#include "header.i" /*******************************************************************************/ /* Builds the optimal decision tree of depth at most Dep <= 2 for Items Fp..Lp */ /* PriviousClass is the most frequent class of the parent node */ /*******************************************************************************/ Tree BuildTree(ItemNo Fp, ItemNo Lp, int Dep, ClassNo PriviousClass) /* --------- */ { Tree Node, BestNode; if( Lp < Fp ) return MakeLeaf(PriviousClass, 0.0, 0.0); InitFreq(Fp, Lp, None); Node = MakeLeaf(BEST_ALL, FREQ_ALL, ERROR_ALL); if( ERROR_ALL == 0.0 || Dep == 0 ) { return Node; } else { BestNode = Build(Fp, Lp, Dep, Node); ReleaseTree(Node); return BestNode; } } /****************************************************************/ /* Constructs an optimal decision tree of depth at most Dep <=2 */ /* for Items Fp..Lp */ /* Root is the optimal leaf for Items Fp..Lp */ /****************************************************************/ Tree Build(ItemNo Fp, ItemNo Lp, int Dep, Tree Root) /* ----- */ { Attribute Att; Tree Node, BestNode; BestNode = CopyTree(Root); ForEach(Att, 0, MaxAtt) { if( SpecialStatus[Att] == IGNORE ) continue; if( MaxAttVal[Att] == 0 ) Node = Build2Contin(Fp, Lp, Att, Dep, Root); else Node = Build2Discr(Fp, Lp, Att, Dep, Root); if( Node->Error < BestNode->Error ) { ReleaseTree(BestNode); BestNode = Node; } } return BestNode; } /*****************************************************************/ /* Konstructs the optimal decision tree of depth at most Dep <=2 */ /* for Items Fp..Lp where the split in the root is */ /* on the continous attribute Att */ /* Root is the optimal leaf for Items Fp..Lp */ /*****************************************************************/ Tree Build2Contin(ItemNo Fp, ItemNo Lp, Attribute Att1, int Dep, Tree Root) /* ------------ */ { ItemNo Kp, i, BestSplit; Attribute Att2; Tree BestNode; if( Dep == 1) { Error_L[Fp] = Infinity; Error_U[Fp] = Infinity; SecondSplitContin(Fp, Lp, None, Att1); return IntervalTest(Att1, IntSpl_U[Fp]); } else { Kp = InitContinArrays(Fp, Lp, Att1); if( Kp > Lp ) return CopyTree(Root); ForEach(Att2, 0, MaxAtt) { if( SpecialStatus[Att2] == IGNORE ) continue; if( MaxAttVal[Att2] != 0 ) SecondSplitDiscr(Kp, Lp, Att1, Att2); if( MaxAttVal[Att2] == 0 ) SecondSplitContin(Kp, Lp, Att1, Att2); } /* Search for the best split of Att1 and construct the corresponding decision tree */ BestSplit = Kp; ForEach(i, Kp+1 , Lp) { if( Error_L[i] + Error_U[i] < Error_L[BestSplit] + Error_U[BestSplit] ) BestSplit = i; } BestNode = CopyTree(Root); BestNode->NodeType = BrIntervals; BestNode->Tested = Att1; BestNode->Forks = 2; BestNode->Cut = (ItemCount*) calloc(2, sizeof(ItemCount)); BestNode->Cut[1] = CVal(Item[BestSplit], Att1); BestNode->Branch = (Tree*) calloc(3, sizeof(Tree)); BestNode->Branch[0] = BuildTree(Fp, Kp-1, 1, Root->BestClass); if( MaxAttVal[Att_L[BestSplit]] == 0 ) { BestNode->Branch[1] = IntervalTest(Att_L[BestSplit], IntSpl_L[BestSplit]); } else { BestNode->Branch[1] = DiscrTest(Kp, BestSplit-1, Att_L[BestSplit]); } if( MaxAttVal[Att_U[BestSplit]] == 0 ) { BestNode->Branch[2] = IntervalTest(Att_U[BestSplit], IntSpl_U[BestSplit]); } else { BestNode->Branch[2] = DiscrTest(BestSplit, Lp, Att_U[BestSplit]); } BestNode->Error = BestNode->Branch[0]->Error; BestNode->Error += BestNode->Branch[1]->Error; BestNode->Error += BestNode->Branch[2]->Error; return BestNode; } } /******************************************************************/ /* Konstructs the optimal decision tree of depth at most Dep <= 2 */ /* for Items Fp..Lp where the split in the root is */ /* on the discrete attribute Att */ /* Root is the optimal leaf for Items Fp..Lp */ /******************************************************************/ Tree Build2Discr(ItemNo Fp, ItemNo Lp, Attribute Att, int Dep, Tree Root) /* ----------- */ { ItemNo Kp; DiscrValue v; Tree BestNode; BestNode = CopyTree(Root); BestNode->NodeType = BrDiscr; BestNode->Tested = Att; BestNode->Forks = MaxAttVal[Att]; BestNode->Branch = (Tree*) calloc(MaxAttVal[Att]+1, sizeof(Tree)); BestNode->Error = 0; ForEach(v, 0, MaxAttVal[Att]) { Kp = Group(Fp, Lp, Att, v); BestNode->Branch[v] = BuildTree(Fp, Kp, Dep-1, Root->BestClass); BestNode->Error += BestNode->Branch[v]->Error; Fp = Kp+1; } return BestNode; } /***************************************************************************************/ /* Sorts Item[Fp..Lp] on Att1 and returns the ItemNo of the first known attribute Att. */ /* Furthermore sets Error_L,Error_U to Infinity */ /***************************************************************************************/ ItemNo InitContinArrays(ItemNo Fp, ItemNo Lp, Attribute Att) /* ---------------- */ { ItemNo Kp, i; Sort(Fp, Lp, Att); for( Kp = Fp ; Kp <= Lp && CVal(Item[Kp], Att) == Unknown ; Kp++ ) ; ForEach(i, Kp, Lp) { Error_L[i] = Infinity; Error_U[i] = Infinity; } return Kp; } /*************************************************************************************/ /* For each i=Fp..Lp the best decision tree with continuous split of Att1 at Item[i] */ /* and continous split on Att2 is calculated and stored in ContinArray. */ /* Attribute Att1 might be None. */ /*************************************************************************************/ void SecondSplitContin(ItemNo Fp, ItemNo Lp, Attribute Att1, Attribute Att2) /* ----------------- */ { ItemNo i; ClassNo c1, c2; SuperList SL; AllList AP; int k; SL = InitSuperList(Fp, Lp, Att1, Att2); MergeSuperList(SL); i = Fp; if( Att1 == None && SL->alllist->next->next != Nil ) { Error(7,"SecondSplitContin","1"); exit(1); } for( AP = SL->alllist ; AP->next != Nil ; AP = AP->next ) { if( Att1 != None && AP->y_val != CVal(Item[i], Att1) ) { Error(7,"SecondSplitContin","2"); exit(1); } ForAllIntervals(c1, k, c2) { if( AP->allint_L[c1][k][c2].Error < Error_L[i] ) { Att_L[i] = Att2; Error_L[i] = AP->allint_L[c1][k][c2].Error; MoveIntervals(&AP->allint_L[c1][k][c2], &IntSpl_L[i]); } if( AP->allint_U[c1][k][c2].Error < Error_U[i] ) { Att_U[i] = Att2; Error_U[i] = AP->allint_U[c1][k][c2].Error; MoveIntervals(&AP->allint_U[c1][k][c2], &IntSpl_U[i]); } } while( i <= Lp && ( Att1 == None || CVal(Item[i], Att1) == AP->y_val ) ) i++; } AP = SL->alllist; while( AP->next != Nil ) AP = AP->next; AP->next = LIST_ALRecord; LIST_ALRecord = SL->alllist; SL = RemoveElSL(SL); } /*************************************************************************************/ /* For each i=Fp..Lp the best decision tree with continuous split of Att1 at Item[i] */ /* and discrete split on Att2 is calculated and stored in ContinArray */ /*************************************************************************************/ void SecondSplitDiscr(ItemNo Fp, ItemNo Lp, Attribute Att1, Attribute Att2) /* ---------------- */ { ItemNo i; ClassNo c, BestClass_L, BestClass_U; ItemCount TotalFreq_L, Correct_L, Correct_U; DiscrValue v; InitFreq(Fp, Lp, Att2); ForEach(c, 0, MaxClass) ForEach(v, 0, MaxAttVal[Att2]) _Freq_L[c][v] = 0; TotalFreq_L = 0; for( i = Fp ; i <= Lp ; ) { Correct_L = 0; Correct_U = 0; ForEach(v, 0, MaxAttVal[Att2]) { BestClass_L = 0; BestClass_U = 0; ForEach(c, 1, MaxClass) { if( _Freq_L[c][v] > _Freq_L[BestClass_L][v] ) BestClass_L = c; if( _Freq[c][v] > _Freq[BestClass_U][v] ) BestClass_U = c; } Correct_L += _Freq_L[BestClass_L][v]; Correct_U += _Freq[BestClass_U][v]; } if( TotalFreq_L - Correct_L < Error_L[i] ) { Att_L[i] = Att2; Error_L[i] = TotalFreq_L - Correct_L; } if( FREQ_ALL - Correct_U < Error_U[i] ) { Att_U[i] = Att2; Error_U[i] = FREQ_ALL - Correct_U; } do { _Freq_L[ Class(Item[i]) ][ DVal(Item[i], Att2) ] += Weight(Item[i]); _Freq[ Class(Item[i])][ DVal(Item[i], Att2) ] -= Weight(Item[i]); TotalFreq_L += Weight(Item[i]); FREQ_ALL -= Weight(Item[i]); i++; } while( i <= Lp && CVal(Item[i], Att1) == CVal(Item[i-1], Att1) ); } } /*******************************************************************/ /* Builds a decision tree of depth 1 with an interval split on Att */ /* into Int at the root */ /*******************************************************************/ Tree IntervalTest(Attribute Att, IntervalSplit IntSpl) /* ------------ */ { Tree Node; int k; Node = MakeLeaf(IntSpl.BestClass, IntSpl.Freq, IntSpl.Error); Node->NodeType = BrIntervals; Node->Tested = Att; Node->Forks = IntSpl.NoIntervals; Node->Cut = (float *) calloc(Node->Forks, sizeof(float)); Node->Branch = (Tree *) calloc(Node->Forks+1, sizeof(Tree)); ForEach(k, 2, IntSpl.NoIntervals) Node->Cut[k-1] = IntSpl.Split[k].Threshold; ForEach(k, 0, IntSpl.NoIntervals) { Node->Branch[k] = MakeLeaf(IntSpl.Split[k].BestClass, IntSpl.Split[k].Freq, IntSpl.Split[k].Error); } if( Node->Branch[0]->Freq == 0 ) Node->Branch[0]->BestClass = Node->BestClass; return Node; } /************************************************************************************/ /* Builds a decision tree of depth 1 for Item[Fp..Lp] with an discrete split on Att */ /* at the root */ /************************************************************************************/ Tree DiscrTest(ItemNo Fp, ItemNo Lp, Attribute Att) /* --------- */ { DiscrValue v; Tree Node; InitFreq(Fp, Lp, Att); Node = MakeLeaf(BEST_ALL, FREQ_ALL, 0.0); Node->NodeType = BrDiscr; Node->Tested = Att; Node->Forks = MaxAttVal[Att]; Node->Branch = (Tree *) calloc(MaxAttVal[Att]+1, sizeof(Tree)); ForEach(v, 0, MaxAttVal[Att]) { Node->Branch[v] = MakeLeaf(BEST[v], FREQ[v], ERROR[v]); Node->Error += ERROR[v]; } return Node; } /*************************/ /* Returns the leaf node */ /*************************/ Tree MakeLeaf(ClassNo C, ItemCount Freq, ItemCount Error) /* -------- */ { Tree Node; Node = (Tree) calloc(1, sizeof(TreeRec)); Node->NodeType = BrNone; Node->BestClass = C; Node->Freq = Freq; Node->Error = Error; return Node; }