#include "header.i" /*****************************/ /* Reads the process options */ /*****************************/ void CommandInput(Argc, Argv) /* ------------ */ int Argc; char *Argv[]; { int o; extern char *optarg; while ( (o = getopt(Argc, Argv, "f:u012i:h")) != EOF ) { switch (o) { case 'f': FileName = optarg; printf("\tFile stem <%s>\n", FileName); break; case 'u': UNSEENS = true; printf("\tTrees evaluated on unseen cases\n"); break; case '2': MAXDEPTH = 2; printf("\tDepth of tree is %d\n", MAXDEPTH); break; case '1': MAXDEPTH = 1; printf("\tDepth of tree is %d\n", MAXDEPTH); break; case '0': MAXDEPTH = 0; printf("\tDepth of tree is %d\n", MAXDEPTH); break; case 'i': MAXINTERVALS = atoi(optarg); break; case 'h': printf("Options of T2: -f FILE use FILE.names, FILE.data, FILE.test\n"); printf(" default FILE='DF'\n"); printf(" -u evaluate on test set\n"); printf(" default don't\n"); printf(" -0 -1 -2 compute 0,1,2-level tree\n"); printf(" default -2\n"); printf(" -i INT use splits into INT intervals\n"); printf(" at the bottom nodes\n"); printf(" default INT=Number of Classes + 1\n\n"); exit(1); } } return; } /*************************************************************************/ /* Classify a case description using the given subtree */ /*************************************************************************/ ClassNo Classify(Description CaseDesc, Tree T) /* -------- */ { DiscrValue v; float Cv; Attribute a; int k; switch ( T->NodeType ) { case 0: /* leaf */ return T->BestClass; case BrDiscr: /* test of discrete attribute */ a = T->Tested; v = DVal(CaseDesc, a); if ( v <= T->Forks ) /* Make sure not new discrete value */ { return Classify(CaseDesc, T->Branch[v]); } else { return Classify(CaseDesc, T->Branch[0]); } case BrIntervals: /* test of continuous attribute */ a = T->Tested; Cv = CVal(CaseDesc, a); if( Cv == Unknown ) return Classify(CaseDesc, T->Branch[0]); ForEach(k, 1, T->Forks-1) { if( Cv < T->Cut[k] ) return Classify(CaseDesc, T->Branch[k]); } return Classify(CaseDesc, T->Branch[T->Forks]); } return None; } /**********************************************************************/ /* Sorts the Item[Fp..Lp] accordingly to the continuous attribute Att */ /**********************************************************************/ void Sort(ItemNo Fp, ItemNo Lp, Attribute Att) /* ---- */ { ItemList IL, IH; ItemNo i; IL = MakeItemList(Fp, Lp); IL = SortItemList(IL, Lp - Fp +1, Att); IH = IL; ForEach(i, Fp, Lp) { Item[i] = IH->item; IH = IH->next; } ReleaseItemList(IL); } /*************************************************************************/ /* Group together the items corresponding to value V of discrete Att */ /* and return the index of the last such */ /*************************************************************************/ ItemNo Group(ItemNo Fp, ItemNo Lp, Attribute Att, DiscrValue V) /* ----- */ { ItemNo i, Kp; Kp = Fp-1; ForEach(i, Fp, Lp) { if( DVal(Item[i], Att) == V ) { Swap(++Kp, i); } } return Kp; } /*************************************************************************/ /* Exchange items at a and b */ /*************************************************************************/ void Swap(ItemNo a, ItemNo b) /* ---- */ { Description Hold; Hold = Item[a]; Item[a] = Item[b]; Item[b] = Hold; } /*********************************************************************************/ /* Moves the content of IntSplS to IntSplD. The content of IntSplS is destroyed. */ /*********************************************************************************/ void MoveIntervals(IntervalSplit *IntSplS, IntervalSplit *IntSplD) /* ------------- */ { Segment *seg; seg = IntSplD->Split; *IntSplD = *IntSplS; IntSplS->Split = seg; } /*******************************************************************************************/ /* Calculates the class distribution of Item[Fp..Lp] */ /* If Att != None, Att is a assumed to be a discrete attribute, and the class distribution */ /* with respect to all attribute values is calculated, too. The best class and the */ /* corresponding quantities are stored in BEST_ALL, FREQ_ALL, ERROR_ALL, */ /* BEST[], FREQ[], ERROR[] */ /*******************************************************************************************/ void InitFreq(ItemNo Fp, ItemNo Lp, Attribute Att) /* -------- */ { ClassNo c; ItemNo i; DiscrValue v; ForEach(c, 0, MaxClass) _FreqA[c] = 0; FREQ_ALL = 0; ForEach(i, Fp, Lp) { _FreqA[ Class(Item[i]) ] += Weight(Item[i]); FREQ_ALL += Weight(Item[i]); } BEST_ALL = 0; ForEach(c, 1, MaxClass) { if( _FreqA[c] > _FreqA[BEST_ALL] ) BEST_ALL = c; } ERROR_ALL = FREQ_ALL - _FreqA[BEST_ALL]; if( Att != None ) { ForEach(v, 0, MaxAttVal[Att]) { ForEach(c, 0, MaxClass) _Freq[c][v] = 0; FREQ[v] = 0; } ForEach(i, Fp, Lp) { _Freq[ Class(Item[i]) ][ DVal(Item[i], Att) ] += Weight(Item[i]); FREQ[ DVal(Item[i], Att) ] += Weight(Item[i]); } ForEach(v, 0, MaxAttVal[Att]) { BEST[v] = 0; ForEach(c, 1, MaxClass) { if( _Freq[c][v] > _Freq[BEST[v]][v] ) BEST[v] = c; } ERROR[v] = FREQ[v] - _Freq[BEST[v]][v]; } } }