/* /--------------------------------------------------------------------\ | File : CN2.C | |--------------------------------------------------------------------| | Written by : Umit CATALYUREK | | Date : 1.6.1993 | |--------------------------------------------------------------------| | Description : Implementation of CN2 Induction Algorithm for | | Machine Learning Course. | | Main File | |--------------------------------------------------------------------| | Last Modification Date : | | By : | | Description : | \____________________________________________________________________/ */ #include #include #include #include #include "prepare.h" #include "complex.h" extern Description Dscr; extern DataList Data; extern PComplexList selectors; #define _COMPREHENSIVE_ #define MAXSTAR 15 RuleList rulelist; PComplexList star; PComplex Find_Best_Complex() { PComplex best_cpx=NULL; PComplexList sp, prev; int i, cnt=0; star = Selectors2ComplexList(); while (star != NULL #ifdef _COMPREHENSIVE_ && cnt < 6 #endif ) { #ifdef _COMPREHENSIVE_ cnt++; #endif Evaluate(&star); if (star != NULL) { if (best_cpx==NULL) best_cpx = CopyComplex(star->cpx); else if (BetterComplex(star->cpx, best_cpx)) { FreeComplex(best_cpx); best_cpx = CopyComplex(star->cpx); } for (i=0, sp = star; sp !=NULL && i < MAXSTAR; i++, sp = sp->next) prev = sp; if (sp!=NULL) { prev->next = NULL; DeallocateComplexList(sp); } Intersect(); } } #ifdef _COMPREHENSIVE_ DeallocateComplexList(star); #endif return best_cpx; } void cn2() { PComplex best_cpx=NULL; int i, moc; PInstance I; do { best_cpx = Find_Best_Complex(); if (best_cpx != NULL) { for (i=1, moc = 0; iclassdistr[i] > best_cpx->classdistr[moc]) moc = i; AddRule(best_cpx, moc); for (I=Data.head; I!=NULL; ) if (ComplexCover(I, best_cpx)) { PInstance P=I; I=I->next; DeleteData(P); } else I=I->next; } } while (best_cpx !=NULL && Data.numofinstances); /**** Add default rule *****/ best_cpx = (PComplex) malloc(sizeof(Complex)); best_cpx->sellist = NULL; best_cpx->numofselectors = 0; for (i=1, moc=0; i Dscr.occurs[moc]) moc = i; for (i=0; iclassdistr[i] = 0; for (I=Data.head; I!=NULL; I=I->next) best_cpx->classdistr[I->class]++; AddRule(best_cpx, moc); } main(argc, argv) int argc; char *argv[]; { char fname[100]; FILE *namesf, *dataf; int outf; if (argc!=2) { printf("\n Usage : cn2 \n\n"); exit(0); } printf("\n CN2 Induction Algorithm Ver 1.0"); printf("\n Implemented by : Umit CATALYUREK\n"); printf("\n Implementation for Machine Learning Course.\n Date : Spring 1992/93\n"); strcpy(fname, "Data/"); strcat(fname, argv[1]); strcat(fname, ".names"); namesf = fopen(fname, "r"); if (namesf == NULL) { printf("\n I could not open description file : %s\n\n", fname); exit(0); } strcpy(fname, "Data/"); strcat(fname, argv[1]); strcat(fname, ".data"); dataf = fopen(fname, "r"); if (dataf == NULL) { printf("\n I could not open data file : %s\n\n", fname); exit(0); } strcpy(fname, "Data/"); strcat(fname, argv[1]); strcat(fname, ".cn2"); outf = creat(fname, 0666); Initialize(); rulelist.head = rulelist.tail = NULL; ReadDescrFile(namesf); /* printdescription(); */ ReadDataFile(dataf); /* printdata(); exit(0); */ EliminateUnknownData(); printf("\n Description File : %s.names", argv[1]); printf("\n Instance File : %s.data", argv[1]); printf("\n #Attributes : %5d #Instances : %5d\n", Dscr.numofattributes, Data.numofinstances); InitSelectors(); cn2(); printrulelist(); WriteOccurences(outf); WriteRuleList(outf); fclose(namesf); fclose(dataf); close(outf); Deallocate(); DeallocateRuleList(); DeallocateSelectors(); }