/* /--------------------------------------------------------------------\ | File : Complex.C | |--------------------------------------------------------------------| | Written by : Umit CATALYUREK | | Date : 1.6.1993 | |--------------------------------------------------------------------| | Description : Implementation of CN2 for Machine Learning Course. | | Main File For Selectors & Complexes | |--------------------------------------------------------------------| | Last Modification Date : | | By : | | Description : | \____________________________________________________________________/ */ #include #include #include #include #include "prepare.h" #include "complex.h" /* #define _DEBUG1_ */ char _debug1_fl_ = 0; #define ZERO 0.000001 #define zabs(x) (((x) < 0.0) ? (-1*(x)) : (x)) extern Description Dscr; extern DataList Data; extern RuleList rulelist; extern PComplexList star; PSelector selectors=NULL; int numofselectors=0; char *opersigns[NOTEQUAL+1] = { "<=", ">", "=", "!="}; CreateSelector(sc, a, com, val) int *sc, a, val; char com; { PSelector s=selectors+*sc; s->attrind = a; s->comp = com; s->valind = val; (*sc)++; } int SelectorCover(sc, I) int sc; PInstance I; { PAttrData D=I->head; PSelector s=selectors+sc; PAttribute A; D += s->attrind; A = Dscr.attrs[s->attrind]; switch (s->comp) { case LESS_EQ : return (D->val.value <= A->vl[s->valind].value); case GREATER : return (D->val.value > A->vl[s->valind].value); case EQUAL : return (D->val.discr == s->valind); case NOTEQUAL : return (D->val.discr != s->valind); } } int ComplexCover(I, cpx) PInstance I; PComplex cpx; { int fl=1; PComplexNode p=cpx->sellist; while (p!=NULL && fl) { fl = SelectorCover(p->sel, I); p = p->next; } return fl; } void InitSelectors() { int i, j, selcnt; numofselectors = 0; for (i=0; iflag == ATTR_DISCRETE && Dscr.attrs[i]->numofdiscrval==2) numofselectors += 2; else numofselectors += 2*Dscr.attrs[i]->numofdiscrval; selectors = (PSelector) malloc(sizeof(Selector)*numofselectors); for (i=0, selcnt=0; iflag == ATTR_CONTINUOUS) for (j=0; jnumofdiscrval; j++) { CreateSelector(&selcnt, i, LESS_EQ, j); CreateSelector(&selcnt, i, GREATER, j); } else for (j=0; jnumofdiscrval; j++) { CreateSelector(&selcnt, i, EQUAL, j); if (A->numofdiscrval!=2) CreateSelector(&selcnt, i, NOTEQUAL, j); } } } void DeallocateSelectors() { free(selectors); } int CheckSelectors(sc1, sc2) /* -1 : Conjucntion produce 0 don't conjunct them 0 : If conjunction is valid tests are on different argument 1 : Valid conjuction test on same attribute 2 : Take only the first one 3 : Take only the second one *****************************/ int sc1, sc2; { PSelector s1=selectors+sc1, s2=selectors+sc2; PAttribute A; if (sc1==sc2) /* ---> Reduction 1 : p and p = p <<<<<< */ return 2; else if (s1->attrind != s2->attrind) /* No reduction can be done ****/ return 0; else if (s1->valind == s2->valind) /* ---> Reduction 2 : p and not p = 0 */ return -1; else if ((A=Dscr.attrs[s1->attrind])->flag == ATTR_DISCRETE) { if (s1->comp == EQUAL && s2->comp == EQUAL) return -1; /* ---> Reduction 3 : a=5 and a=6 = 0 */ else if (A->numofdiscrval==2) if (s1->comp == s2->comp) return -1; /* --->Reduction 4 : since values are different and there exist 2 alternative produces : 0 */ else /* (a = y) and (a != not y) return one of them my selection return with = sign */ if (s1->comp == EQUAL) return 2; else return 3; else if (s1->comp == EQUAL && s2->comp == EQUAL) return -1; } else { if (s1->comp==s2->comp) if (s1->comp==LESS_EQ) /* --->Reduction 5 : a < 5 and a < 10 produces a < 5 */ if (s1->valind < s2->valind) return 2; else return 3; else /* --->Reduction 6 : a > 5 and a > 10 produces a > 10 */ if (s1->valind < s2->valind) return 3; else return 2; else if ((s1->comp==LESS_EQ && s1->valind < s2->valind) || (s2->comp==LESS_EQ && s2->valind < s1->valind)) return -1; /* --->Reduction 7 : a<=5 and a>10 produces 0 */ } return 0; } char *PrintSelector(sc) int sc; { static char buf[150]; char buf2[50]; PSelector s; s = selectors + sc; if (_debug1_fl_) sprintf(buf, "(%d)", sc); else { sprintf(buf, "(%s %s ", Dscr.attrs[s->attrind]->attrname, opersigns[s->comp]); if (Dscr.attrs[s->attrind]->flag == ATTR_CONTINUOUS) sprintf(buf2, "%8.3f)", Dscr.attrs[s->attrind]->vl[s->valind].value); else sprintf(buf2, "%s)", Dscr.attrs[s->attrind]->vl[s->valind].discr); strcat(buf, buf2); } return buf; } void printselectors() { int i; printf("\n Here is the selector list :"); for (i=0; isellist; while (q!=NULL) { PComplexNode p=q; q = q->next; free(p); } free(cpx); } } int CompareComplexes(cp1, cp2) PComplex cp1, cp2; { PComplexNode c1=cp1->sellist, c2=cp2->sellist; if (cp1->numofselectors != cp2->numofselectors) return 0; else { while (c1 != NULL && c2 != NULL && c1->sel == c2->sel) { c1 = c1->next; c2 = c2->next; } return (c1==NULL && c2==NULL); } } int ExistInList(p, cpx) PComplexList p; PComplex cpx; { for (; p!=NULL && !CompareComplexes(p->cpx, cpx); p=p->next); return (p!=NULL); } AddComplex2List(cpxl, compx) PComplexList *cpxl; PComplex compx; { PComplexList p; if (compx!=NULL) { if (ExistInList(*cpxl, compx)) FreeComplex(compx); else { p = (PComplexList) malloc(sizeof(ComplexList)); p->cpx = compx; p->prev = NULL; p->next = *cpxl; if (*cpxl!=NULL) (*cpxl)->prev = p; *cpxl = p; } } } void AddSelector2Complex(cpx, sc) PComplex *cpx; int sc; { PComplexNode c, p, q; c = (PComplexNode) malloc(sizeof(ComplexNode)); c->sel = sc; if (*cpx==NULL) { int i; *cpx = (PComplex) malloc(sizeof(Complex)); (*cpx)->numofexamplescovered = (*cpx)->numofselectors = 0; (*cpx)->entropy = (*cpx)->significance = 0.0; for (i=0; iclassdistr[i] = 0; (*cpx)->sellist = NULL; } for (p = NULL, q=(*cpx)->sellist; q!=NULL && q->selnext) p = q; if (p==NULL) { c->next = q; (*cpx)->sellist = c; } else { p->next = c; c->next = q; } ((*cpx)->numofselectors)++; } PComplex CopyComplex(c) PComplex c; { PComplex cpx=NULL; if (c!=NULL) { PComplexNode q=c->sellist; int i; while (q!=NULL) { AddSelector2Complex(&cpx, q->sel); q = q->next; } cpx->numofexamplescovered = c->numofexamplescovered; cpx->entropy = c->entropy; cpx->significance = c->significance; for (i=0; iclassdistr[i] = c->classdistr[i]; } return cpx; } PComplexList Selectors2ComplexList() { int sc; PComplexList cpxl=NULL; for (sc=0; sccpx)); cpxl = cpxl->next; } return c; } void DeallocateComplexList(cpxl) PComplexList cpxl; { while (cpxl!=NULL) { PComplexList p = cpxl; cpxl = cpxl->next; FreeComplex(p->cpx); free(p); } } char *PrintComplex(cx) PComplex cx; { static char buf[150]; char buf2[50]; PComplexNode c=NULL; if (cx != NULL) { sprintf(buf, "if "); c=cx->sellist; } for (; c!=NULL; c=c->next) { sprintf(buf2, "%s", PrintSelector(c->sel)); strcat(buf, buf2); if (c->next !=NULL) strcat(buf, " and "); } #ifdef _DEBUG1_ sprintf(buf2, "(%d, %8.6lf, %8.6lf)", cx->numofexamplescovered, cx->entropy, cx->significance); strcat(buf, buf2); #endif return buf; } void printcomplexlist(cpxl) PComplexList cpxl; { int cnt=0, i; printf("\n Complex List is : "); while (cpxl!=NULL) { printf("\n %s (%d)\n", PrintComplex(cpxl->cpx), cpxl->cpx->numofexamplescovered); for (i=0; icpx->classdistr[i]); printf(" <%8.6lf, %8.6lf>", cpxl->cpx->entropy, cpxl->cpx->significance); cpxl = cpxl->next; cnt++; } printf("\n End of List %d item is listed", cnt); } void AddRule(cplx, clss) PComplex cplx; int clss; { PRule r; r = (PRule) malloc(sizeof(Rule)); r->cpx = cplx; r->class = clss; r->next = NULL; if (rulelist.tail == NULL) rulelist.head = rulelist.tail = r; else { rulelist.tail->next = r; rulelist.tail = r; } } void DeallocateRuleList() { while (rulelist.head!=NULL) { rulelist.tail = rulelist.head; rulelist.head = rulelist.head->next; free(rulelist.tail); } } void printrulelist() { PRule r=rulelist.head; int sum=0, i; int cnt=0, selsum=0; printf("\n Decision List : \n"); while (r!=NULL && r->cpx->sellist != NULL) { cnt++; selsum += r->cpx->numofselectors; printf("\n %s then\n\t predict class : %s", PrintComplex(r->cpx), Dscr.classes[r->class]); for (i=0, sum=0; iclass) sum += r->cpx->classdistr[i]; printf(" (%d/%d)", r->cpx->classdistr[r->class], sum); r = r->next; } if (r != NULL) { printf("\n Default Rule :\n\t predict class : %s", Dscr.classes[r->class]); for (i=0, sum=0; iclass) sum += r->cpx->classdistr[i]; printf(" (%d/%d)\n", r->cpx->classdistr[r->class], sum); } else printf("\n"); printf("\n %d Rule(s) Total %d selector(s) Average Selector : %8.3f\n", cnt, selsum, (float) selsum / (float) cnt); } void WriteRuleList(o) int o; { PRule r; int i, cnt; cnt = 0; r = rulelist.head; while (r!=NULL) { r = r->next; cnt++; } write(o, &cnt, sizeof(cnt)); r = rulelist.head; while (r != NULL) { PComplexNode p; write(o, &(r->cpx->numofselectors), sizeof(int)); for (p=r->cpx->sellist; p!=NULL; p=p->next) write(o, &(p->sel), sizeof(int)); write(o, &(r->class), sizeof(int)); r = r->next; } } void ReadRuleList(in) int in; { int cnt, i, selcnt, j, sc, class; DeallocateRuleList(); rulelist.head = rulelist.tail = NULL; read(in, &cnt, sizeof(cnt)); for (i=0; inumofselectors = 0; c->sellist = NULL; } read(in, &class, sizeof(int)); AddRule(c, class); } } int RuleListMatch(I) PInstance I; { int fl=0; PRule r; r = rulelist.head; while (r!=NULL && !fl) { fl = ComplexCover(I, r->cpx); if (!fl) r = r->next; } if (r==NULL) return 0; else return (r->class == I->class); } PComplex Conjunct(cx, sc) PComplex cx; { PComplex rc=NULL; char fl = 1; PComplexNode c; if (cx!=NULL) { c = cx->sellist; while (c != NULL) { switch(CheckSelectors(c->sel, sc)) { case -1 : c = NULL; /* To terminate loop */ FreeComplex(rc); rc = NULL; fl = 0; break; /* Valid conjcutions */ case 0 : case 1 : AddSelector2Complex(&rc, c->sel); break; /* Take the first one */ case 2 : fl = 0; AddSelector2Complex(&rc, c->sel); break; /* Take the second one */ case 3 : break; } if (c!=NULL) c = c->next; } } if (fl) AddSelector2Complex(&rc, sc); return rc; } void Intersect() /* specialization of star vy intersecting star with selectors */ { PComplexList newstar=NULL; PComplexList sp=star; int sc; PComplex cpx; for (sp=star; sp != NULL; sp=sp->next) for (sc=0; sccpx, sc); if (cpx!=NULL) if (!ExistInList(star, cpx)) AddComplex2List(&newstar, cpx); else FreeComplex(cpx); } DeallocateComplexList(star); star = newstar; } void EvaluateComplex(cpx) PComplex cpx; { PInstance I=Data.head; int i; cpx->significance = 0.0; cpx->entropy = 2.0; if (cpx->sellist!=NULL) { for (i=0; iclassdistr[i] = 0; cpx->numofexamplescovered=0; while (I!=NULL) { if (ComplexCover(I, cpx)) { cpx->numofexamplescovered++; cpx->classdistr[I->class]++; } I = I->next; } if (cpx->numofexamplescovered>0) { cpx->entropy = cpx->significance = 0.0; for (i=0; iclassdistr[i]>0) { p = (double) cpx->classdistr[i] / (double) cpx->numofexamplescovered; cpx->entropy += p * log2(p); if (Dscr.occurs[i]>0) cpx->significance += (p * log2(p/Dscr.expect[i])); } } if (cpx->entropy<0) cpx->entropy *= -1; cpx->significance *= 2; } } } int BetterComplex(c1, c2) /* Returns > 0 if first one (c1) is better a complex than c2 */ PComplex c1, c2; { if (c1->entropy-c2->entropyentropy-c2->entropy>-ZERO) { #ifdef _DEBUG1_ printf("\n[<%s>", PrintComplex(c1)); printf("<%s>]", PrintComplex(c2)); #endif if (c1->numofexamplescovered==c2->numofexamplescovered) return (c1->numofselectors < c2->numofselectors); else return (c1->numofexamplescovered > c2->numofexamplescovered); } else return (c1->entropy < c2->entropy); } void AddComplex2SortedList(cl, cpx) PComplexList *cl; PComplex cpx; { PComplexList c, p, q; /* printf("\n ADDIND : %s (%d)", PrintComplex(cpx), cpx->numofexamplescovered); */ c = (PComplexList) malloc(sizeof(ComplexList)); c->cpx = cpx; for (p = NULL, q = *cl; q!=NULL && BetterComplex(q->cpx, cpx); q=q->next) p = q; if (p==NULL) *cl = c; else p->next = c; c->next = q; c->prev = p; if (q!=NULL) q->prev = c; /* printf("\n After Adding :"); printcomplexlist(*cl); */ } void Evaluate(cpxl) PComplexList *cpxl; { PComplexList nl=NULL; PComplexList sp; for (sp=*cpxl; sp != NULL; sp=sp->next) { EvaluateComplex(sp->cpx); if (sp->cpx->significance> 0.05) AddComplex2SortedList(&nl, CopyComplex(sp->cpx)); } DeallocateComplexList(*cpxl); *cpxl = nl; }