/************************************************************************ * * * Program package T O O L D I A G * * * * Version 1.5 * * Date: Tue Feb 8 13:39:04 1994 * * * * NOTE: This program package is copyrighted in the sense that it * * may be used for scientific purposes. The package as a whole, or * * parts thereof, cannot be included or used in any commercial * * application without written permission granted by the author. * * No programs contained in this package may be copied for commercial * * distribution. * * * * All comments concerning this program package may be sent to the * * e-mail address 'tr@fct.unl.pt'. * * * ************************************************************************/ #include #include #include "def.h" extern universe *U; extern float selectMultivariate(); extern double bin_coeff(); static str80 buf; static FeatSelectVector Xi = NULL, Xd = NULL, Psi = NULL, auxFeatSet = NULL; static float B; static int D, d; static int r; /* Cardinality of Psi */ static double leaf_visits; static void free_globals() { FREE( Xi ); FREE( Xd ); FREE( Psi ); FREE( auxFeatSet ); } static void init_b_and_b() { int i; D = U->nrFeat; free_globals(); Xi = (feature*) malloc( D * sizeof(struct feature_) ); CHKPTR(Xi); Xd = (feature*) malloc( D * sizeof(struct feature_) ); CHKPTR(Xd); Psi = (feature*) malloc( D * sizeof(struct feature_) ); CHKPTR(Psi); auxFeatSet = (feature*) malloc( (D-1) * sizeof(struct feature_) ); CHKPTR(auxFeatSet); for( i = 0; i < D; i++ ) { Xi[i].rank = i; Xi[i].crit = 0.0; Xd[i].rank = i; Xd[i].crit = 0.0; Psi[i].rank = i; Psi[i].crit = 0.0; } B = -INFINITY; r = D; leaf_visits = 0.0; } static int order_crits( feat1, feat2 ) feature *feat1, *feat2; { if( feat1->crit > feat2->crit ) return((int)1); if( feat1->crit < feat2->crit ) return((int)-1); return((int)0); } static void addFeat( set, len, feat ) FeatSelectVector set; int len; feature *feat; { copy_feature( feat, &(set[len]) ); } static void delFeat( set, len, feat ) FeatSelectVector set; int len; feature *feat; { int i, j; bool found = FALSE; i = 0; j = 0; while( ! found && i < len ) { found = set[i].rank == feat->rank; if( ! found ) i++; } if( ! found ) { fprintf(stderr,"Could not find element - exit...\n"); exit(1); } for( j = i; j < len-1; j++ ) copy_feature( &(set[j+1]), &(set[j]) ); } /* delete all elements from set1 that are in set2 * Assumptions: len1 >= len2 * all elements in set2 appear in set1 */ static void diffSets( set1, set2, len1, len2 ) FeatSelectVector set1, set2; int len1, len2; { int i; for( i = 0; i < len2; i++ ) delFeat( set1, len1 - i, &(set2[i]) ); } static void copySet( set1, set2, len ) FeatSelectVector set1, set2; int len; { int i; for( i = 0; i < len; i++ ) copy_feature( &(set1[i]), &(set2[i]) ); } static void b_and_b( crit, level ) int crit, level; { int q; /* Number of succeeding elements */ FeatSelectVector Q_seq; /* Succeeding elements */ char *syntacPos; /* Occurence ; syntactic position */ int cardAux, i, j, p; bool leaf; float selCrit; printf("Leaf level=%d --- Actual level=%d \r", D-d-1, level ); fflush(stdout); /* calculate the q: number of possible subtrees */ q = r - (D - d - level - 1); /* determine the criterion all potential features to be discarded */ /* all potential features to be discarded are in Psi */ Q_seq = (feature*) malloc( q * sizeof(struct feature_) ); CHKPTR(Q_seq); /* printf(" ****** Xi=\n\t"); show_FeatSelectVector( stdout, Xi, D - level ); /**/ for( j = 0; j < r; j++ ) { /* generate the potential feature set */ p = 0; cardAux = 0; while( p < D - level ) { if( Psi[j].rank != Xi[p].rank ) { auxFeatSet[cardAux].rank = Xi[p].rank; cardAux++; } p++; } /* printf("\n $$$$$$ Calculating selection criterion from set=\n\t"); show_FeatSelectVector( stdout, auxFeatSet, cardAux ); /**/ selCrit = selectMultivariate( crit, auxFeatSet, cardAux ); /* printf("{%d} = %f ", Psi[j].rank, selCrit ); /**/ Psi[j].crit = selCrit; } /* sort the Psi set in order to satisfy the monotonicity condition */ qsort( Psi, r, sizeof(feature), order_crits ); /* printf("Psi sorted=\n\t"); show_FeatSelectVector( stdout, Psi, r ); /**/ copySet( Psi, Q_seq, q ); diffSets( Psi, Q_seq, r, q ); r = r - q; for( i = q-1; i >=0; i-- ) { /* printf("====== Level=%d i=%d Q_seq[%d]=(%d,%f) Bound=%f\n", level, i, i, Q_seq[i].rank, Q_seq[i].crit, B ); /**/ if( level+1 == D-d ) leaf_visits += 1.0; /* check bound */ if( Q_seq[i].crit >= B ) { /* remove features from selected ones, except feature i */ delFeat( Xi, D - level, &(Q_seq[i]) ); /* printf("Xi=\n\t"); show_FeatSelectVector( stdout, Xi, D-level-1 ); /**/ /* test if leaf is reached */ leaf = (level+1 == D-d); if( leaf ) { /* Bound updating */ B = Q_seq[i].crit; /* save the best feature set so far */ copySet( Xi, Xd, D - level - 1 ); fprintf( stdout, "BOUND = %f --- Best selected set so far:\n\t", B ); show_FeatSelectVector( stdout, Xd, d ); } else { b_and_b( crit, level+1 ); } addFeat( Xi, D - level - 1, &(Q_seq[i]) ); } /* prepare for backtracking */ addFeat( Psi, r, &(Q_seq[i]) ); r++; } } void selectFeaturesB_AND_B( crit ) int crit; { int k; double exhaustive_selection; /* all possible subsets - for comparison */ /* binomial coefficient (D over d) */ printf("Number of features to be selected from the %d possible: ", U->nrFeat ); get_d_range( &d, 1, U->nrFeat-1, LEFT_CLOSED__RIGHT_CLOSED ); init_b_and_b(); printf("Branch and bound - PACIENCIA - PATIENCE - PACIENCE - GEDULD! ...\n\n"); /* select */ b_and_b( crit, 0 ); exhaustive_selection = bin_coeff( D, d ); printf("\n\nSTATISTICS OF THE SEARCH PROCESS:\n"); printf(" Number of leafs visited = %.0lf of %.0lf possible = %5.2lf%%\n", leaf_visits, exhaustive_selection, 100.0*leaf_visits/exhaustive_selection ); /* copy the selected set to the global varaible U->FSV */ U->nrSelFeat = d; FREE( U->FSV ); U->FSV = (feature*) malloc( U->nrSelFeat * sizeof(struct feature_) ); CHKPTR(U->FSV); for( k = 0; k < U->nrSelFeat; k++ ) { U->FSV[k].rank = Xd[k].rank; U->FSV[k].crit = (float)EMPTY; /* does not matter */ } show_selected_feats( stdout, U->FSV, U->nrSelFeat ); free_globals(); }