/************************************************************************ * * * Program package T O O L D I A G * * * * Version 1.5 * * Date: Tue Feb 8 13:39:04 1994 * * * * NOTE: This program package is copyrighted in the sense that it * * may be used for scientific purposes. The package as a whole, or * * parts thereof, cannot be included or used in any commercial * * application without written permission granted by the author. * * No programs contained in this package may be copied for commercial * * distribution. * * * * All comments concerning this program package may be sent to the * * e-mail address 'tr@fct.unl.pt'. * * * ************************************************************************/ #include #include #include #include #include #include "def.h" #ifdef DOS #include #endif /* * Check if a file is binary or ascii * returns EMPTY if file cannot be found */ bool is_ascii_file( fname ) char *fname; { FILE *f; byte b; bool ascii_file; f = fopen( fname, f_open_bin_r ); if( f == NULL ) { /* printf("Cannot open %s!...\n", fname ); */ return( EMPTY ); } ascii_file = TRUE; while( !feof( f ) && ascii_file ) { fread( &b, sizeof(byte), 1, f ); if( !feof( f ) ) { /* check if there is a non-ascii character */ ascii_file = ascii_file && (bool)isascii((int)b); } } fclose( f ); return( ascii_file ); } /*----------------------------------------------*/ /* Random number generation */ static int seed; static double maxRand; void init_random_generator() { #ifdef DOS time_t t; srand((unsigned) time(&t)); #else #ifdef ONLY_RAND time_t t; srand((unsigned) time(&t)); maxRand = (pow(2.0, RANGE) - 1.0); #else maxRand = (pow(2.0, 31.0) - 1.0); (void)time( &seed ); srandom( *(int *)&seed ); #endif #endif } /* return an integer random number between 0 and max */ void get_random( max, ran ) int max, *ran; { int i; double r; #ifdef DOS i = random( max ); #else #ifdef ONLY_RAND r = rand(); #else r = random(); #endif r = r / maxRand * (double)max; i = (int)(r); #endif *ran = i; } void copy_FV( src, dest, dim ) FeatVector src, dest; int dim; { int i; for( i = 0; i < dim; i++ ) dest[i] = src[i]; } void showFV( numFeat, FV ) int numFeat; FeatVector FV; { int i; printf(" ("); for( i = 0; i < numFeat; i++ ) printf(" %.5f ", FV[i]); printf(")\n"); } /* * random splitting of the entire data set into x% training samples * and 100-x% test samples */ bool *train_smp = NULL, *test_smp = NULL; /* Lookup for samples */ static int nrSamplesTest, nrSamplesTrain; static void init_lookups( nrSmp ) int nrSmp; { int i; FREE( train_smp ); FREE( test_smp ); train_smp = (bool*) malloc( nrSmp * sizeof( bool ) ); test_smp = (bool*) malloc( nrSmp * sizeof( bool ) ); for( i = 0; i < nrSmp; i++ ) { train_smp[i] = EMPTY; test_smp[i] = EMPTY; } } static void peep_debug_tt( nrSamples, str ) int nrSamples; char *str; { int s, train = 0, test = 0; fprintf( stderr, "%s\n", str); for( s = 0; s < nrSamples; s++ ) { fprintf( stderr, "%d of %d train: %d test: %d\n", s+1, nrSamples, train_smp[s], test_smp[s] ); if( !train_smp[s] && !test_smp[s] ) {fprintf(stderr,"Sample type error; exit...\n"); exit(1);} train += (int)train_smp[s]; test += (int)test_smp[s]; } fprintf( stderr, "No. train=%d(%7.2f%%) No. test=%d(%7.2f%%) All=%d\n", train, (float)train/(float)nrSamples, test, (float)test/(float)nrSamples, nrSamples ); } static void gen_train_test( len, percent, nrTrainElem ) int len; float percent; int *nrTrainElem; { int i, j, k, filled = 0, already; double r; k = 0; *nrTrainElem = (int)(len * percent / 100.0); while( ! filled ) { get_random( len, &j ); already = train_smp[j] == TRUE; if( ! already ) { train_smp[j] = TRUE; k++; } filled = k == *nrTrainElem; } for( i = 0; i < len; i++ ) if( train_smp[i] == TRUE ) test_smp[i] = FALSE; else { train_smp[i] = FALSE; test_smp[i] = TRUE; } } void split_tt( nrSamples, percent_train ) int nrSamples; float percent_train; { int i; if( percent_train < 0.0 || percent_train > 100.0 ) { fprintf( stderr, "Percent error: %5.2f %%\n", percent_train); exit( 1 ); } FREE( train_smp ); FREE( test_smp ); init_lookups( nrSamples ); gen_train_test( nrSamples, percent_train, &nrSamplesTrain ); /* peep_debug_tt( nrSamples, "Peeping..." ); /**/ nrSamplesTest = nrSamples - nrSamplesTrain; } #define LEN 100 void dataline( f, line ) FILE *f; char *line; { char buf[LEN]; int i; for( i = 0; i < LEN; i++ ) buf[i] = '\0'; do { fgets( buf, LEN, f ); /* printf("%s", buf ); /**/ } while( buf[0] == '#' ); i = 0; while( (buf[i] != '\0') && (buf[i] != '\n') && (buf[i] != '#') ) i++; buf[i] = '\0'; /* printf("buf=>>>%s<<<", buf ); /**/ strcpy( line, buf ); } /* factorial of n */ #define MAX_FACTORIAL 100 double factorial( n ) int n; { double prod = 1.0; int i; if( n < 0 || n > MAX_FACTORIAL ) return( EMPTY ); if( n == 0 ) return( 1 ); for( i = 1; i <= n; i++ ) prod *= (double)i; return( prod ); } /* binomial coefficient */ double bin_coeff( n, k ) int n, k; { double bc, nom = 1.0, denom = 1.0; int i, q; if( k > n ) return( EMPTY );; if( k == n ) return( 1 ); if( k > n/2 ) q = k; else q = n - k; for( i = q+1; i <= n; i++ ) nom *= (double)i; for( i = 1; i <= n-q; i++ ) denom *= (double)i; bc = nom / denom; return( bc ); } void get_d( d ) int *d; { char buf[81]; buf[0] = '\0'; gets(buf); if( buf[0] == '\0' ) *d = EMPTY; else sscanf(buf,"%d",d); } void get_f( f ) float *f; { char buf[81]; buf[0]='\0'; gets(buf); if( buf[0] == '\0' ) *f = (float)EMPTY; else sscanf(buf,"%f",f); } void get_d_range( d, min, max, bound ) int *d, min, max, bound; { int old; bool ok; old = *d; do { get_d( d ); if( *d == EMPTY ) *d = old; switch( bound ) { case LEFT_CLOSED__RIGHT_CLOSED : ok = ( *d >= min && *d <= max ); break; case LEFT_CLOSED__RIGHT_OPEN : ok = ( *d >= min && *d < max ); break; case LEFT_OPEN__RIGHT_CLOSED : ok = ( *d > min && *d <= max ); break; case LEFT_OPEN__RIGHT_OPEN : ok = ( *d > min && *d < max ); break; default : fprintf(stderr,"Trouble with interval - exit...\n"); exit(1); }; if( ! ok ) printf("Invalid value! Again ? "); } while( ! ok ); } void get_f_range( f, min, max, bound ) float *f, min, max; int bound; { float old; bool ok; old = *f; do { get_f( f ); if( *f == (float)EMPTY ) *f = old; switch( bound ) { case LEFT_CLOSED__RIGHT_CLOSED : ok = ( *f >= min && *f <= max ); break; case LEFT_CLOSED__RIGHT_OPEN : ok = ( *f >= min && *f < max ); break; case LEFT_OPEN__RIGHT_CLOSED : ok = ( *f > min && *f <= max ); break; case LEFT_OPEN__RIGHT_OPEN : ok = ( *f > min && *f < max ); break; default : fprintf(stderr,"Trouble with interval - exit...\n"); exit(1); }; if( ! ok ) printf("Invalid value! Again ? "); } while( ! ok ); }