/************************************************************************ * * * Program package T O O L D I A G * * * * Version 1.5 * * Date: Tue Feb 8 13:39:02 1994 * * * * NOTE: This program package is copyrighted in the sense that it * * may be used for scientific purposes. The package as a whole, or * * parts thereof, cannot be included or used in any commercial * * application without written permission granted by the author. * * No programs contained in this package may be copied for commercial * * distribution. * * * * All comments concerning this program package may be sent to the * * e-mail address 'tr@fct.unl.pt'. * * * ************************************************************************/ #include #include #include #include #include "def.h" #define MINSAMPLE /*10*/ 6 #define MAXSTRLEN 100 extern bool err_directory(); extern bool next_directory(); extern bool verbose; extern bool is_ascii_file(); /* global variables */ universe univ, *U = NULL; str100 dataDir, dataFile, featNameFile; static str100 classFile; static str80 linebuf, buf; static char wheel[] = "|/-\\"; static bool ascii_file = FALSE; #ifdef DOS static char slash = '\\'; #else static char slash = '/'; #endif void init_Class( C ) Class *C; { C->name[0] = '\0'; C->numSampl = 0; C->S = NULL; C->mean = NULL; C->stddev = NULL; C->sqrsum = NULL; C->a_priori_prob = 0.0; } void init_universe() { strcpy( U->name, "UNDEFINED" ); U->nrClass = 0; U->sumSampl = 0; U->nrFeat = 0; U->normalized = FALSE; U->min = NULL; U->max = NULL; U->nrSelFeat = 0; U->FSV = NULL; U->C = NULL; } void free_universe() { int i; if( U == NULL ) return; for( i = 0; i < U->nrClass; i++ ) { FREE( U->C[i].S ); FREE( U->C[i].mean ); FREE( U->C[i].stddev ); FREE( U->C[i].sqrsum ); init_Class( &(U->C[i]) ); } FREE( U->min ); FREE( U->max ); FREE( U->FSV ); FREE( U->C ); init_universe(); free_feat_names(); } void init_admin() { U = &univ; init_universe(); } void showUniv( f ) FILE *f; { int i; fprintf( f, "\n-----------------------------------------------------------\n"); fprintf( f, " UNIVERSE : %20s", U->name ); if( U->normalized ) fprintf( f, " (normalized to [0,1])"); fprintf( f, "\n OBJECT CLASSES : %20d\n", U->nrClass ); fprintf( f, " Nr. of all samples : %20d\n", U->sumSampl ); fprintf( f, " Nr. of all features : %20d\n", U->nrFeat ); fprintf( f, " Nr. of selected features : %20d\n", U->nrSelFeat ); fprintf( f, " Nr. of samples\n" ); for( i = 0; i < U->nrClass; i++ ) fprintf( f, " Nr. %3d : %30s %5d\n", i+1, U->C[i].name, U->C[i].numSampl); fprintf( f, "-----------------------------------------------------------\n\n"); } void loadUnivDir() { FILE *cf = NULL; int i, j, row, k, w = 0, dim, cptr, sumSampl = 0, offset = 1; bool found; str100 fileName; float value, bias; char chr; if( verbose ) { fprintf( stderr, "Searching sample file in %s\n", dataDir ); } else { fprintf( stderr, "Searching directory...\n"); } /* first count the files */ open_directory( dataDir ); if( err_directory() ) { printf("Cannot open directory %s ...", dataDir ); gets(linebuf); return; } free_universe(); strcpy( U->name, dataDir ); if( dataDir[strlen(dataDir)-1] != slash ) { dataDir[strlen(dataDir)] = slash; dataDir[strlen(dataDir)+1] = '\0'; } /* copy only the name of the data file to the universe name */ i = strlen( dataDir ) - 2; while( dataDir[i] != slash && i > 0 ) i--; if( i == 0 ) offset = 0; strncpy( U->name, &(dataDir[i+offset]), strlen(&(dataDir[i+offset]))-1 ); U->name[strlen(&(dataDir[i+offset]))-1] = '\0'; do { found = next_directory( dataDir, fileName ); if( found ) (U->nrClass)++; } while( found ); close_directory(); if( verbose ) printf("Found %d files\n", U->nrClass ); if( U->nrClass <= 0 || U->nrClass > MAXCLASS ) { printf(" Number of classes invalid! Exitus...\n" ); exit(1); } /* allocate space for the universe */ FREE( U->C ); U->C = (Class*)malloc( U->nrClass* sizeof( struct Class_ ) ); for( i = 0; i < U->nrClass; i++ ) init_Class( &(U->C[i]) ); /* scan through the files */ open_directory( dataDir ); i = 0; if( ! verbose ) { fprintf( stderr, "Loading ... "); fflush( stderr ); } do { found = next_directory( dataDir, fileName ); if( found ) { if( ! verbose ) fprintf( stderr,"%c\b", wheel[(w++) % strlen(wheel)] ); fflush( stderr ); strcpy( classFile, dataDir ); strcat( classFile, fileName ); /* check the first file in the directory if all data is in ascii 1.) Everything ascii data 2.) Feature data in binary format */ if( i == 0 ) ascii_file = is_ascii_file( classFile ); if( ascii_file == EMPTY ) { printf("Cannot open %s! Exitus...\n", classFile ); exit(1); } if( ascii_file ) cf = fopen( classFile, f_open_text_r ); else cf = fopen( classFile, f_open_bin_r ); if( cf == NULL ) { printf("Cannot open %s! Exitus...\n", classFile ); exit(1); } if( verbose ) printf("Scanning file: %s\n", fileName ); /* read the name of the class */ dataline( cf, linebuf ); sscanf( linebuf, "%s", U->C[i].name ); /* groupname if exists */ dataline( cf, linebuf ); /* if the group name exits ignore it */ cptr = 0; while( linebuf[cptr] == ' ' ) cptr++; if( linebuf[cptr] >= '0' && linebuf[cptr] <= '9' ) sscanf( linebuf, "%d", &dim ); else { /* Feature vector dimension */ dataline( cf, linebuf ); sscanf( linebuf, "%d", &dim ); } /* check consistency */ if( i == 0 ) /* first file ? */ { U->nrFeat = dim; FREE( U->min ); FREE( U->max ); U->min = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat); U->max = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat); for( k = 0; k < U->nrFeat; k++ ) { U->min[k] = INFINITY; U->max[k] = -INFINITY; } } else if( dim != U->nrFeat ) { printf("Dim. of feature vector inconsistent: %d != %d! Exitus..\n", dim, U->nrFeat ); exit(1); } /* feature vector dimension */ dataline( cf, linebuf ); sscanf( linebuf, "%d", &(U->C[i].numSampl) ); if( U->C[i].numSampl < MINSAMPLE ) printf("Warning file %s has only %d samples!\n", fileName, U->C[i].numSampl ); sumSampl += U->C[i].numSampl; /* now allocate space for one class */ dim = U->nrFeat * U->C[i].numSampl; FREE( U->C[i].S ); FREE( U->C[i].mean ); FREE( U->C[i].stddev ); FREE( U->C[i].sqrsum ); U->C[i].S = (FeatVector) malloc(dim*sizeof(FeatVector*)); U->C[i].mean = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat); U->C[i].stddev = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat); U->C[i].sqrsum = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat); CHKPTR(U->C[i].S); CHKPTR(U->C[i].mean); CHKPTR(U->C[i].stddev);CHKPTR(U->C[i].sqrsum); /* init mean and standard deviation and minimum and maximum values */ for( k = 0; k < U->nrFeat; k++ ) { U->C[i].mean[k] = 0.0; U->C[i].stddev[k] = 0.0; U->C[i].sqrsum[k] = 0.0; } /* read the samples and update mean and standard deviation */ for( row = 0; row < U->C[i].numSampl; row++ ) { for( k = 0; k < U->nrFeat; k++ ) { if( ascii_file ) fscanf( cf, "%f", &value ); else fread( &value, sizeof(float), 1, cf ); /* printf("i=%d row=%d k=%d ascii=%d value=%f\n", i, row, k, ascii_file, value ); DBG; /**/ U->C[i].S[row*U->nrFeat+k] = value; /* new mean value */ U->C[i].mean[k] = 1.0/((float)(row+1))*(row*U->C[i].mean[k]+value); /* new auxiliary variable "sqrsum" */ U->C[i].sqrsum[k] += value * value; /* new standart deviation */ U->C[i].stddev[k] = (float)sqrt(fabs((1.0/(float)(row+1) * U->C[i].sqrsum[k] - (U->C[i].mean[k] * U->C[i].mean[k])))); /* update minimum and maximum */ if( value < U->min[k] ) U->min[k] = value; if( value > U->max[k] ) U->max[k] = value; } /* showFV( U->nrFeat, &(U->C[i].S[row*U->nrFeat]) ); /**/ } /* compensate for the bias of the standard deviation */ if( U->C[i].numSampl > 1 ) { bias = (float)(U->C[i].numSampl)/(float)(U->C[i].numSampl-1); for( k = 0; k < U->nrFeat; k++ ) { /* Check standard deviation */ if( U->C[i].stddev[k] == 0.0 ) { fprintf(stderr,"WARNING: Standard deviation is zero for class %d", i+1); fprintf(stderr," and feature %d\n", k+1 ); fprintf(stderr," This might cause some trouble. Continue (y/n)?y\b"); gets( buf ); if( buf[0] == 'n' ) { printf("...Exitus...\n"); exit(1); } } U->C[i].stddev[k] *= bias; } } /* printf("mean[%d]: ",i); showFV( U->nrFeat, U->C[i].mean ); /**/ /* printf("stand.dev[%d]: ",i); showFV( U->nrFeat, U->C[i].stddev ); /**/ fclose( cf ); i++; /* next class */ } } while( found ); if( ! verbose ) printf("\n"); U->sumSampl = sumSampl; for( i = 0; i < U->nrClass; i++ ) { U->C[i].a_priori_prob = (float)U->C[i].numSampl / (float)U->sumSampl; if( verbose ) printf(" A priori probability of class %d=%5.2f%%\n", i+1, 100.0*U->C[i].a_priori_prob ); } close_directory(); } void loadUnivFile() { FILE *data = NULL; char **classNames = NULL, **buf1 = NULL; int *samplesPerClass = NULL, *buf2 = NULL; str100 nameBuf; int nrClass = 0, featDim, k, i, j, w = 0, c, dim, row, offset = 1; float dummy, value, bias; FeatVector featBuf = NULL; /* look first type of file */ ascii_file = is_ascii_file( dataFile ); if( ascii_file == EMPTY ) { printf("Cannot open %s!...", dataFile ); gets( linebuf ); return; } if( ascii_file ) data = fopen( dataFile, f_open_text_r ); else data = fopen( dataFile, f_open_bin_r ); if( data == NULL ) { printf("Cannot open %s!...\n", dataFile ); gets( linebuf ); return; } free_universe(); /* copy only the name of the data file to the universe name */ i = strlen( dataFile ) - 1; while( dataFile[i] != slash && i > 0 ) i--; if( i == 0 ) offset = 0; strncpy( U->name, &(dataFile[i+offset]), strlen(&(dataFile[i+offset])) ); U->name[strlen(&(dataFile[i+offset]))] = '\0'; fprintf( stderr, "Scanning file: %s\n", dataFile ); /* feature vector dimension */ dataline( data, linebuf ); sscanf( linebuf, "%d", &featDim ); /* count the classes */ while( !feof( data ) ) { k = 0; while( k < featDim && !feof( data ) ) { if( ascii_file ) fscanf( data, "%f", &dummy ); else fread( &dummy, sizeof(float), 1, data ); k++; } if( !feof( data ) ) { if( ascii_file ) fscanf( data, "%s", nameBuf ); else { j = 0; do { fread( &(nameBuf[j]), sizeof(char), 1, data ); if( nameBuf[j] != '\n' ) j++; } while( nameBuf[j] != '\n' && j < MAXSTRLEN ); nameBuf[j] = '\0'; } if( nameBuf[0] == '\0' ) /* empty name, error */ { fprintf( stderr, "Found an empty class name. Exit...\n"); exit(1); } /* check if the name already exist */ c = 0; while( c < nrClass && !(strcmp(nameBuf,classNames[c]) == 0) ) c++; if( c == nrClass ) { /* new class */ buf1 = (char**) malloc( (1+nrClass) * sizeof(char*) ); buf2 = (int*) malloc( (1+nrClass) * sizeof(int) ); /* copy old values */ for( i = 0; i < nrClass; i++ ) { buf1[i] = (char*) malloc( (1+strlen(classNames[i])) * sizeof(char) ); strcpy( buf1[i], classNames[i] ); FREE( classNames[i] ); buf2[i] = samplesPerClass[i]; } /* insert new value */ buf1[nrClass] = (char*) malloc( (1+strlen(nameBuf)) * sizeof(char) ); strcpy( buf1[nrClass], nameBuf ); buf2[nrClass] = 1; FREE( classNames ); FREE( samplesPerClass ); classNames = buf1; samplesPerClass = buf2; buf1 = NULL; buf2 = NULL; nrClass++; } else (samplesPerClass[c])++; } } fclose( data ); for( i = 0; i < nrClass; i++ ) U->sumSampl += samplesPerClass[i]; U->nrFeat = featDim; FREE( U->min ); FREE( U->max ); U->min = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat); U->max = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat); for( k = 0; k < U->nrFeat; k++ ) { U->min[k] = INFINITY; U->max[k] = -INFINITY; } U->nrClass = nrClass; if( U->nrClass <= 0 || U->nrClass > MAXCLASS ) { printf(" Number of classes invalid! Exitus...\n" ); exit(1); } /* allocate space for the universe */ FREE( U->C ); U->C = (Class*)malloc( U->nrClass* sizeof( struct Class_ ) ); for( i = 0; i < U->nrClass; i++ ) { init_Class( &(U->C[i]) ); strcpy( U->C[i].name, classNames[i] ); FREE( classNames[i] ); /* now allocate space for one class */ dim = U->nrFeat * samplesPerClass[i]; FREE( U->C[i].S ); FREE( U->C[i].mean ); FREE( U->C[i].stddev ); FREE( U->C[i].sqrsum ); U->C[i].S = (FeatVector) malloc(dim*sizeof(FeatVector*)); U->C[i].mean = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat); U->C[i].stddev = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat); U->C[i].sqrsum = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat); CHKPTR(U->C[i].S); CHKPTR(U->C[i].mean); CHKPTR(U->C[i].stddev); CHKPTR(U->C[i].sqrsum); /* init mean and standard deviation */ for( k = 0; k < U->nrFeat; k++ ) { U->C[i].mean[k] = 0.0; U->C[i].stddev[k] = 0.0; U->C[i].sqrsum[k] = 0.0; } } FREE( classNames ); /* open file again to read the feature values */ if( ascii_file ) data = fopen( dataFile, f_open_text_r); else data = fopen(dataFile, f_open_bin_r); if( data == NULL ) { printf("Cannot open %s! Exitus...\n", dataFile ); exit(1); } featBuf = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat); /* feature vector dimension */ dataline( data, linebuf ); sscanf( linebuf, "%f", &dummy ); fprintf( stderr, "Loading ... "); fflush( stderr ); for( i = 0; i < U->sumSampl; i++ ) { fprintf( stderr, "%c\b", wheel[(w++) % strlen(wheel)] ); fflush( stderr ); if( ascii_file ) { for( k = 0; k < U->nrFeat; k++ ) fscanf( data, "%f", &(featBuf[k]) ); fscanf( data, "%s", nameBuf ); } else { fread( featBuf, sizeof(float), U->nrFeat, data ); j = 0; do { fread( &(nameBuf[j]), sizeof(char), 1, data ); if( nameBuf[j] != '\n' ) j++; } while( nameBuf[j] != '\n' && j < MAXSTRLEN ); nameBuf[j] = '\0'; } c = 0; while( c < U->nrClass && !(strcmp(nameBuf,U->C[c].name) == 0) ) c++; if( c == U->nrClass ) { printf("loadUnivFile> Cannot find class %s\n Exitus...\n",nameBuf ); exit(1); } /* copy the sample buffer to the class */ row = U->C[c].numSampl; for( k = 0; k < U->nrFeat; k++ ) { U->C[c].S[row*U->nrFeat+k] = featBuf[k]; value = featBuf[k]; /* new mean value */ U->C[c].mean[k] = 1.0/((float)(row+1))*(row*U->C[c].mean[k]+value); /* new auxiliary variable "sqrsum" */ U->C[c].sqrsum[k] += value * value; /* new standart deviation */ U->C[c].stddev[k] = (float)sqrt(fabs((1.0/(float)(row+1) * U->C[c].sqrsum[k] - (U->C[c].mean[k] * U->C[c].mean[k])))); /* update minimum and maximum */ if( value < U->min[k] ) U->min[k] = value; if( value > U->max[k] ) U->max[k] = value; } (U->C[c].numSampl)++; } fprintf( stderr, "\n"); for( i = 0; i < U->nrClass; i++ ) { U->C[i].a_priori_prob = (float)U->C[i].numSampl / (float)U->sumSampl; /* compensate for the bias of the standard deviation */ if( U->C[i].numSampl > 1 ) { bias = (float)(U->C[i].numSampl)/(float)(U->C[i].numSampl-1); for( k = 0; k < U->nrFeat; k++ ) { /* Check standard deviation */ if( U->C[i].stddev[k] == 0.0 ) { fprintf(stderr,"WARNING: Standard deviation is zero for class %d",i+1); fprintf(stderr," and feature %d\n", k+1 ); fprintf(stderr," This might cause some trouble. Continue (y/n)?y\b"); gets( buf ); if( buf[0] == 'n' ) { printf("...Exitus...\n"); exit(1); } } U->C[i].stddev[k] *= bias; } } if( verbose ) printf(" A priori probability of class %d=%5.2f%%\n", i+1, 100.0*U->C[i].a_priori_prob ); if( U->C[i].numSampl != samplesPerClass[i] ) { printf("loadUnivFile> Inconsistent data\n Exitus...\n"); printf("U->C[%d].numSampl=%d != samplesPerClass[%d]=%d\n", i, U->C[i].numSampl, i, samplesPerClass[i] ); exit(1); } } /* for( c = 0; c < U->nrClass; c++ ) for( i = 0; i < U->C[c].numSampl; i++ ) { for( k = 0; k < U->nrFeat; k++ ) printf("%f ", U->C[c].S[i*U->nrFeat+k] ); printf(" %s\n", U->C[c].name ); } printf(" MIN MAX\n"); for( k = 0; k < U->nrFeat; k++ ) printf("%f ", U->min[k] ); printf("\n"); for( k = 0; k < U->nrFeat; k++ ) printf("%f ", U->max[k] ); printf("\n"); */ FREE( samplesPerClass ); FREE( featBuf ); fclose( data ); } void loadUniv() { bool ok = FALSE; while( ! ok ) { printf("Load data from (f)ile or from (d)irectory? "); gets( linebuf ); switch( linebuf[0] ) { case '\0' : return; case 'f': case 'F': printf("Load data from file? "); gets( dataFile ); loadUnivFile(); ok = TRUE; break; case 'd': case 'D': printf("Load data from directory? "); gets( dataDir ); loadUnivDir(); ok = TRUE; break; default: printf("Unknown option - nothing done..."); gets(linebuf); break; } } }