/************************************************************************ * * * Program package T O O L D I A G * * * * Version 1.5 * * Date: Tue Feb 8 13:39:03 1994 * * * * NOTE: This program package is copyrighted in the sense that it * * may be used for scientific purposes. The package as a whole, or * * parts thereof, cannot be included or used in any commercial * * application without written permission granted by the author. * * No programs contained in this package may be copied for commercial * * distribution. * * * * All comments concerning this program package may be sent to the * * e-mail address 'tr@fct.unl.pt'. * * * ************************************************************************/ #include #include #include "def.h" extern universe *U; extern bool verbose; extern bool is_ascii_file(); extern bool *train_smp, *test_smp; static bool done; static str80 buf; static str100 linebuf; static char defNameTrain[] = "_train", defNameTest[] = "_test"; #define MAXSTRLEN 100 static void merge_data_files() { FILE *in1 = NULL, *in2 = NULL, *out = NULL; str80 in1Nam, in2Nam, outNam; str80 classNam1, classNam2; bool in1Ascii, in2Ascii, outAscii; FeatVector featVec1 = NULL, featVec2 = NULL, featVec3 = NULL; int i, j, k1, k2, dim1, dim2, dim3; printf("\n--- Merging two data files ---\n"); printf("\tMerge file: "); gets( in1Nam ); if( in1Nam[0] == '\0' ) { printf("File name empty..."); gets(buf); return; } in1Ascii = is_ascii_file( in1Nam ); switch( in1Ascii ) { case TRUE : in1 = fopen( in1Nam, f_open_text_r ); break; case FALSE : in1 = fopen( in1Nam, f_open_bin_r ); break; case EMPTY : printf("File not found..."); gets(buf); return; default: fprintf(stderr, "Error in merge_data_files, exit...\n"); exit(1); } printf("\t and file: "); gets( in2Nam ); if( in2Nam[0] == '\0' ) { printf("File name empty..."); gets(buf); return; } if( strcmp(in1Nam,in2Nam)==0 ) { printf("File names identical..."); gets(buf); return; } in2Ascii = is_ascii_file( in2Nam ); switch( in2Ascii ) { case TRUE : in2 = fopen( in2Nam, f_open_text_r ); break; case FALSE : in2 = fopen( in2Nam, f_open_bin_r ); break; case EMPTY : printf("File not found..."); gets(buf); fclose(in1); return; default: fprintf(stderr, "Error in merge_data_files, exit...\n"); exit(1); } printf("\t to file: "); gets( outNam ); if( outNam[0] == '\0' ) { printf("File name empty..."); gets(buf); return; } if( strcmp(in1Nam,outNam)==0 ) { printf("File names identical..."); gets(buf); return; } if( strcmp(outNam,in2Nam)==0 ) { printf("File names identical..."); gets(buf); return; } outAscii = FALSE; printf(" Write %s in b)inary or a)scii mode ?b\b", outNam ); gets( buf ); if( buf[0] == 'a' ) outAscii = TRUE; if( outAscii ) out = fopen( outNam, f_open_text_w ); else out = fopen( outNam, f_open_bin_w ); if( out == NULL ) { printf("Cannot open %s! Exitus...\n", outNam ); fclose(in1); fclose(in2); } if( verbose ) printf("\tMerging %s and %s to %s\n", in1Nam, in2Nam, outNam ); dataline( in1, linebuf ); sscanf( linebuf, "%d", &dim1 ); dataline( in2, linebuf ); sscanf( linebuf, "%d", &dim2 ); dim3 = dim1 + dim2; featVec1 = (FeatVector) malloc(sizeof(FeatVector*) * dim1); featVec2 = (FeatVector) malloc(sizeof(FeatVector*) * dim2); featVec3 = (FeatVector) malloc(sizeof(FeatVector*) * dim3); fprintf( out, "%d\n", dim3 ); /* scan the input files line by line and write the result to the output */ while( !feof( in1 ) && !feof( in2 ) ) { k1 = 0; k2 = 0; while( k1 < dim1 && !feof( in1 ) ) { if( in1Ascii ) fscanf( in1, "%f", &(featVec1[k1]) ); else fread( &(featVec1[k1]), sizeof(float), 1, in1 ); k1++; } while( k2 < dim2 && !feof( in2 ) ) { if( in2Ascii ) fscanf( in2, "%f", &(featVec2[k2]) ); else fread( &(featVec2[k2]), sizeof(float), 1, in2 ); k2++; } /* get the first name */ if( !feof( in1 ) ) { if( in1Ascii ) fscanf( in1, "%s", classNam1 ); else { j = 0; do { fread( &(classNam1[j]), sizeof(char), 1, in1 ); if( classNam1[j] != '\n' ) j++; } while( classNam1[j] != '\n' && j < MAXSTRLEN ); classNam1[j] = '\0'; } } /* get the second name */ if( !feof( in2 ) ) { if( in2Ascii ) fscanf( in2, "%s", classNam2 ); else { j = 0; do { fread( &(classNam2[j]), sizeof(char), 1, in2 ); if( classNam2[j] != '\n' ) j++; } while( classNam2[j] != '\n' && j < MAXSTRLEN ); classNam2[j] = '\0'; } } if( !feof( in1 ) && !feof( in2 ) ) { if( strcmp(classNam1,classNam2) == 0 ) { /* write the new line */ if( outAscii ) { for( k1 = 0; k1 < dim1; k1++ ) fprintf( out, "%f ", featVec1[k1] ); for( k2 = 0; k2 < dim2; k2++ ) fprintf( out, "%f ", featVec2[k2] ); fprintf( out, "%s\n", classNam1 ); } else { fwrite( featVec1, sizeof(FeatVector*), dim1, out ); fwrite( featVec2, sizeof(FeatVector*), dim2, out ); fprintf( out, "%s\n", classNam1 ); } } else { printf("ERROR: Detected different class names in files %s and %s\n", in1Nam, in2Nam ); fprintf(stderr,"\t>>>%s<<< ---- >>>%s<<< ...", classNam1, classNam2 ); gets( buf ); FREE( featVec1 ); FREE( featVec2 ); FREE( featVec3 ); fclose(in1); fclose(in2); fclose(out); return; } } } FREE( featVec1 ); FREE( featVec2 ); FREE( featVec3 ); fclose(in1); fclose(in2); fclose(out); } static void split_train_test() { FILE *outTrain = NULL, *outTest = NULL, *out; str80 outNam1, outNam2, classNam, tmpStr; bool outAscii, ok; float percentTrain, f = 70.0; int s, k, dim, class, smp; printf("\n--- Splitting the actual data set into training and test ---\n"); printf("\tTraining file name: %s\b\b\b\b\b\b", defNameTrain ); gets( linebuf ); strcpy( outNam1, DATA_DIR ); if( linebuf[0] != '\0' ) strcpy( tmpStr, linebuf ); else strcpy( tmpStr, defNameTrain ); strcat( outNam1, tmpStr ); strcpy( outNam2, DATA_DIR ); printf("\t Test file name: %s\b\b\b\b\b", defNameTest ); gets( linebuf ); if( linebuf[0] != '\0' ) { if( strcmp( tmpStr, linebuf ) == 0 ) { printf("File names identical, using default.\n"); strcpy( outNam1, DATA_DIR ); strcat( outNam1, defNameTrain ); strcpy( tmpStr, defNameTest ); } else strcpy( tmpStr, linebuf ); } else strcpy( tmpStr, defNameTest ); strcat( outNam2, tmpStr ); outAscii = FALSE; printf(" Write in b)inary or a)scii mode ?b\b" ); gets( buf ); if( buf[0] == 'a' ) outAscii = TRUE; if( outAscii ) { outTrain = fopen( outNam1, f_open_text_w ); outTest = fopen( outNam2, f_open_text_w ); } else { outTrain = fopen( outNam1, f_open_bin_w ); outTest = fopen( outNam2, f_open_bin_w ); } if( outTrain == NULL || outTest == NULL ) { if( outTrain == NULL ) printf("Cannot open %s!\n", outNam1 ); if( outTest == NULL ) printf("Cannot open %s!\n", outNam2 ); printf("\tExitus...\n" ); if( outTrain != NULL ) fclose( outTrain ); if( outTest != NULL ) fclose( outTest ); exit(1); } printf("Split data into ? percent training data? 70\b\b"); do { gets( linebuf ); if( linebuf[0] == '\0' ) ok = TRUE; else { sscanf( linebuf,"%f", &f ); ok = ( f >= 0.0 && f <= 100.0 ); } if( ! ok ) printf("Invalid value! Again ? "); else percentTrain = f; } while( ! ok ); if( verbose ) printf("\tSplitting samples into\n\t%s and\n\t%s\n", outNam1, outNam2 ); split_tt( U->sumSampl, percentTrain ); fprintf( outTrain, "%d\n", U->nrFeat ); fprintf( outTest, "%d\n", U->nrFeat ); class = 0; smp = 0; for( s = 0; s < U->sumSampl; s++ ) { if( train_smp[s] ) out = outTrain; else if ( test_smp[s] ) out = outTest; else { fprintf(stderr,"Sample neither train nor test. Exit...\n");exit(1);} if( outAscii ) for( k = 0; k < U->nrFeat; k++ ) fprintf( out, "%f ", U->C[class].S[smp*U->nrFeat+k] ); else fwrite( &(U->C[class].S[smp*U->nrFeat]), sizeof(FeatVector*), U->nrFeat, out ); fprintf( out, "%s\n", U->C[class].name ); /* fprintf( stdout, "out=%d s=%d smp=%d train=%d test=%d %s\n", (int)out, s, smp, train_smp[s], test_smp[s], U->C[class].name ); /**/ smp++; if( smp == U->C[class].numSampl ) { class++; smp = 0; } } fclose(outTrain); fclose(outTest); out = NULL; } void intFaceLoop() { printf("\n>>>>>----- INTERFACE MENU -----<<<<<<\n"); printf("(1) Learning Vector Quantization (LVQ)\n"); printf("(2) Stuttgart Neural Network Simulator (SNNS)\n"); printf("(3) Merge two data files to a single data file\n"); printf("(4) Split a data file randomly into two (train & test)\n"); printf("(Q)uit\n\n"); printf("Choice: "); gets(buf); done = FALSE; switch( buf[0] ) { case '?': help( LOOP_INTERFACE, buf ); break; case '1': lvqLoop(); break; case '2': snnsLoop(); break; case '3': merge_data_files(); break; case '4': split_train_test(); break; case 'q': case 'Q': done = TRUE; break; default: showUniv( stdout ); break; } } void intface() { if( U->nrClass > 1 ) do { intFaceLoop(); } while( !done ); else { printf("Please load universe first !...");gets( buf );} }