/************************************************************************ * * * Program package T O O L D I A G * * * * Version 1.5 * * Date: Tue Feb 8 13:39:03 1994 * * * * NOTE: This program package is copyrighted in the sense that it * * may be used for scientific purposes. The package as a whole, or * * parts thereof, cannot be included or used in any commercial * * application without written permission granted by the author. * * No programs contained in this package may be copied for commercial * * distribution. * * * * All comments concerning this program package may be sent to the * * e-mail address 'tr@fct.unl.pt'. * * * ************************************************************************/ /* * COPYRIGHT COMMENT */ /************************************************************************ * * * Program package 'lvq_pak': * * * * sammon.c * * -generates a Sammon mapping from a given list * * * * Version 2.0 * * Date: 31 Jan 1992 * * * * NOTE: This program package is copyrighted in the sense that it * * may be used for scientific purposes. The package as a whole, or * * parts thereof, cannot be included or used in any commercial * * application without written permission granted by its producents. * * No programs contained in this package may be copied for commercial * * distribution. * * * * All comments concerning this program package may be sent to the * * e-mail address 'lvq@cochlea.hut.fi'. * * * ************************************************************************/ #include #include #include #include #include "def.h" #define MAGIC (0.2) #define EPSILON (0.00000000001) extern universe *U; extern bool verbose; extern float Euclidian_Distance(); extern bool feat_description; extern char **feature_desc; static str80 buf, linebuf, name, lvqFile, gnuFile, featNameFile; static char cmd[200]; static bool done; void gen_lvq_data_file() { int i, j, k; FILE *lvq = NULL, *feats = NULL; if( U->nrSelFeat == 0 ) { printf(" Select features first please!..." ); gets( buf ); return; } strcpy( lvqFile, DATA_DIR ); printf("Saving the data in LVQ format in file:\n\t\t%s", lvqFile ); gets( name ); if( name[0] != '\0' ) { strcat( lvqFile, name ); lvq = fopen( lvqFile, f_open_text_w ); if( lvq == NULL ) { printf("Cannot open %s! Exitus...\n", lvqFile ); exit(1); } if( verbose ) printf("Generating LVQ-File: %s\n", lvqFile ); fprintf( lvq, "%d\n", U->nrSelFeat ); for( i = 0; i < U->nrClass; i++ ) { for( j = 0; j < U->C[i].numSampl; j++ ) { for( k = 0; k < U->nrSelFeat; k++ ) fprintf( lvq, "%f ", U->C[i].S[j*U->nrFeat+U->FSV[k].rank] ); fprintf( lvq, "%s\n", U->C[i].name ); } } fclose( lvq ); } if( ! feat_description ) return; strcpy( featNameFile, DATA_DIR ); printf("Saving the feature names in file:\n\t\t%s", featNameFile ); gets( name ); if( name[0] != '\0' ) { strcat( featNameFile, name ); feats = fopen( featNameFile, f_open_text_w ); if( feats == NULL ) { printf("Cannot open %s! Exitus...\n", featNameFile ); exit(1); } if( verbose ) printf("Generating feature description-File: %s\n", featNameFile ); fprintf( feats, "%d\n", U->nrSelFeat ); for( k = 0; k < U->nrSelFeat; k++ ) fprintf( feats, "%s\n", feature_desc[U->FSV[k].rank] ); fclose( feats ); } } static void filter_selected_lvq() { str80 raw, filtered; FILE *r, *f; str100 nameBuf; int featDim, k, i; FeatVector featBuf = NULL; if( U->nrSelFeat == 0 ) { printf(" Select features first please!..." ); gets( buf ); return; } printf("Name of the file to be filtered? "); gets( raw ); r = fopen( raw, f_open_text_r ); if( r == NULL ) { printf(" Cannot open %s...", raw ); gets( buf ); return; } strcpy( filtered, DATA_DIR ); printf("Name of the output file with filtered features?\n\t\t%s", filtered ); gets( buf ); if( (strcmp(raw,buf) == 0) ) { printf(" Sorry file names are the same!..." ); gets( buf ); return; } strcat( filtered, buf ); f = fopen( filtered, f_open_text_w ); if( f == NULL ) { printf(" Cannot open %s...", raw ); gets( buf ); return; } if( verbose ) printf("Filtering\n\t\t%s\nTO\n\t\t%s\n", raw, filtered ); /* try to read the dimension */ dataline( r, buf ); sscanf( buf, "%d", &featDim ); if( featDim != U->nrFeat ) { printf("Universe and file have different feature dimensions: %d != %d...", U->nrFeat, featDim ); gets( buf ); fclose( r ); fclose( f ); return; } featBuf = (FeatVector) malloc(sizeof(FeatVector*) * U->nrFeat); fprintf( f, "%d\n", U->nrSelFeat ); while( !feof( r ) ) { for( k = 0; k < U->nrFeat; k++ ) { fscanf( r, "%f", &(featBuf[k]) ); /* normalize, if training data was also normalized */ if( U->normalized ) featBuf[k] = (featBuf[k] - U->min[k]) / (U->max[k] - U->min[k]); } fscanf( r, "%s", nameBuf ); if( !feof( r ) ) { for( i = 0; i < U->nrSelFeat; i++ ) fprintf( f, "%f ", featBuf[ U->FSV[i].rank ] ); fprintf( f, "%s\n", nameBuf ); } } FREE( featBuf ); fclose( r ); fclose( f ); } #ifdef DOS #define EXEC "" #else #define EXEC "exec " #endif #define OFFSET (1.0) #define TOL (10.0) static void gen_plot( x, y ) float *x, *y; { FILE *gf = NULL; int i, j, row; str20 cntStr; float minX = INFINITY, minY = INFINITY, maxX = -INFINITY, maxY = -INFINITY; float dx, dy, delta; /* Copy the data to gnuplot data files: one for each class */ row = 0; for( i = 0; i < U->nrClass; i++ ) { strcpy( gnuFile, DATA_DIR ); #ifdef DOS sprintf( cntStr, "_class.%d", i ); strcat( gnuFile, cntStr ); #else strcat( gnuFile, U->C[i].name ); #endif gf = fopen( gnuFile, f_open_text_w ); if( gf == NULL ) { printf("Cannot open %s! Exitus...\n", gnuFile ); exit(1); } if( verbose ) printf("Dumping %s\n", gnuFile ); fprintf( gf, "# %s\n", U->C[i].name ); for( j = 0; j < U->C[i].numSampl; j++ ) { if( x[row] < minX ) minX = x[row]; if( x[row] > maxX ) maxX = x[row]; if( y[row] < minY ) minY = y[row]; if( y[row] > maxY ) maxY = y[row]; fprintf( gf, "%f\t%f\n", x[row], y[row] ); /* printf("x[%d] = %f y[%d] = %f class = %s", row, x[row], row, y[row], U->C[i].name ); DBG; /**/ row++; } fclose( gf ); } dx = maxX - minX; dy = maxY - minY; if( dx == 0.0 ) delta = OFFSET; else delta = dx / TOL; minX -= delta; maxX += delta; if( dy == 0.0 ) delta = OFFSET; else delta = dy / TOL; minY -= delta; maxY += delta; /* generate the gnuplot batch file */ strcpy( gnuFile, DATA_DIR ); strcat( gnuFile, "_tmp.gnu" ); gf = fopen( gnuFile, f_open_text_w ); if( gf == NULL ) { printf("Cannot open %s! Exitus...\n", gnuFile ); exit(1); } if( verbose ) printf("Dumping %s\n", gnuFile ); fprintf( gf, "#\n# Batch file to visualize sammon plot\n" ); fprintf( gf, "# Generated automatically !\n#\n" ); fprintf( gf, "# Universe %s\n", U->name ); fprintf( gf, "set title \"SAMMON PLOT\"\n"); fprintf( gf, "set xrange [%f:%f]\n", minX, maxX ); fprintf( gf, "set yrange [%f:%f]\n", minY, maxY ); fprintf( gf, "plot " ); for( i = 0; i < U->nrClass-1; i++ ) #ifdef DOS fprintf( gf, "\"_class.%d\", ", i ); fprintf( gf, "\"_class.%d\"\n", U->nrClass-1 ); #else fprintf( gf, "\"%s\", ", U->C[i].name ); fprintf( gf, "\"%s\"\n", U->C[U->nrClass-1].name ); #endif fprintf( gf, "pause -1 \"Hit return to exit...\"" ); fclose( gf ); sprintf( cmd, "cd %s\n\t%sgnuplot %s", DATA_DIR, EXEC, gnuFile ); if( verbose ) printf("\n --- Execute:\n\t%s\n", cmd ); #ifdef DOS #else system( cmd ); #endif } #define RLEN_MAX 32000 #define RLEN_DEFAULT 100 void sammon() { int rlen; int i, j, k, row; int noc = 0; float e1x, e1y, e2x, e2y; float dpj; float dq, dr, dt; float *x = NULL, *y = NULL; float *xu = NULL, *yu = NULL, *dd = NULL; float xd, yd; float xx, yy; float e, tot; int mutual; float d, ee; FeatVector Samples = NULL, smp = NULL, smp1 = NULL; if( U->nrClass == 0 ) { printf(" Please load universe first !..." ); gets( buf ); return; } if( U->nrSelFeat == 0 ) { printf(" Select features first please!..." ); gets( buf ); return; } if( U->nrSelFeat == 1 ) { printf("It does not make sense to map unidimensional "); printf("data to n dimensions!..." ); gets( buf ); return; } /* Load samples with selected features only in buffer */ Samples = (FeatVector)malloc(U->nrSelFeat*U->sumSampl*sizeof(FeatVector*)); if( Samples == NULL ) { printf("No space for buffer 'Samples'! Exitus...\n"); exit(1); } row = 0; for( i = 0; i < U->nrClass; i++ ) for( j = 0; j < U->C[i].numSampl; j++ ) { for( k = 0; k < U->nrSelFeat; k++ ) Samples[ row*U->nrSelFeat + k ] = U->C[i].S[j*U->nrFeat+U->FSV[k].rank]; row++; } /* How many entries? */ noc = U->sumSampl; /* Allocate dynamical memory */ x = (float *) malloc(sizeof(float) * noc); y = (float *) malloc(sizeof(float) * noc); if( U->nrSelFeat == 2 ) { printf("Number of selected features is 2 - Mapping the data directly\n"); printf(" to 2 dimensions, without using the Sammon mapping.\n"); for( i = 0; i < row; i++ ) { x[i] = Samples[ i*U->nrSelFeat + 0 ]; y[i] = Samples[ i*U->nrSelFeat + 1 ]; } gen_plot( x, y ); FREE( Samples ); FREE( x ); FREE( y ); return; } rlen = RLEN_DEFAULT; printf("Sammon: Number of iterations (default=%d): ", rlen ); gets(buf); if( buf[0] != '\0' ) sscanf( buf, "%d", &rlen ); if( rlen <= 0 || rlen >= RLEN_MAX ) { printf("Value %d for iterations is invalid. Setting to default %d...", rlen, RLEN_DEFAULT ); rlen = RLEN_DEFAULT; gets( buf ); } xu = (float *) malloc(sizeof(float) * noc); yu = (float *) malloc(sizeof(float) * noc); dd = (float *) malloc(sizeof(float) * (noc * (noc - 1) / 2)); /* Initialize the tables */ for (i = 0; i < noc; i++) { x[i] = (float) i / (float) noc; y[i] = (float) (i + i % 2) / (float) noc; } /* Compute the mutual distances between entries */ mutual = 0; for (j = 1; j < noc; j++) { smp = &(Samples[j*U->nrSelFeat]); for (k = 0; k < j; k++) { smp1 = &(Samples[k*U->nrSelFeat]); dd[mutual] = Euclidian_Distance( smp, smp1, U->nrSelFeat ); if( dd[mutual] == 0.0 ) { printf("Warning: distance between samples %d and %d is 0\n", j, k ); printf("\tSetting distance to a minimum %e\n", EPSILON ); dd[mutual] = EPSILON; } /* showFV( U->nrSelFeat, smp ); showFV( U->nrSelFeat, smp1 ); printf("Mutual[%d][%d] = %7.5f", j, k, dd[mutual] ); NL; /**/ mutual++; } } /* Iterate */ for (i = 0; i < rlen; i++) { /* printf("\r iterations to go: %6d - sample=", rlen - i -1 ); /**/ printf("\r iterations to go: %6d", rlen-i-1 ); fflush(stdout); /**/ for (j = 0; j < noc; j++) { /* printf("%5d\b\b\b\b\b", j ); fflush(stdout); /**/ e1x = e1y = e2x = e2y = 0.0; for (k = 0; k < noc; k++) { if (j == k) continue; xd = x[j] - x[k]; yd = y[j] - y[k]; dpj = (float) sqrt((double) (xd * xd + yd * yd)); /* Calculate derivatives */ if (k > j) dt = dd[k * (k - 1) / 2 + j]; else dt = dd[j * (j - 1) / 2 + k]; if( dt != 0.0 ) { dq = dt - dpj; dr = dt * dpj; e1x += xd * dq / dr; e1y += yd * dq / dr; e2x += (dq - xd * xd * (1.0 + dq / dpj) / dpj) / dr; e2y += (dq - yd * yd * (1.0 + dq / dpj) / dpj) / dr; } } /* Correction */ xu[j] = x[j] + MAGIC * e1x / (float)fabs((double)e2x); yu[j] = y[j] + MAGIC * e1y / (float)fabs((double)e2y); } /* Move the center of mass to the center of picture */ xx = yy = 0.0; for (j = 0; j < noc; j ++) { xx += xu[j]; yy += yu[j]; } xx /= (float)noc; yy /= (float)noc; for (j = 0; j < noc; j ++) { x[j] = xu[j] - xx; y[j] = yu[j] - yy; } /* Error in distances */ e = tot = 0.0; mutual = 0; for (j = 1; j < noc; j ++) { for (k = 0; k < j; k ++) { d = dd[mutual]; tot += d; xd = x[j] - x[k]; yd = y[j] - y[k]; ee = d - (float) sqrt((double)( xd * xd + yd * yd )); e += (ee * ee / d); mutual++; } } e /= tot; /* fprintf(stdout, "Mapping error: %7.3f\n", e); /**/ } FREE( xu ); FREE( yu ); FREE( dd ); printf("\n"); gen_plot( x, y ); FREE( Samples ); FREE( x ); FREE( y ); } void lvqLoop() { printf("\n>>>>>----- Learning Vector Quantization (LVQ) -----<<<<<<\n"); printf("(1) Generate data file\n"); printf("(2) Filter only selected features from a file\n"); printf("(Q)uit\n\n"); printf("Choice: "); gets(buf); done = FALSE; switch( buf[0] ) { case '?': help( LOOP_LVQ, buf ); break; case '1': gen_lvq_data_file(); break; case '2': filter_selected_lvq(); break; case 'q': case 'Q': done = TRUE; break; default: showUniv( stdout ); break; } }