/************************************************************************ * * * Program package T O O L D I A G * * * * Version 1.5 * * Date: Tue Feb 8 13:39:05 1994 * * * * NOTE: This program package is copyrighted in the sense that it * * may be used for scientific purposes. The package as a whole, or * * parts thereof, cannot be included or used in any commercial * * application without written permission granted by the author. * * No programs contained in this package may be copied for commercial * * distribution. * * * * All comments concerning this program package may be sent to the * * e-mail address 'tr@fct.unl.pt'. * * * ************************************************************************/ #include #include #include #include "def.h" extern universe *U; extern bool verbose; static str80 buf, gnuFile; static char cmd[200]; #ifdef DOS #define EXEC "" #else #define EXEC "exec " #endif #define OFFSET (1.0) #define TOL (10.0) /* Tolerance for the plotting */ static void gen_gnuplot( x, y, dim, first, second, nrClass, classes ) float *x, *y; int dim; int nrClass, *classes; { int i; FILE *gf = NULL; float minX = INFINITY, minY = INFINITY, maxX = -INFINITY, maxY = -INFINITY; float dx, dy, delta; strcpy( gnuFile, DATA_DIR ); strcat( gnuFile, "_correl_" ); gf = fopen( gnuFile, f_open_text_w ); if( gf == NULL ) { printf("Cannot open %s! Exitus...\n", gnuFile ); exit(1); } if( verbose ) printf("Dumping %s\n", gnuFile ); for( i = 0; i < dim; i++ ) { fprintf( gf, "%f %f\n", x[i], y[i] ); if( x[i] < minX ) minX = x[i]; if( x[i] > maxX ) maxX = x[i]; if( y[i] < minY ) minY = y[i]; if( y[i] > maxY ) maxY = y[i]; } fclose( gf ); dx = maxX - minX; dy = maxY - minY; if( dx == 0.0 ) delta = OFFSET; else delta = dx / TOL; minX -= delta; maxX += delta; if( dy == 0.0 ) delta = OFFSET; else delta = dy / TOL; minY -= delta; maxY += delta; /* generate the gnuplot batch file */ strcpy( gnuFile, DATA_DIR ); strcat( gnuFile, "_tmp.gnu" ); gf = fopen( gnuFile, f_open_text_w ); if( gf == NULL ) { printf("Cannot open %s! Exitus...\n", gnuFile ); exit(1); } if( verbose ) printf("Dumping %s\n", gnuFile ); fprintf( gf, "#\n# Batch file to visualize linear correlation\n" ); fprintf( gf, "# Generated automatically !\n#\n" ); fprintf( gf, "# Universe %s\n", U->name ); fprintf( gf, "set title \"LINEAR CORRELATION FOR CLASS"); if( nrClass > 1 ) fprintf( gf, "ES" ); fprintf( gf, " " ); for( i = 0; i < nrClass; i++ ) fprintf( gf, "%d ", classes[i] ); fprintf( gf, "\"\n"); fprintf( gf, "set xlabel \"First feature = %d\"\n", first ); fprintf( gf, "set ylabel \"Second feature = %d\"\n", second ); fprintf( gf, "set xrange [%f:%f]\n", minX, maxX ); fprintf( gf, "set yrange [%f:%f]\n", minY, maxY ); fprintf( gf, "plot \"_correl_\"\n" ); fprintf( gf, "pause -1 \"Hit return to exit...\"\n" ); fclose( gf ); sprintf( cmd, "cd %s\n\t%sgnuplot %s", DATA_DIR, EXEC, gnuFile ); if( verbose ) printf("\n --- Execute:\n\t%s\n", cmd ); #ifdef DOS #else system( cmd ); #endif fclose( gf ); } void corr( allClass, nrClass, classes, first, second ) bool allClass; int nrClass, *classes, first, second; { int i, j, row, class, nrPoints = 0; float *x = NULL, *y = NULL; float Ex = 0.0, Ey = 0.0, Exy = 0.0; /* expected values */ float Sx = 0.0, Sy = 0.0; /* standard deviation */ float Cov, CorCoeff; /* Covariance and linear correlation coefficient */ if( allClass ) nrPoints = U->sumSampl; else for( i = 0; i < nrClass; i++ ) { class = classes[i]-1; if( U->C[class].numSampl < 2 ) { fprintf(stderr,"corr> Not enough samples: %d; Exit...\n", U->C[class].numSampl ); exit(1); } nrPoints += U->C[class].numSampl; } x = (float*) malloc( nrPoints * sizeof(float) ); y = (float*) malloc( nrPoints * sizeof(float) ); row = 0; for( i = 0; i < nrClass; i++ ) { class = classes[i]-1; for( j = 0; j < U->C[class].numSampl; j++ ) { x[row] = U->C[class].S[j*U->nrFeat+(first-1)]; y[row] = U->C[class].S[j*U->nrFeat+(second-1)]; /* printf("x[%d]=%f y[%d]=%f\n", row, x[row], row, y[row] ); /**/ row++; } } /* all x and y values have been determined now: calculate linear correlation coefficient */ for( i = 0; i < row; i++ ) { Ex += x[i]; Ey += y[i]; Exy += x[i] * y[i]; } Ex /= (float)row; Ey /= (float)row; Exy /= (float)row; Cov = Exy - Ex*Ey; for( i = 0; i < row; i++ ) { Sx += (x[i]-Ex)*(x[i]-Ex); Sy += (y[i]-Ey)*(y[i]-Ey); } Sx = (float)sqrt( (double)(Sx/(float)(row-1)) ); Sy = (float)sqrt( (double)(Sy/(float)(row-1)) ); if( Sx == 0.0 || Sy == 0.0 ) { fprintf(stderr,"corr> Cannot calculate correlation coefficient"); fprintf(stderr," because standard deviation is 0; Exit...\n" ); exit(1); } CorCoeff = Cov / (Sx*Sy); printf("\n>>>----------------------------------------------------<<<\n"); printf(" RESULT OF LINEAR CORRELATION ANALYSIS \n"); printf(" Feature nr 1: %d\n", first ); printf(" Mean=%7.3f Standard deviation=%7.3f\n", Ex, Sx ); printf(" Feature nr 2: %d\n", second ); printf(" Mean=%7.3f Standard deviation=%7.3f\n\n", Ey, Sy ); printf(" Covariance=%7.3f\n", Cov ); printf(" ===> Correlation coefficient=%7.3f <===\n", CorCoeff ); printf(">>>----------------------------------------------------<<<\n\n"); gen_gnuplot( x, y, row, first, second, nrClass, classes ); FREE( x ); FREE( y ); } void correlation() { bool ok, allClass, already; int i, j, first, second, nrClass, *classes = NULL; printf(">>>--- Correlation analysis between two features ---<<<\n"); printf("First feature? "); do { get_d( &first ); ok = ( first > 0 && first <= U->nrFeat ); if( ! ok ) printf("Invalid value! Again ? "); } while( ! ok ); printf("Second feature? "); do { get_d( &second ); ok = ( second > 0 && second <= U->nrFeat && first != second ); if( ! ok ) printf("Invalid value! Again ? "); } while( ! ok ); printf("Correlation of all classes (y/n)?y\b"); gets( buf ); allClass = ((buf[0]=='y') || (buf[0]=='Y') || (buf[0]=='\0')); if( allClass ) { nrClass = U->nrClass; classes = (int*) malloc( U->nrClass * sizeof(int) ); } if( ! allClass ) { printf("Number of classes to be analyzed? "); do { get_d( &nrClass ); ok = ( nrClass > 0 && nrClass < U->nrClass ); if( ! ok ) printf("Invalid value! Again ? "); } while( ! ok ); classes = (int*) malloc( nrClass * sizeof(int) ); for( i = 0; i < nrClass; i++ ) { printf("Class nr.%d ? ", i+1 ); do { get_d( &(classes[i]) ); ok = ( classes[i] > 0 && classes[i] <= U->nrClass ); already = FALSE; if( ok ) for( j = 0; j < i; j++ ) already = already || (classes[j] == classes[i]); ok = ! already && ok; if( ! ok ) printf("Invalid value! Again ? "); } while( ! ok ); } } else for( i = 0; i < nrClass; i++ ) classes[i] = i+1; corr( allClass, nrClass, classes, first, second ); FREE( classes ); }