/* ====================================================================== David W. Aha NGE: nge.c: Top-level file for the NGE algorithm Assumptions: 1. Only 1 predictee (the last argument) 2. All symbolic values are given from 0 through n 3. None of the maximum settings in datastructures.h are violated by the database. Some are checked in the read routine. Not all. 4. Missing values in the database are denoted by '?' 5. Number of classes is less than 10 (assumed when reading) 6. Does not test for overflow in size of exemplars[] array, which stores the stored exemplars. 7. Requires that the number of values for symbolic-valued attributes be included after them in the namesfile. ====================================================================== */ /*==== Accessing the declarations. ====*/ #define MAIN 1 #include "nge.h" #undef MAIN /* ====================================================================== Main Function ====================================================================== */ main (argc, argv) int argc; char *argv[]; { int able_to_interpret_options(), experiment(); void print_usage(); if (argc < (1 + NUMBER_OF_REQUIRED_INPUTS)) /*==== Missing some required inputs ====*/ print_usage(); else { /*==== Read the required input parameters ====*/ strcpy(namesfile,argv[1]); strcpy(trainingfile,argv[2]); strcpy(testingfile,argv[3]); strcpy(outputfile,argv[4]); number_of_seeds = atoi(argv[5]); srandom((unsigned)atoi(argv[6])); /*==== Interpret the options, if possible ====*/ if (able_to_interpret_options(&argv[0],argc)) { if (experiment() == FALSE) print_usage(); } else print_usage(); } } /* ====================================================================== Prints usage information for the program. ====================================================================== */ void print_usage() { printf("Usage: namesfile trainfile testfile outputfile #seeds seed [options]\n"); printf("\nRequired Parameters:\n"); printf(" namesfile contains the datafile's format information\n"); printf(" trainfile contains training instances\n"); printf(" testfile contains testing instances\n"); printf(" outputfile will contain the experiment's results\n"); printf(" Number of seeds to initialize the exemplar base\n"); printf(" seed is used to initialize random variable generator\n"); printf("\nUser Parameters: (name, default, and brief description)\n"); printf(" -greedy (off) if set, selects greedy variant of NGE\n"); printf(" -far (20) (divided by 100) feature_adjustment_rate\n"); printf("\nConvenience Options:\n"); printf(" -testrate (100) How often to run on test set\n"); printf(" -testlast (off) Test after finished?\n"); } /* ====================================================================== Tries to interpret the options. Yields 0 iff something is wrong. Else 1. ====================================================================== */ int able_to_interpret_options(argument,argc) int *argument,argc; { /*==== Local Variables ====*/ register int i,arg; char argstr[30]; /*==== 1. Setup optional and convenience argument defaults. ====*/ feature_adjustment_rate = 0.2; greedy = FALSE; testrate = 100; testlast = FALSE; /*==== 2. Stop now if number of seeds is illogical ====*/ if ((number_of_seeds < 1) || (number_of_seeds > MAX_NUMBER_OF_TRAINING_INSTANCES)) { printf("Fatal error: Number of seeds must be between 1 and %d\n", MAX_NUMBER_OF_TRAINING_INSTANCES); return(FALSE); } /*==== 3. Process the options. ====*/ for(i=NUMBER_OF_REQUIRED_INPUTS+1; i 20000)) { printf("Test rate argument x: 1 <= x <= 20000.\n"); return(FALSE); } else { testrate = arg; i++; } } } else if (strcmp(*(argument+i),"-testlast") == 0) testlast = TRUE; else if (strcmp(*(argument+i),"-greedy") == 0) greedy = TRUE; else if (strcmp(*(argument+i),"-far") == 0) { if (i == (argc)) { printf("Feature adjustment rate option has integer argument\n"); return(FALSE);} else { arg = atoi(*(argument+i+1)); if ((arg<1) || (arg>100)) { printf("Feature adjustment rate value not in [1,100]\n"); return(FALSE); } else { feature_adjustment_rate = 0.01 * (double)arg; i++; } } } else { printf("Unknown argument to NGE: %s\n",*(argument+i)); return(FALSE); } } /*==== 4. Done ====*/ return(TRUE); } /* ====================================================================== Runs the experiment ====================================================================== */ int experiment() { /*==== Subfunctions ====*/ int initialization(); extern void train_and_test(); /* training.c */ /*==== 1. Initialize ====*/ printf("Initializing...\n"); if (initialization() == FALSE) { printf("Fatal error during initialization.\n"); return(FALSE); } /*==== 2. Go for it ====*/ printf("Training...\n"); train_and_test(); /*==== 3. Done ====*/ printf("Finished Experiment.\n"); return(TRUE); } /* ====================================================================== Initializes system variables and windows. ====================================================================== */ int initialization() { /*==== Subfunctions ====*/ extern int read_data_format(); /*==== Utility ====*/ extern int read_and_normalize_data(); /*==== Utility ====*/ /*==== Local variables ====*/ register int i; /*==== 1. Set Simple variables ====*/ my_infinity = 999999.0; /*==== Occasional usage ====*/ number_of_exemplars = 0; number_of_attributes = 0; /*==== 2. Initialize information for setting exemplar weights ====*/ for(i=0; i num_training_instances) { printf("Fatal error: Number of seeds must be larger than "); printf("number of training instances.\n"); printf("Currently, they are %d and %d respectively.\n", number_of_seeds,num_training_instances); return(FALSE); } } /*==== 4. They're not all used as seeds, I hope ====*/ for(i=0; i