/* ====================================================================== David W. Aha NGE: training.c: Training/testing procedures for NGE ====================================================================== */ #include "nge.h" /* ====================================================================== Seeding, training, and testing. ====================================================================== */ void train_and_test() { /*==== Subfunctions ====*/ void seed_exemplar_base(); int process_instance(); extern void print_general_info(), /*==== Printing ====*/ print_classification_results(), /*==== Printing ====*/ print_feature_weights(); /*==== Local Variables ====*/ FILE *output_file; register int num_processed=number_of_seeds, inst_num, attribute; /*==== 1. Setup ====*/ output_file = fopen(outputfile,"w"); print_general_info(output_file); seed_exemplar_base(); for(attribute=0; attributeexemplars[e][a][UPPER]) sum += square(feature_weights[a] * (instance[a]-exemplars[e][a][UPPER])); } } distances[e] = ((float)number_of_uses[e]/(float)number_correct[e]) * (float)sqrt((double)sum); /* if (mode == TESTING) { printf("yes, sum is %4.2f\n",sum); print_feature_weights(); print_instance(inst_num,TESTING); print_exemplar_distance(e); printf("Testing distances[%d] = %4.2f\n",e,distances[e]); printf("(Uses/Correct)*sqrt(Sum) is (%d/%d)/sqrt(%4.2f)\n", number_of_uses[e],number_correct[e], sum); } */ /*==== 1.2 Insert into nearest_neighbors[] array if needed ====*/ if ((num_stored == 0) || (distances[e] < distances[nearest_neighbors[0]])) num_stored = insert(e,0,num_stored); else if ((num_stored == 1) || (distances[e] <= distances[nearest_neighbors[1]])) num_stored = insert(e,1,num_stored); } return(num_stored); } /* ====================================================================== Inserts an exemplar index into the nearest_neighbors[] array. Returns the number currently stored after the insertion. Cares only about the ordering of the nearest 2 exemplars and those tied with it (wrt distance). ====================================================================== */ int insert(exemplar_num,insertion_location,num_stored) register int exemplar_num, insertion_location, num_stored; { /*==== Local variables ====*/ register int i; if (insertion_location == 0) { for(i=num_stored; i>0; i--) nearest_neighbors[i] = nearest_neighbors[i-1]; nearest_neighbors[0] = exemplar_num; if ((num_stored >= 2) && (distances[nearest_neighbors[1]] < distances[nearest_neighbors[2]])) return(2); else return(num_stored+1); } else /*==== insertion_location is 1 ====*/ { if ((num_stored == 1) || (distances[exemplar_num] < distances[nearest_neighbors[1]])) { nearest_neighbors[1] = exemplar_num; return(2); } else /*==== (num_stored > 1) and it's a tie ====*/ { nearest_neighbors[num_stored] = exemplar_num; return(num_stored+1); } } } /* ====================================================================== Classifies the given instance, using the info in distances[]. Updates hyperrectangles and weights appropriately. ====================================================================== */ void classify_and_update(inst_num,num_stored) register int inst_num, num_stored; { /*==== Subfunctions ====*/ void update(), insert_exemplar(), find_nearest_exemplar(); void print_instance(), print_exemplars_distances(); /*==== Local variables ====*/ register int result; /*==== 0. Setup (temporary) ====*/ /* printf("\n"); print_instance(inst_num,TRAINING); print_exemplars_distances(); */ /*==== 1. Classify and update wrt nearest exemplar ====*/ find_nearest_exemplar(0,num_stored); /*==== In nearest_neighbors[0] ====*/ if (class[nearest_neighbors[0]] == (int)training_instances[inst_num][number_of_attributes]) { /*==== 1.1 Correct classification ====*/ update(nearest_neighbors[0],inst_num,CORRECT); result = CORRECT; } else { /*==== 1.2 Incorrect classification ====*/ update(nearest_neighbors[0],inst_num,INCORRECT); result = INCORRECT; } /*==== 2. If not greedy, process also with 2nd nearest exemplar ====*/ if (!greedy && (number_of_exemplars > 1)) { find_nearest_exemplar(1,num_stored); if (class[nearest_neighbors[1]] == (int)training_instances[inst_num][number_of_attributes]) { /*==== 1.1 Correct classification ====*/ update(nearest_neighbors[1],inst_num,CORRECT); result = CORRECT; } else { /*==== 1.2 Incorrect classification ====*/ update(nearest_neighbors[1],inst_num,INCORRECT); result = INCORRECT; } } /*==== 3. If incorrect classification, then store it ====*/ if (result == INCORRECT) insert_exemplar(inst_num); } /* ====================================================================== Finds nearest or second-nearest exemplar (determined by given index). Assumes that there are at least "index+1" numbers of stored exemplars. ====================================================================== */ void find_nearest_exemplar(index,num_stored) register int index, num_stored; { /*==== Subfunctions ====*/ float area_of_exemplar(); /*==== Local variables ====*/ register int i, num_tied=1, original, selected; int tied_exemplar[MAX_NUMBER_OF_EXEMPLARS]; float area_tied_exemplars, area; if ((num_stored >= (index+1)) && (distances[nearest_neighbors[index]] < distances[nearest_neighbors[index+1]])) /*==== 1. Nothing to do; no tie here ====*/ return; else { /*==== 2.1 Find set of tied exemplars ====*/ tied_exemplar[0] = original = nearest_neighbors[index]; area_tied_exemplars = area_of_exemplar(tied_exemplar[0]); for(i=index+1; i= training_instances[inst_num][a])) return(TRUE); else return(FALSE); } /* ====================================================================== Updates an attribute weight ====================================================================== */ void update_attribute_weight(match_result,classification_result,attribute_num) register int match_result, classification_result, attribute_num; { if (((classification_result == CORRECT) && (match_result == TRUE)) || ((classification_result == INCORRECT) && (match_result == FALSE))) feature_weights[attribute_num] *= (1.0 + feature_adjustment_rate); else feature_weights[attribute_num] *= (1.0 - feature_adjustment_rate); } /* ====================================================================== Updates the size of the exemplar's hyperrectangle after a correct classification guess. 1 attribute at a time. e=exemplar_num and a=attribute_num. ====================================================================== */ void update_hypperrectangle(e,inst_num,a) register int e,inst_num,a; { if (attribute_type[a] == SYMBOLIC_TYPE) exemplars[e][a][(int)training_instances[inst_num][a]] = INCLUDED; else /*==== Numeric-valued attribute ====*/ { if (exemplars[e][a][LOWER] > training_instances[inst_num][a]) exemplars[e][a][LOWER] = training_instances[inst_num][a]; if (exemplars[e][a][UPPER] < training_instances[inst_num][a]) exemplars[e][a][UPPER] = training_instances[inst_num][a]; } } /* ====================================================================== Selects the seeds to use to initialize the exemplar base. ====================================================================== */ void seed_exemplar_base() { /*==== Subfunctions ====*/ void insert_exemplar(); /*==== Local variables ====*/ register int i, seed; /*==== Select the seeds ====*/ for(i=0; i