/* ====================================================================== David W. Aha ibl.h: Datatypes for the implementation of the IBL algorithms Topic: IB1, IB2, IB3, and now IB4 July, 1990 ====================================================================== */ /*==== Pre-processor commands to handle storage multi-accessing problem. ====*/ /*==== General idea: only the main source file reads these declarations as is. All others read them as external declarations. ====*/ #ifdef MAIN #define EXTERN #else #define EXTERN extern #endif /*==== Accessing Library Files ====*/ #include #include #include /*==== Maximum settings ====*/ #define MAX_NUMBER_OF_ATTRIBUTES 220 #define MAX_NUMBER_OF_PREDICTORS 219 #define MAX_NUMBER_OF_PREDICTEES 10 #define MAX_NUMBER_OF_INSTANCES 1650 #define MAX_NUMBER_OF_TEST_INSTANCES 400 #define MAX_NUMBER_OF_VALUES 10 #define MAXLINE 2000 #define MAX_NAME_LENGTH 1000 #define MAX_FILENAME_SIZE 100 /* 20 gave me a problem here! */ #define MAX_VALUE_OF_K 25 /*==== Simple constants ====*/ #define NUMBER_OF_REQUIRED_INPUTS 6 #define NUMERIC_TYPE 0 /* for attributes */ #define NOMINAL_TYPE 1 /* for attributes */ #define BOOLEAN_TYPE 2 /* for attributes */ #define FALSE 0 #define TRUE 1 #define INCORRECT 0 /* Index to counts array (incorrect) */ #define CORRECT 1 /* Index to counts array (correct) */ #define REPORT 0 /* Index for print_recent... arrays */ #define TEST 1 /* Index for print_recent... arrays */ #define PROBABILITY_WEIGHTS 0 /*==== These 2 guys are for setting the ====*/ #define SIMPLE 1 /*==== weight-updating method for IB4 ====*/ /*==== ---------------- Instance Structure Definitions ---------- ====*/ EXTERN int number_of_attributes, /* Synonym */ attribute_type[MAX_NUMBER_OF_ATTRIBUTES], /* 1 of 3 types */ num_values[MAX_NUMBER_OF_ATTRIBUTES], /* For nominals. */ predictee[MAX_NUMBER_OF_PREDICTEES], /* Which to predict */ predictor[MAX_NUMBER_OF_PREDICTORS], /* Which to use */ number_of_predictors, number_of_predictees; EXTERN char value_name[MAX_NUMBER_OF_ATTRIBUTES][MAX_NUMBER_OF_VALUES][MAX_NAME_LENGTH], predictee_value_name[MAX_NUMBER_OF_VALUES][MAX_NAME_LENGTH]; /*==== The latter is used only when ib4 && overlap is true ====*/ /*==== Structure of instances. ====*/ typedef struct instance /* A record rep'n for instances. */ { double attributes[MAX_NUMBER_OF_ATTRIBUTES]; int instance_number; /* An identifier into instances array */ } instance, *ptr_instance; /*==== Instance-holding and related arrays ====*/ EXTERN ptr_instance raw_instances[MAX_NUMBER_OF_INSTANCES], /* Unnormalized */ instances[MAX_NUMBER_OF_INSTANCES], /* Normalized */ raw_test_instances[MAX_NUMBER_OF_TEST_INSTANCES]; EXTERN int data_set[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_INSTANCES], num_uses[MAX_NUMBER_OF_INSTANCES], number_of_test_instances; /*==== ---------------- System Inputs ----------------------- ====*/ EXTERN char descriptionfile[MAX_FILENAME_SIZE], /* Documentation file on instances */ namesfile[MAX_FILENAME_SIZE], /* Documentation file on instances */ trainingfile[MAX_FILENAME_SIZE], /* Training set file name */ testingfile[MAX_FILENAME_SIZE], /* Test file name */ outputfile[MAX_FILENAME_SIZE]; /* Output file name */ /*==== Data structures for recording optional system inputs. ====*/ EXTERN int testrate, /* Frequency of testing in instances */ reportrate, /* Frequency of reporting in instances */ startup, /*==== Used to control when to output ===*/ ib1, /*==== Default is off ====*/ ib2, /*==== Default is off ====*/ ib3, /*==== Default is on ====*/ ib4, /*==== Default is off ====*/ k, /*==== Default is 1 ====*/ overlap, /*==== Default is FALSE ====*/ best_concept_only, /*==== Default is FALSE ====*/ printweights, /*==== Default is FALSE ====*/ testlast, /*==== Default is FALSE ====*/ norm_none, /*==== Default is FALSE ====*/ norm_linear, /*==== Default is TRUE ====*/ norm_sd, /*==== Default is FALSE ====*/ missing_maxdiff, /*==== Default is TRUE ====*/ missing_ave, /*==== Default is FALSE ====*/ missing_ignore, /*==== Default is FALSE ====*/ storeall, /*==== Default is FALSE ====*/ multiline; /*==== Default is FALSE ====*/ EXTERN double signif_accept, /* Confidence interval above class freq */ signif_drop, /* Confidence interval below class freq */ learning_rate; /*==== default=0.10, range [0.01,0.25] ==*/ /*==== Recording the actual confidence levels (print_general_info) ====*/ EXTERN double signif_accept_level, signif_drop_level; /*==== Used during classification. ====*/ EXTERN int /*==== # of Concept description instances (per description & total) ==*/ num_data_in_alg, num_data_in_concept[MAX_NUMBER_OF_PREDICTEES], /*==== Number of training instances (counting positive examples only) ====*/ num_train_with_value[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_VALUES], num_train_in_concept[MAX_NUMBER_OF_PREDICTEES], /*==== Number of testing query instances ====*/ num_queries_in_concept[MAX_NUMBER_OF_PREDICTEES], num_queries_in_alg, /*==== Number of correct/incorrect classifications ====*/ num_correct_in_alg, num_correct_in_concept[MAX_NUMBER_OF_PREDICTEES], num_incorrect_in_alg, num_incorrect_in_concept[MAX_NUMBER_OF_PREDICTEES], /*==== Counting the number of instances dropped per description ====*/ num_dropped_in_alg, num_dropped_in_concept[MAX_NUMBER_OF_PREDICTEES], /*==== Counting the number of correct classifications for this concept ====*/ num_training_correct_in_alg, num_training_correct_in_concept[MAX_NUMBER_OF_PREDICTEES], num_training_incorrect_in_alg, num_training_incorrect_in_concept[MAX_NUMBER_OF_PREDICTEES], num_training_correct_in_alg_previous[2], /*==== 2 means: stdio or file ====*/ num_training_correct_in_concept_previous[2][MAX_NUMBER_OF_PREDICTEES], num_training_incorrect_in_alg_previous[2], num_training_incorrect_in_concept_previous[2][MAX_NUMBER_OF_PREDICTEES]; EXTERN double /*==== Percent of test instances correctly classified ====*/ percent_correct_in_alg, percent_correct_in_concept[MAX_NUMBER_OF_PREDICTEES], /*==== Percent of training instances saved ====*/ percent_data_in_alg, percent_data_in_concept[MAX_NUMBER_OF_PREDICTEES]; /*==== Things having to do with the distinct algorithms. ====*/ typedef char algorithmname[40]; EXTERN algorithmname algorithm_string; EXTERN int instance_number; /*==== Training Instance Counter ====*/ EXTERN double unknown_value, /*==== Missing Values ID ====*/ my_infinity, /*==== Highest Val ====*/ initusages, /*==== Init for newly saved instance ====*/ initcorrectcount, /*==== Init for newly saved instance ====*/ initincorrectcount, /*==== Init for newly saved instance ====*/ counts[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_INSTANCES][2], usages[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_INSTANCES]; /*==== Used during ordering of similarity procedures. ====*/ EXTERN int num_rejected, rejected_dataid[MAX_NUMBER_OF_INSTANCES], num_accepted, accepted_dataid[MAX_NUMBER_OF_INSTANCES]; EXTERN double accepted_distance[MAX_NUMBER_OF_INSTANCES], rejected_distance[MAX_NUMBER_OF_INSTANCES]; /*==== Only a subset are acceptable (used during testing) ====*/ EXTERN int num_accepted_in_concept[MAX_NUMBER_OF_PREDICTEES], accepted_id[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_INSTANCES]; /*==== Need to normalize instances in this domain. ====*/ EXTERN double scalemin[MAX_NUMBER_OF_ATTRIBUTES], scalemax[MAX_NUMBER_OF_ATTRIBUTES]; /*==== Holds distances between current training instance and all others. ====*/ EXTERN double distances[MAX_NUMBER_OF_INSTANCES]; /*==== Used in training.c: update_classification_records ====*/ EXTERN int to_be_dropped[50], num_to_be_dropped; /*==== Used in concept_instance_accepted and noisy_instance. Updated in update_confidence_intervals, called by update_classification_records and train_and_test ====*/ EXTERN double class_accept_threshold[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_VALUES], class_drop_threshold[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_VALUES]; /*==== For Simlarity-modifying algorithms (IB4) ====*/ EXTERN double /*==== Numerator ====*/ attribute_weight_total[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_PREDICTORS], /*==== Denominator ====*/ weight_size[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_PREDICTORS], /*==== Quotient ====*/ attribute_weight[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_PREDICTORS], /*==== The real weight that is used ====*/ attribute_weight_used[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_PREDICTORS], /*==== For the probability weights option for updating weights ====*/ conditional_probability[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_PREDICTORS]; /*==== Used in utility.c: read_description and translate_instance. If ib4 && overlap, then it's different (smaller) than number of attributes. Used to hide the fact that we want to increase the value of number_of_attributes only once! Here. ====*/ EXTERN int num_attributes_to_translate; /*==== IB4 best_concept_only needs ====*/ EXTERN double probability_of_membership[MAX_NUMBER_OF_TEST_INSTANCES][MAX_NUMBER_OF_PREDICTEES]; EXTERN int weight_method; /*==== Norm_sd option, for normalizing by the standard deviation. ====*/ EXTERN double sum_values[MAX_NUMBER_OF_ATTRIBUTES], sum_squared_values[MAX_NUMBER_OF_ATTRIBUTES], number_of_known_values[MAX_NUMBER_OF_ATTRIBUTES], standard_deviation[MAX_NUMBER_OF_ATTRIBUTES]; EXTERN algorithmname normalization_string; /*==== Missing attribute values ====*/ EXTERN int num_predictor_training_values[MAX_NUMBER_OF_ATTRIBUTES], num_training_instances_with_value[MAX_NUMBER_OF_ATTRIBUTES][MAX_NUMBER_OF_VALUES]; EXTERN double sum_predictor_values[MAX_NUMBER_OF_ATTRIBUTES]; EXTERN algorithmname missing_string;