/* ======================================================================
   David W. Aha
   ibl.h: Datatypes for the implementation of the IBL algorithms 
   Topic: IB1, IB2, IB3, and now IB4
   July, 1990
   ====================================================================== */

/*==== Pre-processor commands to handle storage multi-accessing problem. ====*/
/*==== General idea: only the main source file reads these declarations as
       is.  All others read them as external declarations. ====*/
#ifdef MAIN
#define EXTERN
#else
#define EXTERN extern
#endif

/*==== Accessing Library Files ====*/
#include <stdio.h>
#include <math.h>
#include <strings.h>

/*==== Maximum settings ====*/
#define  MAX_NUMBER_OF_ATTRIBUTES 220
#define  MAX_NUMBER_OF_PREDICTORS 219
#define  MAX_NUMBER_OF_PREDICTEES 10
#define  MAX_NUMBER_OF_INSTANCES 1650
#define  MAX_NUMBER_OF_TEST_INSTANCES 400
#define  MAX_NUMBER_OF_VALUES 10
#define  MAXLINE 2000
#define  MAX_NAME_LENGTH 1000
#define  MAX_FILENAME_SIZE 100            /* 20 gave me a problem here! */
#define  MAX_VALUE_OF_K 25

/*==== Simple constants ====*/
#define  NUMBER_OF_REQUIRED_INPUTS 6
#define  NUMERIC_TYPE 0         /* for attributes */
#define  NOMINAL_TYPE 1         /* for attributes */
#define  BOOLEAN_TYPE 2         /* for attributes */
#define  FALSE 0
#define  TRUE 1
#define  INCORRECT 0            /* Index to counts array (incorrect) */
#define  CORRECT 1              /* Index to counts array (correct) */
#define  REPORT 0               /* Index for print_recent... arrays */
#define  TEST 1                 /* Index for print_recent... arrays */
#define  PROBABILITY_WEIGHTS 0  /*==== These 2 guys are for setting the ====*/
#define  SIMPLE 1               /*==== weight-updating method for IB4 ====*/

/*==== ---------------- Instance Structure Definitions ---------- ====*/
EXTERN int number_of_attributes,                      /* Synonym */
           attribute_type[MAX_NUMBER_OF_ATTRIBUTES],  /* 1 of 3 types */
           num_values[MAX_NUMBER_OF_ATTRIBUTES],      /* For nominals. */
           predictee[MAX_NUMBER_OF_PREDICTEES],       /* Which to predict */
           predictor[MAX_NUMBER_OF_PREDICTORS],       /* Which to use */
           number_of_predictors,
           number_of_predictees;
EXTERN char
   value_name[MAX_NUMBER_OF_ATTRIBUTES][MAX_NUMBER_OF_VALUES][MAX_NAME_LENGTH],
   predictee_value_name[MAX_NUMBER_OF_VALUES][MAX_NAME_LENGTH];
   /*==== The latter is used only when ib4 && overlap is true ====*/   

/*==== Structure of instances. ====*/
typedef struct instance              /* A record rep'n for instances. */
   { double attributes[MAX_NUMBER_OF_ATTRIBUTES];   
     int instance_number;            /* An identifier into instances array */
   } instance, *ptr_instance;

/*==== Instance-holding and related arrays ====*/
EXTERN ptr_instance raw_instances[MAX_NUMBER_OF_INSTANCES],  /* Unnormalized */
                    instances[MAX_NUMBER_OF_INSTANCES],      /* Normalized */
                    raw_test_instances[MAX_NUMBER_OF_TEST_INSTANCES];
EXTERN int data_set[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_INSTANCES],
           num_uses[MAX_NUMBER_OF_INSTANCES],
           number_of_test_instances;

/*==== ---------------- System Inputs ----------------------- ====*/
EXTERN char
  descriptionfile[MAX_FILENAME_SIZE], /* Documentation file on instances */
  namesfile[MAX_FILENAME_SIZE],       /* Documentation file on instances */
  trainingfile[MAX_FILENAME_SIZE],    /* Training set file name */
  testingfile[MAX_FILENAME_SIZE],     /* Test file name */
  outputfile[MAX_FILENAME_SIZE];      /* Output file name */

/*==== Data structures for recording optional system inputs. ====*/
EXTERN int testrate,                /* Frequency of testing in instances */
           reportrate,              /* Frequency of reporting in instances */
           startup,                 /*==== Used to control when to output ===*/
           ib1,                     /*==== Default is off ====*/
           ib2,                     /*==== Default is off ====*/
           ib3,                     /*==== Default is on ====*/
           ib4,                     /*==== Default is off ====*/
           k,                       /*==== Default is 1 ====*/
           overlap,                 /*==== Default is FALSE ====*/
           best_concept_only,       /*==== Default is FALSE ====*/
           printweights,            /*==== Default is FALSE ====*/
           testlast,                /*==== Default is FALSE ====*/
           norm_none,               /*==== Default is FALSE ====*/
           norm_linear,             /*==== Default is TRUE ====*/
           norm_sd,                 /*==== Default is FALSE ====*/
           missing_maxdiff,         /*==== Default is TRUE ====*/
           missing_ave,             /*==== Default is FALSE ====*/
           missing_ignore,          /*==== Default is FALSE ====*/
           storeall,                /*==== Default is FALSE ====*/
           multiline;               /*==== Default is FALSE ====*/
EXTERN double signif_accept,        /* Confidence interval above class freq */
              signif_drop,          /* Confidence interval below class freq */
              learning_rate;        /*==== default=0.10, range [0.01,0.25] ==*/

/*==== Recording the actual confidence levels (print_general_info) ====*/
EXTERN double signif_accept_level, signif_drop_level;

/*==== Used during classification. ====*/
EXTERN int
  /*==== # of Concept description instances (per description & total) ==*/
  num_data_in_alg,
  num_data_in_concept[MAX_NUMBER_OF_PREDICTEES],
  /*==== Number of training instances (counting positive examples only) ====*/
  num_train_with_value[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_VALUES],
  num_train_in_concept[MAX_NUMBER_OF_PREDICTEES],
  /*==== Number of testing query instances ====*/
  num_queries_in_concept[MAX_NUMBER_OF_PREDICTEES],
  num_queries_in_alg,
  /*==== Number of correct/incorrect classifications ====*/
  num_correct_in_alg,
  num_correct_in_concept[MAX_NUMBER_OF_PREDICTEES],
  num_incorrect_in_alg,
  num_incorrect_in_concept[MAX_NUMBER_OF_PREDICTEES],
  /*==== Counting the number of instances dropped per description ====*/
  num_dropped_in_alg,
  num_dropped_in_concept[MAX_NUMBER_OF_PREDICTEES],
  /*==== Counting the number of correct classifications for this concept ====*/
  num_training_correct_in_alg,
  num_training_correct_in_concept[MAX_NUMBER_OF_PREDICTEES],
  num_training_incorrect_in_alg,
  num_training_incorrect_in_concept[MAX_NUMBER_OF_PREDICTEES],
  num_training_correct_in_alg_previous[2],  /*==== 2 means: stdio or file ====*/
  num_training_correct_in_concept_previous[2][MAX_NUMBER_OF_PREDICTEES],
  num_training_incorrect_in_alg_previous[2],
  num_training_incorrect_in_concept_previous[2][MAX_NUMBER_OF_PREDICTEES];

EXTERN double
  /*==== Percent of test instances correctly classified ====*/
  percent_correct_in_alg,
  percent_correct_in_concept[MAX_NUMBER_OF_PREDICTEES],
  /*==== Percent of training instances saved ====*/
  percent_data_in_alg,
  percent_data_in_concept[MAX_NUMBER_OF_PREDICTEES];

/*==== Things having to do with the distinct algorithms. ====*/
typedef char algorithmname[40];
EXTERN algorithmname algorithm_string;

EXTERN int instance_number;       /*==== Training Instance Counter ====*/
EXTERN double
  unknown_value,      /*==== Missing Values ID ====*/
  my_infinity,        /*==== Highest Val ====*/
  initusages,         /*==== Init for newly saved instance ====*/
  initcorrectcount,   /*==== Init for newly saved instance ====*/
  initincorrectcount, /*==== Init for newly saved instance ====*/
  counts[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_INSTANCES][2],
  usages[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_INSTANCES];

/*==== Used during ordering of similarity procedures. ====*/
EXTERN int num_rejected, rejected_dataid[MAX_NUMBER_OF_INSTANCES],
           num_accepted, accepted_dataid[MAX_NUMBER_OF_INSTANCES];
EXTERN double accepted_distance[MAX_NUMBER_OF_INSTANCES],
              rejected_distance[MAX_NUMBER_OF_INSTANCES];

/*==== Only a subset are acceptable (used during testing) ====*/
EXTERN int num_accepted_in_concept[MAX_NUMBER_OF_PREDICTEES],
           accepted_id[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_INSTANCES];

/*==== Need to normalize instances in this domain. ====*/
EXTERN double scalemin[MAX_NUMBER_OF_ATTRIBUTES],
              scalemax[MAX_NUMBER_OF_ATTRIBUTES];

/*==== Holds distances between current training instance and all others. ====*/
EXTERN double distances[MAX_NUMBER_OF_INSTANCES]; 

/*==== Used in training.c: update_classification_records ====*/
EXTERN int to_be_dropped[50], num_to_be_dropped;

/*==== Used in concept_instance_accepted and noisy_instance.
       Updated in update_confidence_intervals, called by 
       update_classification_records and train_and_test ====*/
EXTERN double
   class_accept_threshold[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_VALUES],
   class_drop_threshold[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_VALUES];

/*==== For Simlarity-modifying algorithms (IB4) ====*/
EXTERN double
   /*==== Numerator ====*/
   attribute_weight_total[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_PREDICTORS],
   /*==== Denominator ====*/
   weight_size[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_PREDICTORS],
   /*==== Quotient ====*/
   attribute_weight[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_PREDICTORS],
   /*==== The real weight that is used ====*/
   attribute_weight_used[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_PREDICTORS],
   /*==== For the probability weights option for updating weights ====*/
   conditional_probability[MAX_NUMBER_OF_PREDICTEES][MAX_NUMBER_OF_PREDICTORS];

/*==== Used in utility.c: read_description and translate_instance.
       If ib4 && overlap, then it's different (smaller) than number of
       attributes.  Used to hide the fact that we want to increase the
       value of number_of_attributes only once!  Here. ====*/
EXTERN int num_attributes_to_translate;

/*==== IB4 best_concept_only needs ====*/
EXTERN double probability_of_membership[MAX_NUMBER_OF_TEST_INSTANCES][MAX_NUMBER_OF_PREDICTEES];
EXTERN int weight_method;

/*==== Norm_sd option, for normalizing by the standard deviation. ====*/
EXTERN double sum_values[MAX_NUMBER_OF_ATTRIBUTES],
              sum_squared_values[MAX_NUMBER_OF_ATTRIBUTES],
              number_of_known_values[MAX_NUMBER_OF_ATTRIBUTES],
              standard_deviation[MAX_NUMBER_OF_ATTRIBUTES];
EXTERN algorithmname normalization_string;

/*==== Missing attribute values ====*/
EXTERN int num_predictor_training_values[MAX_NUMBER_OF_ATTRIBUTES],
           num_training_instances_with_value[MAX_NUMBER_OF_ATTRIBUTES][MAX_NUMBER_OF_VALUES];
EXTERN double sum_predictor_values[MAX_NUMBER_OF_ATTRIBUTES];
EXTERN algorithmname missing_string;