/* Copyright (c) 1993 by The Johns Hopkins University */
 
/* 

 PEBLS:  Parallel Exemplar-Based Learning System


 FILE:  INIT.C:  Initialization Routines 

*/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "config.h"
#include "pebls.h"


extern config_type 	CONFIG;
extern instance_type 	data[INSTANCES_MAX];



/* ------------------------------------------------------------ */
/* INITIALIZE_DATA:  Initializes data				*/

void initialize_data(void)
{
    int i;

    for (i=0; i<INSTANCES_MAX; i++)
    {
	data[i].class_true    	= 0;
	data[i].class_nearest  	= 0;
	data[i].class_pp  	= 0;
	data[i].classify_errors = 0;
	data[i].weight   = 1.0;
	data[i].correct  = 1;
	data[i].used     = 1;
	data[i].weighted = FALSE;
	data[i].trained  = FALSE;
    }
}



/* ------------------------------------------------------------ */
/* INITIALIZE_CONFIGURATION: Initialize the primary 		*/
/* configuration structure used to store information contained  */
/* in the PCF.							*/
/* INPUTS:  None.						*/
/* OUTPUT:  None.						*/

void initialize_configuration(void)
{
    int i;

    CONFIG.operating_mode = TEST;
    CONFIG.training_mode = UNKNOWN;
    strcpy(CONFIG.data_file,  "");
    CONFIG.data_format = UNKNOWN;
    CONFIG.value_spacing = UNKNOWN;
    CONFIG.exemplar_weighting = OFF;
    CONFIG.feature_weighting  = OFF;
    CONFIG.instances = 0;
    CONFIG.training_instances = 0;
    CONFIG.test_instances = 0;
    CONFIG.training_size = 1.00;
    CONFIG.classes = UNKNOWN;
    CONFIG.features = UNKNOWN;
    CONFIG.values = UNKNOWN;
    CONFIG.common_values = UNKNOWN;
    CONFIG.post_processing = OFF;
    CONFIG.trials = 1;
    CONFIG.nearest_neighbor = 1;
    CONFIG.nearest_voting = MAJORITY;
    CONFIG.K = (float) 1;
    CONFIG.R = (float) 2;
    CONFIG.output_mode = AVERAGES_ONLY;
    CONFIG.debug = OFF;

    for (i=0; i<FEATURES_MAX; i++)
      CONFIG.feature_weights[i] = 1.00;
}






/* ------------------------------------------------------------ */
/* CHECK_CONFIGURATION:  Checks to make sure that the config-   */
/*  uration is correct and legal.				*/

void check_configuration(void)
{
    float train_size = CONFIG.training_size;
    int f;

    if ((train_size != (float) UNKNOWN) &&
	((train_size <= 0.0) ||	(train_size > 1.0)))  error(TRAIN_SIZE_ERR, NULL);

    if (CONFIG.trials > TRIALS_MAX) error(TRIALS_ERR, NULL);
    if (CONFIG.classes > CLASSES_MAX) error(CLASSES_ERR, NULL);
    if (CONFIG.features> FEATURES_MAX) error(FEATURES_ERR, NULL);
    if (CONFIG.values >  VALUES_MAX) error(VALUES_ERR, NULL);
    
    if (CONFIG.common_values) 
 	for (f=0; f<FEATURES_MAX; f++)
	    CONFIG.nvalues[f] = CONFIG.values;

    if (CONFIG.nearest_neighbor < 1) error(K_NEIGHBOR_ERR, NULL);
    if (CONFIG.nearest_neighbor > K_NEIGHBOR_MAX) error(K_NEIGHBOR2_ERR, NULL);


    if ((CONFIG.nearest_voting < 60) || (CONFIG.nearest_voting > 69))
	error(VOTING_ERR, NULL);

    if ((CONFIG.exemplar_weighting != OFF) &&
	((CONFIG.exemplar_weighting < 70) || (CONFIG.exemplar_weighting > 79)))
      error(UNK_EWEIGHT_ERR, NULL);

    if ((CONFIG.feature_weighting != OFF) &&
	((CONFIG.feature_weighting < 80) || (CONFIG.feature_weighting > 89)))
      error(UNK_FWEIGHT_ERR, NULL);

    if ((CONFIG.post_processing != OFF) &&
	((CONFIG.post_processing < 90) || (CONFIG.post_processing > 99)))
      error(UNK_POSTPROC_ERR, NULL);


}




/* ------------------------------------------------------------ */
/* CONSTANT_TRANSLATE:  Translate constant string to 		*/
/*   corresponding constant value.				*/

int constant_translate(char *value)
{
     if (strcasecmp(value,"ON")==0) return ON;
     else if (strcasecmp(value,"OFF")==0) return OFF;
     else if (strcasecmp(value,"TRUE")==0) return TRUE;
     else if (strcasecmp(value,"FALSE")==0) return FALSE;
     else if (strcasecmp(value, "TEST")==0) return TEST;
     else if (strcasecmp(value, "CLASSIFY")==0) return CLASSIFY;
     else if (strcasecmp(value,"STANDARD")==0) return STANDARD;
     else if (strcasecmp(value,"SUBUNITS")==0) return SUBUNITS;
     else if (strcasecmp(value,"SUBSET")==0) return SUBSET;
     else if (strcasecmp(value,"SPECIFIED_GROUP")==0) return SPECIFIED_GROUP;
     else if (strcasecmp(value,"LEAVE_ONE_OUT")==0) return LEAVE_ONE_OUT;
     else if (strcasecmp(value,"AVERAGES_ONLY")==0) return AVERAGES_ONLY;
     else if (strcasecmp(value,"DETAILED")==0) return DETAILED;
     else if (strcasecmp(value,"COMPLETE")==0) return COMPLETE;
     else if (strcasecmp(value,"PROTEIN_STANDARD")==0) return PROTEIN_STANDARD;
     else if (strcasecmp(value,"PROTEIN_SMOOTH")==0) return PROTEIN_SMOOTH;
     else if (strcasecmp(value,"PROTEIN_SMOOTH_ONLY")==0) return PROTEIN_SMOOTH_ONLY;
     else if (strcasecmp(value,"TRIANGLE")==0) return TRIANGLE;
     else if (strcasecmp(value,"GENETIC")==0) return GENETIC;
     else if (strcasecmp(value,"USED_CORRECT")==0) return USED_CORRECT;
     else if (strcasecmp(value,"ONE_PASS")==0) return ONE_PASS;
     else if (strcasecmp(value,"INCREMENT")==0) return INCREMENT;
     else if (strcasecmp(value,"MAJORITY")==0) return MAJORITY;
     else if (strcasecmp(value,"WEIGHTED_DISTANCE")==0) return WEIGHTED_DISTANCE;
     else if (strcasecmp(value,"THRESHOLD")==0) return THRESHOLD;
     else if (strcasecmp(value,"USER_EXEMPLAR_1")==0) return USER_EXEMPLAR_1;
     else if (strcasecmp(value,"USER_EXEMPLAR_2")==0) return USER_EXEMPLAR_2;
     else if (strcasecmp(value,"USER_EXEMPLAR_3")==0) return USER_EXEMPLAR_3;
     else if (strcasecmp(value,"USER_FEATURE_1")==0) return USER_FEATURE_1;
     else if (strcasecmp(value,"USER_FEATURE_2")==0) return USER_FEATURE_2;
     else if (strcasecmp(value,"USER_FEATURE_3")==0) return USER_FEATURE_3;
     else if (strcasecmp(value,"USER_POSTPROC_1")==0) return USER_POSTPROC_1;
     else if (strcasecmp(value,"USER_POSTPROC_2")==0) return USER_POSTPROC_2;
     else if (strcasecmp(value,"USER_POSTPROC_3")==0) return USER_POSTPROC_3;
     else error(UNK_CONST_ERR, NULL);

}



/* ------------------------------------------------------------ */
/* PROCESS_CONFIGURATION_ENTRY:  Process a line in the		*/
/*    configuration file.					*/
/* INPUTS:  line = A line from the PCF				*/
/* OUTPUT:  None.  						*/

void process_configuration_entry(char line[])
{  
    char delim[5];
    char *config_param;
    char *config_value;
    int  value_counter, class_counter, feature_index;
    int i, classes = CONFIG.classes;

    printf("%s",line);

    strcpy(delim, " =\t\n");  
    config_param = (char *) strtok(line,delim);  




					/* CASE I: Blank Line */

    if (config_param == NULL)
	    /* do nothing (blank) */ ;

    else if (config_param[0] == '#')
          /* do nothing (comment) */ ;



					/* CASE II:  Class names */

    else if (strcasecmp(config_param, "CLASS_NAMES")==0)
    {
	class_counter = 0;
	while ((config_value = strtok(NULL, delim)) != NULL)
	{
	    strcpy((char *) CONFIG.class_name[class_counter], config_value);
	    classtab_insert(config_value, class_counter++);
	}
    }






					/* CASE III: Common values */

    else if (strcasecmp(config_param,"COMMON_VALUES")==0)
    {
	CONFIG.common_values = TRUE;
	if (CONFIG.data_format == SUBUNITS) value_counter = 1;
	else value_counter = 0;

	while ((config_value = strtok(NULL, delim)) != NULL)
	    symtab_insert(0, config_value, value_counter++);
	CONFIG.values = value_counter;
    }





					/* CASE IV: Unique Feature Values */

    else if (strcasecmp(config_param, "FEATURE_VALUES")==0)
    {
	CONFIG.common_values = FALSE;
	value_counter = 0;
	feature_index = atoi(config_value = strtok(NULL, delim)) - 1;
	if ((feature_index+1 > CONFIG.features) ||
	    (feature_index < 0))
	  error(FEATURE_VALUE_ERR, NULL);
	while ((config_value = strtok(NULL, delim)) != NULL)
	    symtab_insert(feature_index, config_value, value_counter++);
	CONFIG.nvalues[feature_index] = value_counter;
    }





					/* CASE V: Feature Parameters */


    else if (strcasecmp(config_param, "FEATURE_WEIGHT")==0)
    {
	int feature_index = atoi(config_value = strtok(NULL, delim)) - 1;
	float f_weight =  atof(strtok(NULL, delim));

	if ((feature_index+1 > CONFIG.features) ||
	    (feature_index < 0))
	  error(FEATURE_WEIGHT_ERR, NULL);

	CONFIG.feature_weights[feature_index] = f_weight;
	CONFIG.feature_weighting = USER_DEFINED;
    }
         


					/* CASE VI: Simple Parameters */
    else
    {
	config_value = strtok(NULL, delim);

	if (strcasecmp(config_param, "OPERATING_MODE")==0)
	   CONFIG.operating_mode = constant_translate(config_value);

	else if (strcasecmp(config_param, "OUTPUT_MODE")==0)
	   CONFIG.output_mode = constant_translate(config_value);

	else if (strcasecmp(config_param, "TRAINING_MODE")==0)
	   CONFIG.training_mode = constant_translate(config_value);

        else if (strcasecmp(config_param, "TRAINING_SIZE")==0)
           CONFIG.training_size = atof(config_value);

	else if (strcasecmp(config_param, "DATA_FORMAT")==0)
	   CONFIG.data_format = constant_translate(config_value);

	else if (strcasecmp(config_param, "DATA_FILE")==0)
	   strcpy(CONFIG.data_file, config_value);

        else if (strcasecmp(config_param, "FEATURES")==0)
	   CONFIG.features = atof(config_value);


	else if (strcasecmp(config_param, "VALUE_SPACING")==0)
	   CONFIG.value_spacing = constant_translate(config_value);

	else if (strcasecmp(config_param, "EXEMPLAR_WEIGHTING")==0)
	   CONFIG.exemplar_weighting = constant_translate(config_value);

	else if (strcasecmp(config_param, "FEATURE_WEIGHTING")==0)
	{
	   CONFIG.feature_weighting = constant_translate(config_value);
	   if (CONFIG.feature_weighting == GENETIC)
	   {
		CONFIG.genetic_count = atoi(strtok(NULL,delim));
		CONFIG.genetic_adj   = atof(strtok(NULL, delim));
	   }
	}

	else if (strcasecmp(config_param, "POST_PROCESSING")==0)
	{
	    CONFIG.post_processing = constant_translate(config_value);
	    if ((CONFIG.post_processing == PROTEIN_SMOOTH) ||
		(CONFIG.post_processing == PROTEIN_SMOOTH_ONLY))
		CONFIG.smooth_window = atoi(strtok(NULL,delim));
	}

	else if (strcasecmp(config_param, "DEBUG")==0)
	   CONFIG.debug = constant_translate(config_value);

	else if (strcasecmp(config_param, "CLASSES")==0)
	    CONFIG.classes = atoi(config_value);

	else if (strcasecmp(config_param, "VALUES")==0)
	    CONFIG.values = atoi(config_value);

	else if (strcasecmp(config_param, "K")==0)
	    CONFIG.K = atof(config_value);

	else if (strcasecmp(config_param, "R")==0)
	    CONFIG.R = atof(config_value);

	else if (strcasecmp(config_param, "TRIALS")==0)
	    CONFIG.trials = atoi(config_value);

	else if (strcasecmp(config_param, "NEAREST_NEIGHBOR")==0)
	    CONFIG.nearest_neighbor = atoi(config_value);

	else if (strcasecmp(config_param, "NEAREST_VOTING")==0)
        {
	    CONFIG.nearest_voting = constant_translate(config_value);

	    if (CONFIG.nearest_voting == THRESHOLD)
	    {
	        for (i=0; i<classes; i++)
		{
		    CONFIG.precedence[i] = classtab_lookup(strtok(NULL,delim));
		    CONFIG.threshold[i] = atoi(strtok(NULL,delim));
		    printf("%d %d \n", CONFIG.precedence[i], CONFIG.threshold[i]);
		}
	    }
	}

	else error(UNK_PARAMETER_ERR, config_param);
    }
}






/* ------------------------------------------------------------ */
/* READ_CONFIGURATION_FILE:  Load information contained in	*/
/*   configuration file (filename) into main configuration type */

void read_configuration_file(char filename[])
{
    FILE *config_file;
    char line[LINE_MAX];
    int values, f;

    initialize_configuration();

    if ((config_file = (FILE *) fopen(filename, "r")) == NULL)
    {
	printf("%s does not exist.\n", filename);
	exit(1);
    }
    else
    {
	while (fgets(line, LINE_MAX, config_file) != NULL)
	    process_configuration_entry(line);
    }

    check_configuration();

}




/* ------------------------------------------------------------ */
/* INITIALIZE:  Initialize the PEBLS program.			*/
/* Reads configuration file, initializes tables, invokes        */
/* routines for inputting data.					*/
/* INPUTS:  Name of configuration file. (.PCF)			*/
/* OUTPUT:  None.						*/

void initialize(char filename[])
{
    int i, f, values;

    srand((unsigned) time(NULL));
    initialize_classtab();
    initialize_symtab();
    initialize_output();
    initialize_data();

    read_configuration_file(filename);

    switch (CONFIG.data_format)
    {
	case STANDARD:  standard_reader(); break;
	case SUBUNITS: subunit_reader(); break;
    }

    if (CONFIG.training_mode != SPECIFIED_GROUP)
        CONFIG.training_instances = CONFIG.instances;

    if (CONFIG.feature_weighting != OFF)
        set_feature_weights(CONFIG.feature_weighting);

}

