/* Copyright (c) 1993 by The Johns Hopkins University */



/* UTIL.C:  Misc. Support routines including random number generation  	*/
/*          and error handler						*/



/* Written by John N. Rachlin 	*/
/* Johns Hopkins University   	*/
/* Summer 1993			*/





#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include "config.h"
#include "pebls.h"

extern instance_type data[INSTANCES_MAX];
extern config_type CONFIG;



/* ------------------------------------------------------------ */
/* F_RANDOM: FLOATING POINT RANDOM NUMBER BETWEEN 0 AND MAX    	*/

float f_random(float max)
{
    float n;
    n = max * (float) (rand() % 32767) / 32767.0;
    return (n);
}





/* ------------------------------------------------------------ */
/* I_RANDOM: INTEGER RANDOM NUMBER BETWEEN 0 and MAX		*/
/* INPUTS: Max = Maximum integer range		       		*/
/* OUTPUT: Random Number in range 0..Max-1			*/

int i_random(int max)
{
    float n;

    n = (float) max * f_random(1.0);
    return ((int) n);
}



/* ------------------------------------------------------------ */
void DEBUG(int i)
{
    printf("%d\n", i);
}




/* ------------------------------------------------------------ */
/* ROUND:  Round to the nearest integer				*/

int round(float x)
{
    return (int) (x + 0.5);
}



/* ------------------------------------------------------------ */
float corr_coeff(double p, double n, double u, double o)
{
    double x,y;

    x = (p * n - u * o);
    y = ((n+u)*(n+o)*(p+u)*(p+o));
    return ( (float) (x / sqrt (y)) );
}







/* ------------------------------------------------------------ */
void shuffle(int arr[], int tinst)
{
    int i, s, temp;

    for (i=0; i<tinst; i++)
      arr[i] = i;

    for (i=0; i<tinst; i++)
    {
	s = i_random(tinst);
	temp = arr[s];
	arr[s] = arr[i];
	arr[i] = temp;
    }
}





/* ------------------------------------------------------------ */

void error(int code, char *string)
{
    printf("\nPEBLS ERROR\n");

    switch (code)
    {
	case OP_MODE_ERR: 
	    puts("\nNo Operating Mode Specified");
	    break;

	case USAGE_ERR:
	    puts("\nUSAGE:   pebls <filename.pcf>");
	    break;

	case TRAIN_SIZE_ERR:
	    puts("\nIllegal Training Size");
	    break;

	case TRIALS_ERR:
	    puts("\nTrial size exceeds TRIALS_MAX (modify config.h)");
	    break;

	case CLASSES_ERR:
	    puts("\nNumber of classes exceeds CLASSES_MAX (modify config.h)");
	    break;

	case FEATURES_ERR:
	    puts("\nNumber of features exceeds FEATURES_MAX (modify config.h)");
	    break;

	case VALUES_ERR:
	    puts("\nNumber of values per feature exceeds VALUES_MAX (modify config.h)");
	    break;

	case INSTANCES_ERR:
	    printf("\nNumber of instances exceeds INSTANCES_MAX");
	    printf(" (Modify config.h)");
	    break;

	case NO_TRAIN_ERR:
	    printf("\nNo training instances specified in %s", 
		   CONFIG.data_file);
	    break;

	case UNK_CONST_ERR:
	    puts("\nUnknown constant in Configuration file");
	    break;

        case UNK_EWEIGHT_ERR:
	    puts("\nUnknown exemplar weighting technique");
	    break;

        case UNK_FWEIGHT_ERR:
	    puts("\nUnknown feature weighting technique");
	    break;

        case UNK_POSTPROC_ERR:
	    puts("\nUnknown post processing technique");
	    break;

	case K_NEIGHBOR_ERR:
	    puts("\nNearest neighbor parameter must be greater than 1");
	    break;

	case K_NEIGHBOR2_ERR:
	    printf("\nNumber of nearest neighbors exceeds K_NEIGHBOR_MAX ");
	    printf("(modify config.h)");
	    break;

  	case GENETIC_ERR:
	    printf("\nGENETIC feature weighting is only for use with \n");
	    printf("SPECIFIED_GROUP training mode.\n");
	    break;
			
	case DATAFILE_ERR:
	    printf("\nData File %s  does not exist.\n", CONFIG.data_file);
	    break;

	case UNDECLARED_VALUE_ERR:
	    printf("\nUndeclared feature value encountered in instance data: \"%s\"\n", string);
	    break;

	case UNDECLARED_CLASS_ERR:
	    printf("\nUndeclared class encountered in instance data: \"%s\"\n", string);
	    break;

	case UNK_PARAMETER_ERR:
	    printf("\nUnknown Configuration Parameter: %s\n", string);
	    break;

	case VOTING_ERR:
	    printf("\nIllegal value specified for nearest-neighbor voting method. \n");
	    break;

	case FEATURE_VALUE_ERR:
	    printf("\nFeature value index is illegal. \n");
	    break;

	case FEATURE_WEIGHT_ERR:
	    printf("\nFeature weight index is illegal. \n");
	    break;

    }

    printf("\n\n");
    exit(1);
}






/* ------------------------------------------------------------ */
int majority_class(int i, int window_size)
{
    int lower = i - window_size / 2;
    int upper = i + window_size / 2;
    int j;
    int class_count[3];
    int max, max_index;

    for (j=0; j<3; j++)
	class_count[j] = 0;

    for (j=lower; j<=upper; j++)
	    class_count[data[j].class_nearest]++;

    max = -1;
    for (j=0; j<3; j++)
    {
	if (class_count[j] > max)
	{
	    max = class_count[j];
	    max_index = j;
	}
    }

    if (class_count[data[i].class_nearest] == class_count[max_index])
	return (data[i].class_nearest);
    else return max_index;
}




/* ------------------------------------------------------------ */
void protein_smooth(void)
{
    int i, instances = CONFIG.instances - CONFIG.smooth_window/2;
    int window = CONFIG.smooth_window;
    int half_window = CONFIG.smooth_window / 2;
    
    for (i=CONFIG.smooth_window/2; i<instances; i++)
    {
        if (data[i].trained == FALSE)
	{
	    if ((atoi(data[i].id) == atoi(data[i-half_window].id)) &&
		(atoi(data[i].id) == atoi(data[i+half_window].id)))
	      data[i].class_pp = majority_class(i,window);
	    else
	      data[i].class_pp = data[i].class_nearest;
	}
    }
}
	

/* ------------------------------------------------------------ */
void protein_standard(void)
{
    int i, j, instances = CONFIG.instances;
    int coil, ahelix, bsheet;
    int current_id, current_class, chain_length;

    ahelix = 0;
    bsheet = 1;
    coil =   2;

    i = 0;
    while (i<instances)
    {
	if (data[i].trained == FALSE)  /* test instance */
	{
	    chain_length = 0;
	    current_class = data[i].class_nearest;
	    current_id = atoi(data[i].id);
	    j = i;
	    while ((data[j].class_nearest == current_class) &&
		   (atoi(data[j].id) == current_id))
	    {
		chain_length++;
		j++;
	    }


	    if (((current_class == ahelix) &&
		 (chain_length < 4)) ||
		((current_class == bsheet) &&
		 (chain_length < 2)))
	    {
		for (j=0; j<chain_length; j++)
		    data[i+j].class_pp = coil;
	    }
	    else
	    {
		for (j=0; j<chain_length; j++)
		    data[i+j].class_pp = data[i+j].class_nearest;
	    }

	    i += chain_length;
	}
	else i++;
    }
}






/* ------------------------------------------------------------ */
/* USER_POSTPROC_1:  Declared function for user-defined post-   */
/* processing							*/

void user_postproc_1(void)
{

    /* Your function here */

}


/* ------------------------------------------------------------ */
/* USER_POSTPROC_2:  Declared function for user-defined post-   */
/* processing							*/

void user_postproc_2(void)
{

    /* Your function here */

}


/* ------------------------------------------------------------ */
/* USER_POSTPROC_3:  Declared function for user-defined post-   */
/* processing							*/

void user_postproc_3(void)
{

    /* Your function here */

}


/* ------------------------------------------------------------ */
/* POST_PROCESS:  Modify classifications using Domain-Specific  */
/*  Knowledge.  						*/

void post_process(void)
{
    int i, instances = CONFIG.instances;

    switch (CONFIG.post_processing)
    {
	case OFF:	        /* do nothing */    break;
	case PROTEIN_STANDARD:  protein_standard(); break;


	case PROTEIN_SMOOTH:    
	case PROTEIN_SMOOTH_ONLY:
	  protein_smooth();
	  if (CONFIG.post_processing == PROTEIN_SMOOTH)
	  {
	      for (i=0; i<instances; i++)
	          data[i].class_nearest = data[i].class_pp;
	      protein_standard(); 
	  }
	  break;
	

	case USER_POSTPROC_1:	user_postproc_1(); break;
	case USER_POSTPROC_2:	user_postproc_2(); break;
	case USER_POSTPROC_3:	user_postproc_3(); break;
    }
}