/* Copyright (c) 1993 by The Johns Hopkins University */ /* READERS.C: Routines for reading input data for a variety of formats */ #include #include #include #include "config.h" #include "pebls.h" extern instance_type data[INSTANCES_MAX]; extern config_type CONFIG; /* ------------------------------------------------------------ */ /* STANDARD_READER: Loads data from file in "STANDARD" format */ /* , , .... */ void standard_reader(void) { char delim[5]; char line[LINE_MAX]; char value[2]; char *symbol; FILE *fptr; int icount = -1; int train_count = 0, test_count = 0; int fcount; int scount; int i, len; int training = FALSE, testing = FALSE; strcpy(delim, " ,\t\n"); if ((fptr = fopen(CONFIG.data_file, "r")) == NULL) error(DATAFILE_ERR, NULL); while (fgets(line, LINE_MAX, fptr) != NULL) { if (line[0] != '#') { scount = 0; fcount = 0; symbol = strtok(line, delim); if (symbol != NULL) { if (strcasecmp(symbol,"TRAIN")==0) training = TRUE; else if (strcasecmp(symbol,"TEST")==0) { training = FALSE; testing = TRUE; } else if (symbol) { icount++; if (training) train_count++; else if (testing) test_count++; } } while ((symbol != NULL) && (strcasecmp(symbol,"TRAIN") != 0) && (strcasecmp(symbol,"TEST") != 0)) { if (scount==0) data[icount].class_true = classtab_lookup(symbol); else if (scount==1) strcpy(data[icount].id, symbol); else { if (CONFIG.value_spacing) data[icount].value[fcount++] = symtab_lookup(fcount, symbol); else { len = strlen(symbol); value[1] = '\0'; for (i=0; i INSTANCES_MAX - 1) error(INSTANCES_ERR, NULL); strcpy(data[instances].id, subunit->id); data[instances].class_true = subunit->class[i]; data[instances].offset = i; for (j=i-diff,k=0; j<=i+diff; j++, k++) { if ((j<0) || (j>=length)) data[instances].value[k] = 0; else data[instances].value[k] = subunit->value[j]; } instances++; if (training) training_instances++; else test_instances++; } CONFIG.instances = instances; CONFIG.training_instances = training_instances; CONFIG.test_instances = test_instances; } /* ------------------------------------------------------------ */ /* SUBUNIT_READER: Loads data from file in "SUBUNITS" format*/ /* (See Documentation) */ void subunit_reader(void) { FILE *fptr; char line[LINE_MAX]; char delim[5]; char *token1, *token2; int length; subunit_type subunit; int training; int i; strcpy(delim, " \t\n,"); if ((fptr = fopen(CONFIG.data_file, "r")) == NULL) error(DATAFILE_ERR, NULL); while (fgets(line, LINE_MAX, fptr) != NULL) { token1 = strtok(line, delim); if (token1) { token2=strtok(NULL, delim); if (*token1 == '#') /* do nothing */ ; else if (strcasecmp(token1, "BEGIN")==0) { if (token2) strcpy(subunit.id, token2); else strcpy(subunit.id,""); length = 0; } else if (strcasecmp(token1, "END")==0) subunit_to_instances(&subunit, length, training); else if (strcasecmp(token1, "TRAIN")==0) training = TRUE; else if (strcasecmp(token1, "TEST")==0) training = FALSE; else { subunit.value[length] = symtab_lookup(length, token1); subunit.class[length] = classtab_lookup(token2); length++; } } } fclose(fptr); }