
/****************************************************************/
/* Copyright 1993 : Johns Hopkins University			*/
/*                 Department of Computer Science		*/
/****************************************************************/
/* Contact : murthy@cs.jhu.edu					*/
/****************************************************************/
/* File Name : gendata.c					*/
/* Author : Sreerama K. Murthy					*/
/* Last modified : October 1993					*/
/* Contains modules : 	main					*/
/*			gendata_help()				*/
/* Uses modules in :	oc1.h					*/
/*			util.c					*/ 
/*			classify_util.c				*/
/*			classify.c				*/
/* Is used by modules in :	none.				*/
/****************************************************************/		


#include "oc1.h"

char *pname;
char test_data[LINESIZE],failed_data[LINESIZE];
int unlabeled=FALSE,no_of_dimensions,no_of_categories;
int verbose=FALSE;
void srand48();

/************************************************************************/
/* Module name : main							*/ 
/* Functionality : 	This module reads in a decision tree, and 	*/
/*			generates a random dataset that has zero error  */
/*			when classified by the above tree. If no 	*/
/*			decision tree is input, the classes of the 	*/
/*			examples in the dataset are assigned randomly.	*/
/* Parameters :	argc, argv : see any standard C textbook.		*/
/* Returns :	None.							*/
/* Calls modules :	read_tree (classify_util.c)			*/
/*			allocate_point_array (load_data.c)		*/
/*			classify (classify.c)				*/
/*			print_point (classify.c)			*/
/*			gendata_help					*/
/*			error (util.c)					*/
/*			ivector (util.c)				*/
/*			free_ivector (util.c)				*/
/*			myrandom (util.c)				*/
/* Is called by modules :	none.					*/
/************************************************************************/
main(argc,argv)
int argc;
char *argv[];
{
 extern char *optarg;
 extern int optind;
 int c1,i,j,above,below,no_of_samples;
 int *point_count;
 char decision_tree[LINESIZE];
 struct point **points_array = NULL,**allocate_point_array();
 struct tree_node *root = NULL,*read_tree();
 FILE *outfile;
 
 strcpy(test_data,"\0");
 strcpy(decision_tree,"\0");
 above = 0.0;
 below = 1.0;

 pname = argv[0];
 if (argc == 1) gendata_help();
 while ((c1 = getopt (argc, argv, "D:s:o:n:ua:b:d:c:v")) != EOF)
   switch (c1)
     {
         case 'D':   /*Decision Tree */
                     if (no_of_dimensions > 0 || no_of_categories > 0)
                        gendata_help();
                     strcpy(decision_tree,optarg);
                     break;
         case 'o':   /*File into which data should be generated.*/
                     strcpy(test_data,optarg);
                     break;
         case 's':   /*Seed for the random number generator */
                     srand48(atol(optarg));
                     break;
         case 'n':   no_of_samples = atoi (optarg);
                     break;
         case 'd':   if (strlen(decision_tree)) gendata_help();
                     no_of_dimensions = atoi (optarg);
                     break;
         case 'c':   if (strlen(decision_tree)) gendata_help();
                     no_of_categories = atoi (optarg);
                     break;
         case 'u':   unlabeled = TRUE; break;
         case 'v':   verbose = TRUE; break;
         case 'a':   above = atoi (optarg);
                     /* Make sure that the random numbers generated are
                        more than this value. */
                     break;
         case 'b':   below = atoi (optarg);
                     /* Make sure that the random numbers generated are
                        less than this value. */
                     break;
         default:    gendata_help();
     }

 if (no_of_samples <= 0 || below <= above) gendata_help(); 

 if (strlen(decision_tree))
  {
   if (no_of_dimensions || no_of_categories) gendata_help();
   root = read_tree(decision_tree);
   if (verbose && root != NULL) 
     fprintf(stderr,"Decision tree read from %s.\n",decision_tree);
  }
 else
  {
   if (!no_of_dimensions) no_of_dimensions = 2;
   if (!no_of_categories) no_of_categories = 2;
  } 
 if (verbose) 
    fprintf(stderr,"Number of dimensions = %d, Number of classes = %d\n",
            no_of_dimensions, no_of_categories);

 points_array = allocate_point_array(points_array,no_of_samples,0);
 
 for (i=1;i<=no_of_samples;i++)
   for (j=1;j<= no_of_dimensions;j++)
     points_array[i]->dimension[j] = myrandom(above,below);

 fprintf(stderr,"%d random data points generated.\n",no_of_samples);

 if ((outfile = fopen(test_data,"w")) == NULL) outfile = stdout;

 if (unlabeled != TRUE)
  {
   if (root != NULL)
     classify(points_array,no_of_samples,root,test_data);
   else
    {
     for (i=1;i<=no_of_samples;i++)
     {
      points_array[i]->category = (int)myrandom(1,no_of_categories+1);
      print_point(outfile,points_array[i],FALSE);
     }
     fclose(outfile);
    }
  }
 else
  {
    for (i=1;i<=no_of_samples;i++)
      print_point(outfile,points_array[i],TRUE);
    fclose(outfile);
  }

 if (verbose && !unlabeled)
  {
    point_count = ivector(1,no_of_categories);
    for (i=1;i<=no_of_categories;i++) point_count[i]=0;
    for (i=1;i<=no_of_samples;i++)
       point_count[points_array[i]->category]++;

    for (i=1;i<=no_of_categories;i++)
      fprintf(stderr,"\tCategory %d : %d points\n",i,point_count[i]);
  }

 if (verbose && strlen(test_data)) 
   fprintf(stderr,"Output written to %s.\n", test_data);

 free_ivector(point_count,1,no_of_categories);
}

/************************************************************************/
/* Module name : gendata_help						*/ 
/* Functionality : 	Displays the command line options available with*/
/*			"gendata", with brief descriptions.		*/
/* Parameters :	None.							*/
/* Returns :	Nothing.						*/
/* Calls modules :	None.						*/
/* Is called by modules : main						*/
/************************************************************************/
gendata_help()
{
 fprintf (stderr,"\n\nUsage : gendata -D:s:o:un:d:c:va:b:");
 fprintf (stderr,"\nOptions :");
 fprintf (stderr,"\n    -D<File containing the Decision tree>");
 fprintf (stderr,"\n	  (Default: None)");
 fprintf (stderr,"\n    -s<integer seed for the random number generator>");
 fprintf (stderr,"\n    -o<file to write the generated data> (Default=stdout)");
 fprintf (stderr,"\n    -n<number of points to be generated>");
 fprintf (stderr,"\n	  (Default: None)");
 fprintf (stderr,"\n    -d<#dimensions> (Default=2)");
 fprintf (stderr,"\n    -c<#categories. (Default=2)");
 fprintf (stderr,"\n    -u : Unlabeled Data. (Default=FALSE)");
 fprintf (stderr,"\n    -v : Verbose (Default=FALSE)");
 fprintf (stderr,"\n    -a<all generated data are above this number>");
 fprintf (stderr,"\n         (Default=0)");
 fprintf (stderr,"\n    -b<all generated data are below this number>");
 fprintf (stderr,"\n         (Default=1)");
 fprintf (stderr,"\n\n");
 exit(0);

}

/************************************************************************/
/************************************************************************/

