/****************************************************************/ /* Copyright 1993 : Johns Hopkins University */ /* Department of Computer Science */ /****************************************************************/ /* Contact : murthy@cs.jhu.edu */ /****************************************************************/ /* File Name : gendata.c */ /* Author : Sreerama K. Murthy */ /* Last modified : October 1993 */ /* Contains modules : main */ /* gendata_help() */ /* Uses modules in : oc1.h */ /* util.c */ /* classify_util.c */ /* classify.c */ /* Is used by modules in : none. */ /****************************************************************/ #include "oc1.h" char *pname; char test_data[LINESIZE],failed_data[LINESIZE]; int unlabeled=FALSE,no_of_dimensions,no_of_categories; int verbose=FALSE; void srand48(); /************************************************************************/ /* Module name : main */ /* Functionality : This module reads in a decision tree, and */ /* generates a random dataset that has zero error */ /* when classified by the above tree. If no */ /* decision tree is input, the classes of the */ /* examples in the dataset are assigned randomly. */ /* Parameters : argc, argv : see any standard C textbook. */ /* Returns : None. */ /* Calls modules : read_tree (classify_util.c) */ /* allocate_point_array (load_data.c) */ /* classify (classify.c) */ /* print_point (classify.c) */ /* gendata_help */ /* error (util.c) */ /* ivector (util.c) */ /* free_ivector (util.c) */ /* myrandom (util.c) */ /* Is called by modules : none. */ /************************************************************************/ main(argc,argv) int argc; char *argv[]; { extern char *optarg; extern int optind; int c1,i,j,above,below,no_of_samples; int *point_count; char decision_tree[LINESIZE]; struct point **points_array = NULL,**allocate_point_array(); struct tree_node *root = NULL,*read_tree(); FILE *outfile; strcpy(test_data,"\0"); strcpy(decision_tree,"\0"); above = 0.0; below = 1.0; pname = argv[0]; if (argc == 1) gendata_help(); while ((c1 = getopt (argc, argv, "D:s:o:n:ua:b:d:c:v")) != EOF) switch (c1) { case 'D': /*Decision Tree */ if (no_of_dimensions > 0 || no_of_categories > 0) gendata_help(); strcpy(decision_tree,optarg); break; case 'o': /*File into which data should be generated.*/ strcpy(test_data,optarg); break; case 's': /*Seed for the random number generator */ srand48(atol(optarg)); break; case 'n': no_of_samples = atoi (optarg); break; case 'd': if (strlen(decision_tree)) gendata_help(); no_of_dimensions = atoi (optarg); break; case 'c': if (strlen(decision_tree)) gendata_help(); no_of_categories = atoi (optarg); break; case 'u': unlabeled = TRUE; break; case 'v': verbose = TRUE; break; case 'a': above = atoi (optarg); /* Make sure that the random numbers generated are more than this value. */ break; case 'b': below = atoi (optarg); /* Make sure that the random numbers generated are less than this value. */ break; default: gendata_help(); } if (no_of_samples <= 0 || below <= above) gendata_help(); if (strlen(decision_tree)) { if (no_of_dimensions || no_of_categories) gendata_help(); root = read_tree(decision_tree); if (verbose && root != NULL) fprintf(stderr,"Decision tree read from %s.\n",decision_tree); } else { if (!no_of_dimensions) no_of_dimensions = 2; if (!no_of_categories) no_of_categories = 2; } if (verbose) fprintf(stderr,"Number of dimensions = %d, Number of classes = %d\n", no_of_dimensions, no_of_categories); points_array = allocate_point_array(points_array,no_of_samples,0); for (i=1;i<=no_of_samples;i++) for (j=1;j<= no_of_dimensions;j++) points_array[i]->dimension[j] = myrandom(above,below); fprintf(stderr,"%d random data points generated.\n",no_of_samples); if ((outfile = fopen(test_data,"w")) == NULL) outfile = stdout; if (unlabeled != TRUE) { if (root != NULL) classify(points_array,no_of_samples,root,test_data); else { for (i=1;i<=no_of_samples;i++) { points_array[i]->category = (int)myrandom(1,no_of_categories+1); print_point(outfile,points_array[i],FALSE); } fclose(outfile); } } else { for (i=1;i<=no_of_samples;i++) print_point(outfile,points_array[i],TRUE); fclose(outfile); } if (verbose && !unlabeled) { point_count = ivector(1,no_of_categories); for (i=1;i<=no_of_categories;i++) point_count[i]=0; for (i=1;i<=no_of_samples;i++) point_count[points_array[i]->category]++; for (i=1;i<=no_of_categories;i++) fprintf(stderr,"\tCategory %d : %d points\n",i,point_count[i]); } if (verbose && strlen(test_data)) fprintf(stderr,"Output written to %s.\n", test_data); free_ivector(point_count,1,no_of_categories); } /************************************************************************/ /* Module name : gendata_help */ /* Functionality : Displays the command line options available with*/ /* "gendata", with brief descriptions. */ /* Parameters : None. */ /* Returns : Nothing. */ /* Calls modules : None. */ /* Is called by modules : main */ /************************************************************************/ gendata_help() { fprintf (stderr,"\n\nUsage : gendata -D:s:o:un:d:c:va:b:"); fprintf (stderr,"\nOptions :"); fprintf (stderr,"\n -D"); fprintf (stderr,"\n (Default: None)"); fprintf (stderr,"\n -s"); fprintf (stderr,"\n -o (Default=stdout)"); fprintf (stderr,"\n -n"); fprintf (stderr,"\n (Default: None)"); fprintf (stderr,"\n -d<#dimensions> (Default=2)"); fprintf (stderr,"\n -c<#categories. (Default=2)"); fprintf (stderr,"\n -u : Unlabeled Data. (Default=FALSE)"); fprintf (stderr,"\n -v : Verbose (Default=FALSE)"); fprintf (stderr,"\n -a"); fprintf (stderr,"\n (Default=0)"); fprintf (stderr,"\n -b"); fprintf (stderr,"\n (Default=1)"); fprintf (stderr,"\n\n"); exit(0); } /************************************************************************/ /************************************************************************/