/****************************************************************/ /* Copyright 1993 : Johns Hopkins University */ /* Department of Computer Science */ /****************************************************************/ /* Contact : murthy@cs.jhu.edu */ /****************************************************************/ /* File Name : compute_impurity.c */ /* Author : Sreerama K. Murthy */ /* Last modified : October 1993 */ /* Contains modules : compute_impurity */ /* set_counts */ /* reset_counts */ /* largest_element */ /* Uses modules in : oc1.h */ /* util.c */ /* Is used by modules in : mktree.c */ /* perturb.c */ /****************************************************************/ #include "oc1.h" extern int no_of_dimensions; extern int *left_count,*right_count; extern int no_of_categories; extern int coeff_modified; /************************************************************************/ /* Module name : compute_impurity */ /* Functionality : Front end to the routine to compute the */ /* impurity of a given array of points. */ /* The name of the actual impurity-computing routine */ /* is given by the hash constant IMPURITY, which is */ /* in turn defined by the user in oc1.h. */ /* Parameters : cur_no_of_points : Size of the point set whose impurity */ /* needs to be computed. /* Returns : impurity. */ /* Calls modules : IMPURITY */ /* Is called by modules : main (mktree.c) */ /* build_dt (mktree.c) */ /* oblique_split (mktree.c) */ /* cross_validate (mktree.c) */ /* suggest_perturbation (perturb.c) */ /* perturb_randomly (perturb.c) */ /* linear_split (perturb.c) */ /************************************************************************/ float compute_impurity(cur_no_of_points) int cur_no_of_points; { int i,j; float IMPURITY; j = 0; for (i=1;i<=no_of_categories;i++) j = j + left_count[i]+right_count[i]; if (j != cur_no_of_points) error ("COMPUTE_IMPURITY: Arrays Left_Count and Right_Count not set."); return(IMPURITY); } /************************************************************************/ /* Module name : set_counts */ /* Functionality : Sets the values in the integer arrays */ /* left_count and right_count, to reflect the */ /* number of points of each category on the */ /* left and right of the current hyperplane. */ /* If "flag" is zero, the values are set */ /* assuming that ALL points are on one (right) */ /* side of the hyperplane (relevant while */ /* computing initial impurity before splitting). */ /* If "flag" is not zero, it is assumed that */ /* the "val" fields of points are correctly set. */ /* Parameters : cur_points : Array of pointers to the POINT structs */ /* cur_no_of_points : Number of points */ /* flag : 0 if initial impurity is to be computed */ /* Returns : Nothing. */ /* Calls modules : reset_counts */ /* Is called by modules : main (mktree.c) */ /* build_dt (mktree.c) */ /* oblique_split (mktree.c) */ /* cross_validate (mktree.c) */ /* Remarks : There may be a better way of computing the initial */ /* impurity, than considering a hypothetical hyperplane */ /* onto one side of the point set. Suggestions ? */ /************************************************************************/ set_counts(cur_points,cur_no_of_points,flag) POINT **cur_points; int cur_no_of_points; int flag; { int i; reset_counts(); if (!flag) for (i=1;i<=cur_no_of_points;i++) right_count[cur_points[i]->category]++; else { if (coeff_modified == TRUE) { printf("Set_Counts: Values not set properly !\n"); find_values(cur_points,cur_no_of_points); } for (i=1;i<=cur_no_of_points;i++) { if (cur_points[i]->val < 0) left_count[cur_points[i]->category]++; else right_count[cur_points[i]->category]++; } } } /************************************************************************/ /* Module name : reset_counts */ /* Functionality : Resets the values in the integer arrays */ /* left_count and right_count to zero. */ /* Parameters : None. */ /* Returns : Nothing. */ /* Calls modules : None. */ /* Is called by modules : set_counts */ /* suggest_perturbation (perturb.c) */ /* perturb_randomly (perturb.c) */ /* linear_split (perturb.c) */ /* /************************************************************************/ reset_counts() { int i; for (i=1;i<=no_of_categories;i++) left_count[i] = right_count[i] = 0; } /************************************************************************/ /* Module name : largest_element */ /* Functionality : determines the index of the largest element */ /* in an array. */ /* Parameters : array : integer array */ /* count : number of elements */ /* Returns : index of the largest element in the array. */ /* count+1 if the largest element <= 0 */ /* Calls modules : error (util.c) */ /* Is called by modules : build_dt (mktree.c) */ /* maxminority (impurity_measures.c) */ /* summinority (impurity_measures.c) */ /************************************************************************/ int largest_element(array,count) int *array,count; { int i; int major; major = 1; for (i=2;i<=count;i++) if (array[i] > array[major]) major = i; if (array[major] <= 0) return(count+1); return(major); } /************************************************************************/ /************************************************************************/