;;; This is an implementation of the basic ID3 algorithm for learning from examples, January, 1988. ;;;; Copyright (c) 1988 by Raymond Joseph Mooney. This program may be freely copied, used, or ;;;; modified provided that this copyright notice is included in each copy of this code ;;;; and parts thereof. ;;;; This implementation of ID3 produces decision trees descriminating postive and negatives ;;;; instances which are represented by simple nominal feature vectors represented by ordered lists. ;;;; WEATHER-DATA is a sample datafile for the weather example used in Quinlan's ML journal article (defparameter *trace-id3* nil "Produces a trace if set to T") ;;; A decision tree is either a symbol representing a leaf (+ or -) or a structure ;;; where decision-tree-feature is the number (starting from 1) of the feature ;;; being tested and decision-tree-subtrees is an assoc list of the form ;;; ((value1 subtree1)(value2 subtree2)...) representing the branches and subtrees (defstruct (decision-tree (:print-function print-decision-tree)) feature subtrees) (defun train-id3 (examples) ;;; This function takes a list of examples where an example is a list consisting of either + or - ;;; and an instance and produces a decision tree which classifies instances into + or -. (if (null examples) (pick-one *categories*) (prog1 (build-decision-tree examples (let ((features nil)) (dotimes (i (length *domains*) features) (setf features (nconc features (list (1+ i))))))) (trace-print *trace-id3* "~%")))) (defun build-decision-tree (examples features &optional most-common) ;;; This function produces a decision tree for the given set of examples ;;; by choosing one of the given features (features are indicated by a ;;; number giving its position in the vector) as the root of the tree and ;;; recursively making trees for each of the resulting categories. ;;; most-common passes the most common class of a parent to its children (let ((p 0)(n 0)) (dolist (example examples) (cond ((eq (first example) '+) (incf p)) ((eq (first example) '-) (incf n)))) (cond ((null examples) ;; if there are no examples, label leaf with most common class from the parent node (trace-print *trace-id3* "~%No examples, use most common class of parent: ~A" most-common) most-common) ((zerop p) ;; if there are no positive examples then they must all be ;; negative so make a leaf indicating a negative outcome. (trace-print *trace-id3* "~%All examples -") '-) ((zerop n) ;; if there are no negative examples then they must all be ;; positive so make ;; a leaf indicating a positive outcome. (trace-print *trace-id3* "~%All examples +") '+) ((null features) ;; if there are no features left to descriminate on and all ;; examples are not in the same class, then example set must ;; have had same instance both positive and negative (trace-print *trace-id3* "~%Inconsistent data, using most common class: ~A" (if (> p n) '+ '-)) (if (> p n) '+ '-)) (t (let ((I (info p n)) (split-feature nil) (E 0) (min-E 1e10)) ;; Otherwise find the feature which maximizes information ;; gain (minimizes E) and make it the root of the decision ;; tree (i.e. make it the "split feature") (dolist (feature features) (setf E (expected-info feature examples)) (trace-print *trace-id3* "~%Info gain for feature ~A = ~5,3F" feature (- I E)) (if (< E min-E) (progn (setf min-E E)(setf split-feature feature)))) (trace-print *trace-id3* "~%~%Splitting on feature ~A" split-feature) ;; separate instances based on their value for this feature ;; and process each subset of examples recursively ;; eliminating the splitting feature from the set of features ;; available for use in discriminating between examples (make-decision-tree :feature split-feature :subtrees (mapcar #'(lambda (value) (trace-print *trace-id3* "~%~%Considering value ~A of feature ~A" value split-feature) (list value (build-decision-tree (remove-if-not #'(lambda (ex) (eq (nth (1- split-feature) (second ex)) value)) examples) (remove split-feature features) (if (> p n) '+ '-)))) (nth (1- split-feature) *domains*)))))))) (defun expected-info (feature examples) ;;; Compute the expected amount of information needed for the subtrees created by ;;; splitting on the given feature. This is simply a weighted sum of the information ;;; needed for each subtree. (let ((E 0) (num-examples (length examples))) (dolist (value (nth (1- feature) *domains*)) (let ((p-i 0) (n-i 0)) (dolist (example examples) (if (equal (nth (1- feature) (second example)) value) (cond ((eq (first example) '+) (incf p-i)) ((eq (first example) '-) (incf n-i))))) (incf E (* (/ (+ p-i n-i) num-examples) (info p-i n-i))))) E)) (defun info (p n) ;;; Compute the amount of information needed to distinguish the two classes ;;; given p + instances and n - instances (let ((s (+ p n))) (- (- (if (zerop p) 0 (* (/ p s) (log (/ p s) 2)))) (if (zerop n) 0 (* (/ n s) (log (/ n s) 2)))))) ;;;; ========================================================================================== ;;;; Testing and Printing functions ;;;; ========================================================================================== (defun test-id3 (example decision-tree) ;;; Determines the class of instance by using it to traverse the given decision ;;; tree till a leaf is reached. (if (symbolp decision-tree) decision-tree (let* ((value (nth (1- (decision-tree-feature decision-tree)) (second example))) (subtree (second (assoc value (decision-tree-subtrees decision-tree))))) (test-id3 example subtree)))) (defun print-decision-tree (tree stream depth &optional (indent 0)) ;;; Print decision tree in a nice indented form (if (= indent 0) (setf tree (format-decision-tree tree))) (cond ((atom tree) (format stream "~%~vTClass is: ~A" indent tree)) (t (format stream "~%~vTFeature: ~A" indent (first tree)) (dolist (value-form (rest tree)) (format stream "~%~vT ~A" indent (first value-form)) (print-decision-tree (second value-form) stream depth (+ indent 5)))))) (defun format-decision-tree (decision-tree) ;;; Format tree with feature names (if (symbolp decision-tree) decision-tree (cons (nth (1- (decision-tree-feature decision-tree)) *feature-names*) (mapcar #'(lambda (choice) (list (first choice) (format-decision-tree (second choice)))) (decision-tree-subtrees decision-tree))))) ;;;; ========================================================================================== ;;;; The following functions are for multiple category data ;;;; ========================================================================================== (defun train-multi-id3 (examples) "ID3 for multiple categories. Returns a list of (category example-count tree)'s for each category where example-count is the number of examples in the category and tree is the ID3 tree constructed for the given category examples as + and all others as -" (dolist (category *categories*) (setf (get category 'training-examples) nil)) (dolist (example examples) (push (rest example) (get (first example) 'training-examples))) (mapcar #'(lambda (category) (format t "~%~%Category: ~A" category) (let* ((training-examples (append (label-examples category '+) (mapcan #'(lambda (other-category) (label-examples other-category '-)) (remove category *categories*)))) (*categories* '(+ -)) (decision-tree (train-id3 training-examples))) (format t "~%~A" decision-tree) (list category (length (get category 'training-examples)) decision-tree))) *categories*)) (defun label-examples (category label) "Relabel the training-examples of a given category with the given class label" (mapcar #'(lambda (inst) (cons label inst)) (get category 'training-examples))) (defun test-multi-id3 (example tree-alist) "Uses tree-alist result of TRAIN-MULTI-ID3 to classify a new example. Finds all categories whose tree classifies as + and then picks one with the most examples inorder to assign to a single class. If no category matches, then picks category with most examples" (let ((matching-class-counts (mapcan #'(lambda (alist-elt) (if (eq (test-id3 example (third alist-elt)) '+) (list (list (first alist-elt) (second alist-elt))))) tree-alist))) (if matching-class-counts ;If there are matching categories (maximum-label matching-class-counts) ;Then pick one with most examples (maximum-label tree-alist)))) ;Else pick most common category (defun maximum-label (count-alist) "Returns the label in count-alist ((label count) ...) with the maximum count." (let (max-label (max-count 0)) (dolist (label-count count-alist) (when (> (second label-count) max-count) (setf max-count (second label-count)) (setf max-label (first label-count)))) max-label))