;;; This is a version of the AQ algorithm for learning from examples which uses ;;; beam search to generate bounded stars. February, 1988. ;;;; Copyright (c) 1988 by Raymond Joseph Mooney. This program may be freely ;;;; copied, used, or modified provided that this copyright notice is included ;;;; in each copy of this code and parts thereof. ;;;; This version of AQ assumes that events are represented as ordered lists of ;;;; nominal feature values and that complexes are represented similarly with ;;;; ?'s for values which are not constrained. Consequently, its language does ;;;; not include negation, internal disjunction, linear or structural features. ;;;; It has a general LEF function but does not support tolerances. (proclaim '(optimize speed (safety 0))) (defparameter *trace-aq* nil) ; AQ produces a trace when set to T (defparameter *print-with-feature-names* nil) ; print out complexes with feature names ;; The LEF (Lexicographic Evaluation Functional) determines which complexes ;; are more preferable. A LEF is a list of criteria functions. A criteria ;; function takes two arguements, a complex and the current list of uncovered ;; + events and returns a value, where a lesser value indicates more preferable. ;; The current LEF function first maximizes coverage of + events in order to ;; minimize disjuncts and with in that minimizes the number of selectors. (defvar *lef* '(count-coverage count-selectors)) (defparameter *max-star* 1) ; The beam width which controls the maximum size ; stars may achieve. (defvar *domains*) ; A list specifying the domain for each feature (defmacro trace-print (test-var &rest format-form) ;; Print using the format string only if test-var is nonNIL `(if ,test-var (format t ,@format-form))) (defun aq (examples) ;;; AQ takes a list of examples where an example is a list of either + or - ;;; (indicating the class) and an event description. It returns a cover ;;; (a list of complexes) which covers all of the postive events and none ;;; of the negative ones. ;; First separate into + and - events and call AQ1 (let ((pos-instances nil)(neg-instances nil)) (dolist (example examples) (cond ((eq (first example) '+) (push (second example) pos-instances)) ((eq (first example) '-) (push (second example) neg-instances)))) (aq1 pos-instances neg-instances))) (defun aq1 (pos-instances neg-instances) ;;; AQ1 takes a lists of positive and negative events and returns a cover ;;; (a list of complexes) which covers all of the postive events and none ;;; of the negative ones. (let ((star nil)(cover nil)(seed nil)(best-complex nil)(delta 0)) ;; Use seeds which are not covered by any previous star until there ;; are no more such seeds. (do ((seed-instances pos-instances)) ((null seed-instances)) (setf seed (pop seed-instances)) ; pick a seed from the available set (setf pos-instances (remove seed pos-instances)) (trace-print *trace-aq* "~%~%Seed: ~A" seed) ;; Generate a bounded star covering this seed but not any of the - events (setf star (generate-star seed neg-instances pos-instances)) ;; Since generate-star returns a list of complexes sorted from best to ;; worst according to the LEF, the best is the first element of the star (setf best-complex (first star)) (trace-print *trace-aq* "~%Best complex: ~A" best-complex) (push best-complex cover) ; add the best complex to the cover ;; Remove from the set of possible seeds those which are covered by any ;; complex in the star (setf seed-instances (remove-if #'(lambda (instance) (dolist (complex star) (when (match complex instance) (return t)))) seed-instances)) ;; Remove from the set of positive events those covered by the chosen ;; complex (setf pos-instances (remove-if #'(lambda (instance) (match best-complex instance)) pos-instances))) ;; Pick seeds and generate bounded stars until all + events are covered (do nil ((null pos-instances)) (setf seed (pop pos-instances)) (trace-print *trace-aq* "~%~%Seed: ~A" seed) (setf star (generate-star seed neg-instances pos-instances)) (setf best-complex (first star)) (trace-print *trace-aq* "~%Best complex: ~A" best-complex) (push best-complex cover) (setf pos-instances (remove-if #'(lambda (instance) (match best-complex instance)) pos-instances)) (incf delta)) ; delta is an estimate of distance from ; the minimal # of complexes (trace-print *trace-aq* "~%~%Cover minimal within ~D complexes" delta) cover)) (defun generate-star (seed neg-instances pos-instances) ;;; Generate a star which covers seed but not any of the neg-instances ;;; If star ever gets larger than *max-star* complexes then trim it the ;;; best *max-star* complexes as judged by the LEF. The star is ;;; represented as a list of "evaled-complexes" which are cons-cells with a ;;; complex in their CAR and a list of LEF values in their CDR. This prevents ;;; unnecessary recalculating LEF values. ;; Initialize the star to an evaled-complex for the most general complex (let ((star (compute-lef-values (initialize-star) pos-instances))) ;; For each negative event specialize each complex in the star which matches ;; the negative event just enough so it doesn't cover it. (dolist (neg-instance neg-instances) ;; if |star|>*max-star* then trim it to the best complexes by sorting it ;; according to the LEF and taking the best *max-star* complexes (when (> (length star) *max-star*) (trace-print *trace-aq* "~%Trimming star to best ~D" *max-star*) (setf star (subseq (sort star #'lef-less-than :key #'rest) 0 *max-star*))) (trace-print *trace-aq* "~%Current star: ~A ~%~%Processing neg event ~A" star neg-instance) (setf star (update-star star seed neg-instance pos-instances)) ;; check if star has went empty due to limited search (can never happen if ;; the star is never trimmed (when (null star) (error "Star empty do to limited search"))) ;; Before returning the star, sort it, trim it if necessary, and return only ;; complexes, not evaled complexes (setf star (sort star #'lef-less-than :key #'rest)) (when (> (length star) *max-star*) (trace-print *trace-aq* "~%Trimming star to best ~D" *max-star*) (setf star (subseq star 0 *max-star*))) (trace-print *trace-aq* "~%Final star: ~A" star) (mapcar #'(lambda (evaled-complex) (first evaled-complex)) star))) (defun update-star (star seed neg-instance pos-instances) ;;; Specializes complexes in the star so that none match the given instance ;;; for a negative example but each still covers the seed. Calculates new ;;; LEF values for any new complexes to get evaled-complexes (setf star (mapcan #'(lambda (evaled-complex) (if (match (first evaled-complex) neg-instance) (compute-lef-values (specializations-against (first evaled-complex) neg-instance seed) pos-instances) (list evaled-complex))) star)) ;; Remove from the star those complexes which are more specific than some other (dolist (evaled-complex1 star) (dolist (evaled-complex2 (rest (member evaled-complex1 star))) (cond ((more-general? (first evaled-complex1) (first evaled-complex2)) (setf star (remove evaled-complex2 star))) ((or (more-general? (first evaled-complex2) (first evaled-complex1)) (equal (first evaled-complex1) (first evaled-complex2))) (setf star (remove evaled-complex1 star)))))) star) (defun specializations-against (complex neg-instance seed) ;;; Specialize the given complex just enought so it doesn't cover the ;;; negative event but still covers the seed. For each feature in ;;; complex which is "?" change it to the value in seed to obtain a ;;; specialization unless the value in seed and neg-instance are the same. (do ((complex-rest complex (rest complex-rest)) (neg-rest neg-instance (rest neg-rest)) (complex-bef nil (append complex-bef (list (first complex-rest)))) (seed-rest seed (rest seed-rest)) (specializations nil)) ((null complex-rest) specializations) (if (and (eq (first complex-rest) '?) (not (eq (first neg-rest) (first seed-rest)))) (push (append complex-bef (list (first seed-rest)) (rest complex-rest)) specializations)))) (defun match (generalization instance) ;;; Match function for a simple feature vector representation where "?" is a ;;; wildcard (or (equal generalization instance) (and (or (equal (first generalization)(first instance)) (eq (first generalization) '?)) (match (rest generalization) (rest instance))))) (defun more-general? (x y) ;;; Returns T iff generalization x is strictly more general than ;;; generalization y for a simple feature vector representation. ;;; For x to be more general than y, they must match and x must ;;; have a "?" where y has a specific value; however y must never ;;; have a "?" where x has a specific value (cond ((or (null x)(null y)) nil) ((and (eq (first x) '?) (not (eq (first y) '?)) (or (equal (rest x)(rest y)) (more-general? (rest x) (rest y)))) t) ((equal (first x) (first y)) (more-general? (rest x) (rest y))))) (defun initialize-star () ;;; Initialize G to a set containing the all "?" feature vector (list (mapcar #'(lambda (feature) (declare (ignore feature)) '?) *domains*))) (defun compute-lef-values (star pos-instances) ;;; Calculate LEF values for each complex and return a list of evaled ;;; complexes: (complex lef-value1 lef-value2 ...) (mapcar #'(lambda (complex) (cons complex (mapcar #'(lambda (lef-fn) (funcall lef-fn complex pos-instances)) *lef*))) star)) (defun lef-less-than (value-list1 value-list2) ;;; Returns T iff the first list of LEF values is less than the second ;;; Interprets values lexicographically (or (< (first value-list1) (first value-list2)) (and (rest value-list1) (equal (first value-list1)(first value-list2)) (lef-less-than (rest value-list1)(rest value-list2))))) (defun count-coverage (complex pos-instances) ;;; Counts the number of positive events covered by the complex ;;; and returns the negation of this value so less is better (let ((match-count 0)) (dolist (pos-instance pos-instances (- match-count)) (if (match complex pos-instance) (incf match-count))))) (defun count-selectors (complex pos-instances) ;;; Counts the number of selectors in a complex, assuming less is better (declare (ignore pos-instances)) (let ((selector-count 0)) (dolist (feature complex selector-count) (unless (eq feature '?) (incf selector-count))))) ;;;; ========================================================================================== ;;;; Functions for running and testing a single concept ;;;; ========================================================================================== (defun aq-test (examples train#) ;;; Run and test id3 on the examples by using the first train# examples ;;; to train and the remaining to test (let ((training-examples (subseq examples 0 train#)) (testing-examples (subseq examples train#)) (start-time (get-internal-run-time))) (let ((cover (aq training-examples))) (format t "~%~%Run time: ~,2Fs" (seconds-since start-time)) (format t "~%Cover:") (print-cover cover) (test-examples testing-examples cover)))) (defun test-examples (examples cover) ;;; Test cover on the given set of examples and report results (let ((correct# 0)) (dolist (example examples) (if (cond ((cover-match cover (second example)) (eq (first example) '+)) ((eq (first example) '-))) (incf correct#))) (format t "~%~%Percentage correct: ~A" (round (* 100 (/ correct# (length examples))))))) (defun cover-match (cover instance) ;;; If cover covers instance then return t (dolist (complex cover) (when (match complex instance)(return t)))) (defun print-cover (cover) ;;; Print cover in a nice format (dolist (complex cover) (format t "~%~A" (format-complex complex)) (unless (eq complex (first (last cover))) (format t " or")))) (defun format-complex (complex) ;;; Format a complex into a prettier form for output ;;; ( (feature-name value) ...) for features with constrained values (if *print-with-feature-names* (do ((complex-rest complex (rest complex-rest)) (feature-rest *feature-names* (rest feature-rest)) (formated-complex nil)) ((null complex-rest) (nreverse formated-complex)) (unless (eq (first complex-rest) '?) (push (list (first feature-rest) (first complex-rest)) formated-complex))) complex)) ;;;; ========================================================================================== ;;;; The following functions are for multiple concept (category) problems like the soybean data ;;;; ========================================================================================== (defun aq-categories (category-list) ;;; AQ for multiple concept learning problems. The argument category-list ;;; should be a list of atoms which represent names of individual categories. A list of ;;; instances for learning should be stored on the LEARN-INSTANCES property of each ;;; category name. A single concept learning trial is run for each category in which the ;;; instances of that category are positive examples and instances of all other categories ;;; are negative examples. The resulting cover for each category is stored on the ;;; COVER property of the category name (let ((start-time (get-internal-run-time))) (dolist (category-name category-list) (format t "~%~%Category: ~A" category-name) (setf (get category-name 'COVER) (aq1 (get category-name 'learn-instances) (mapcan #'(lambda (a) (copy-list (get a 'learn-instances))) (remove category-name category-list)))) (format t "~%Cover:")(print-cover (get category-name 'COVER))) (format t "~%~%Run time: ~,2Fs" (seconds-since start-time))) (aq-test-categories category-list)) (defun separate-instances (categories num-learn-instances) ;;; Separates a list of instances for a set of categories into learning and testing ;;; instances to facilitate experimentation in preparation for using VS-CATEGORIES ;;; and TEST-CATEGORIES. The variable categories should be bound to a list of ;;; category names whose values are a list of instances of that category. The first ;;; num-learn-instances of these instances are stored on the LEARN-INSTANCES property ;;; to be used for learning and the rest are stored on TEST-INSTANCES for testing. (dolist (category categories) (setf (get category 'learn-instances) (subseq (eval category) 0 num-learn-instances)) (setf (get category 'test-instances) (subseq (eval category) num-learn-instances (length (eval category)))))) (defun aq-test-categories (categories &optional use-learn-instances) ; ;;; To be used after AQ-CATEGORIES in order to test the generalizations learned on a ;;; set of new instances stored under the TEST-INSTANCES property of each category name ;;; in categories. Reports % correct for each category and overall % correct. (let ((percent-sum 0)(percent 0)) (dolist (category categories) (format t "~%~%Testing ~A instances" category) (let ((answers (mapcar #'(lambda (instance) (test-instance instance categories)) (get category (if use-learn-instances 'learn-instances 'test-instances)))) (count 0)) (format t "~%~A" answers) (dolist (answer answers) (if (and (eq (first answer) category)(null (rest answer))) (incf count))) (setf percent (* 100 (/ count (length answers)))) (incf percent-sum percent) (format t "~%Percentage correct: ~A" (round percent)))) (format t "~%~%Total percent correct: ~A" (round (/ percent-sum (length categories)))))) (defun test-instance (instance categories) ;;; Given an instance and a list of category names returns the subset of these categories ;;; which it is determined that the instance belongs to. Uses the cover for each ;;; category to determine whether or not an instance belongs to the category or not. (let ((member-categories nil)) (dolist (category categories member-categories) (if (cover-match (get category 'COVER) instance) (push category member-categories))))) ;;;; ========================================================================================== ;;;; General utility functions ;;;; ========================================================================================== (defun seconds-since (time) ;;; Return seconds elapsed since given time (initially set by get-internal-run-time) (/ (- (get-internal-run-time) time) internal-time-units-per-second))