;;;; VERSION-SPACE.LISP January 1988, An implementation of the Version Space Algorithm ;;;; for incremental learning from examples ;;;; Copyright (c) 1988 by Raymond Joseph Mooney. This program may be freely copied, used, or ;;;; modified provided that this copyright notice is included in each copy of this code ;;;; and parts thereof. ;;;; This is a basic version space learning algorithm. In order to use it for any ;;;; representation of instances and generalizations, the user must define the ;;;; following functions: ;;;; ;;;; equal-generalizations(x ,y): Returns T iff x and y are equal generalizations. ;;;; match(generalization, instance): Returns T iff generalization matches instance. ;;;; more-general?(x, y): Returns T iff generalization x is strictly more general ;;;; than generalization y. ;;;; initialize-g: Returns initial set of most general generalizations. ;;;; specialize-against(generalization, Returns a list of minimal specializations of the ;;;; instance) given generalization which do not match the instance. ;;;; generalize-to(generalization, Returns a list of minimal generalizations of the ;;;; instance) given generalization which do match the instance. ;;;; ;;;; A sample set of these functions for nominal feature vectors represented as ordered lists ;;;; is included. A sample data set using this representation is in the file: FIGURE-DATA. (defvar *s* nil "The most specific set of generalizations (S)") (defvar *g* nil "The most general set of generalizations (G)") (defparameter *trace-vs* nil "Produces trace if set to T") (defun train-version-space (examples) (version-space (positive-first examples))) (defun positive-first (examples) ;;; Put positive examples all first (nconc (remove-if-not #'(lambda (example) (eq (first example) '+)) examples) (remove-if-not #'(lambda (example) (eq (first example) '-)) examples))) (defun version-space (examples) ;;; This function takes a list of examples where an example is a list whose first element ;;; is either + or - to indicate a positive or negative example, and whose second element ;;; is a description of that example (an instance). It uses the version-space algorithm to ;;; determine the sets of most specific and most general generalizations consistent with all of ;;; the examples. Returns list (S G) (setf *g* (initialize-g)) (let ((first-pos (assoc '+ examples)) (converged nil)) (setf examples (remove first-pos examples)) (setf *s* (list (second first-pos))) ;initialize S to first positive instance (trace-print *trace-vs* "~%~%Example: ~A" first-pos) (trace-print *trace-vs* "~%S= ~A ~%G= ~A" *s* *g*) ;; process each additional example incrementally (dolist (example examples) (trace-print *trace-vs* "~%~%Example: ~A" example) (cond ((eq (first example) '-) ;; if example is a negative example, ;; then remove any members of S which match its instance and update G (setf *s* (remove (second example) *s* :test #'reverse-match)) (update-g (second example))) ((eq (first example) '+) ;; if example is a positive example ;; then remove any members of G which do not match its instance and update S (setf *g* (remove (second example) *g* :test-not #'reverse-match)) (update-s (second example)))) (trace-print *trace-vs* "~%S= ~A ~%G= ~A" *s* *g*) (cond ((or (null *g*)(null *s*)) ;; if either S or G is empty then concept cannot be described (format t "~%Langauage is insufficient to describe the concept") (return nil)) ((and (eq (length *s*) 1) (eq (length *g*) 1) (equal-generalizations (first *s*)(first *g*)) (null converged)) ;; if the version space has just converged then say so ;; but continue checking remaining examples for consistency (format t "~%~%Convergence. Concept must be: ~A" (format-generalization (first *s*))) (setf converged t)))) (unless converged (if (and *s* *g*) (format t "~%~%Did not converge~%S= ~A~%G= ~A" (mapcar #'format-generalization *s*) (mapcar #'format-generalization *g*))))) (list *s* *g*)) (defun update-g (instance) ;;; Specializes generalizations in G so that none match the given instance for a negative example. ;; For each generalization in G which matches the instance compute minimal specializations which do ;; not match and remove those which are not more general than (or equal to) some element of S. (setf *g* (mapcan #'(lambda (generalization) (if (match generalization instance) (remove-if-not #'(lambda (specialization) (member specialization *s* :test #'(lambda (a b) (or (more-general? a b) (equal-generalizations a b))))) (specializations-against generalization instance)) (list generalization))) *g*)) ;; Remove from G those elements which are more specific than some other element in G (setf *g* (clean-g *g*))) (defun clean-g (g) ;;; Remove from the list of generalizations g, any gens which are more specific or equal to some ;;; other gen in g. (dolist (generalization1 g) (dolist (generalization2 (rest (member generalization1 g))) (cond ((more-specific? generalization2 generalization1) (setf g (remove generalization2 g))) ((or (more-specific? generalization1 generalization2) (equal-generalizations generalization1 generalization2)) (setf g (remove generalization1 g)))))) g) (defun update-s (instance) ;;; Generalizes generalizations in S so that all match the given instance for a positive example ;; For each generalization in S which doesn't match the instance compute minimal generalizations ;; which do match and remove those which are not more specific than (or equal to) some element of G. (setf *s* (mapcan #'(lambda (generalization) (if (not (match generalization instance)) (remove-if-not #'(lambda (generalized) (member generalized *g* :test #'(lambda (a b) (or (more-specific? a b) (equal-generalizations a b))))) (generalizations-to generalization instance)) (list generalization))) *s*)) ;; Remove from S those elements which are more general than some other element of S (setf *s* (clean-s *s*))) (defun clean-s (s) ;;; Remove from the list of generalizations s, any gens which are more general or equal to some ;;; other gen in g. (dolist (generalization1 s) (dolist (generalization2 (rest (member generalization1 s))) (cond ((more-general? generalization2 generalization1) (setf s (remove generalization2 s))) ((or (more-general? generalization1 generalization2) (equal-generalizations generalization1 generalization2)) (setf s (remove generalization1 s)))))) s) (defun reverse-match (instance generalization) ;;; Matches instance to generalization instead of vice-versa (match generalization instance)) (defun more-specific? (a b) ;;; Returns T iff generalization a is strictly more specific than generalization b (more-general? b a)) ;;;;================================================================================================= ;;;; Functions specific to nominal feature vectors represented as ordered lists of feature values. ;;;;================================================================================================= (defparameter *print-with-feature-names* nil "Print out generalizations with feature names") (defun equal-generalizations (x y) ;;; Equivalence function for simple feature vector representation (equal x y)) (defun match (generalization instance) ;;; Match function for a simple feature vector representation where "?" is a wildcard (or (equal generalization instance) (and (or (equal (first generalization)(first instance)) (eq (first generalization) '?)) (match (rest generalization) (rest instance))))) (defun more-general? (x y) ;;; Returns T iff generalization x is strictly more general than generalization y for ;;; a simple feature vector representation. For x to be more general than y, they must match ;;; and x must have a "?" where y has a specific value; however y must never have a "?" where ;;; x has a specific value (cond ((or (null x)(null y)) nil) ((and (eq (first x) '?) (not (eq (first y) '?)) (or (equal (rest x)(rest y)) (more-general? (rest x) (rest y)))) t) ((equal (first x) (first y)) (more-general? (rest x) (rest y))))) (defun initialize-g () ;;; Initialize G to a set containing the all "?" feature vector (list (mapcar #'(lambda (feature) (declare (ignore feature)) '?) *domains*))) (defun generalizations-to (generalization instance) ;;; Generalizes the given generalization in all ways just enough to match the instance. ;;; For simple feature vectors there is only one possible least generalization in which ;;; differing feature values are changed to "?"s (list (if (or (null generalization)(null instance)) nil (cons (if (equal (first generalization)(first instance)) (first generalization) '?) (first (generalizations-to (rest generalization)(rest instance))))))) (defun specializations-against (generalization instance) ;;; Specializes the given generalization in all ways just enough so it doesn't match the instance. ;;; For simple feature vectors, for each "?" in the generalization there is a least specialization ;;; for each possible value in the domain for that feature which is different from the value ;;; in the instance. (do ((gen-rest generalization (rest gen-rest)) (inst-rest instance (rest inst-rest)) (gen-bef nil (append gen-bef (list (first gen-rest)))) (domain-rest *domains* (rest domain-rest)) (specializations nil)) ((or (null gen-rest)(null inst-rest)) specializations) (if (eq (first gen-rest) '?) (setf specializations (append (mapcar #'(lambda (value) (append gen-bef (list value) (rest gen-rest))) (remove (first inst-rest) (first domain-rest))) specializations))))) (defun format-generalization (gen) ;;; Format a generalization into a prettier form for output ;;; ( (feature-name value) ...) for features with constrained values (if *print-with-feature-names* (do ((gen-rest gen (rest gen-rest)) (feature-rest *feature-names* (rest feature-rest)) (formated-gen nil)) ((null gen-rest) (nreverse formated-gen)) (unless (eq (first gen-rest) '?) (push (list (first feature-rest) (first gen-rest)) formated-gen))) gen)) ;;;; ========================================================================================== ;;;; Test and consistency checking functions ;;;; ========================================================================================== (defun test-version-space (example s-g) ;;; Test an exmaple against results of version-space. Considers an example positive if ;;; if it matches the majority of the generalizations in S and G. As a second value ;;; returns the fraction of S and G matching as a 'match score' (let ((count 0) (s (first s-g)) (g (second s-g)) (match-fraction 0)) (dolist (generalization (append s g)) (if (match generalization (second example)) (incf count))) (if (or s g) (setf match-fraction (/ count (+ (length s) (length g))))) (values (if (> match-fraction 0.5) '+ '-) match-fraction))) (defun check-consistency (examples) ;;; Checks the consistency of each generalization in S and G with the given list of examples. ;;; Prints an error message for each error found. Can be used to check correctness after ;;; running version-space on a list of examples. (dolist (generalization (append *s* *g*)) (dolist (example examples) (if (and (eq (first example) '-)(match generalization (second example))) (format t "~%~%Error: ~A matches ~A" generalization example)) (if (and (eq (first example) '+)(not (match generalization (second example)))) (format t "~%~%Error: ~A doesn't match ~A" generalization example))))) ;;;; ========================================================================================== ;;;; The following functions are for multiple category problems ;;;; ========================================================================================== (defun train-multi-version-space (examples) "Version space for multi-category data. Return a list of (category S G)'s for for each category, where (S G) is the version space result for the given category examples as + and all others as -" (dolist (cat *categories*) (setf (get cat 'training-examples) nil)) (dolist (example examples) (push (rest example) (get (first example) 'training-examples))) (mapcar #'(lambda (category) (let ((training-examples (nconc (label-examples category '+) (mapcan #'(lambda (other-category) (label-examples other-category '-)) (remove category *categories*)))) (*categories* '(+ -))) (format t "~%~%Category: ~A" category) (cons category (train-version-space training-examples)))) *categories*)) (defun label-examples (category label) "Relabel the training-examples of a given category with the given class label" (mapcar #'(lambda (inst) (cons label inst)) (get category 'training-examples))) (defun test-multi-version-space (example result-alist) "Test example for matching S-G result of each category and return category with the mighest fraction of matching S-G elements" (let (best-category (max-score 0)) (dolist (result result-alist) (multiple-value-bind (class score) (test-version-space example (rest result)) (when (> score max-score) (setf best-category (first result)) (setf max-score score)))) best-category))