;;; FILE: FOIL.L ;;; PROGRAMMER: John M. Zelle ;;; DATE: 2/1/92 ;;; DESCRIPTION: A LISP implementation of the FOIL (First Order Inductive ;;; Learning) algorithm for learning relational concept definitions. ;;; For backgound on the algorithm, see : ;;; ;;; Quinlan, J. R., "Learning Logical Definitions from Relations," ;;; in Machine Learning, 5, 1990. ;;; ;;; The version presented here is somewhat simplified in that it ;;; uses a much weaker test to constrain recursive predicates ;;; (see the function, RECURSION-FILTER), and it does not incorporate ;;; encoding length restrictions to handle noisy data. There is also ;;; no post-processing of clauses to simplify learned definitions, ;;; although this would be relatively easy to add. ;;; MODIFIED: (2/6/92 JMZ) Cleaned up variable names and added parameter ;;; *use-negated-literals*. (proclaim '(optimize (speed 3))) ;;; REPRESENTATION OF HYPOTHESES ;;; ;;; FOIL hypotheses are represented as a set of function-free Horn ;;; clauses. Since the heads of the clauses of a definition are all ;;; identical, they are not explicitly represented in the working ;;; hypothesis. Instead, the program maintains a list of clause bodies ;;; representing the current concept definition. ;;; Each clause body is a list of antcedents. An empty body corresponds ;;; to the single antecedent, TRUE. An antecedent is a list consisitng ;;; of the predicate name of the antecedent followed by its variables. ;;; A variable is represented by a natural number. Thus, the body ;;; a clause such as: ;;; ;;; list(A) :- components(A,B,C), list(C). ;;; ;;; would be represented as: ;;; ;;; ( (components 0 1 2) (list 2) ) ;;; ;;; Background knowledge is represented as a property list of ;;; predicates with associated sets of tuples which are in ;;; the relation. Examples: (setf list-preds '( list ( (()) ((a)) ((b (a) d)) (((a) d)) ((d)) ) null ( (()) ) components ( ((a) a ()) ((b (a) d) b ((a) d)) (((a) d) (a) (d)) ((d) d ()) ((e.f) e f)))) (setf can-reach-preds '( can-reach ( (0 1) (0 2) (0 3) (0 4) (0 5) (0 6) (0 8) (1 2) (3 2) (3 4) (3 5) (3 6) (3 8) (4 5) (4 6) (4 8) (6 8) (7 6) (7 8) ) linked-to ( (0 1) (0 3) (1 2) (3 2) (3 4) (4 5) (4 6) (6 8) (7 6) (7 8) ))) ;;; FOIL returns a list of clause bodies defining the predicate, ;;; NAME, in terms of the background knowledge in PREDICATES. ;;; The resulting definition covers all of the positive examples ;;; and none of the negative. If negative examples are not ;;; provided, a closed-world assumption is used. ;;; PREDICATES is a property list where each key is the ;;; name of a relation, and the data is a list of lists where ;;; each list represents a "tuple" which is in the relation. NOTE: ;;; The set of positive tuples for the predicate to be learned must ;;; be part of PREDICATES. Try: (foil 'list list-preds) (defparameter *use-negative-literals* nil) (defun foil (target-concept extensional-defs &optional negatives) (let* ((positives (getf extensional-defs target-concept)) (universe (create-universe extensional-defs)) (background-preds (mapcan #'(lambda (x) (if (atom x) (list x))) extensional-defs)) (background-tuples (mapcan #'(lambda(x) (if (consp x) (list x))) extensional-defs)) (positives-to-cover positives) positive-tuples negative-tuples current-clause clauses info-value best-literal) (unless negatives (setf negatives (close-world positives universe))) (format t "~%SETUP COMPLETE -- STARTING INDUCTION~%") ;;; The top-level loop continues until all positive examples have ;;; been covered by some clause in the hypothesis. (loop (if (null positives-to-cover) (return clauses)) (setf current-clause nil) (setf positive-tuples positives-to-cover) (setf negative-tuples negatives) ;;; Add antecedents to the current clause until it covers no ;;; negative examples. (loop (if (null negative-tuples) (return)) (format t "~%CURRENT CLAUSE: ~A~%STILL COVERS NEGATIVE~%" current-clause) ;;; Find the literal whose addition produces most info gain (setf info-value (compute-info-value (length positive-tuples) (length negative-tuples))) (setf best-literal (find-best-literal target-concept background-preds background-tuples positive-tuples negative-tuples info-value)) (push best-literal current-clause) (format t "~%~%BEST ANTECENDENCT FOUND: ~a" best-literal) ;;; Modify the psoitive and negative tuples to reflect the ;;; newly extended current-clause. (setf positive-tuples (extend-tuples-with-literal positive-tuples extensional-defs best-literal)) (setf negative-tuples (extend-tuples-with-literal negative-tuples extensional-defs best-literal))) ;;; Add the new clause to the hypothesis, and calculate the positive ;;; examples yet to be covered. (push (nreverse current-clause) clauses) (format t "~%~%>>>>>FOUND CLAUSE: ~a" (car clauses)) (setf positives-to-cover (set-difference positives-to-cover positive-tuples :test #'subtuplep))))) ;;;--------------------------------------------------------------------------- ;;; These functions implement a "closed-world" convention for generating ;;; negative examples. ;;;--------------------------------------------------------------------------- ;;; return a list containing all terms that appear in a list of tuples (defun collect-terms-from-tuples (tuple-list) (delete-duplicates (mapcan #'(lambda (x) x) (copy-tree tuple-list)) :test #'equal)) ;;; return a list of all terms that appear in tuples of the extensional ;;; definitions, EXT-DEFS (defun create-universe (ext-defs) (delete-duplicates (mapcan #'(lambda (X) (if (consp x) (collect-terms-from-tuples x))) ext-defs) :test #'equal)) ;;; return all the tuples of LENGTH arguments which can be constructed ;;; using terms from the set CONSTANTS. (defun all-tuples (constants length) (if (zerop length) (list nil) (let ((one-less-list (all-tuples constants (1- length)))) (mapcan #'(lambda (short-tuple) (mapcar #'(lambda (c) (cons c short-tuple)) constants)) one-less-list)))) ;;; returns a list of all tuples constructable from terms in UNIVERSE ;;; which are not in TUPLES. (defun close-world (tuples universe) (set-difference (all-tuples universe (length (car tuples))) tuples :test #'equal)) ;;;--------------------------------------------------------------------------- ;;; Functions for finding a useful antecedent. These are the real heart ;;; of the hill-climbing search used by FOIL. ;;;--------------------------------------------------------------------------- ;;; FIND-BEST-LITERAL searches the space of all "reasonable" literals which ;;; can be constructed from the background predicates to find the one which ;;; yields maximum information gain. A literal is "reasonable" if it shares ;;; at least one variable with the tuples constructed so far, and does not ;;; lead to infinite or vacuous recursion. TARGET-CONCEPT is the ;;; name of the concepte whose definition is being learned. It must be known ;;; to apply special restrictions on recursive literals. ANTE-NAMES ;;; is a list of the names of the background predicates. EXT-DEFS ;;; is a corresponding list of extensional definitions (tuple lists). ;;; POSITIVE-TUPLES is a list of tuples which are extensions of positive ;;; examples which are covered by the current clause. That is, each tuple ;;; is a list of ground substitutions for the variables in the current ;;; clause. The order of the tuple is crucial, position 0 represents the ;;; value of the first variable, position 1, the second, etc. The number ;;; of "old" variables in the current clause so far is therefore the length ;;; of each tuple. NEGATIVE-TUPLES is the list of extensions of negative ;;; examples covered by the current clause. INFO-VALUE is the current ;;; information measure for the current tuples. ;;; The function returns two values, the best antecedent, and the gain. ;;; NOTE: This code is somewhat ugly, but relatively efficient. (defun find-best-literal (target-concept ante-names ext-defs positive-tuples negative-tuples info-value) (let ((max-gain 0) (old-var-count (length (first positive-tuples))) best-literal info-gain potential-gain-by-specializing) ;;; Try all possible unnegated literals (do* ((definitions ext-defs (cdr definitions)) (ext-def (car definitions) (car definitions)) (name-list ante-names (cdr name-list)) (ante-name (car name-list) (car name-list)) (arity (length (car ext-def)) (length (car ext-def))) (all-bindings (generate-variablizations old-var-count arity) (generate-variablizations old-var-count arity)) (prune-list nil)) ((null definitions)) (format t "~% TRYING PREDICATE: ~a..." ante-name) (if (eq ante-name target-concept) (setf all-bindings (recursion-filter old-var-count all-bindings))) (setf prune-list nil) ;;; Tuples whose specializations can't win. (dolist (binding all-bindings) ;;; Test if this binding is known to be futile (unless (member binding prune-list :test #'(lambda (x y) (same-old-vars x y old-var-count))) (multiple-value-setq (info-gain potential-gain-by-specializing) (compute-info-gain info-value positive-tuples negative-tuples ext-def binding)) (when (> info-gain max-gain) (setf max-gain info-gain) (setf best-literal (cons ante-name binding)) (format t "~% BEST SO FAR: ~a ~a" best-literal max-gain)) (if (< potential-gain-by-specializing max-gain) (push binding prune-list))))) ;;; This loop is just the same, for negated literals (when *use-negative-literals* (do* ((definitions ext-defs (cdr definitions)) (ext-def (car definitions) (car definitions)) (name-list ante-names (cdr name-list)) (ante-name (car name-list) (car name-list)) (arity (length (car ext-def)) (length (car ext-def))) (all-bindings (generate-variablizations old-var-count arity) (generate-variablizations old-var-count arity)) ; (prune-list nil) ) ((null definitions)) (format t "~% TRYING PREDICATE: NOT ~a..." ante-name) (if (eq ante-name target-concept) (setf all-bindings (recursion-filter old-var-count all-bindings))) ; (setf prune-list nil) (dolist (binding all-bindings) ; (unless (member binding prune-list ; :test #'(lambda (x y) ; (same-old-vars x y old-var-count))) (multiple-value-setq (info-gain potential-gain-by-specializing) (compute-negative-info-gain info-value positive-tuples negative-tuples ext-def binding)) (when (> info-gain max-gain) (setf max-gain info-gain) (setf best-literal (list 'not (cons ante-name binding))) (format t "~% BEST SO FAR: ~a ~a" best-literal max-gain)) ; (if (< potential-gain-by-specializing max-gain) ; (push binding prune-list))) ))) (values best-literal max-gain))) ;;; GENERATE-VARIABLIZATIONS is a somewhat subtle function which ;;; returns a list of possible bindings for a new antecedent of arity, SIZE ;;; given that the tuples currently have OLD-VAR-COUNT variables. Each ;;; binding is a list of numbers corresponding to a way in which vars ;;; may be assigned in the new antecedent. Recall, numbers less than ;;; old-var-count represent existing variables, and numbers equal or ;;; greater are new (unbound) vars. ;;; NOTE: The order in which bindings are returned is crucial to the ;;; pruning of the search space. Tuples having identical bindings to ;;; old variables are returned with the most general binding earliest ;;; in the list. (defun generate-variablizations (old-var-count size) (let (count possible-vars result) ;;; First generate a list containing the numbers corresponding to ;;; the existing variables and -1 for a new variable "marker" (dotimes (i (1+ old-var-count) t) (push (- old-var-count i 1) possible-vars)) ;;; Now generate all possible variations of old variables with ;;; -1 marking places for new variables. NOTE the "cdr" serves ;;; to remove the binding having all new variables. (setf result (cdr (all-tuples possible-vars size))) ;;; Replace the new variable markers with independent new ;;; variable numbers. Each binding which introduces new ;;; variables is now the most general case in that each ;;; new variable is independent. (dolist (v-ation result result) (setf count old-var-count) (do ((ptr v-ation (cdr ptr))) ((null ptr) result) (when (equal (car ptr) -1) (setf (car ptr) count) (incf count)))) ;;; Finally, expand each binding containing new variables into ;;; into a set of bindings representing all possible dependencies ;;; among the new var positions. (mapcan #'(lambda (binding) (expand-on-new-vars binding old-var-count)) result))) ;;; EXPAND-ON-NEW-VARS ;;; Given a binding (a list of naturals) and the number of the "first" ;;; new variable, this function returns the list of all bindings which ;;; can be formed by specialization which unifies some of the new vars. (defun expand-on-new-vars (binding new-var) (let* ((new-var-list (mapcan #'(lambda (x) (if (>= x new-var) (list x))) binding)) (partitions (all-partitions new-var-list))) (if new-var-list (mapcar #'(lambda (partition) (do* ((new-binding (copy-list binding)) (ptr new-binding (cdr ptr))) ((null ptr) new-binding) (unless (< (car ptr) new-var) (setf (car ptr) (find-var-number (car ptr) partition new-var))))) partitions) (list binding)))) ;;; The next three functions, EXTEND-PARTITION, ALL-PARTITIONS, and ;;; FIND-VAR-NUMBER are used to calculate all the possible paritionings ;;; among new variables. ;;; A partition is a list of lists which represent equivalence classes. ;;; Given a parition and a new element, generate a list of all the new ;;; partitions that could be constructed by making the element a singleton ;;; subset, or adding it to one of the existing equivalence classes. (defun extend-partition (value partition) (cons (cons (list value) partition) (mapcar #'(lambda (subset) (substitute (cons value subset) subset partition :test #'equal :count 1)) partition))) ;;; Given a list of numbers return a list of all possible paritionings ;;; of the list into equivalence classes. (defun all-partitions (set) (if (null (cdr set)) (list (list set)) (mapcan #'(lambda (partition) (extend-partition (car set) partition)) (all-partitions (cdr set))))) ;;; Find the class in which var occurs in partition, and return a ;;; new variable name based on start-count representing all of ;;; the variables falling in the first class, start-count + 1 the ;;; second, etc. (defun find-var-number (var partition start-count) (dolist (subset partition) (if (member var subset :test #'eql) (return start-count) (incf start-count)))) ;;;--------------------------------------------------------------------------- ;;; OPERATIONS ON TUPLES ;;;--------------------------------------------------------------------------- ;;; if EXTUPLE can extend TUPLE under the constraints of binding, return ;;; the extension, o/w NIL. e.g. (a b c) (1 3) (b f) returns (a b c f) ;;; since the 1th position of (a b c) contains "b" and the tuples are ;;; therefore compatible. Binding forces corresponding positions ;;; in exttuple to match tuple. (defun extend-tuple-with-tuple (tuple binding exttuple) (do ((var-positions binding (cdr var-positions)) (values exttuple (cdr values)) (first-new-var (length tuple)) (new-binding)) ( (null var-positions) (append tuple (reverse new-binding)) ) (if (< (car var-positions) first-new-var) (unless (equal (nth (car var-positions) tuple) (car values)) (return nil)) (push (car values) new-binding)))) ;;; return a list of all possible extensions of tuple with the tuples of ;;; EXT-DEFINITION under the constraints imposed by BINDING. (defun all-extensions (tuple binding ext-definition) (delete-duplicates (mapcan #'(lambda (x) (let ((extension (extend-tuple-with-tuple tuple binding x))) (if extension (list extension)))) ext-definition))) ;;; COMPUTE-INFO-VALUE returns the information content of a positive ;;; signal for a set containing PLUS-COUNT positives and NEG-COUNT ;;; negatives. The goofy constant is the binary log conversion. (defun compute-info-value (plus-count neg-count) (- (* (log (/ plus-count (+ plus-count neg-count))) 1.4426950408889634))) ;;; COMPUTE-INFO-GAIN ;;; returns two values, the information gained by using this binding of ;;; the relation defined by ext-definition, and the maximum gain ;;; potentially achievable by specializations of this binding. (defun compute-info-gain (current-info-value pos-tuples neg-tuples ext-definition binding) (let (new-pos-count new-neg-count pos-retained) (multiple-value-setq (new-pos-count pos-retained) (count-extensions pos-tuples ext-definition binding)) (if (zerop pos-retained) (return-from compute-info-gain (values 0 0))) (setq new-neg-count (count-extensions neg-tuples ext-definition binding)) (values (* pos-retained (- current-info-value (compute-info-value new-pos-count new-neg-count))) (* pos-retained current-info-value)))) ;;; COMPUTE-NEGATIVE-INFO-GAIN (same as above, but for negated antecedent) (defun compute-negative-info-gain (current-info-value pos-tuples neg-tuples ext-definition binding) (let (new-pos-count new-neg-count) (setf new-pos-count (count-unextendable pos-tuples ext-definition binding)) (if (zerop new-pos-count) (return-from compute-negative-info-gain (values 0 0))) (setf new-neg-count (count-unextendable neg-tuples ext-definition binding)) (values (* new-pos-count (- current-info-value (compute-info-value new-pos-count new-neg-count))) (* current-info-value new-pos-count)))) ;;; COUNT-EXTENSIONS returns two values, the number of tuples ;;; in the set which results from expanding each tuple in TUPLES ;;; with all compatible tuples from EXT-DEFINITION under the restriction ;;; imposed by binding, and the number of tuples which have any extension. (defun count-extensions (tuples ext-definition binding) (let ((sum 0) (retained 0) temp-count) (dolist (tuple tuples (values sum retained)) (setf temp-count (count-compatible-tuples tuple ext-definition binding)) (when (not (zerop temp-count)) (incf retained) (incf sum temp-count))))) ;;; COUNT-COMPATIBLE-TUPLES returns the number of tuples in EXT-DEFINITION ;;; which can extend TUPLE under the constraints of BINDING (defun count-compatible-tuples (tuple ext-definition binding) (let ((sum 0)) (dolist (cand ext-definition sum) (if (compatible-extension tuple cand binding) (incf sum))))) ;;; COMPATIBLE-EXTENSION returns TRUE if the tuples, TUPLE and ;;; EXTENDER are compatible under BINDING. (defun compatible-extension (tuple extender binding) (do* ((old-vars (length tuple)) (ext-ptr extender (cdr ext-ptr)) (bind-ptr binding (cdr bind-ptr)) (current-var (car bind-ptr) (car bind-ptr))) ((null ext-ptr) t) (if (and (< current-var old-vars) (not (equal (nth current-var tuple) (car ext-ptr)))) (return nil)))) ;;; COUNT-UNEXTENDABLE returns the number of tuples in TUPLES which ;;; have no extension in EXT-DEFINITION compatible under the restrictions ;;; imposed by BINDING (defun count-unextendable (tuples ext-definition binding) (let ((sum 0)) (dolist (tuple tuples sum) (unless (extendablep tuple ext-definition binding) (incf sum))))) ;;; EXTENABLEP returns if there is a tuple in EXT-DEFINITION compatible with ;;; tuple under BINDING. (defun extendablep (tuple ext-definition binding) (dolist (ext ext-definition nil) (if (compatible-extension tuple ext binding) (return t)))) ;;; SUBTUPLEP returns true if the tuple (PREFIX) is a prefix of TUPLE. ;;; i.e. TUPLE is an extension of prefix. (defun subtuplep (prefix tuple) (do ((pre-ptr prefix (cdr pre-ptr)) (tup-ptr tuple (cdr tup-ptr))) ((null pre-ptr) t) (unless (equal (car pre-ptr) (car tup-ptr)) (return nil)))) ;;; EXTEND-TUPLES-WITH-LITERAL returns a list of the extended ;;; tuples from TUPLES which are compatible with LITERAL given the ;;; background information in EXT-DEFINITIONS. (defun extend-tuples-with-literal (tuples ext-definitions literal) (if (eq (car literal) 'not) (let* ((pure-literal (second literal)) (extuples (getf ext-definitions (car pure-literal))) (binding (cdr pure-literal))) (remove-if #'(lambda (x) (extendablep x extuples binding)) tuples)) (let ((exttuples (getf ext-definitions (car literal))) (binding (cdr literal))) (mapcan #'(lambda(tuple) (all-extensions tuple binding exttuples)) tuples)))) ;;;--------------------------------------------------------------------------- ;;; Operations on bindings. ;;;--------------------------------------------------------------------------- ;;; RECURSION-FILTER return a list of those bindings in BINDINGS which are ;;; "safe" for a recursive antecedent. NOTE: Currently, the safety is very ;;; weak. The function only insures that a recursive call contains variables ;;; not contained in the head of the clause, and introduces no new vars. (defun recursion-filter (new-var bindings) (delete-if #'(lambda (binding) (let ((max-var (apply #'max binding))) (or (>= max-var new-var) ;;; contains brand-new vars (< max-var (length (first bindings)))))) ;;; contains only orig bindings)) ;;; SAME-OLD-VARS returns true if both bindings have all occurrences of ;;; vars numbered less than NEW-VAR in the exact same position. NOTE: ;;; this guarantees that one of the bindings is a specialization of the ;;; other since relations among new vars can only restrict the set of ;;; tuples covered by a literal. (defun same-old-vars (binding1 binding2 new-var) (do* ((ptr1 binding1 (cdr ptr1)) (ptr2 binding2 (cdr ptr2)) (var1 (car ptr1) (car ptr1)) (var2 (car ptr2) (car ptr2))) ((null ptr1) t) (if (and (not (eql var1 var2)) (or (< var1 new-var) (< var2 new-var))) (return nil))))