;;;; BACKPROP.LISP  -   A parallel distributed processing (PDP)
;;;;               feed-forward system with one layer of hidden units.
;;;;               The generalized delta rule is used for training
;;;;               and the logistic function is used for activation.
;;;;               (It is assumed that each node's output function
;;;;               is the identity operator.)  For more details,
;;;;               see chapter 8 of PDP1, especially pp. 322-335.

;;;; Copyright (C) 1988 by Jude William Shavlik and Raymond Joseph Mooney
;;;;       This program may be freely copied, used, or 
;;;;       modified provided that this copyright notice 
;;;;       is included in each copy of this code and parts thereof.


;;;; --------------------------------------------------------------------------------------
;;;;                       Global Variables
;;;; --------------------------------------------------------------------------------------

(defvar *eta*              0.25 "Parameter that determines the learning rate.")
(defvar *alpha*            0.90 "Scale factor for the momentum term.")
(defvar *initial-wt-limit* 0.3  "The maximum magnitude of the initial wts.")
(defvar *epsilon*          0.1 "Acceptable error for correct output vector.")
(defvar *output-epsilon*   0.2 "Binary ouput is 1 if real output is this close to 1")
(defvar *trace-backprop*    nil "Report i/o behavior periodically?")
(defvar *network*           nil "The saved learned network")
(defvar *graph-on*          nil   "Produces graph of the output")
(defvar *graph-window*      nil "The current graph window")
(defvar *most-positive-exponent* (log most-positive-long-float) 
                                "Used to prevent exponentation overflow.")
(defvar *most-negative-exponent* (log long-float-epsilon)
                                "Used to prevent exponentation underflow.")

(defmacro trace-print (test-var &rest format-form)
  ;; Print using the format string only if test-var is nonNIL
  `(if ,test-var
       (format t ,@format-form)))

(defstruct (network (:print-function network-printer))
  ;;; A PDP network, with one layer of hidden units.
  ;;; Input units fully connect to all of the hidden units and the hidden
  ;;; units fully connect to all of the output units.
  ;;; There are no direct connections between input and output units.
  ;;; Besides recording the network weights for inter-node links,
  ;;; the biases, activations, and error signals of nodes are recorded.
  ;;; In addition, the most recent changes (deltas) in the weights and
  ;;; biases are recorded.  These are used to compute a "momentum" term
  ;;; when adjusting weights and biases.  Empirically, networks often
  ;;; converge faster when the momentum term is used to "smooth" weight changes.
    input-to-hidden-wts
    hidden-to-output-wts
    input-to-hidden-deltas
    hidden-to-output-deltas
    hidden-unit-biases
    output-unit-biases
    hidden-bias-deltas
    output-bias-deltas
    hidden-activations
    output-activations
    hidden-unit-errors
    output-unit-errors)

;;;; --------------------------------------------------------------------------------------
;;;;                      THE MAIN FUNCTIONS
;;;; --------------------------------------------------------------------------------------

(defun backprop-m (examples &optional short-encode-flag (hidden-units 4) randomize-examples? 
                               (report-freq 10) (max-cycles 1000000))
  ;;; Backprop for multi-valued feature examples. First encodes examples into binary
  ;;; strings and then calls BACKPROP. If short-encode-flag is set uses log(n) bits
  ;;; feature value otherwise uses 1 bit/feature-value.  See BACKPROP for purpose
  ;;; of other arguments

  (format t "~%Examples: ~A" examples)
  (backprop (backprop-example-form (convert-to-bits examples short-encode-flag))
       hidden-units randomize-examples? report-freq max-cycles))


(defun backprop-example-form (examples)
  ;;; Converts examples with + and - to indicate class to form needed for
  ;;; BACKPROP. See BACKPROP.

  (mapcar #'(lambda (example)
	      (cons (if (eq (first example) '+)
			'(1) '(0)) (second example)))
	  examples))


(defun backprop (examples &optional (hidden-units 4) randomize-examples? 
                               (report-freq 10) (max-cycles 1000000)
                     &aux network numb-correct (last-correctness 0)
                          (cycle 0)
                          (numb-of-examples (length examples))
                          (input-units  (length (rest  (first examples))))
                          (output-units (length (first (first examples)))))
  ;;; Train a PDP network with these example i/o pairs.  The CAR of
  ;;; each example is the desired output when the CDR is the input.  (The
  ;;; CAR must be a list, i.e. ((1) 0 1 0) is a properly formatted example.)
  ;;; The number of hidden units desired can be specified - the number of
  ;;; input and output units is determined by the form of each example.
  ;;; If RANDOMIZE-EXAMPLES? is set, during each training cycle the training
  ;;; examples are randomly mixed-up.  Training stops when the network correctly
  ;;; learns the training examples or the maximum number of cycles is reached.
  ;;; The REPORT-FREQ variable specifies how often the system reports its performance.
    (format t "~%BACKPROP program initializing ... ~%~%")
    (setf network (build-network  input-units hidden-units output-units t))
    (setf *network* network)
    (if *graph-on*
	(setf *graph-window* (make-graph-window (format nil "% Correct vs. cycle #: eta=~,3F" *eta*))))
    (loop
     (incf cycle)
     (if (zerop (mod cycle report-freq))
         (format t "~%Cycle ~A~%" cycle)
       (format t " ~A" cycle))
     (setf numb-correct 0) 
  ;;; In each cycle, go through all of the training examples, adjusting weights
  ;;; after each example is presented.  Count the number of correct classifications
  ;;; and report the precentage correct.  If requested (by the *trace-backprop* flag),
  ;;; show the results on the training examples.  To speed things up, only report
  ;;; results if the current cycle is a multiple of the reporting frequency.
     (dolist (example (if randomize-examples? (mix-up examples) examples))
       (let ((input-vector   (rest  example))
	      (desired-output (first example)))
         ;; Activate the current network, given this input.
         (activate-network network input-vector input-units hidden-units output-units)
         (let ((correct? (output-correct? desired-output network output-units)))
	   (when (and *trace-backprop* (zerop (mod cycle (* 2 report-freq))))
	     ;;; Every other time performance reporting occurs, report i/o behavior.
	     (report-current-results network input-vector 
				     desired-output output-units correct?))
           (if correct?
	       (incf numb-correct)
	       (perform-back-propagation network input-vector desired-output 
					 input-units hidden-units output-units)))))
     (when (zerop (mod cycle report-freq))
         ;; If this is a "special" cycle, report the current results of the network.
         (format t "Percentage correct = ~5,3F%~%" (* 100 (/ numb-correct numb-of-examples)))
	 (when *graph-on* (plot-graph-line (* 1 (- cycle report-freq)) (* 400 last-correctness) (* 1 cycle)
				       (* 400 (/ numb-correct numb-of-examples))
				       *graph-window*)
	   (setf last-correctness (/ numb-correct numb-of-examples))))
     (if (or (= numb-correct numb-of-examples) (>= cycle max-cycles))
         (return))) ; All classifications correct or "enough" training, so stop.
    (format t "~%~%There were ~A training cycles.~%~%" cycle)
    network)

(defun activate-network (network input-vector 
                         &optional (input-units  (length input-vector))
                                   (hidden-units (length (network-hidden-unit-biases network)))
                                   (output-units (length (network-output-unit-biases network))))
  ;;; Activate this network, given this input-vector.  (For efficiency reasons,
  ;;; the number of each type of network node is provided.)
   (dotimes (h hidden-units)  ; Set the activations of the hidden units.
     (setf (aref (network-hidden-activations network) h)
           (logistic-activation-function 
              (let ((answer 0)) ; Each hidden unit gets a weighted input from each input unit.
                (dotimes (i input-units answer)
                   (incf answer (* (elt input-vector i)
                                   (aref (network-input-to-hidden-wts network) i h)))))
              (aref (network-hidden-unit-biases network) h))))
   (dotimes (o output-units)  ; Set the activations of the output units.
     (setf (aref (network-output-activations network) o)
           (logistic-activation-function 
              (let ((answer 0)) ; Each output unit gets a weighted input from each hidden unit.
                (dotimes (h hidden-units answer)
                   (incf answer (* (aref (network-hidden-activations   network) h)
                                   (aref (network-hidden-to-output-wts network) h o)))))
              (aref (network-output-unit-biases network) o)))))


(defun perform-back-propagation (network input-vector desired-output-vector
                         &optional (input-units  (length input-vector))
                                   (hidden-units (length (network-hidden-unit-biases network)))
                                   (output-units (length (network-output-unit-biases network))))
  ;;; Perform back-propagation to modify the network's weights in order to improve
  ;;; future performance.  It is assumed that the LOGISTIC activation function
  ;;; is used (see page 329 of PDP1).  For "smoothing", a momentum term is included.
  ;;; The momentum can be cancelled by setting *alpha* to zero.
  ;;    First, determine the error signals.
   (dotimes (o output-units)  ; Determine the errors of the output units.
     (setf (aref (network-output-unit-errors network) o)
        (let ((actual-output (aref (network-output-activations network) o)))
           (* (- (elt desired-output-vector o) actual-output)
              actual-output (- 1 actual-output)))))
   (dotimes (h hidden-units)  ; Determine the errors of the hidden units.
     (setf (aref (network-hidden-unit-errors network) h)
        (let ((hidden-unit-activation (aref (network-hidden-activations network) h))
              (sum 0))
          (* hidden-unit-activation (- 1 hidden-unit-activation) 
              (dotimes (o output-units sum)
                 (incf sum (* (aref (network-output-unit-errors network) o)
                              (aref (network-hidden-to-output-wts  network) h o))))))))
  ;;    Reset the weights in the network.
   (dotimes (h hidden-units)
     (dotimes (o output-units) ; Update the weights from the hidden to the output units.
       (let ((delta (delta-rule (aref (network-output-unit-errors      network) o)
                                (aref (network-hidden-activations      network) h)
                                (aref (network-hidden-to-output-deltas network) h o))))
         ; Remember the weight momentum and update the weight.
         (setf (aref (network-hidden-to-output-deltas network) h o) delta)
         (incf (aref (network-hidden-to-output-wts    network) h o) delta)))
     (dotimes (i input-units) ; Update the weights from the input to the hidden units.
       (let ((delta (delta-rule (aref (network-hidden-unit-errors     network) h)
                                (elt input-vector i)
                                (aref (network-input-to-hidden-deltas network) i h))))
         (setf (aref (network-input-to-hidden-deltas network) i h) delta)
	 (incf (aref (network-input-to-hidden-wts    network) i h) delta))))
  ;;    Reset the biases in the network.
   (dotimes (o output-units) ; Update each output unit's bias
     (let ((delta (delta-rule (aref (network-output-unit-errors network) o)
                              1
                              (aref (network-output-bias-deltas network) o))))
         ; Remember the bias momentum and update the bias.
       (setf (aref (network-output-bias-deltas network) o) delta)
       (incf (aref (network-output-unit-biases network) o) delta)))
   (dotimes (h hidden-units) ; Update each hidden unit's bias.
     (let ((delta (delta-rule (aref (network-hidden-unit-errors network) h)
                              1
                              (aref (network-hidden-bias-deltas network) h))))
       (setf (aref (network-hidden-bias-deltas network) h) delta)
       (incf (aref (network-hidden-unit-biases network) h) delta))))


;;;; --------------------------------------------------------------------------------------
;;;;                               UTILITY FUNCTIONS
;;;; --------------------------------------------------------------------------------------

(defun delta-rule (error activation previous-delta)
  ;;; Determine the weight change specified by the delta learning rule.
  ;;; Include a momentum term to reduce the chance of oscillations.
   (+ (* *eta* error activation) 
      (* *alpha* previous-delta)))

(defun logistic-activation-function (total-weighted-input bias)
  ;;; A continuous, non-linear activation function is needed.
  ;;; It must be continuous if the generalized delta rule
  ;;; is to be used.  For hidden units to be beneficial, the
  ;;; activation function must also be non-linear.
  ;;; See equation 15 on page 329 of PDP1.
   (/ 1 (+ 1 (guarded-exp (- (+ total-weighted-input bias))))))

(defun guarded-exp (x)
  ;;; Prevent overflow during exponentiation.
   (cond ((<= x *most-negative-exponent*) long-float-epsilon)
         ((>= x *most-positive-exponent*) most-positive-long-float)
         (t (exp x))))

(defun build-network (input-units hidden-units output-units &optional randomize?)
  ;;; Construct a "feed-forward" PDP network with this many input, hidden, and output units.
  ;;; Each hidden unit is connected to each of the input units and each output unit is
  ;;; connected to each hidden unit.  There are no direct connections between input and
  ;;; output units.  If randomize? is set, the weights and biases are randomized.
   (make-network 
    :output-unit-biases      (build-pdp-array (list output-units) randomize?)
    :hidden-unit-biases      (build-pdp-array (list hidden-units) randomize?)
    :output-bias-deltas      (build-pdp-array (list output-units))
    :hidden-bias-deltas      (build-pdp-array (list hidden-units))
    :output-activations      (build-pdp-array (list output-units))
    :hidden-activations      (build-pdp-array (list hidden-units))
    :output-unit-errors      (build-pdp-array (list output-units))
    :hidden-unit-errors      (build-pdp-array (list hidden-units))
    :hidden-to-output-wts    (build-pdp-array (list hidden-units output-units) randomize?)
    :input-to-hidden-wts     (build-pdp-array (list input-units  hidden-units) randomize?)
    :hidden-to-output-deltas (build-pdp-array (list hidden-units output-units))
    :input-to-hidden-deltas  (build-pdp-array (list input-units  hidden-units))))


(defun build-pdp-array (dimensions &optional randomize?)
  ;;; Construct an array of the specified size.  If requested, randomize the elements.
   (if randomize? 
       (make-array dimensions
               :element-type 'long-float
               :initial-contents (make-random-array-contents dimensions))
       (make-array dimensions
               :element-type 'long-float
               :initial-element (coerce 0 'long-float))))

(defun make-random-array-contents (dimensions &aux temp)
  ;;; Construct a list representing an array of the specified dimensions.
  ;;; The elements of the array are randomly chosen.
   (if (null dimensions)
     (coerce (random-interval (- *initial-wt-limit*) *initial-wt-limit*) 'long-float)
     (dotimes (i (first dimensions) temp)
        (push (make-random-array-contents (rest dimensions)) temp))))


(defun output-correct? (desired-output network output-units &aux (all-right t))
  ;;; Return T iff all of the ouputs are within epsilon of their desired values
  (dotimes (o output-units all-right) 
    (unless (< (abs (- (elt desired-output o) 
		   (aref (network-output-activations network) o)))
	       *epsilon*)
	(setf all-right nil))))


(defun report-current-results (network input-vector desired-output output-units correct?)
  ;;; Report how well the desired output matches the actual output.
   (format t " Input=~A~%  Desired Output=~A~%  Actual  Output=("
           input-vector desired-output)
   (dotimes (o output-units)
       (format t " ~9,8F" (aref (network-output-activations network) o)))
   (if (not correct?) (format t ")   X~%") (format t ")~%"))) ; "


(defun compute-network-values (network instance)
  ;;; Return the list of binary output values computed by the network given the
  ;;; instance as input.

  (activate-network network instance)
  (let ((output nil))
    (dotimes (o (length (network-output-unit-biases network)) (reverse output))
      (if (< (- 1 (aref (network-output-activations network) o))
	     *output-epsilon*)
	  (push 1 output)
	  (push 0 output)))))


(defun network-printer (network stream depth
                        &aux (input-units  (first (array-dimensions
                                              (network-input-to-hidden-wts network))))
                             (hidden-units (length (network-hidden-unit-biases network)))
                             (output-units (length (network-output-unit-biases network))))
  ;; Print this PDP network in an understandable format.
   (declare (ignore depth))
   (format stream "~%Network Contents~%")
   (dotimes (o output-units)
     (format stream "~% Output Unit ~A  -  Bias =  ~9,5F~%"
       o (aref (network-output-unit-biases network) o))
     (dotimes (h hidden-units) 
       (format stream "   Wt from hidden unit ~A = ~9,5F~%"
         h (aref (network-hidden-to-output-wts network) h o))))
   (dotimes (h hidden-units)
     (format stream "~% Hidden Unit ~A  -  Bias =  ~9,5F~%"
       h (aref (network-hidden-unit-biases network) h))
     (dotimes (i input-units) 
       (format stream "   Wt from input unit ~A  = ~9,5F~%"
         i (aref (network-input-to-hidden-wts network) i h)))))


(defun test-all-instances (&optional short-encode-flag)
  ;;; For each instance in the feature space compute and report the
  ;;; classification resulting from the learned *network*

  (let ((encoding-alists (mapcar #'(lambda (domain) (if short-encode-flag
							(short-encode-domain domain)
							(long-encode-domain domain)))
				 *domains*)))
    (dolist (instance (all-possible-instances))
      (format t "~%~A is ~A" instance
	      (if (equal (compute-network-values *network*
						 (encode-instance instance
								  encoding-alists))
		      '(1))
		  '+ '-)))))


(defun all-possible-instances (&optional (domains *domains*))
  ;;; Generate the list of all possible instances in the space
  ;;; defined by *domains*.

  (if (null (rest domains))
      (mapcar #'(lambda (value) (list value)) (first domains))
      (mapcan #'(lambda (value)
		  (mapcar #'(lambda (sub-example) (cons value sub-example))
			  (all-possible-instances (rest domains))))
	      (first domains))))

(defun mix-up (list)
  "Randomize the elements in this list."
  (for pair in (sort (for item in list collect (cons (get-random) item))
		     #'(lambda (a b) (> (first a) (first b))))
       collect (rest pair)))

(defun random-interval (a b)
  ;;; Randomly chose a value between A and B.
   (+ a (* (- b a) (/ (random 1000000) 1000000))))

(defun seconds-since (time)
   ;;; Return seconds elapsed since given time (initially set by get-internal-run-time)
  (/ (- (get-internal-run-time) time)
     internal-time-units-per-second))

;;;; ==========================================================================================
;;;; Functions for running and testing a single concept
;;;; ==========================================================================================

(defun backprop-test (examples train# &optional short-encode-flag (hidden-units 4) randomize-examples? 
                               (report-freq 10) (max-cycles 1000000))
  ;;; Run and test on the examples by using the first train# examples
  ;;; to train and the remaining to test
 (setf examples (convert-to-bits examples short-encode-flag))
 (let ((training-examples (subseq examples 0 train#))
       (testing-examples  (subseq examples train#))
       (start-time (get-internal-run-time)))
   (backprop (backprop-example-form training-examples)
	hidden-units randomize-examples? report-freq max-cycles)
   (format t "~%~%Run time: ~,2Fs" (seconds-since start-time))
   (backprop-test-examples testing-examples)))

(defun backprop-test-examples (examples)
  ;;; Test on the given set of examples and report results
  (let ((correct# 0))
    (dolist (example examples)
      (let ((output (equal (compute-network-values *network* (second example)) '(1))))
	  (if (or (and (eq (first example) '+) output)
		  (and (eq (first example) '-) (null output)))
	      (incf correct#))))
    (format t "~%~%Percentage correct: ~A" (round (* 100 (/ correct# (length examples)))))))


;;;; ==========================================================================================
;;;; The following functions are for multiple concept (category) problems like the soybean data
;;;; ==========================================================================================

(defun make-backprop-examples (pos-instances neg-instances)
  ;;; Converts lists of positive and negative instances into a list of examples
  ;;; suitable for BACKPROP.

  (append (mapcar #'(lambda (instance) (cons '(1) instance)) pos-instances)
	  (mapcar #'(lambda (instance) (cons '(0) instance)) neg-instances)))


(defun backprop-categories (category-list &optional (hidden-units 4) randomize-examples? 
                               (report-freq 10) (max-cycles 1000000))

  ;;; BACKPROP for multiple concept learning problems. The argument category-list
  ;;; should be a list of atoms which represent names of individual categories. A list of
  ;;; instances for learning should be stored on the LEARN-INSTANCES property of each
  ;;; category name. A single concept learning trial is run for each category in which the
  ;;; instances of that category are positive examples and instances of all other categories
  ;;; are negative examples.  If convergence is reached for a given category, then the
  ;;; learned network is stored on the NETWORK property of the category name.

  (setf *instance-tester* #'backprop-test-instance)
  (let ((start-time (get-internal-run-time)))
    (dolist (category-name category-list)
      (format t "~%~%Category: ~A" category-name)
      (backprop (make-backprop-examples (get category-name 'learn-instances)
				 (mapcan #'(lambda (a) (copy-list (get a 'learn-instances)))
					 (remove category-name category-list)))
	   hidden-units randomize-examples? report-freq max-cycles)
      (setf (get category-name 'NETWORK) *network*))
    (format t "~%~%Run time: ~,3Fs" (seconds-since start-time)))
  (backprop-test-categories category-list))

(defun backprop-test-instance (instance categories)
  ;;; Given an instance and a list of category names returns the subset of these categories
  ;;; which it is determined that the instance belongs to (i.e. for which the learned
  ;;; network for that category returns T.  For use with one network per category
  ;;; (BACKPROP-CATEGORIES)

   (let ((member-categories nil))
     (dolist (category categories member-categories)
       (if (equal (compute-network-values (get category 'NETWORK) instance) '(1))
	  (push category member-categories)))))


(defun backprop-categories1 (category-list &optional (hidden-units 4) randomize-examples? 
                               (report-freq 10) (max-cycles 1000000))

  ;;; BACKPROP for multiple concept learning problems. The argument category-list
  ;;; should be a list of atoms which represent names of individual categories. A list of
  ;;; instances for learning should be stored on the LEARN-INSTANCES property of each
  ;;; category name. Builds one multiple-output network (1 output per disease)
  ;;; for classifying examples into all of the categories and formats instances
  ;;; of all diseases in multiple output form needed for BACKPROP.

  (setf *instance-tester* #'best-guess-test)
  (let ((start-time (get-internal-run-time))
	(output-encoding-alist (long-encode-domain category-list)))
    (backprop (mix-up (mapcan #'(lambda (pair)
			     (mapcar #'(lambda (instance)
					 (cons (cdr pair) instance))
				     (get (car pair) 'learn-instances)))
			 output-encoding-alist))
	 hidden-units randomize-examples? report-freq max-cycles)
    (format t "~%~%Run time: ~,3Fs" (/ (- (get-internal-run-time) start-time)
						     internal-time-units-per-second)))
  (backprop-test-categories category-list))

(defun perfect-match-test (instance categories)
  ;;; Given an instance and a list of category names returns the subset of these categories
  ;;; which it is determined that the instance belongs to (i.e. for which the learned
  ;;; network for that category returns T.)  Assumes examples is in a category iff
  ;;; its activation is within *output-epsilon* of 1.

  (activate-network *network* instance)
  (let ((member-categories nil))
    (dotimes (o (length (network-output-unit-biases *network*)) member-categories)
      (if (> (aref (network-output-activations *network*) o)
	     (- 1 *output-epsilon*))
	  (push (elt categories o) member-categories)))))


(defun best-guess-test (instance categories)
  ;;; Given an instance and a list of category names returns the subset of these categories
  ;;; which it is determined that the instance belongs to (i.e. for which the learned
  ;;; multi-output network for that category returns T.) Assumes in the one category
  ;;; with the maximum output activation.

  (activate-network *network* instance)
  (let ((max-output-unit 0))
    (dotimes (o (length (network-output-unit-biases *network*)))
      (if (> (aref (network-output-activations *network*) o)
	     (aref (network-output-activations *network*) max-output-unit))
	  (setf max-output-unit o)))
    (list (elt categories max-output-unit))))


(defun run-backprop-test (categories learn-nums test-num num-hidden)
  (dolist (learn-num learn-nums)
    (separate-instances categories learn-num test-num)
    (format t "~%~%Test with ~A training examples/category" learn-num)
    (backprop-categories1 categories num-hidden)
    (setf *instance-tester* #'perfect-match-test)
    (format t "~%~%Perfect match test")
    (test-categories categories)
    (setf *instance-tester* #'best-guess-test)
    (format t "~%~%Best guess test")
    (test-categories categories)))

(defun backprop-test-categories (categories &optional learn-instances?)
  ;;; To be used after *-CATEGORIES in order to test the generalizations learned 
  ;;; on a  set of new instances stored under the TEST-INSTANCES property of each category
  ;;; name in categories.  Reports % correct for each category and overall % correct.

  (let ((percent-sum 0)(percent 0))
    (dolist (category categories)
      (format t "~%~%Testing ~A instances" category)
      (let ((answers (mapcar #'(lambda (instance)
				 (funcall *instance-tester* instance categories))
			     (get category (if learn-instances?
					       'learn-instances
					       'test-instances))))
	    (count 0))
	(format t "~%~A" answers)
	(dolist (answer answers) (if (and (eq (first answer) category)(null (rest answer)))
				     (incf count)))
	(setf percent (* 100 (/ count (length answers))))
	(incf percent-sum percent)
	(format t "~%Percent correct: ~,2F" percent)))
    (format t "~%~%Total percent correct: ~,2F" (/ percent-sum (length categories)))))

(defun separate-instances (categories num-learn-instances)
  ;;; Separates a list of instances for a set of categories into learning and testing
  ;;; instances to facilitate experimentation in preparation for using *-categories
  ;;; and test-categories.  The variable categories should be bound to a list of 
  ;;; category names whose values are a list of instances of that category.  This function 
  ;;; stores the first num-learn-instances of these instances on the LEARN-INSTANCES
  ;;; property to be used for learning and stores the rest of the instances on the
  ;;; TEST-INSTANCES property for the purpose of testing.
  
  (dolist (category categories)
    (setf (get category 'learn-instances)
	  (subseq (eval category) 0 num-learn-instances))
    (setf (get category 'test-instances)
	  (subseq (eval category) num-learn-instances (length (eval category))))))