;;; -*- Mode:Common-Lisp; Package:USER; Base:10 -*- ;;============================================================================ ;; EXTENTRON ALGORITHM ;; ;; Copyright (c) 1992 by Paul Baffes and John Zelle. This program may be ;; freely copied, used, or modified provided that this copyright notice is ;; included in each copy of this code and parts thereof. ;;============================================================================ ;;---------------------------------------------------------------------------- ;; TRACING PARAMETERS AND ROUTINES ;;---------------------------------------------------------------------------- ;; for tracing (proclaim '(optimize (speed 3) (compilation-speed 0))) (defvar *trace-extentron* nil) (defvar *trace-weights* nil) (defvar *trace-percep* nil) (defun trace-extentron (&optional (trace-percep nil) (trace-weights nil)) (setf *trace-extentron* t) (setf *trace-percep* trace-percep) (setf *trace-weights* trace-weights)) (defun untrace-extentron () (setf *trace-extentron* nil) (setf *trace-percep* nil) (setf *trace-weights* nil)) (defmacro trace-format (test-var &rest format-form) ;; When test-var (usually a *-trace variable) is set then use formated print `(if ,test-var (format t ,@format-form))) ;;---------------------------------------------------------------------------- ;; STATISTICS PARAMETERS ;;---------------------------------------------------------------------------- (defvar *num-epochs* 0) ;;---------------------------------------------------------------------------- ;; UTILITIES ;;---------------------------------------------------------------------------- (defun square (x) (* x x)) (defun mix-up (list) ;;-------------------------------------------------------------------------- ;; A simple randomizing routine for mixing up a list in place. The idea is ;; to use a "bubble sort" kind of approach. That is, we move down the list, ;; randomly swapping the two current elements. ;;-------------------------------------------------------------------------- (do ((elem list (cdr elem)) (next (cdr list) (cdr next)) temp) ((null next) list) (when (>= (random 1.0) 0.75) (setf temp (car elem)) (setf (car elem) (car next)) (setf (car next) temp))) ) ;;---------------------------------------------------------------------------- ;; ACCESS MACROS ;;---------------------------------------------------------------------------- ;;---------------------------------------------------------------------------- ;; NODES ;; ;; Each node is an integer LTU with a thrshold and list of weights ;; A node is a LIST OF NUMBERS: first the threshold then the weights. ;;---------------------------------------------------------------------------- (defmacro node-threshold (node) `(car ,node)) (defmacro node-weights (node) `(cdr ,node)) ;;---------------------------------------------------------------------------- ;; IO-PAIRS ;; ;; an io-pairs is a list of lists, each is a list of input and output (that ;; is, all the inputs are in one list, all the outputs in another. ;; Specifically, the first list is the list of outputs, the second is the list ;; of inputs). ;;---------------------------------------------------------------------------- (defmacro example-input (example) `(cadr ,example)) ;; `(car ,example) changed for Univ. Test (defmacro example-output (example) `(car ,example)) ;; `(cdr ,example) changed for Univ. Test. ;;---------------------------------------------------------------------------- ;; LAYERS ;; ;; Now that we only pass through particular node values when we propagate, ;; each layer has become more complicated (ptb 11/26). Now, a layer is a list ;; of two lists (rather than just one list as before): first is a list of ;; which nodes should pass through to the next layer, and second is a list of ;; the nodes for the layer. ;; ;; NOTE: by convention we use "layer" as a variable representing an entire ;; layer, "nodes" for just the nodes list, and "passes" for the pass-through ;; list. ;;---------------------------------------------------------------------------- (defmacro layer-pass-through (layer) `(car ,layer)) (defmacro layer-nodes (layer) `(cadr ,layer)) ;;---------------------------------------------------------------------------- ;; GLOBAL VARIABLES FOR EXTENTRON ;;---------------------------------------------------------------------------- (defvar *node-accuracy-counts* nil) ;; keep track of # correct at each node (defvar *node-purity-counts* nil) ;; total "1's" for node in io-pairs (defvar *temp-output* nil) ;; A list of output allocated only once. See the ;; routine "test-layer" (defvar *num-pairs* 0) ;; keeps track of total number of io-pairs (defvar *magic-number* 30) ;; number of cycles to tune with no improvement. ;; See the routine "train-layer-perceptron" ;;---------------------------------------------------------------------------- ;; EXTENTRON Code starts here ;;---------------------------------------------------------------------------- (defun extentron (io-pairs bit-threshold pair-threshold) ;;-------------------------------------------------------------------------- ;; Takes io-pairs to be learned, and two "stopping" criterion: the accuracy ;; for bits and the accuracy for pairs. To get "perfect" training, you set ;; the bit-threshold to 1 (ie, 100%) and the pair-threshold to 1. ;; ;; Modified 11/24/91 to set up a single global output for testing layers ;;-------------------------------------------------------------------------- (let* ((pairs (copy-tree io-pairs)) (layer-count 1) (num-outputs (if pairs (length (example-output (car pairs))) 1)) network layer bit-results pair-results ) ;; initialize global variables (init-global-variables io-pairs num-outputs) ;; create an initial layer and train it. Then test the accuracy (setf layer (create-zero-layer (car pairs))) (train-layer-perceptron layer pairs) (setf network (nconc network (list layer))) (multiple-value-setq (bit-results pair-results) (test-layer layer pairs)) ;; repeat until we get the desired accuracy (do () ((and (>= bit-results bit-threshold) (>= pair-results pair-threshold))) (incf layer-count) (trace-format *trace-extentron* "~%======================") (trace-format *trace-extentron* "~%bit accuracy: ~F~%pattern accuracy:~F" bit-results pair-results) (trace-format *trace-extentron* "~%node accuracies ~A~%" *node-accuracy-counts*) (trace-format *trace-extentron* "~%making layer ~3D" layer-count) ;; extend the io-pairs based upon the last layer (dolist (p pairs) (nconc (example-input p) (propagate-and-filter layer (example-input p)))) ;; create a new layer from the last layer, train it, test it. (setf layer (create-next-layer layer)) (train-layer-perceptron layer pairs) (nconc network (list layer)) (multiple-value-setq (bit-results pair-results) (test-layer layer pairs))) ;; print results and return network. (trace-format *trace-extentron* "~%~%~%bit accuracy: ~F~%pattern accuracy: ~F" bit-results pair-results) (trace-format *trace-extentron* "~%Total Layers ~3D" layer-count) (trace-format *trace-extentron* "~%Total Epochs: ~6,2F" (/ *num-epochs* num-outputs)) network)) (defun init-global-variables (io-pairs num-outputs) (setf *num-epochs* 0) (setf *num-pairs* (length io-pairs)) (setf *temp-output* (make-sequence 'list num-outputs :initial-element 0)) (setf *node-accuracy-counts* (make-sequence 'list num-outputs :initial-element 0)) (setf *node-purity-counts* (make-sequence 'list num-outputs :initial-element 0)) (dolist (p io-pairs) (do ((bits (example-output p) (cdr bits)) (counts *node-purity-counts* (cdr counts))) ((null counts) t) (if (= (car bits) 1) (incf (car counts)))))) (defun propagate-and-filter (layer input-vector &optional (use-threshold t)) ;;-------------------------------------------------------------------------- ;; A special propagate routine. This code does a normal propagate and then ;; consults the pass-through list of the layer to see which of the actual ;; outputs should be produced. Note that this propagate is ONLY used for ;; extending the io-pairs in the "extentron" routine (see above). ;; Returns a new list of the propagated outputs. (ptb 11/26). Note that the ;; activations are pushed onto a list, thus reversing their order (thus the ;; nreverse for the return of the do*). ;;-------------------------------------------------------------------------- (do* ((nodes (layer-nodes layer) (cdr nodes)) (node (car nodes) (car nodes)) (passes (layer-pass-through layer) (cdr passes)) (result nil)) ((null nodes) (nreverse result)) (when (car passes) (let ((activation (compute-sum node input-vector))) (push (if use-threshold (if (>= activation (node-threshold node)) 1 0) activation) result))))) (defun propagate-into (layer input-vector output-vector &optional (use-threshold t)) ;;-------------------------------------------------------------------------- ;; Destructively insert the output of propagation into output-vector ;; modified 11/27 (ptb): changed to have "use-threshold" so we can use it in ;; "test-extentron" ;;-------------------------------------------------------------------------- (do* ((nodes (layer-nodes layer) (cdr nodes)) (node (car nodes) (car nodes)) (outputs output-vector (cdr outputs))) ((null nodes) output-vector) (if use-threshold (if (>= (compute-sum node input-vector) (node-threshold node)) (setf (car outputs) 1) (setf (car outputs) 0)) (setf (car outputs) (compute-sum node input-vector))))) (defun compute-sum (node input-vector) (let ((sum 0)) (mapc #'(lambda (x y) (setf sum (+ sum (* x y)))) (node-weights node) input-vector) sum)) (defun create-zero-layer (io-pair) ;;-------------------------------------------------------------------------- ;; This routine was modified (ptb 11/26) to account for the new layer ;; structure that keeps "pass through" information (see layer macros above). ;; Each layer now has a list of two things: the pass-through vector for that ;; layer (the first element) and the list of actual nodes (the second ;; element). Here, since we are creating the first layer (layer 0. You're ;; welcome, Dijykstra) we know that all the outputs will pass through, and ;; we can just create a list of "t". ;;-------------------------------------------------------------------------- (let ((input-vector (example-input io-pair))) (list (make-sequence 'list (length (example-output io-pair)) :initial-element t) (mapcar #'(lambda (dummy) (cons 0 (mapcar #'(lambda (dummy) 0) input-vector))) (example-output io-pair))))) (defun create-next-layer (prev-layer) ;;-------------------------------------------------------------------------- ;; Modified 11/26/91 (ptb): now this routine can no longer assume that all ;; outputs are going to be passed through to this layer. In fact, the ;; io-pairs may have been extended with only a few of the outputs from the ;; previous layer (see "propagate-and-filter" above). So, this routine must ;; compute the number of weights to extend each node by. This is done by ;; looping through the pass-through section of the previous layer, counting ;; the number of outputs that were really passed through. ;; ;; In addition, this routine must update the pass-through section of the new ;; layer to reflect whether or not a particular output should be passed ;; through in the future. ;;-------------------------------------------------------------------------- (let ((layer (copy-tree prev-layer)) (num-new-weights 0) new-weights) ;; count the number of outputs passed from previous layer (dolist (pass (layer-pass-through prev-layer)) (if pass (incf num-new-weights))) ;; extend the nodes in the new layer by the number of outputs passed ;; through from the previous layer (dolist (node (layer-nodes layer)) (setf new-weights nil) (dotimes (n num-new-weights) (push 0 new-weights)) (nconc (node-weights node) new-weights)) ;; update the pass-through section of the new layer to reflect the latest ;; accuracy counts. (do ((counts *node-accuracy-counts* (cdr counts)) (passes (layer-pass-through layer) (cdr passes))) ((null counts) t) (if (= (car counts) *num-pairs*) (setf (car passes) nil))) ;; return the new layer layer)) (defun train-layer-perceptron (layer io-pairs) ;;-------------------------------------------------------------------------- ;; Modified (ptb 11/26). Changed the initialization of "temp-node" to use ;; make-sequence (see init-global-variables). Also, note that the new ;; "pass-through" section of the layers DOES NOT effect this routine. ;;-------------------------------------------------------------------------- ;; first, create some locals. The "temp-node" serves as a temporary place ;; holder for node states as cycling occurs in the "train-node" routine. (let ((node-index 0) (temp-node (make-sequence 'list (1+ (length (example-input (car io-pairs)))) :initial-element 0))) ;; train each node in turn (do ((nodes (layer-nodes layer) (cdr nodes)) (counts *node-accuracy-counts* (cdr counts))) ((null nodes) t) (trace-format *trace-percep* "~%Training node: ~D~%" node-index) ;; if the node is not already fully trained, then train it. Note that ;; the value for "(car counts)" is passed through from the layer out of ;; which the current layer in "layer" was formed. So, each time a layer ;; is tained, the number of counts is passed along. Since the ;; "train-node" routine won't quit until it finds a better count, the ;; algorithm is guaranteed to eventually converge. (unless (= (car counts) *num-pairs*) (train-node (car nodes) counts io-pairs node-index temp-node)) (incf node-index)))) ;(defvar *show-all* nil) (defun train-node (node ptr-to-count io-pairs output-index temp-node) ;;-------------------------------------------------------------------------- ;; Passing a global temp-node so as not to garbagify. ptr-to-count points to ;; the accuracy we must beat on this layer. Note that initially (ie the first ;; time this routine is called) the node has a count of 0, meaning that it ;; has not classified anything correctly. As this routine is repeatedly ;; called, the results of calling "test-node" will return a measure of how ;; many 0s and 1s for this node were accurately generated. Because we get a ;; pointer to the count here, this routine destructively modifies that count ;; each time this routine is called. Since the count is PASSED ON from one ;; layer to the next, we force subsequent layers to be more accurate for the ;; node (because we only set "better-split" to true here when we get a value ;; for "num-correct" that beats "max-correct"). ;; ;; 11/25/91: revise to use apportionment. First, we set max-correct to be ;; the initial value of the last layer's value for the node (since we know ;; we must beat it to converge). ;;-------------------------------------------------------------------------- (let ((max-correct (car ptr-to-count)) ;; init max-correct to best so far for ;; this node. (cycle-count 0) (node-purity (nth output-index *node-purity-counts*)) better-split split-p num-correct) ;; loop until get all correct, or get at least one split (ie, better-split ;; is set) and you've gone past the magic number of tries. (loop (if (or (= max-correct *num-pairs*) (and better-split (> cycle-count *magic-number*))) (return)) (if *trace-percep* (if better-split (format t "~D," cycle-count) (format t "."))) ; (trace-format *show-all* ; "~%split-p=~A num-correct=~D max-correct=~D" ; split-p num-correct max-correct) ;; train and test the node for one epoch (that is, one look at all ;; the IO pairs). We randomize here for EACH epoch. (train-node-epoch node (mix-up io-pairs) output-index) (incf *num-epochs*) ;; test the node, setting "split-p" if a split is returned that is ;; better than naive guessing. (multiple-value-setq (split-p num-correct) (test-node node io-pairs output-index node-purity)) ;; if we get a "good" split, and we get all correct or at least more ;; than what was in "max-correct" then update max correct and the ;; temp node to save the weights. Note that we reset "cycle-count" so ;; we get at least "magic-number" more cycles to find a better split ;; than the currect max. (if (or (= num-correct *num-pairs*) (and split-p (> num-correct max-correct))) (progn (setf max-correct num-correct) (setf better-split t) (do ((n node (cdr n)) (tn temp-node (cdr tn))) ((null n) t) (setf (car tn) (car n))) (setf cycle-count 0)) (incf cycle-count))) ;; update the counts value for this node (setf (car ptr-to-count) max-correct) ;; Finally, set the node to the best weights we found. (do ((n node (cdr n)) (tn temp-node (cdr tn))) ((null n) t) (setf (car n) (car tn))))) (defun train-node-epoch (node io-pairs output-index) (let (input-vector target-value) (dolist (pair io-pairs) (setf input-vector (example-input pair)) (setf target-value (nth output-index (example-output pair))) (unless (= target-value (propagate-node node input-vector)) (if (zerop target-value) (progn (incf (node-threshold node)) (do ((ws (node-weights node) (cdr ws)) (ins input-vector (cdr ins))) ((null ws) t) (unless (zerop (car ins)) (decf (car ws))))) (progn (decf (node-threshold node)) (do ((ws (node-weights node) (cdr ws)) (ins input-vector (cdr ins))) ((null ws) t) (unless (zerop (car ins)) (incf (car ws)))))))))) (defun propagate-node (node input-vector) ;; props for only one node (let ((sum 0)) (mapc #'(lambda (wt input) (incf sum (* wt input))) (node-weights node) input-vector) (if (>= sum (node-threshold node)) 1 0))) (defun test-node (node io-pairs output-index output-purity) ;;--------------------------------------------------------------------------- ;; modified 11/26 to add a notion of purity (see *node-purity-counts*) global ;; variable. Passed in output-purity to save an "nth", that is, the purity ;; value for this particular node is passed in to this routine by the caller, ;; to save doing "nth" for the same node for each epoch. ;; ;; The most difficult thing to understand about this routine is the first of ;; the two returned values (see "values" statement as return of the dolist). ;; The idea is to return "t" as the first value only if two conditions are ;; met: (1) at least one correct prediction is made and (2) the predictive ;; accuracy of the node is better than random guessing. Condition 1 is easy: ;; we simply wait until the number of correct "1" values generated for the ;; node (we assume at least one "1" is desired) is generated. Testing ;; condition (2) is more complex. ;; ;; To compute predictive accuracy, we need two measures: (a) the number of ;; predictions and (b) how many times the predictions were correct. We ;; compute this here by simply looking at the number of "1" outputs that are ;; accurately predicted (we don't need to check both "1s" and "0s" since they ;; are the only two values--ie, they mirror eachother). So, this loop ;; computes two values: the number of "1s" that were predicted and the number ;; of times those predictions were correct. That gives us the following ;; ratio: ;; ;; #-correctly-generated-1s ;; ------------------------ ;; total-#-of-predicted-1s ;; ;; which is the predictive accuracy. Now, we also know what the a priori ;; expected number of "1s" is for this node; it gets passed in as ;; "output-purity". At initialization time (see "init-global-variables") we ;; compute the number of expected "1s" for this node. Thus we have another ;; ratio we can generate: ;; ;; #-of-expected-1s-for-node ;; ------------------------- ;; total-#-of-io-pairs ;; ;; which is the expected ratio of "1s" which we can expect for the node, ;; simply by looking at the io pairs. Note that with NO TRAINING AT ALL we ;; can generate a predictive accuracy that matches this latter ratio: we ;; simply always guess "1". Thus, our number of predicted "1s" would be the ;; same as the number of io pairs, and we'd be guaranteed to generate all the ;; expected "1s". Thus we can trivially get the latter ratio by creating a ;; node that always returns "1". ;; ;; To do better, we require that the split found be MORE ACCURATE than the ;; naive "alwasys-generate-a-1" case. Specifically, we want the first ratio ;; to be closer to 1 (ie more accurate) than the second ratio (the naive ;; approach). Thus we want the following: ;; ;; #-correctly-generated-1s #-of-expected-1s-for-node ;; ------------------------ > ------------------------- ;; total-#-of-predicted-1s total-#-of-io-pairs ;; ;; which is identical to ;; ;; (#-correctly-generated-1s * total-#-of-io-pairs) > ;; (total-#-of-predicted-1s * #-of-expected-1s-for-node) ;;--------------------------------------------------------------------------- (let ((num-correct 0) (correct-1s 0) (predicted-1s 0) out-value input-vector target-value) (dolist (pair io-pairs (values (> (* correct-1s *num-pairs*) (* predicted-1s output-purity)) num-correct)) (setf input-vector (example-input pair)) (setf target-value (nth output-index (example-output pair))) (setf out-value (propagate-node node input-vector)) (when (= out-value 1) (incf predicted-1s) (if (= target-value 1) (incf correct-1s))) (if (= target-value out-value) (incf num-correct))))) (defun test-layer (layer pairs) ;;--------------------------------------------------------------------------- ;; Takes a layer and pairs, and returns two percentages: ;; the percentage of bits that are correctly output and the percentage of ;; pairs that are correctly output. ;; Modified 11/18/91: had to change this to return 100 percent accuracy in ;; the event that there are no pairs passed in for testing. ;; Modified 11/24/91: All calculated outputs are saved in the global list ;; *temp-output* which is assumed to be the appropriate length. This ;; is a cons saving hack. ;; Checked 11/26/91: note that the addition of the "pass-through" information ;; does not effect this routine (ptb). ;;--------------------------------------------------------------------------- (if (null pairs) (values 1.0 1.0) (let ((bit-count 0) (pattern-count 0) (output *temp-output*)) (dolist (p pairs) (propagate-into layer (example-input p) output) (if (equal output (example-output p)) (incf pattern-count)) (mapc #'(lambda (output-bit desired-bit) (if (= output-bit desired-bit) (incf bit-count))) output (example-output p))) (values (/ bit-count (* (length (example-output (car pairs))) *num-pairs*)) (/ pattern-count *num-pairs*))))) (setq and-pairs '(((0 0) (0 0)) ((1 1) (1 1)) ((0 1) (1 0)) ((0 1) (0 1)))) (setq xor-pairs '(((0) (0 0 )) ((0) (1 1 )) ((1) (0 1 )) ((1) (1 0 )))) (setq equiv-pairs '(((1) (0 0)) ((1) (1 1)) ((0) (1 0)) ((0) (0 1)))) (setq devil-pairs '(((0 1) (0 0)) ((0 1) (1 1)) ((1 0) (0 1)) ((1 0) (1 0))))