;;; -*- Mode: LISP; Package: AC2; Base: 10; Syntax: Common-lisp -*- ;;; ;;; ;;;***************************************************************************;;; ;;;***************************************************************************;;; ;;; ;;; ;;; RESTRICTED COPYRIGHT ;;; ;;; -------------------- ;;; ;;; ;;; ;;; Copyright (C) 1988 Research Institute for Advanced Computer Science. ;;; ;;; All rights reserved. The RIACS Software Policy contains specific ;;; ;;; terms and conditions on the use of this software. IN PARTICULAR, THIS ;;; ;;; SOFTWARE MAY NOT BE DISTRIBUTED TO ANY OTHER PARTY WITHOUT EXPLICIT ;;; ;;; PERMISSION FROM RIACS. ;;; ;;; ;;; ;;; THIS REFLECTS THE ANTICIPATED ;;; ;;; COMPLETION OF AUTOCLASS III IN THE ;;; ;;; FIRST QUARTER 1989 ;;; ;;; ;;; ;;;***************************************************************************;;; ;;;***************************************************************************;;; ;;; ;;; ; ;****************************************************************************** ; ; AUTOCLASS II - Automatic Classification of Data ; ; Authors : Peter Cheeseman - RIACS - Research Institute for Advanced ; Computer Science ; Matthew Self - Sterling Software ; ; John Stutz - RIA - Artificial Intelligence Research Branch ; ; William Taylor - Sterling Software ; ; Address : MS 244-20, NASA Ames Research Center, Moffett Field, CA 94035 ; ; Phone : (415) 694-4946 - FTS 464-4946 ; ; Arpanet : cheeseman@pluto.arc.nasa.gov ; ; Environment : Common Lisp ; ; Revision History : 11 Jan 88 - original released version - ; ;****************************************************************************** ;;;************************************** ;;; ;;; File : cha:>stutz>ac2>prog>development-30e1 ;;; ;;; Description : The AutoClass II program ;;; ;;; 30 Weighted case assignment. Allows specification of prior probabilities and weights. ;;; ** Extensive revision of nomenclature. Complete revision of Probability calculations. *** ;;; Residual statistics are eliminated. ;;; ;;; PORTING HISTORY: ;;; ;;; 30 Jul 87 : Ported to VAX/VMS 8800 & DEC Common Lisp V 2.2 ;;; added #+/- :symbolics reader macros. will ;;; ;;; 07 Aug 87 : Ported to Sequent/Dynix V 2.1.1 & Kyoto Common Lisp (16 Sept 86) ;;; added explicit init forms in LET,LET*,DO,DO* ;;; added explicit PROGN to else clauses of IF. will ;;; ;;; 25 Aug 87 : Changed COLLECT-REAL-PRIORS-FROM-DATA & READ-DB. added REAL-PT-VALIDATION ;;; & *VARIABLE-DESCRIPTIONS*. This checks for attributes with zero std. ;;; dev. and more unknown values than known values. ;;; ;;; 26 Aug 87 : Changed FIND-BEST-N & TIME-IT to print out run number. will ;;; ;;; 31 Aug 87 : Added optional keyword :header-file to READ-DB. will ;;; ;;; 01 Sept 87 : Added termination-factor to DYNAMIC-CYCLE. will ;;; ;;; 15 Sept 87 : Modifications to write.wt-set, cond-read.wt-set, find-best-n , ;;; get-best-of-weightings to allow an optional header string in ;;; -.wt-set files, which do not need to be completly filled. john ;;; ;;; 23 Sept 87 : Added COMPRESS-CLASS-WTS, EXPAND-CLASS-WTS & COMPRESS-.WT-SET-FILE, ;;; and modified COND-READ-.WT-SET, WRITE-.WT-SET, GET-BEST-OF-WEIGHTINGS, & ;;; FIND-BEST-N to make use of a compresses class weights representation john ;;; ;;; 10 Nov 87 : Modifications to COLLECT-PART-TOTAL-MML, REAL-VAR-MML, and ;;; UPDATE-REAL-MML-CONSTANTS. New RANDOM-DISTRIBUTE-DATA, renaming old ;;; version to NORMAL-DISTRIBUTE-DATA without changing calls. john ;;; 12/april/88 - Modifications to gen-disperse-list *disperse-fail-ratio* ;;; equiv-disperse-rating. ;;; ;;; April/88 - Extensive modifications to everything dealing with weight sets, ;;; to allow run-time contorl of the active number of classes. john ;;; ;;; __________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ; ; ; ;;; Function Index ; ;General Macros ; return-on-char (test-char &optional (block nil) result) ; return-on-char-list (char-list &optional (block nil) result) ; time-it (complete-function-call &key (stream t) ; (f-string "~2& Run ~A : ") (f-args-list ''("_")) ; without-floating-underflow-traps-kludge (&rest body) ; ;Data Base ; make-data-base (&optional (N-data *N-data*)) ; get-datum (data-base N-datum) ; set-datum (data-base N-datum datum) ; get-datum-var (datum N-var) ; set-datum-var (datum N-var value) ; get-variable (data-base N-datum N-var) ; set-variable (data-base N-datum N-var value) ; ;Accumulaters ; acc-var (acc variable) ; acc-disc-wt (acc variable value-index) ; acc-null-wt (acc variable) ; acc-known-wt (acc variable) ; acc-real-wt (acc variable) ; acc-real-Sum (acc variable) ; acc-real-Sum-Sq (acc variable) ; acc-disc-MML (acc variable value-index) ; acc-null-MML (acc variable) ; acc-known-MML (acc variable) ; acc-real-wt (acc variable) ; acc-real-Mean (acc variable) ; acc-real-Sigma-term (acc variable) ; acc-real-Log-Sigma (acc variable) ; make-accumulater () ; copy-accumulater (accumulater) ; replace-accumulater (to-acc from-acc) ; null-accumulater (acc &optional (null-value nil)) ; collect-discrete-wts (acc &optional (return-type 'vector)) ; set-acc-to-prior-wts (acc) ; inc-acc-var (acc var type value &optional (weight 1.0)) ; display-accumulater (acc &optional ; (stream t) (as-probability nil)( class-wt nil) (priors nil)) ; display-accumulated-wts (acc &optional (stream t)) ; ; ;Classes ;(defstruct class ; "Holds the information describing one class of a partitioning." ; (value-wts nil :type vector) ;; Accumulater: Cur Var/Val Wts. & sums (priors inc). ; (disc-prior-wts nil :type vector) ;; Discrete component Prior Wts. for unit MMLs. ; (wt 0.0 :type float) ;; Current Wt. of class for MMLs (Not Inc. priors). ; (unit-MMLs nil :type vector) ;; Accumulater: Var/Val unit MMLs, means, sigmas. ; (memb-prior-wt 0.0 :type float) ;; Weight used to calculate current Memb. MML. ; (memb-unit-MML 0.0 :type float) ;; Membership unit MML. ; (membership-MML 0.0 :type float) ;; membership MML collecter. ; (description-MML 0.0 :type float) ;; description MML collecter. ; ) ; class-empty-p (class) ; class-var-disc-wt (class N-var) ; class-total-mml (class) ; make-class&accs () ; replace-class (to-class from-class) ; set-class-to-priors (class class-prior &optional (data-wt (1+ *N-data*))) ; update-class-wts (class delta-wt datum-vector &optional) ; update-class-unit-MMLs (class data-wt) ; class-datum-prob (class datum) ; log-class-datum-prob (class datum) ; class-real-mean&sigmas (class &optional (suppress-priors nil)) ; display-class (class &optional (stream t)) ; display-class-wts (class &optional (stream t)) ; ; ;Partitionings ;(defstruct (partitioning (:conc-name part-) ) ; "Holds the information characterizing a partitioning into classes" ; (superior nil ) ;; The hierarchial superior class. ; (class-range 1 :type integer) ;; No. of classes in partitioning. ; (classes nil :type vector) ;; [range] of class: the partitioning classes. ; (memb-wt 0.0 :type float) ;; The class membership weight (less prior). ; (prior-wt 0.0 :type float) ;; The prior class membership weight. ; (total-MML 0.0 :type float) ;; partition message length. ; ) ; ; *empty-class-wt-limit* 0.9 "When weight is less than this the class is empty.") ; part-class (partition class-index) ; part-class-wt (partition N-class) ; class-prior-wt (partition N-class) ; class-wts (partition &optional all) ; class-disc-wt (partition N-class N-var) ; empty-classes-p (partition &optional all) ; discrete-wt (partition N-class N-var N-value) ; null-wt (partition N-class N-var) ; known-wt (partition N-class N-var) ; real-wt (partition N-class N-var) ; real-sum (partition N-class N-var) ; real-sum-sq (partition N-class N-var) ; real-sum-log-err (partition N-class N-var) ; class-memb-MML (partition N-class) ; collect-memb-MMLs (partition) ; class-desc-MML (partition N-class) ; collect-desc-MMLs (partition) ; make-part&classes (class-range &optional (superior nil)) ; copy-partition (from-partition) ; replace-part (to-part from-part) ; expand-partition (partition) ; remove-null-classes (partition) ; set-partition-to-priors (partition &optional (class-prior-seq nil)) ; collect-weights (partition) ; update-partition-MMLs (partition) ; collect-part-total-MML (partition) ; *ln-default-real-pt-error* (log .1)) ; real-var-MML (total-wt prior-wt total-sigma const-term) ; update-real-MML-constants () ; display-part-MMLs (partition &optional (stream t) all-classes) ; display-part-classes (partition &optional (stream t) all-classes) ; ; ;DATA I/O Section ; read-db (in-file &optional n-data) ; expand-*disc-priors* (input-priors) ; expand-*real-priors* (input-priors) ; make-*real-bases*-list () ; collect-real-priors-from-data (&key (real-prior-wt *real-prior-wt-default*) ; (fixed-sigma nil) (out-type 'short-float)) ; save-data (&optional (stream t)) ; reverse-*data* () ; scramble-*data* (&optional (moves *N-data*)) ; mod-distribute-data (class-range &optional (class-range nil)) ; block-distribute-data (block-size &optional (class-range nil)) ; random-assign-data (class-range &optional (randomize t)) ; random-select-data (class-range &optional (n-samples 1) (randomize t)) ; random-distribute-data (class-range &optional (%zeros .5) (randomize t)) ; gen-random-weights (count %zeros) ; normal-distribute-data (class-range &optional (sigma% 0.3) (randomize t)) ; gen-normal-weights (count mean sigma) ; gen-normal-list (count mean sigma ) ; display-weights (&optional (clean t) (weights *curr-wts*) (stream t)) ; max-weight-class (N-datum &optional (weights *curr-wts*)) ; get-max-weight-classes (weights) ; read-class-wts (in-file &optional (Nth-partition 0)) ; restore-class-wts (in-file &optional (Nth-partition 0) partition) ; save-class-wts (out-file &optional (weights nil)) ; get-class-wts () ; reset-class-wts (weights &optional (length-override nil)) ; reset-wts (to-weights from-weights) ; compress-class-wts (&optional (long-wts *curr-wts*) (min-weight 0.0) ) ; expand-class-wts (short-wts &optional (n-classes nil)) ; display-data (&optional (stream t) (short t)) ; display-data-list (data-Nos &optional (stream t) (short t)) ; display-data-class (partition class &optional (stream t) (short t)) ; display-data-by-class (partition &optional (stream t) (short t)) ; ;Simple tutored learning ; tutored-partition (tutor-function &optional (partition nil)) ; thyroid (datum) ; ;Experiments on Speed optimization ; base-cycle (partition) ; ;Functions for saving and accessing classifications as .wt-set files ; *current-results* nil ; write-.wt-set (file header count MMLs classes weightings ; &optional (wt-format ''short) (min-wt 1.0e-8) )) ; cond-read-.wt-set (file header count MMLs classes weightings N-classes N-held ; &optional (wt-format ''short) ) ; max-classes-from-.wt-set (input-wt-set-file) ; compress-.wt-set-file (input-wt-set-file ; &optional (output-wt-set-file nil) (min-wt 1.0e-4)) ; split-.wt-set-file (wt-set-file &key (in-format 'short) (out-format 'short) ; (min-wt 1.0e-4) ) ; write-component-of-split-.wt-set-file ; (wt-set-file N-component header count NLPs classes weightings ; &optional (out-format 'short) (min-wt 1.0e-8) ) ; read-component-of-split-.wt-set-file ; (wt-set-file N-component count-list header NLPs classes weightings ; &optional (in-format 'short) ) ; get-best-of-weightings (wt-set-file &optional (wt-format 'short) (position nil)) ; ; ;New Local Minimum Search Routines ; update-.wt-set-file (partition wt-set-file &key (update-method t) (n-cycles 5) ; (mode 'all) (position nil) (dynamic-halt-range nil) ; (in-format 'short) (out-format 'short) (header nil) ; (cache-file nil) ) ; find-best-n (N-held N-classes partition search-method N-cycles save-file ; &optional (file-header nil) (dynamic-halt-range nil) ; (in-format 'short) (out-format 'short) (min-wt 1.0e-8) ) ; find-best-n-2 (N-held partition search-method N-cycles save-file ; &key (N-classes nil) (file-header nil) ; (dynamic-halt-range nil) (in-format 'short) ; (out-format 'short) (min-wt 1.0e-8) ; (revise-prev-runs nil) ) ; simple-cycle (partition &optional (max-cycles 0) (stream t) ; (skip-initialize nil)) ; dynamic-cycle (partition &optional (N-average 5) (stream t) ; (HALT-RANGE nil) (skip-initialize nil)) ; save-last-cycle (count partition) ; get-data-weights (partition) ; get-over-relaxed-data-weights (partition &optional (relaxation-ratio 1.75) ) ; test&record (partition record-file ; &optional (data-file nil) (dispersion-ratio 0.0) ) ; *DB-cycle-count* 0 ; update-weights (partition &optional ......) ; update-datum-weights (N-datum partition ; &optional (relaxation-ratio 1.0) (delta-ratio 1.0) ) ; get-new-weight-vector (partition datum &optional (weights nil)) ; ; ;Prediction ; *test-db* nil "Source of test data." ; *N-test-data* 0 "Number of test data." ; *test-data* nil "Data for testing." ; test-weights* nil "Weights for test data." ; test-vars* nil "List of variables for testing." ; test-values* nil "Array of lists of values for variables in *test-vars*." ; read-test-data (in-file &optional n-data &key (variable-descriptions nil) ; (variable-type-check t)) ; mark-test-variables (var-list &optional (sub-symbol *test-sub-symbol*)) ; get-test-weights (partition) ; simple-errors () ; restore-test-variables () ; ;New Heuristic Search Experiments ; dynamic-disperse-cycle (partition &key (N-average 5) (stream t) (HALT-RANGE nil)) ; *max-disperse-backstep-ratio* 1.0 ; simple-disperse-cycle (partition &key (N-average 5) (stream t) (HALT-RANGE nil) ; (prev-trace ()) (cache-file nil) ) ; gen-disperse-list (partition &optional (prev-failures ()) ) ; *disperse-change-ratio* .005 ; equiv-disperse-rating (rate-1 rate-2) ; simple-disperse-class (N-class partition &optional (remove-class nil)) ; disperse-class&update (N-class partition &optional (remove-class nil)) ; smoothly-disperse-class-wt-over-all (N-class dispersion-ratio) ; random-disperse-class-wt-over-all (N-class dispersion-ratio) ; ;Old Heuristic MML search routines ; ordered-classes (partition) ; ordered-classes-* (partition key) ; split-class (partition split-class N-splits) ; disperse-class (D-class partition &optional (remove-D-class nil)) ; ;Functions For Generating And Testing Normal Distributions ; mean&sigma (number-seq) ; variable-mean&sigma (N-var) ; gen-normal (mean sigma) ; gen-normal-data (N-data mean&sigma-list) ; gen-formatted-data (data-spec-list) ; gen-data-spec-list (N-samples/pattern N-vars N-patt &optional (sigma-el 2) ; (sigma-patt 10) (var-min-limit 10) (var-max-limit 100) ) ; graph-list (list &optional (scale 1) min max (stream t)) ; graph-real-var (N-var &key (wts *curr-wts*) (data *base-data*) (scale 1) ; min max (stream t) (n-lines 40)) ; ;Timers ; timed-duration (fn &rest args) ; map-timed-duration (result-type function &rest sequences) ; *minimum-tests* 3 "Run each test at least this many times" ; *minimum-duration* 6 "Run each test for at least this many seconds" ; multiple-timed-duration (fn) ; ;Utility Routines ; display (vector) ; v-display (vector &key (stream t)) ; formatted-time-string () ; safe-exp (x) ; reset (fun seq) ; randomize-random () ; simple-normalize-vector (vector) ; normalize-active-vector (vector length) ; normalize&type-vector (vector &optional (length nil) (element-type 'short-float)) ; max+ (num-seq &key end) ; min+ (num-seq &key end) ; max+position (num-seq &key end) ; min+position (num-seq &key end) ; max+! (seq &key end) ; min+! (seq &key end) ; query-for-number (message min max) ; square (x) ; sigma-sq (N sum sum-sq) ; log-gamma (x &optional (full-precision nil)) ; ;Symbolics Specific Bit Mapped Graphical Routines ; line-graph-list (list x y &optional (x-step 1) (y-scale 1.0) ) ; line-graph-seq (seq x y &optional (x-step 1) ; (y-scale 1.0) (y-base 0.0) (base t)) ; static-line-graph-seq (seq x y &optional (x-step 1) ; (y-scale 1.0) (y-base 0.0) (base t)) ; dot-graph-list (list x y &optional (x-step 1) (y-scale 1.0) ) ; dot-graph-seq (seq x y &optional (x-step 1) (y-scale 1.0) ) ; static-dot-graph-seq (seq x y &optional (x-step 1) (y-scale 1.0) ) ; dot-graph-class (N-class x y &optional (x-step 1) (y-scale 1.0)) ; line-graph-class (N-class x y &optional (x-step 1) (y-scale 1.0)) ; bar-graph-mean&sigmas (mean&sigmas x y &optional (x-step 1) (y-scale 1.0)) ; beam-graph-mean&sigmas (mean&sigmas x y &optional (x-step 1) (y-scale 1.0)) ; ;;; __________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; System dependent fixed parameters: (when (string-search "Genera 7.2" (software-version)) (pushnew 7.2 *features*) ) (defconstant *least-positive-short-float* #-7.2 least-positive-short-float #+7.2 least-positive-normalized-short-float) (defconstant *least-positive-single-float* #-7.2 least-positive-single-float #+7.2 least-positive-normalized-single-float) (defconstant *least-positive-double-float* #-7.2 least-positive-double-float #+7.2 least-positive-normalized-double-float) (defconstant *least-positive-long-float* #-7.2 least-positive-long-float #+7.2 least-positive-normalized-long-float) (defconstant *sqrt-least-positive-short-float* (sqrt *least-positive-short-float*)) (defconstant *log-most-positive-long-float* (log most-positive-long-float)) (defconstant *log-most-positive-short-float* (log most-positive-short-float)) (defconstant *least-long-float-level* (/ *least-positive-long-float* long-float-epsilon .9999) ) (defconstant *least-short-float-level* (/ *least-positive-short-float* short-float-epsilon .9999) ) (defconstant *test-sub-symbol* '?) (defconstant *log-pi* (coerce (log pi) 'short-float)) ;;; The following special variables define the data and are set in "read-db": (defvar *current-db* nil "The name of the current DB file.") (defvar *base-data* nil "[elements x variables] data base array. See make-data-base.") (defvar *curr-wts* nil "Array of class weight vectors for each case in data") (defvar *wt-vector-length* 0 "Actual length of weight vectors, >= effective length.") (defvar *N-data* nil "The number of data elements in *base-data*.") (defvar *variable-types* () "Ordered list of variable types.") (defvar *N-variables* nil "The number of variables, from (length *variable types).") (defvar *disc-var-ranges* () "Ordered list of range limits for discrete variables. In a file these are the maximum values. In the program the file values are increased by 1 to allow for null values (index 0). REQUIRES nil place-holders for other types.") ;;; The following define various priors and functions on them: (defvar *disc-priors* () "For Discrete Variables a list of prior weights for each value (including null), or a single value to be uniformly applied, or nill when a total weight of 1 is to be uniformly distributed. For Real Variables a list of (null non-null) weights, or a number which is the non-null fraction of a total weight of 1.0, or nil for *defaults-known-prob* below. The sum of prior weights for a variable is considered to. to be the weight of prior information (in cases). Use nil for the 'ignore variable type. Use (expand-discrete-priors *disc-priors*) to get fully instaniated form.") (defvar *real-priors* () "For real variables a list of (mean sigma weight), or of (mean sigma) with weight defaulting to *real-prior-Wt-default*. REQUIRES nil place-holders for other types. Provides the information for constructing *real-bases*. To be read from DB, or generated by Collect-Real-Priors-From-Data. ") (defvar *real-bases* () "list of (weight sum sum-sq) corresponding to *real-priors*, calculated from the 2 case equivalent to a mean and sigma, and appropriatly weighted. This provides the initial weighted sums for the real variable accumulaters.") (defvar *real-prior-Wt-default* 2.0 "Default Wt. for *real-priors* and *real-bases*") (defvar *real-MML-constants* () "List of constants derived solely from the priors and in the calculation of the real ML. This list is initialized by update-real-mml-constants") (defconstant *log-root-pi* (coerce (log (sqrt pi)) 'short-float)) (defvar *default-known-prob* .9 "Default prior Prob. that a real value is known") (defvar *variable-descriptions* () "ordered list of variable descriptions - ui") ;; Memory Management: (defvar *wt-vector-store* () "Weight vector recycle storage list.") ;;; The following are used by the Heuristic search routines: (defvar *best-partition* nil "The current best partitioning") (defvar *partition* nil "A current partition") (defvar *monitor* nil "Output file for monitoring action.") (defvar *best-class-assignments* () "List of current best class assignments") (defvar *collect-partition-assignments* t "Flag for recording partion class assignments.") (defvar *partition-assignments-list* () "Records partion's class assignments.") (defvar *1st-marginals* nil "Array based structure recording the first order marginals") ;;; Debugging and monitor flags: (defvar *process-output* nil "To hold pointer to output stream, see run&record.") (defvar *debug-0* nil "Debugging flag for find-MML-partition") (defvar *debug-1* nil "Debugging flag for move-datum") (defvar *trace-0* t "Tracing flag for find-MML-partition") (defvar *trace-2* nil "Tracing flag for MML calculations") (defvar *r&r-count* 0 "Counter for run&record function") (defvar *collect-min-steps* nil "Flags collection of minimization steps") (defvar *min-steps* () " holds Simple-minimize-mml steps list") ;;; System Conditioning: (push :auto-class-30 *features*) ; Allows version dependent DB reading (push (lisp-implementation-type) *features*) #+ (and :symbolics "Symbolics-Common-Lisp") (and (push `(setf si:gc-flip-minimum-ratio ,si:gc-flip-minimum-ratio) sys:logout-list) (push `(setq *features* (remove :auto-class-30 *features*)) sys:logout-list) (push `(setq *features* (remove ,(lisp-implementation-type) *features*)) sys:logout-list)) #+ (and :symbolics #(Zetalisp)) (and (push `(setf si:gc-flip-minimum-ratio ,si:gc-flip-minimum-ratio) logout-list) (push `(setq *features* (remove :auto-class-30 *features*)) logout-list) (push `(setq *features* (remove ,(lisp-implementation-type) *features*)) logout-list)) #+ :symbolics (setf si:gc-flip-minimum-ratio 1.00) ;;; __________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; General Macros (defmacro return-on-char (test-char &optional (block nil) result) "Provides a soft abort from enclosing 'block when 'test-char is typed. Call as (return-on-char a )." (let ((input-char (gensym))) `(let (,input-char) (when (and (setq ,input-char (read-char-no-hang)) (char-equal ,(character test-char) ,input-char) ) (return-from ,block ,result) ) ) ) ) (defmacro return-on-char-list (char-list &optional (block nil) result) "Provides a soft abort from enclosing 'block when one of 'char-list is typed. Call as (return-on-char-list '(a b) )." (let ((input-char (gensym))) `(let (,input-char) (when (and (setq ,input-char (read-char-no-hang)) (member ,input-char ,char-list :test (lambda (in l-char) (char-equal in (character l-char))) ) ) (return-from ,block ,result) ) ) ) ) (defmacro time-it (complete-function-call &key (stream t) (f-string "~2& Run ~A : ") (f-args-list ''("_")) ) "Records on 'stream the time to perform 'complete-function-call. The f-string and f-args-list specify an initial format string and arguments." ; ID added 26/aug/87 Will, ID made optional 15/sept/86, returned values 25/nov/87 John. ; revised for arbitary string and args-list 1/88 John (let ((start-time (gensym)) (results (gensym)) ) `(let* ((,start-time (get-internal-real-time)) (,results (multiple-value-list ,complete-function-call)) ) (format ,stream "~? Elapsed time ~,2F minutes.~%" ,f-string ,f-args-list (/ (- (get-internal-real-time) ,start-time) internal-time-units-per-second 60.0 ) ) (values-list ,results) ) ) ) (defmacro without-floating-underflow-traps-kludge (&rest body) #+ :symbolics `(without-floating-underflow-traps ,@body) #- :symbolics `(progn ,@body) ) ;;; __________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; Data Base ;;; 'data-base - A run-time defined array of datum arrays used to store ;;; the datum-variable values. ;;; The data stored are of 3 variable types with differing representations: ;;; discrete : Integer, 0 reserved for 'no-value'. ;;; real-pt : Real, implicit error range, nil means 'no-value'. ;;; ignore: Any symbol. There are ignored in all calculations. ;;; The standard data-base is *base-data* created by "read-db" below. (defun make-data-base (&optional (N-data *N-data*)) (let ((db (make-array N-data))) (do ((N-datum 0 (1+ N-datum))) ((= N-datum N-data)) (setf (aref db N-datum) (make-array *N-variables*)) ) db ) ) (defmacro get-datum (data-base N-datum) "Gets a datum vector from a data base." `(aref ,data-base ,N-datum) ) (defmacro set-datum (data-base N-datum datum) "Sets a datum vector in a data base." `(setf (get-datum ,data-base ,N-datum) ,datum) ) (defmacro get-datum-var (datum N-var) "Gets a variable/value from a datum vector." `(aref ,datum ,N-var) ) (defmacro set-datum-var (datum N-var value) "Sets a variable/value in a datum vector." `(setf (aref ,datum ,N-var) ,value) ) (defmacro get-variable (data-base N-datum N-var) "Gets a variable/value from a data base." `(aref (aref ,data-base ,N-datum) ,N-var) ) (defmacro set-variable (data-base N-datum N-var value) "Sets a variable/value in a data base." `(setf (get-variable ,data-base ,N-datum ,N-var) ,value) ) ;;;___________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ; Accumulaters ;;; "accumulater" - refers to a run-time defined abstract data structure used to ;;; accumulate data about values of variables, relative to a class. The top level consists ;;; of an *N-variables* length array, the elements of which depend upon the variable type: ;;; discrete : Array of weights or unit-MMLs (one per variable, ;;; 0 index reserved for 'no-value'. ;;; real-pt: Array of [null-wt, known-wt, real-wt, sum, sum-sq], ;;; or [null-MML, known-MML, real-wt, mean, sigma-term, log-sigma]. ;;; ignore: 'ignore - as a placeholder and identifier. ;;; Appropriate constructer and accessor functions for getting at the data for specific ;;; variables, relative to the class, follow. These mimic the syntax for common-lisp ;;; structure operaters. The accessor macros come first at they essentially are the ;;; field definitions. ; Accessers for a weight accumulater: (defmacro acc-var (acc variable) "Gets the variable's vector." `(aref ,acc ,variable) ) (defmacro acc-disc-wt (acc variable value-index) "Discrete variable/value's weight." `(aref (aref ,acc ,variable) ,value-index) ) (defmacro acc-null-wt (acc variable) "Missing value's weight." `(aref (aref ,acc ,variable) 0) ) (defmacro acc-known-wt (acc variable) "Real Var. known weight." `(aref (aref ,acc ,variable) 1) ) (defmacro acc-real-wt (acc variable) "Real variable's weight." `(aref (aref ,acc ,variable) 2) ) (defmacro acc-real-Sum (acc variable) "Real variable's weighted sum." `(aref (aref ,acc ,variable) 3) ) (defmacro acc-real-Sum-Sq (acc variable) "Real variable's weighted sum^2." `(aref (aref ,acc ,variable) 4) ) ; Accessors for a MML/parameter accumulater: (defmacro acc-disc-MML (acc variable value-index) "Discrete variable/value's MML." `(aref (aref ,acc ,variable) ,value-index) ) (defmacro acc-null-MML (acc variable) "Missing value's MML." `(aref (aref ,acc ,variable) 0) ) (defmacro acc-known-MML (acc variable) "Real Var. 'known MML." `(aref (aref ,acc ,variable) 1) ) ;(defmacro acc-real-wt (acc variable) "Real Var. weight." ; `(aref (aref ,acc ,variable) 2) ) ; duplicate of definition above. (defmacro acc-real-Mean (acc variable) "Real variable's mean." `(aref (aref ,acc ,variable) 3) ) (defmacro acc-real-Sigma-term (acc variable) "Real Var. (/ 1 (* 2 (^ sigma 2)))." `(aref (aref ,acc ,variable) 4) ) (defmacro acc-real-Log-Sigma (acc variable) "Real Var. (Ln (* sigma (^ 2 .5)))." `(aref (aref ,acc ,variable) 5) ) (defun make-accumulater () "(make-accumulater) - builds an variable array of value arrays under control of the globals *variable-types* and *disc-var-ranges*. VALUES ARE INITIALIZED to NIL." (when (not (= (length *variable-types*) (length *disc-var-ranges*))) (break "~&*variable-types* & *disc-var-ranges* are of incompatable lengths!~%~ You may continue after correcting.~%") ) (map 'vector #'(lambda (type range) (case type (discrete (make-array range :initial-element nil)) (real-pt (make-array 6 :initial-element nil)) (ignore #(ignore)) ) ) *variable-types* *disc-var-ranges* ) ) (defun copy-accumulater (accumulater) "Creates a new accumulater with copies of the contents of the argument." (map 'vector #'copy-seq accumulater ) ) (defun replace-acc (to-acc from-acc) "Destructivly replaces values of to-acc by those of from-acc, without sharing structures. " (map nil #'replace to-acc from-acc ) ) (defun null-accumulater (acc &optional (null-value nil)) "Fill an accumulater with nils, or some null value (probably 0.0)." (map nil #'(lambda (var) (when (vectorp var) (fill var null-value))) acc) ) ;(defun test-for-neg (acc) ; (map 'list #'(lambda (x) ; (or (eq x 'ignore) (count-if-not 'plusp x))) ; acc)) (defun collect-discrete-wts (acc &optional (return-type 'vector)) "Collect the value weights for discrete Vars., and the discrete null/known weights for real Vars. The real value fields of real Vars. are ignored." (map return-type #'(lambda (type var-vector) (case type (discrete (reduce '+ var-vector)) (real-pt (reduce '+ var-vector :end 2) ) (ignore 0.0) (t (break "bad type - ~A" type)) ) ) *variable-types* acc) ) (defun set-acc-to-prior-wts (acc) "Fill a weight accumulater with prior weights from *disc-priors*, *real-bases*. It is assumed that read-db has already expanded *disc-priors*, and *real-bases*." (do* ((N-var 0 (1+ N-var)) (types *variable-types* (cdr types)) (type (car types) (car types)) (disc-priors *disc-priors* (cdr disc-priors)) (disc-prior (car disc-priors) (car disc-priors)) (bases *real-bases* (cdr bases)) (base (car bases) (car bases)) (N-elements nil) ) ((= N-var *N-variables*)) (case type (discrete (setq N-elements (length (aref acc N-var))) (cond ((and disc-prior (listp disc-prior) (= (length disc-prior) N-elements)) (do ((N-el 0 (1+ N-el)) (priors disc-prior (cdr priors)) ) ((= N-el N-elements)) (setf (acc-disc-wt acc N-var N-el) (car priors)) ) ) (t (break "~&~A is bad prior Wt. for discrete variable ~A~%" disc-prior N-var) ) ) ) (real-pt (cond ((and disc-prior (listp disc-prior) (= (length disc-prior) 2)) (setf (acc-null-wt acc N-var) (first disc-prior)) (setf (acc-known-wt acc N-var) (second disc-prior)) ) (t (break "~&~A is bad discrete prior Wt. for ~A variable ~A~%" disc-prior type N-var )) ) (cond ((and base (listp base) (= (length base) 3)) (setf (acc-real-wt acc N-var) (first base)) (setf (acc-real-sum acc N-var) (second base)) (setf (acc-real-sum-sq acc N-var) (third base)) ) (t (break "~&~A is a bad prior base Wt. for ~A variable ~A~%" base type N-var )) ) ) (ignore) (t (break "bad type - ~A" type)) ) ) ) (defun inc-acc-var (acc var type value &optional (weight 1.0)) "Incriments an accumulater variable of 'type by an appropriatly weighted 'value." (if (null value) ; Allows using nil for unknown values. (incf (acc-null-wt acc var) weight) (case type (discrete ; Value is an index over variable range. (incf (acc-disc-wt acc var value) weight)) (real-pt ; Value is a continuous number. (incf (acc-real-wt acc var) weight) (incf (acc-real-sum acc var) (* value weight)) (incf (acc-real-sum-sq acc var) (* value value weight)) ) (ignore) (t (break "bad type - ~A" type)) ) ) ) ;;; (display-accumulater acc ) - displays the accumulater variable/values stored ;;; in the accumulater acc: ;;; (defun display-accumulater (acc &optional (stream t) (as-probability nil)( class-wt nil) (priors nil)) "Displays the accumulater variable/values stored in the accumulater acc. If as-probability then the values are displayed as probabilities, that is weight/total-wt." (let ((output ()) (*print-structure-contents* nil)) (do ((N-var 0 (1+ N-var)) (types *variable-types* (cdr types)) (values nil) (tot-class-wt nil)) ((= N-var *N-variables*)) (setq values (acc-var acc N-var)) (setq tot-class-wt (+ (elt priors N-var) class-wt)) (push (format stream "~%Variable ~3D: ~8A " N-var (car types)) output) (cond ((not as-probability) (map nil #'(lambda (x) (push (format stream " ~10F" x) output)) values) ) (t (case (car types) (discrete (map nil #'(lambda (x) (push (format stream " ~10F" (/ x tot-class-wt)) output)) values)) (real-pt (push (format stream " ~10F ~10F ~10F ~10F" (/ (elt values 0) tot-class-wt) ;prob unknown (/ (elt values 1) tot-class-wt) ;prob known (/ (elt values 3) (elt values 2)) ;mean real value (sqrt (- (/ (elt values 4) (elt values 2)) ;(sqrt variance) (expt (/ (elt values 3) (elt values 2)) 2))) ) output) ) (ignore (push (format stream "ignore") output)) (t (break "bad type - ~A" (car types))) ) ) ) ) (or (eq stream t) (apply 'concatenate (cons 'string (reverse output)))) ) ) (defun display-accumulated-wts (acc &optional (stream t)) "Displays the accumulater variable/values stored in the accumulater acc." (let ((output ()) (*print-structure-contents* nil) ) (do ((N-var 0 (1+ N-var)) (types *variable-types* (cdr types)) ) ((= N-var *N-variables*)) (push (format stream "~%Variable ~3D: ~8A " N-var (car types)) output) (unless (eq (car types) 'ignore) (map nil #'(lambda (x) (push (format stream " ~10F" x) output)) (acc-var acc N-var) ) ) ) (or (eq stream t) (apply 'concatenate (cons 'string (reverse output)))) ) ) ;;;___________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; Classes ;;; A 'class structure represents a single class of a partitioning (see below). ;;; There are accumulaters (see above) for: ;;; (defstruct class "Holds the information describing one class of a partitioning." (value-wts nil :type vector) ;; Accumulater: Cur Var/Val Wts. & sums (priors inc). (disc-prior-wts nil :type vector) ;; Discrete component Prior Wts. for unit MMLs. (wt 0.0 :type float) ;; Current Wt. of class for MMLs (Not Inc. priors). (unit-MMLs nil :type vector) ;; Accumulater: Var/Val unit MMLs, means, sigmas. (memb-prior-wt 0.0 :type float) ;; Weight used to calculate current Memb. MML. (memb-unit-MML 0.0 :type float) ;; Membership unit MML. (membership-MML 0.0 :type float) ;; membership MML collecter. (description-MML 0.0 :type float) ;; description MML collecter. ) (defvar *empty-class-wt-limit* 0.9 "When weight is less than this the class is considered to be empty.") (defmacro class-empty-p (class) "A class is empty when the class weight is near zero.)" `(< (class-wt ,class) ,*empty-class-wt-limit*) ) (defmacro class-var-disc-wt (class N-var) "The total discrete weight accumulated by a variable." `(+ (class-wt ,class) (aref (class-disc-prior-wts ,class) ,N-var) ) ) (defun class-total-mml (class) (+ (class-membership-MML class) (class-description-MML class)) ) (defun make-class&accs () (make-class :value-wts (make-accumulater) :disc-prior-wts (make-array *N-variables* :initial-element 0.0) :unit-MMLs (make-accumulater) )) (defun replace-class (to-class from-class) "Destructivly replaces the contents of to-class with those of from-class, without creating any new structures." (REPLACE-ACC (class-value-wts to-class) (class-value-wts from-class)) (replace (class-disc-prior-wts to-class) (class-disc-prior-wts from-class)) (setf (class-wt to-class) (class-wt from-class)) (REPLACE-ACC (class-unit-MMLs to-class) (class-unit-MMLs from-class)) (setf (class-memb-prior-wt to-class) (class-memb-prior-wt from-class)) (setf (class-memb-unit-MML to-class) (class-memb-unit-MML from-class)) (setf (class-membership-MML to-class) (class-membership-MML from-class)) (setf (class-description-MML to-class) (class-description-MML from-class)) t ) (defun set-class-to-priors (class class-prior &optional (data-wt (1+ *N-data*))) "Sets all the class values to the priors." (SET-ACC-TO-PRIOR-WTS (class-value-wts class)) (replace (class-disc-prior-WTS class) ; Use replace for locality of refrence (COLLECT-DISCRETE-WTS (class-value-wts class) 'vector) ) (setf (class-wt class) 0.0 ) (setf (class-memb-prior-wt class) class-prior) (UPDATE-CLASS-UNIT-MMLs class data-wt) (setf (class-membership-MML class) 0.0) (setf (class-description-MML class) 0.0) class ) (defun update-class-wts (class delta-wt datum-vector &optional) "Incriments the appropriate weights and sums by delta-wt. Fields and methods are determined from *variable-types* and datum-vector. Delta-wt MUST be the difference between two normalized weight vectors of non-negative elements." (let ((value-wts (class-value-wts class)) ) ;; accumulater (incf (class-wt class) delta-wt) (do* ((N-var 0 (1+ N-var)) (types *variable-types* (cdr types)) (type (car types) (car types)) (value nil) (disc-priors nil) (real-priors nil) ) ((= N-var *N-variables*)) (setq value (get-datum-var datum-vector N-var)) (unless (eq value *test-sub-symbol*) ; Eq -> Ignore this variable (setq disc-priors (elt *disc-priors* N-var)) (case type (discrete (incf (acc-disc-wt value-wts N-var value) delta-wt) ) (real-pt (cond ((null value) (incf (acc-null-wt value-wts N-var) delta-wt) ) ((numberp value) (setq real-priors (elt *real-priors* N-var)) (incf (acc-known-wt value-wts N-var) delta-wt) (incf (acc-real-wt value-wts N-var) delta-wt) (incf (acc-real-sum value-wts N-var) (* delta-wt value) ) (incf (acc-real-sum-sq value-wts N-var) (* delta-wt value value) ) ) (t (break "bad value: ~A for real variable: ~A" value N-var)) ) ) (ignore) (t (break "bad type: ~A" type)) ) ) ) ) ) (defun update-class-unit-MMLs (class data-wt) "Updates unit-MMLs (MMLs for Descrete variables and the descrete null/known components of Real variables, and the Real variable MML computation parameters), using the data in weight, value-wts, disc-prior-wts, and memb-prior-wt." ;;; Change to keep sigma-sq no less than *sqrt-least-positive-short-float*. JCS 7/88 ; ; (if (not (= (length *variable-types*) (length (class-value-wts class)) ; (length (class-disc-prior-wts class)) (length (class-unit-MMLs class)) )) ; (break "bad length in *variable-types*, value-wts, disc-prior-wts, or unit-MMLs.") ) (let ((value-wts (class-value-wts class)) ;; accumulater (disc-prior-wts (class-disc-prior-wts class)) ;; vector (MMLs (class-unit-MMLs class)) ;; accumulater (class-wt (class-wt class)) ;; real (weight nil) (mean nil) (sigma-sq nil) (disc-wt nil)) (setf (class-memb-unit-MML class) (- (log (/ (+ class-wt (class-memb-prior-wt class)) data-wt))) ) (do* ((n-var 0 (1+ n-var)) (types *variable-types* (cdr types)) (type (car types) (car types)) ) ((= n-var *N-variables*)) (setq disc-wt (+ class-wt (aref disc-prior-wts n-var))) (case type (discrete (replace (acc-var MMLs n-var) (map 'vector #'(lambda (wt) (- (log (/ wt disc-wt))) ) (acc-var value-wts n-var) ) ) ) (real-pt (setf (acc-null-MML MMLs n-var) (- (log (/ (acc-null-wt value-wts n-var) disc-wt))) ) (setf (acc-known-MML MMLs n-var) (- (log (/ (acc-known-wt value-wts n-var) disc-wt))) ) (setq weight (acc-real-wt value-wts n-var)) (setq mean (/ (acc-real-sum value-wts n-var) weight )) (setq sigma-sq (max *sqrt-least-positive-short-float* (sigma-sq weight (acc-real-sum value-wts n-var) (acc-real-sum-sq value-wts n-var) ) )) (setf (acc-real-wt MMLs n-var) weight) (setf (acc-real-mean MMLs n-var) mean) (setf (acc-real-sigma-term MMLs n-var) (/ 0.5 sigma-sq)) (setf (acc-real-log-sigma MMLs n-var) (log (sqrt (* sigma-sq 2.0 3.1415926))) ) ) (ignore) ) ) ) ) (defun class-datum-prob (class datum) " Computes the probability that datum is described by this class. Note that log-class-datum-prob is preferred for it's far greater range." (safe-exp (log-class-datum-prob class datum)) ) (defun log-class-datum-prob (class datum) " Computes the log-probability that datum is described by this class, assuming that the variables (attributes) are conditionally independent with respect to the class. A value of *test-sub-symbol* in the data vector causes that variable to be ignored in this calculation, in order to make a prediction on the expected value." (let ((log-prob (- (class-memb-unit-MML class))) (unit-MMLs (class-unit-MMLs class)) (value nil)) ; (format t "~&~A~%" log-prob) (do ((N-var 0 (1+ N-var)) (types *variable-types* (cdr types)) ) ((= N-var *N-variables*)) (setq value (get-datum-var datum N-var)) (unless (eq value *test-sub-symbol*) ; Eq -> unknown variable for prediction (case (car types) (discrete (decf log-prob (acc-disc-MML unit-MMLs N-var value)) ) (real-pt (decf log-prob (cond ((null value) (acc-null-MML unit-MMLs N-var) ) (t (+ (acc-known-MML unit-MMLs N-var) (acc-real-Log-Sigma unit-MMLs N-var) (* (acc-real-Sigma-Term unit-MMLs N-var) (square (- value (acc-real-Mean unit-MMLs N-var))) ) )) ) ) ) (ignore) (t (break "Unknown variable type: ~A" (car types))) ) ) ; (format t "~&~A~%" log-prob) ) log-prob ) ) (defun class-real-mean&sigmas (class &optional (suppress-priors nil)) " Returns a list of '(mean std-deviation) for real variables, nil for discretes." (let ((listing ()) (MMLs (class-unit-mmls class)) ) (do* ((n-var 0 (1+ n-var)) (types *variable-types* (cdr types)) (priors *real-priors* (cdr priors)) ) ((= n-var *N-variables*)) (case (car types) (discrete (push '(0 0) listing)) (real-pt (let ((mean (acc-real-mean MMLs n-var)) (sigma (sqrt (/ .5 (acc-real-sigma-term MMLs n-var)))) ) (if (and suppress-priors (= mean (first (car priors))) (= sigma (second (car priors))) ) (push '(0 0) listing) (push (list mean sigma) listing) ) ) ) (ignore (push '(0 0) listing)) ) ) (reverse listing) ) ) (defun display-class (class &optional (stream t)) (let ((*print-structure-contents* nil) (tot-class-wt (class-wt class)) (class-priors (class-disc-prior-wts class))) (format stream "~& Class ~A has weight ~D~%~ with Membership MML ~F, Description MML ~F ~ and and accumulated probabilities:~%~A~%" class (class-wt class) (class-membership-MML class) (class-description-MML class) (display-accumulater (class-value-wts class) nil t tot-class-wt class-priors)))) (defun display-class-wts (class &optional (stream t)) (let ((*print-structure-contents* nil)) (format stream "~& Class ~A has weight ~D~%~ with Membership MML ~F, Description MML ~F ~ and and accumulated weights:~%~A~%" class (class-wt class) (class-membership-MML class) (class-description-MML class) (display-accumulated-wts (class-value-wts class) nil) ) ) ) ;;; ________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; Partitionings ;;; A 'partitioning' structure represents one level of one branch of a hierarchial ;;; classification. It has one or more 'classes', a 'residue class', and may have a ;;; superior 'class'. An ordinary 'class' is implicitly represented by an index to ;;; the arrays forming the partitioning fields. The 'classes' field is the only complex ;;; one. ;;; (defstruct (partitioning (:conc-name part-) ) "Holds the information characterizing a partitioning into classes" (superior nil ) ;; The hierarchial superior class. (class-range 1 :type integer) ;; No. of classes in partitioning. (classes nil :type vector) ;; [range] of class: the partitioning classes. (memb-wt 0.0 :type float) ;; The class membership weight (less prior). (prior-wt 0.0 :type float) ;; The prior class membership weight. (total-MML 0.0 :type float) ;; partition message length. ) ;;; Access macros for the class/accumulater fields: (defmacro part-class (partition class-index) "Gets the class-index'th class of partition." `(aref (part-classes ,partition) ,class-index) ) (defmacro part-class-wt (partition N-class) `(class-wt (part-class ,partition ,N-class)) ) (defmacro class-prior-wt (partition N-class) `(class-memb-prior-wt (part-class ,partition ,N-class)) ) (defmacro class-wts (partition &key all) `(reset #'class-wt (subseq (part-classes ,partition) 0 (if ,all nil (part-class-range ,partition))) ) ) (defmacro class-disc-wt (partition N-class N-var) "Gets the total discrete weight on a variable in a class." `(class-var-disc-wt (part-class ,partition ,N-class) ,N-var) ) (defmacro empty-classes-p (partition &optional all) `(find-if #'(lambda (cl) (class-empty-p cl)) (part-classes ,partition) :end (if ,all nil (part-class-range ,partition))) ) (defmacro discrete-wt (partition N-class N-var N-value) `(acc-disc-wt (class-value-wts (part-class ,partition ,N-class)) ,N-var ,N-value ) ) (defmacro null-wt (partition N-class N-var) `(acc-null-wt (class-value-wts (part-class ,partition ,N-class)) ,N-var) ) (defmacro known-wt (partition N-class N-var) `(acc-known-wt (class-value-wts (part-class ,partition ,N-class)) ,N-var) ) (defmacro real-wt (partition N-class N-var) `(acc-real-wt (class-value-wts (part-class ,partition ,N-class)) ,N-var) ) (defmacro real-sum (partition N-class N-var) `(acc-real-sum (class-value-wts (part-class ,partition ,N-class)) ,N-var) ) (defmacro real-sum-sq (partition N-class N-var) `(acc-real-sum-sq (class-value-wts (part-class ,partition ,N-class)) ,N-var)) (defmacro real-sum-log-err (partition N-class N-var) `(acc-real-Sum-Log-Err (class-value-wts (part-class ,partition ,N-class)) ,N-var) ) (defmacro class-memb-MML (partition N-class) `(class-membership-MML (part-class ,partition ,N-class)) ) (defmacro collect-memb-MMLs (partition) `(reduce #'(lambda (sum class) (+ sum (class-membership-MML class))) (part-classes ,partition) :end (part-class-range ,partition) :initial-value 0.0) ) (defmacro class-desc-MML (partition N-class) `(class-description-MML (part-class ,partition ,N-class)) ) (defmacro collect-desc-MMLs (partition) `(reduce #'(lambda (sum class) (+ sum (class-description-MML class))) (part-classes ,partition) :end (part-class-range ,partition) :initial-value 0.0)) (defun display-unit-MMLs (partition N-var) (let ((classes (part-classes partition)) (*print-array* t) ) (map nil #'(lambda (class) (format t "~&~2D ~A~%" (position class classes) (aref (class-unit-mmls class) N-var) ) ) (remove-if #'(lambda(wt) (< wt .5)) classes :key #'class-wt) ) ) ) ;;; ________________________________________ (defun make-part&classes (class-range &optional (superior nil)) "Generates a partition. Call set-partition-to-priors to instantiate priors." (let ((partition (make-partitioning :superior superior :class-range class-range :classes (make-array class-range) ))) (dotimes (N-class class-range) ; adds the accumulaters (setf (part-class partition N-class) (make-class&accs) ) ) partition ) ) (defun copy-partition (from-partition) "Copy (i.e. build) a new partition from the contents of another, using the origional's 'part-class-range as the new one's actual size." ;; new - WILL 03 SEPT 87 (let* ((class-range (part-class-range from-partition)) (classes (part-classes from-partition)) (partition (make-partitioning :superior nil :class-range class-range :classes (make-array class-range) :memb-wt (part-memb-wt from-partition) :prior-wt (part-prior-wt from-partition) :total-mml (part-total-mml from-partition)))) (dotimes (n-class class-range) (setf (part-class partition n-class) (make-class :value-wts (make-array *n-variables* :initial-contents (copy-accumulater (class-value-wts (aref classes n-class)))) :disc-prior-wts (make-array *n-variables* :initial-contents (class-disc-prior-wts (aref classes n-class))) :unit-mmls (make-array *n-variables* :initial-contents (copy-accumulater (class-unit-mmls (aref classes n-class)))) :wt (class-wt (aref classes n-class)) :memb-prior-wt (class-memb-prior-wt (aref classes n-class)) :memb-unit-mml (class-memb-unit-mml (aref classes n-class)) :membership-mml (class-membership-mml (aref classes n-class)) :description-mml (class-description-mml (aref classes n-class))))) partition)) (defun replace-part (to-part from-part) "Destructivly replaces the contents of to-part with those of from-part, without creating any new structure whatever." ;; new - John 12/87, revised 1/88 (let* ((class-range (part-class-range from-part))) (when (> class-range (length (part-classes to-part))) (break "To-part has too few classes (~A) to hold ~A classes." (length (part-classes to-part)) class-range ) ) (setf (part-superior to-part) (part-superior from-part)) (setf (part-class-range to-part) class-range) (setf (part-memb-wt to-part) (part-memb-wt from-part)) (setf (part-prior-wt to-part) (part-prior-wt from-part)) (setf (part-total-mml to-part) (part-total-mml from-part)) (dotimes (n-class class-range) (replace-class (part-class to-part n-class) (part-class from-part n-class) ) ) (when (> (length (part-classes to-part)) class-range) ;clear the extra classes (do ((n-class class-range (1+ n-class)) (limit (length (part-classes to-part))) ) ((>= n-class limit)) (SET-CLASS-TO-PRIORS (part-class to-part n-class) (class-memb-prior-wt (part-class to-part n-class)) ) ) ) t ) ) (defun expand-partition (partition) "Expands the class-range to the physical range." (setf (part-class-range partition) (length (part-classes partition)) ) ) (defun remove-null-classes (partition &key all-but-1 ) "Parses partition to extract any empty classes, left shifting higher number classes down to fill any resulting voids, and replacing them with the empties. The class weights in *curr-wts* are also shifted to match the new class numbers. The partition's class-range and inv-class-range are updated to reflect the non-null classes. Note that this enforces a uniform prior weight on classes." (let* ((class-range (part-class-range partition)) (null-count (+ (if all-but-1 -1 0) (count-if #'(lambda (x) (< (class-wt x) *empty-class-wt-limit*)) (part-classes partition) :end class-range ) )) (count 0) (class-store ()) (class-prior nil) (new-N-class nil) (base-prior nil) ) (when (> null-count 0) ; Note that value is used as a return flag (setf class-prior (/ 1.0 (- class-range null-count))) (setf base-prior (/ 1.0 (length (part-classes partition)))) (dotimes (N-class class-range) (cond ((< (part-class-wt partition N-class) *empty-class-wt-limit*) (incf count) (push (part-class partition N-class) ;Save this class structure class-store )) (t (setq new-N-class (- N-class count)) (setf (part-class partition new-N-class) ;Shift class structure down (part-class partition N-class) ) (setf (class-memb-prior-wt (part-class partition new-N-class)) class-prior) (dotimes (N-datum *N-data*) ;;Shift the weights down by count (setf (aref (aref *curr-wts* N-datum) new-N-class) (aref (aref *curr-wts* N-datum) N-class) ) ) ) ) (when (>= (+ N-class null-count (if all-but-1 1 0)) class-range) ;No more non-null classes (setf (part-class partition N-class) ;Replace with a saved class struct (or (pop class-store) (break "~&attempted pop on empty class-store~%") ) ) (set-class-to-priors (part-class partition N-class) base-prior) (setf (part-class-wt partition N-class) 0) (dotimes (N-datum *N-data*) ;;Null the weights (setf (aref (aref *curr-wts* N-datum) N-class) 0) ) ) ; (format t "~&N-class ~D, count ~D, null-count ~D, class-store ~A~%" ; N-class count null-count class-store) ) (and class-store (break "~&class-store not emptyied: ~A~%" class-store)) (decf (part-class-range partition) null-count) ;;Update partition size (update-partition-mmls partition) ) ) ) (defun set-partition-to-priors (partition &optional (class-prior-seq nil)) " Resets partition weights, MML, and all class weights to the prior values. The class-prior-seq defaults to a uniform weight of 1.0 over all classes." (let ((N-classes (part-class-range partition)) (classes (part-classes partition)) ) (setf (part-total-MML partition) 0.0) (setf (part-memb-wt partition) 0.0) (cond (class-prior-seq ; Use the given class prior wts. (setf (part-prior-wt partition) (reduce '+ class-prior-seq :end N-classes) ) (dotimes (n-class (part-class-range partition)) (SET-CLASS-TO-PRIORS (aref classes n-class) (aref class-prior-seq n-class) ) ) ) (t ; Use uniformly distributed prior wt of 1. (setf (part-prior-wt partition) 1.0) (let ((class-prior (/ 1.0 (part-class-range partition)))) (dotimes (n-class N-classes) (SET-CLASS-TO-PRIORS (aref classes n-class) class-prior ) ) ) ) ) ) ) (defun collect-weights (partition) " Resets the partition and class weight collecters to the priors, and adds the weights from *curr-wts*. UPDATE-PARTITION-MMLS should be called next." (when (< (length (aref *curr-wts* 0)) (part-class-range partition)) (break "*curr-wts* vectors have length ~A, but partition class-range is ~A." (length (aref *curr-wts* 0)) (part-class-range partition) ) ) (SET-PARTITION-TO-PRIORS partition) (let ((class nil) (val-wts nil) (datum nil) (weight nil)) ;; Using top-level let for debugging ID's. (dotimes (N-class (part-class-range partition)) ; For each active class in partition: (setq class (aref (part-classes partition) N-class)) (setq val-wts (class-value-wts class)) (dotimes (N-datum *N-data*) ; For each datum in *base-data*: (setq datum (get-datum *base-data* N-datum)) (setq weight (aref (aref *curr-wts* N-datum) N-class)) (incf (part-memb-wt partition) weight) (incf (class-wt class) weight) (do ((N-var 0 (1+ N-var)) ; For each variable: (types *variable-types* (cdr types)) (value nil) ) ((= N-var *N-variables*)) (setq value (get-datum-var datum N-var)) (unless (eq value *test-sub-symbol*) ; Eq -> Ignore this variable (case (car types) (discrete (incf (acc-disc-wt val-wts N-var value) weight ) ) (real-pt (without-floating-underflow-traps-kludge (cond ((null value) (incf (acc-null-wt val-wts N-var) weight)) (t (incf (acc-known-wt val-wts N-var) weight) (incf (acc-real-wt val-wts N-var) weight ) (incf (acc-real-sum val-wts N-var) (* weight value) ) (incf (acc-real-sum-sq val-wts N-var) (* weight value value) ) ) ) ) ) (ignore) ) ) ) ) ) ) t ) (defun update-partition-MMLs (partition) "Updates the class's unit-MMLs and totals. Normally called after collect-weights." (let ((classes (part-classes partition)) (data-wt (+ (part-memb-wt partition) (part-prior-wt partition) ))) (dotimes (N-class (part-class-range partition) ) (UPDATE-CLASS-UNIT-MMLs (aref classes N-class) data-wt) ) (COLLECT-PART-TOTAL-MML partition) (part-total-MML partition) ) ) (defun collect-part-total-MML (partition) " Finds total MML for given partition and puts the partition-MML, class-description-MML, and class-membership-MML into the partition and class structures. The total is calculated as a weighted sum over the unit mmls. This is normally called from update-partition-MMLs which previously updated the unit MMLs." (let ((N-classes (part-class-range partition)) (classes (part-classes partition)) (total-mml nil) ) (dotimes (n-class N-classes) (let ((class (aref classes n-class))) (setf (class-description-MML class) 0.0) (setf (class-membership-MML class) (* (+ (class-wt class) (class-memb-prior-wt class) ) (class-memb-unit-MML class) ) ) ) ) (do* ((N-var 0 (1+ N-var)) ; For each variable, and class: (types *variable-types* (cdr types)) (att-real-ML-const *real-MML-constants* (cdr att-real-ML-const)) ) ((= N-var *N-variables*)) (case (car types) (discrete (dotimes (n-class N-classes) (let ((class (aref classes n-class))) (map nil #'(lambda (weight unit-MML) (let ((delta-MML (* weight unit-MML))) (when (minusp delta-MML) (break "Negative discrete delta-MML: ~A @ class ~A & var ~A." delta-MML (position class (part-classes partition)) N-var ) ) (incf (class-description-MML class) delta-MML) ) ) (acc-var (class-value-wts class) N-var) (acc-var (class-unit-MMLs class) N-var)) ) ) ) (real-pt ;;;this can be made much more effecient (dotimes (n-class N-classes) (let* ((class (aref classes n-class)) (val-acc (class-value-wts class)) (MML-acc (class-unit-MMLs class )) (null-wt (acc-null-wt val-acc N-var)) (null-MML (acc-null-MML MML-acc N-var)) (known-wt (acc-known-wt val-acc N-var)) (known-MML (acc-known-MML MML-acc N-var)) (real-wt (acc-real-wt MML-acc N-var)) (real-sigma (sqrt (/ (* 2 (acc-real-sigma-term MML-acc N-var))))) (prior-wt (third (elt *real-priors* N-var))) ;; (prior-sigma (second (elt *real-priors* N-var))) (-log-p-unk (* null-wt null-MML)) (-log-p-known (* known-wt known-MML)) (-log-p-real (REAL-VAR-MML real-wt prior-wt real-sigma (car att-real-ML-const)) ) (delta-MML (+ -log-p-unk -log-p-known -log-p-real)) ) ; (when (and (<= -log-p-real 0.0) ; (/= real-wt (third (elt *real-priors* N-var))) ) ; (format t "~&@ class ~2D & var ~3D ~ ; (real-var-MML ~10,3F ~10,3F ~10,3F) --> ~12,11F ~12,11F~%" ; (position class (part-classes partition)) N-var ; real-wt real-sigma (car att-real-ML-const) -log-p-real delta-MML) ; ) ; (when (minusp delta-MML) ; (break "Negative real-pt delta-MML: ~A @ class ~A & var ~A." ; delta-MML (position class (part-classes partition)) N-var) ) (incf (class-description-MML class) delta-MML ) ) )) (ignore) ) ) ;;;finally, add the sum of membership-mmls and description-mml into the total MML (setf total-mml (+ (collect-desc-MMLS partition) (collect-memb-MMLs partition) ) ) (if (complexp total-mml) (let ((temp (sqrt (+ (square (realpart total-mml)) (square (imagpart total-mml))))) ) (warn "Collect-part-total-mml has generated a complex result: ~A~%~ Calculation continues with the magnitude: ~A" total-MML temp ) (setf (part-total-MML partition) temp ) ) (setf (part-total-MML partition) total-mml ) ) ) ) (defvar *ln-default-real-pt-error* (log .1)) (defun real-var-MML (total-wt prior-wt total-sigma const-term) ;JCS 6-Nov-87 ;;; ;;; Following Self's analysis, we take the probability of finding L values x1,...,xL ;;; when given a prior mean and sigma corresponding to N data to be: ;;; ;;; P(x1, x2, ...xL|L, N, mean[N], sigma[N]) = dx1 * dx2 * ... * dxL * ;;; ;;; | 1 gamma((L+N-1)/2 ) sqrt(N)^N sigma[N]^(N-1) | ;;; | ---------- . ------------------ . --------------- . ------------------| ;;; | sqrt(pi)^L gamma((N-1)/2 ) sqrt(L+N)^(L+N) sigma[L+N]^(L+N-1)| ;;; ;;; We assume that this form is equally applicable to a weight distribution, ;;; substitute the total and prior weights for L and N, the weighted standard ;;; deviations for sigma[L+N] and sigma[N], and take the negative logarithm to ;;; get the MML. As is done throughout the real variable calculations, the variable ;;; value errors dxi are assumed to be constant for all values of each variable. They ;;; are included in this calculation in order to prevent the MML going negative when ;;; sigma[L+N] becomes very small. ;;; Note that the above formula was developed under the assumption that the ;;; probability distribution function is near linear at xi over the range dxi. Which ;;; implies sigma >> dxi. When this assumption fails, the calculation will be in error. ;;; ;;; total-wt L+N ;;; total-sigma sigma[L+N] ;;; prior-wt N ;;; prior-sigma sigma[N] ;;; const-term (log (/ (* sqrt(N)^N sigma[N]^(N-1)) ;;; gamma((N-1)/2) )) ;;; (let ((MML (- (+ const-term (* (- total-wt prior-wt) *ln-default-real-pt-error*) (log-gamma (* 0.5 (- total-wt 1.0))) (- (* 0.5 (- total-wt prior-wt) *log-pi*)) (- (* 0.5 total-wt (log total-wt))) (- (* (- total-wt 1.0) (log total-sigma)) ) )) )) ; (when (< MML -.001) ; (warn "Numerical representation problem gives MML of ~A." MML) ) MML ) ) (defun update-real-MML-constants () ; JCS 6-Nov-87 "Generates list of values known from priors used in real-ML calculation. See real-var-ML for a complete description of the formula for calculating the ML for reals." (setq *real-MML-constants* (mapcar #'(lambda (type priors) (case type (discrete 'nil) (real-pt (let ((prior-sigma (second priors)) (prior-wt (third priors)) ) (+ (* 0.5 prior-wt (log prior-wt)) (* (- prior-wt 1.0) (log prior-sigma)) (- (log-gamma (* 0.5 (- prior-wt 1.0)))) ) ) ) (ignore 'nil) ) ) *variable-types* *real-priors*) ) ) ;;;; Modified to match the equation: ;(defun real-var-ML (wt sigma prior-wt const-term) ; "Calculates the message length for a given population [weight] of numbers. ; -log P(real value)" ;;;; ;;;; term-1 term-2 term-3 term-4 ;;;; ;;;; ;;;; | 1 (gamma (l+w-1)/2 ) (sqrt w)^w s[w]^(w-1) | ;;;; -log| ----------- . ------------------ . ------------------ . --------------| ;;;; | (sqrt pi)^l (gamma (w-1)/2 ) (sqrt (l+w))^(l+w) s[l+w]^(l+w-1)| ;;;; ;;;; l = wt ;;;; w = prior-wt ;;;; s[w] = prior-sigma ;;;; s[l+w] = sigma ;;;; const-term ;;;; = (log (gamma (w-1)/2 ) ) - (log (sqrt w)^w ) - (log s[w]^(w-1) ) ; ; (let ((total-wt (+ wt prior-wt)) ; (real-var-ml nil)) ; (setf real-var-ml ; (+ const-term ; (* wt *log-root-pi*) ;;; (sqrt pi)^l ; (- (log-gamma (/ (1- total-wt) 2.0))) ;;; (gamma (l+w-1)/2 ) ; (* (/ total-wt 2.0) (log total-wt)) ;;; (sqrt (l+w))^(l+w) ; (* (1- total-wt) (log sigma)))) ;;; s[l+w]^(l+w-1) ; (when (not (plusp real-var-ml)) ; (warn "Non-positive real-var-ml: ~A" real-var-ml) ) ; real-var-ml ) ) ;(defun update-real-MML-constants () ; "Generates list of values known from priors used in real-ML calculation. See real-var-ML ; for a complete description of the formula for calculating the ML for reals." ; (setq *real-MML-constants* ; (mapcar ; #'(lambda (type priors) ; (case type ; (discrete ; 'nil) ; (real-pt ;;;second is att-prior-sigma, third is attribute-prior-wt ; (- (log-gamma (/ (1- (third priors)) 2.0) ) ; (* (/ (third priors) 2.0) (log (third priors))) ; (* (1- (third priors)) (log (second priors)))) ) ; (ignore ; 'nil) ) ) ; *variable-types* *real-priors*) ) ) (defun display-part-MMLs (partition &optional (stream t) all-classes) (let* ((*print-structure-contents* nil) (output (list (format stream "~& Partition ~A has TOTAL MML ~A~% ~ with Memb. MML ~A; Descr. MML ~A" partition (part-total-MML partition) (collect-memb-MMLs partition) (collect-desc-MMLs partition) ))) (limit (if all-classes (length (part-classes partition)) (part-class-range partition) )) wt ) (do ((N-class 0 (1+ N-class))) ((= N-class limit)) (setq wt (part-class-wt partition N-class)) (push (format stream "~% Class ~2D has weight ~14,8F, and MMLs ~14,8F & ~14,8F -> ~14,8F" N-class wt (class-memb-MML partition N-class) (class-desc-MML partition N-class) (if (zerop wt) 0.0 (/ (+ (class-memb-MML partition N-class) (class-desc-MML partition N-class) ) wt ) ) ) output ) ) (push (format stream "~%") output) (or (eq stream t) (apply 'concatenate (cons 'string (reverse output)))) ) ) (defun display-part-classes (partition &optional (stream t) all-classes) (let* ((*print-structure-contents* nil) (output (list (format stream "~& Partition ~A has classes:~%" partition))) (limit (if all-classes (length (part-classes partition)) (part-class-range partition) )) ) ; (map nil #'(lambda (x) (push (format stream "~A" (display-class x nil)) output)) ; (part-classes partition) ) (do ((N-class 0 (1+ N-class))) ((= N-class limit)) (push (format stream "~%~A" (display-class (part-class partition N-class) nil)) output ) ) (push (format stream "~%") output) (or (eq stream t) (apply 'concatenate (cons 'string (reverse output)))) ) ) ;;; ________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; DATA I/O Section ;;; (read-db in-file) reads a data-base file (see >class>test3-b.db) and stores the ;;; information in *base-data*. The file must begin with a list giving the variable types ;;; followed by one giving varible ranges. The three variable types, with their value and ;;; range representations are: ;;; discrete integer integer No. of values ;;; real-pt real list of (low high), reals. ;;; ignore any symbol excess data to be ignored ;;; Then we read the *disc-priors* and *real-priors*. ;;; The next item is an integer giving the number of data in the file. ;;; The data variable/values follow, in variable order for each datum. It is usefull to ;;; format these with one datum per line and uniform value spacing, but this is not required. ;;; Discrete variable values are presumed to run from 1 to the range value, inclusive, with ;;; the value 0 reserved for "not-known". For any type a '? or 'nil is read as "not-known". ;;; Unknown discrete variables are recorded as value 0, while unknown reals are recorded as ;;; nil. ;;; Functions save-data and read-test-db must be modifed concurrently with this. ;;; ;;; NEW VERSION BY WILL 25 AUG 87 (defun read-db (in-file &optional n-data &KEY (VARIABLE-DESCRIPTIONS NIL) (HEADER-FILE NIL)) (setq *current-db* in-file) (with-open-file (stream (string in-file) :direction :input) (COND ((NOT (NULL HEADER-FILE)) ; Read header information from seperate file. (WITH-OPEN-FILE (HD-STREAM (STRING HEADER-FILE) :DIRECTION :INPUT) (setq *variable-types* (READ HD-STREAM)) (setq *N-variables* (length *variable-types*)) (IF VARIABLE-DESCRIPTIONS (SETF *VARIABLE-DESCRIPTIONS* (READ HD-STREAM)) (SETF *VARIABLE-DESCRIPTIONS* (MAKE-LIST *N-VARIABLES* :INITIAL-ELEMENT ""))) (setq *disc-var-ranges* (mapcar #'(lambda (type range) (if (eq type 'discrete) (1+ range) range)) *variable-types* (READ HD-STREAM))) (setq *disc-priors* (Expand-*Disc-Priors* (READ HD-STREAM))) (setq *real-priors* (Expand-*Real-Priors* (READ HD-STREAM))) (setq *real-bases* (Make-*Real-Bases*-List)) (Update-Real-MML-Constants) (setq *N-data* (READ HD-STREAM)))) (T ; Read header information from data file. (setq *variable-types* (READ STREAM)) (setq *N-variables* (length *variable-types*)) (IF VARIABLE-DESCRIPTIONS (SETF *VARIABLE-DESCRIPTIONS* (READ STREAM)) (SETF *VARIABLE-DESCRIPTIONS* (MAKE-LIST *N-VARIABLES* :INITIAL-ELEMENT ""))) (setq *disc-var-ranges* (mapcar #'(lambda (type range) (if (eq type 'discrete) (1+ range) range)) *variable-types* (READ STREAM))) (setq *disc-priors* (Expand-*Disc-Priors* (READ STREAM))) (setq *real-priors* (Expand-*Real-Priors* (READ STREAM))) (setq *real-bases* (Make-*Real-Bases*-List)) (Update-Real-MML-Constants) (setq *N-data* (READ STREAM)))) (setq *N-data* ; Read the number of data and set up storage. (cond ((eq n-data 'all) *N-data*) ((and (numberp n-data) (<= n-data *N-data*)) n-data ) (t (query-for-number (format nil "There are ~D data items in the file. How many ~ do you wish to use?" *N-data*) 1 *N-data* ) ) ) ) (setq *base-data* (make-data-base)) (setq *curr-wts* (make-array *N-data* :adjustable t :initial-element nil)) (setq *wt-vector-length* 0) (do ((N-datum 0 (1+ N-datum)) (value nil)) ; Read the data base. ((= N-datum *N-data*)) (do ((N-var 0 (1+ N-var)) (types *variable-types* (cdr types)) ) ((= N-var *N-variables*)) (setq value (READ STREAM)) (set-variable *base-data* N-datum N-var (cond ((or (null value) (eq '? value)) (if (eq 'discrete (car types)) 0 nil ) ) (t value) ) ) ) ) ) t ) (defun expand-*disc-priors* (input-priors) "Makes a fully expanded *disc-priors* type list from the input-priors. Must not have any side effects." (and (null input-priors) (setq input-priors (make-list (length *variable-types*) :initial-element nil)) ) (mapcar #'(lambda (prior type range) (case type (discrete (cond ((and (listp prior) (not (eq prior nil))) prior) ((and (numberp prior) (numberp range)) (make-list range :initial-element prior) ) ((and (null prior) (numberp range)) (make-list range :initial-element (/ 1.0 range)) ) (t (break "bad combination of type = ~A, prior = ~A,range = ~A" type prior range )) ) ) (real-pt (cond ((and (listp prior) (not (eq prior nil))) prior) ((numberp prior) (list (- 1.0 prior) prior) ) ((null prior) (list (- 1.0 *default-known-prob*) *default-known-prob*) ) (t (break "Bad combination of type = ~A, prior = ~A" type prior )) ) ) (ignore 'ignore) ) ) input-priors *variable-types* *disc-var-ranges* ) ) (defun expand-*real-priors* (input-priors) "Makes a fully expanded *real-priors* type list from the input-priors. Must not have any side effects." (if (null input-priors) (break "argument input-priors is nil") (mapcar #'(lambda (prior type) (case type (discrete (cond (prior (break "odd value ~A in discrete position." prior)) (t nil) ) ) (real-pt (cond ((or (null prior) (not (listp prior)) (fourth prior)) (break "bad value for real prior: ~A" prior) ) ((third prior) prior) (t (append prior (list *real-prior-Wt-default*))) ) ) (ignore (cond (prior (break "odd value ~A in ignore position." prior)) (t nil) ) ) ) ) input-priors *variable-types* ) ) ) (defun make-*real-bases*-list () "Makes a *real-bases* type list from the *real-priors* list. A real-base list consists of ( ) where the sums refer to the two values corresponding to the mean and sigma in *real-priors*, and the weight is the prior weight from *real-priors*." (mapcar #'(lambda (prior) (cond ((null prior) nil) ((and prior (listp prior) ) (let* ((mean (first prior)) (sigma (second prior)) (weight (or (third prior) *real-prior-Wt-default*)) (weight-factor (/ weight 2.0)) (x1 (+ mean sigma)) (x2 (- mean sigma)) ) (list weight (* weight-factor (+ x1 x2)) (* weight-factor (+ (* x1 x1) (* x2 x2))) ) ) ) (t (break "Bad prior value: ~A" prior)) ) ) *real-priors* ) ) (defun collect-real-priors-from-data (&key (real-prior-wt *real-prior-wt-default*) (fixed-sigma nil) (out-type 'short-float)) "Replaces the current *real-priors* and *real-bases* by the empherical priors. If no real values are present, or all are present and identical, the variable is changed to type IGNORE. If :fixed-sigma is given, that is used for all real standard deviations. :out-type controls the type of reals used in the lists and subsequent calculations." (let ((min-sigma 0.001) ; Substitute for zero sigma (n-reals-list (make-sequence 'list *N-variables* :initial-element 0)) (sums-list (make-sequence 'list *N-variables* :initial-element 0.0d0)) (sums-sq-list (make-sequence 'list *N-variables* :initial-element 0.0d0)) (value nil) sigma) (without-floating-underflow-traps-kludge (dotimes (N-datum *N-data*) ; accumulate n, ‡value, ‡value^2 (do ((N-var 0 (1+ N-var)) (types *variable-types* (cdr types)) (n-reals n-reals-list (cdr n-reals)) (sums sums-list (cdr sums)) (sums-sq sums-sq-list (cdr sums-sq)) ) ((endp types) ) (when (eq (car types) 'real-pt) (when (numberp (setq value (get-variable *base-data* N-datum N-var))) (setq value (coerce value 'long-float)) (incf (car n-reals)) (incf (car sums) value) (incf (car sums-sq) (* value value)) ) ) ) ) (do ((N-var 0 (1+ N-var)) ; calcualte prior mean and sigma (types *variable-types* (cdr types)) (n-reals n-reals-list (cdr n-reals)) (sums sums-list (cdr sums)) (sums-sq sums-sq-list (cdr sums-sq)) (real-priors *real-priors* (cdr real-priors))) ((endp types) ) (when (eq (car types) 'real-pt) (cond ((zerop (car n-reals)) ; No data values found: (setf (car real-priors) nil) (setf (car types) 'ignore) (format t "~&- REAL attr. ~3D changed to 'IGNORE a/c no real values." N-var)) ((and (zerop (setf sigma (sqrt (sigma-sq (car n-reals) (car sums) (car sums-sq))) )) (= (car n-reals) *n-data*)) (setf (car real-priors) nil) (setf (car types) 'ignore) (format t "~&- REAL attr ~3D changed to 'IGNORE a/c sigma = 0 over all data." N-var ) ) ((zerop sigma) (setf (car real-priors) (list (coerce (/ (car sums) (car n-reals)) out-type) (or fixed-sigma min-sigma) real-prior-wt ) ) ) ((not (complexp sigma)) (setf (car real-priors) (list (coerce (/ (car sums) (car n-reals)) out-type) (or fixed-sigma (coerce sigma out-type)) real-prior-wt ) ) ) (t (format t "~&- REAL attr. ~3D generated complex sigma: ~A." N-var sigma) (if (y-or-n-p "Shall I use the magnitude? Alternative is to ignore.") (progn (setf (car real-priors) (list (coerce (/ (car sums) (car n-reals)) out-type) (or fixed-sigma (coerce (sqrt (+ (expt (realpart sigma) 2) (expt (imagpart sigma) 2) )) out-type ) ) real-prior-wt ) ) ) (progn (setf (car real-priors) nil) (setf (car types) 'ignore) (format t "~&- Attr ~3D changed to 'IGNORE a/c complex sigma." N-var ) ) ) ) ) ) ) ) (setq *real-bases* (Make-*Real-Bases*-List)) (Update-Real-MML-Constants) t ) ) (defun save-data (&key (stream t) (real-priors t)) "Intended for saving out a data base that has been modified. Uses format to generate a formatted output that can be edited to the actual input form." (when real-priors (collect-real-priors-from-data)) (format stream "~2%; Variable types. ~%~A " *variable-types*) (format stream "~2%; Discrete variable ranges. ~%~A" (mapcar #'(lambda (x y) (if (eq x 'discrete) (1- y) y)) *variable-types* *disc-var-ranges*)) (format stream "~2%; Discrete priors. ~%~A" *disc-priors*) (format stream "~2%; Real variable priors (empherical priors for full DB). ~%~A" *real-priors*) (format stream "~2%; No of data items. ~%~A" *N-data*) (do ((N-datum 0 (1+ N-datum))) ((= N-datum *N-data*)) (format stream "~&") (do ((N-var 0 (1+ N-var))) ((= N-var *N-variables*)) (format stream "~3T~A" (get-variable *base-data* N-datum N-var)) ) ) ) ;;; --------------------------- Routines for Reordering Data ----------------------------- ;;; The following two are used to change the order of elements in the data base, without ;;; altering the class assignments. Any correct MML algorithm should produce an invarent ;;; total under these changes: (defun reverse-*data* () "Reverses the data order without altering class assignments." (setq *base-data* (reverse *base-data*)) (setq *curr-wts* (reverse *curr-wts*)) ) (defun scramble-*data* (&optional (moves *N-data*)) "Scrambles the data order without altering class assignments." (let ((index1 nil) (index2 nil) (temp nil)) (dotimes (N-moves moves ) (setq index1 (random *N-data*)) (setq index2 (random *N-data*)) (setq temp (get-datum *base-data* index1)) (set-datum *base-data* index1 (get-datum *base-data* index2)) (set-datum *base-data* index2 temp) (setq temp (aref *curr-wts* index1)) (setf (aref *curr-wts* index1) (aref *curr-wts* index2)) (setf (aref *curr-wts* index2) temp) ) moves ) ) ;;;----------------- Routines For Generating An Initial Weight Distribution ---------------- (defmacro put-wt-vector (vec) `(push ,vec *wt-vector-store*) ) (defmacro get-wt-vector () `(let ((temp (and *wt-vector-store* (pop *wt-vector-store*)))) (if (= (length temp) *wt-vector-length*) (fill temp 0) (make-array *wt-vector-length* :initial-element 0) ) ) ) (defun mod-distribute-data (mod-range &optional (class-range nil)) "Distributes data by file position mod mod-range." (when (< mod-range 1) (break "bad mod-range: ~D" mod-range)) (unless class-range (setq class-range mod-range)) (cond ((> class-range *wt-vector-length*) (setf *wt-vector-length* class-range) (setf *wt-vector-store* ()) (dotimes (N-datum *N-data*) (setf (aref *curr-wts* N-datum) (make-array *wt-vector-length* :initial-element 0) ) (incf (aref (aref *curr-wts* N-datum) (* 1.0 (mod N-datum mod-range)) )) ) ) (t (dotimes (N-datum *N-data*) (fill (aref *curr-wts* N-datum) 0) (incf (aref (aref *curr-wts* N-datum) (* 1.0 (mod N-datum mod-range)) )) ) ) ) class-range ) (defun block-distribute-data (block-size &optional (class-range nil)) "Distributes data by blocks from the file." (unless class-range (setq class-range (ceiling *N-data* block-size))) (cond ((> class-range *wt-vector-length*) (setf *wt-vector-length* class-range) (setf *wt-vector-store* ()) (dotimes (N-datum *N-data*) (setf (aref *curr-wts* N-datum) (make-array *wt-vector-length* :initial-element 0) ) (incf (aref (aref *curr-wts* N-datum) (floor N-datum block-size) )) ) ) (t (dotimes (N-datum *N-data*) (fill (aref *curr-wts* N-datum) 0) (incf (aref (aref *curr-wts* N-datum) (floor N-datum block-size) )) ) ) ) class-range ) (defun random-assign-data (class-range &optional (randomize t)) "Randomly assigns each datum to one class." (when randomize (randomize-random)) (cond ((> class-range *wt-vector-length*) (setf *wt-vector-length* class-range) (setf *wt-vector-store* ()) (dotimes (N-datum *N-data*) (setf (aref *curr-wts* N-datum) (make-array *wt-vector-length* :initial-element 0) ) (setf (aref (aref *curr-wts* n-datum) (random class-range)) 1.0 ) ) ) (t (dotimes (N-datum *N-data*) (fill (aref *curr-wts* N-datum) 0) (setf (aref (aref *curr-wts* n-datum) (random class-range)) 1.0 ) ) ) ) class-range ) (defun random-select-data (class-range &key (n-samples 1) (randomize t) ) " Randomly samples the data for class prototypes, taking n-samples for each class. It is desirable that (class-range * n-samples) << *N-data*. Note that *curr-wts* may be an array of 'nil." (when (>= (* 5 class-range n-samples) *n-data*) (break "Requesting too many samples: ~D for ~D data." (* class-range n-samples) *n-data* ) ) (when randomize (randomize-random)) (let ((used-list ()) (count nil) (weight (/ *N-data* n-samples class-range)) ) (when (> class-range *wt-vector-length*) (setf *wt-vector-length* class-range) (setf *wt-vector-store* ()) ) (if (> *wt-vector-length* (length (aref *curr-wts* 0))) (dotimes (N-datum *N-data*) (setf (aref *curr-wts* N-datum) (or (fill (pop *wt-vector-store*) 0) (make-array *wt-vector-length* :initial-element 0) ) ) ) (dotimes (N-datum *N-data*) (fill (aref *curr-wts* N-datum) 0) ) ) (dotimes (n-class class-range) (setf count 0) (do (n-datum) ((= count n-samples)) (setf n-datum (random *n-data*)) (when (not (member n-datum used-list)) (push n-datum used-list) (setf (aref (aref *curr-wts* n-datum) n-class) weight) (incf count) ) ) ) ) class-range ) (defun random-distribute-data (class-range &optional (%zeros .5) (randomize t)) "Generates a random distribution, of which about %zeros are set to 0.0." (when (< class-range 1) (break "bad class-range: ~D" class-range)) (when (or (< %zeros 0.0) (> %zeros 1.0)) (setq %zeros (max .01 (min .99 %zeros))) (format t "~&Random-distribute-data: %zeros set to ~D~%" %zeros) ) (when randomize (randomize-random)) (cond ((> class-range *wt-vector-length*) (setf *wt-vector-length* class-range) (setf *wt-vector-store* ()) (dotimes (N-datum *N-data*) (setf (aref *curr-wts* N-datum) (gen-random-weights class-range %zeros) ) ) ) (t (dotimes (N-datum *N-data*) (fill (aref *curr-wts* N-datum) 0 :start class-range) (replace (aref *curr-wts* N-datum) (gen-random-weights class-range %zeros) ) ) ) ) class-range ) (defun gen-random-weights (count %zeros) "Generates a vector of count random weights, in which any non-zero values sum to 1.0, and approximatly %zeros have value 0.0. When %zeros --> 1.0 the sequence often contains only zeros." (let ((random-list ()) (value nil) (scale 1.0) (sum 0.0) ) (dotimes (n count) (setq value (max 0.0 (- (random 1.0) %zeros) )) (incf sum value) (push value random-list) ) (when (> sum 0.0) (setf scale (/ 1.0 sum)) ) (map 'vector #'(lambda (x) (* scale x)) random-list) ) ) (defun normal-distribute-data (class-range &optional (sigma% 0.3) (randomize t)) "Attempts a normal distribution about mean = (/ 1 class-range) and sigma = (* sigma% mean)." (when (< class-range 1) (break "bad class-range: ~D" class-range)) (when randomize (randomize-random)) (let* ((mean (/ 1.0 class-range)) (sigma (* sigma% mean)) ) (cond ((> class-range *wt-vector-length*) (setf *wt-vector-length* class-range) (setf *wt-vector-store* ()) (dotimes (N-datum *N-data*) (setf (aref *curr-wts* N-datum) (gen-normal-weights class-range mean sigma) ) ) ) (t (dotimes (N-datum *N-data*) (fill (aref *curr-wts* N-datum) 0 :start class-range) (replace (aref *curr-wts* N-datum) (gen-normal-weights class-range mean sigma) ) )) ) class-range ) ) (defun gen-normal-weights (count mean sigma) "Generates count POSITIVE values having the specified mean. When sigma << mean these are elements of the normal distribution defined by mean and sigma. Otherwise the actual standard deviation is typically .2 to .35 of the mean." (when (< mean 0) (break "Bad inputs: ~A ~A ~A" count mean sigma ) ) (let* ((normal-list (gen-normal-list count mean sigma)) (min-value (min 0.0 (min+ normal-list))) (scale (/ mean (- (/ (reduce #'+ normal-list) count ) min-value ))) ) (map 'vector #'(lambda (x) (* scale (- x min-value))) normal-list) ) ) (defun gen-normal-list (count mean sigma ) "Generates count values from the normal distribution of mean and sigma." (let* ((steps 10) (range (* sigma (sqrt (/ 3.0 steps)))) (2*range (* range 2.0)) (list ()) (sum nil) ) (dotimes (n count) (setq sum (- mean (* steps range))) (dotimes (m steps) (incf sum (random 2*range) ) ) (push sum list) ) list ) ) (defun display-weights (&key (clean t) (weights *curr-wts*) (stream t) N-classes) "Formats the weights onto stream." (let* ((epsilon (case (type-of (* 1.0 (reduce '+ (elt weights 0)))) (single-float single-float-epsilon) (double-float double-float-epsilon) (short-float short-float-epsilon) (long-float long-float-epsilon)) ) (weights-n nil) (weight-i nil)) (unless N-classes (setf N-classes *wt-vector-length*) ) (dotimes (n *N-data*) (format stream "~&~3D " n) (setq weights-n (elt weights n)) (dotimes (i N-classes) (setq weight-i (elt weights-n i)) (if (and clean (< weight-i epsilon)) (format stream " ~A" 0.0) (format stream " ~A" weight-i) ) ) ) ) ) (defun max-weight-class (N-datum &optional (weights *curr-wts*) N-classes ) "Given a data index, finds the class with maximum weight." (max+position (aref weights N-datum) :end N-classes) ) (defun get-max-weight-classes (weights &optional N-classes) "Makes an array containing the maximum weight class for each datum, defaulting to class 0 for data where the class weights are all 0.0." (let* ((n-weights (length weights)) (maxs (make-array n-weights))) (dotimes (n n-weights) (setf (aref maxs n) (max-weight-class n weights N-classes)) ) maxs ) ) ;;;------------------------------------- ;;; Routines for maintaining the class weights array *curr-wts*: ;;; Copy-seq is used to get new copies of the weight vectors: (defun read-class-wts (in-file &optional (Nth-partition 0)) "Reads the Nth-partition *curr-wts* type sequence from a file written by save-class-wts, checking for correct number of data." (let ((N-data nil)) (with-open-file (stream (string in-file) :direction :input) (dotimes (n (* 2 Nth-partition)) (read stream) ) (setq N-data (read stream)) (if (/= N-data *N-data*) (format t "~2&Unable to proceed. This file contains ~D data, while the ~ current DB contains ~D~%Please load the correct DB and try again.~%" N-data *N-data*) (read stream) ) ) ) ) (defun restore-class-wts (in-file &optional (Nth-partition 0) partition) "Restores *curr-wts* to the Nth-partition version saved on in-file by save-class-wts, and optionally creates a corresponding size partitioning." (let* ((new-weights (read-class-wts in-file Nth-partition)) (N-wts (length (aref new-weights 0))) ) (reset-class-wts new-weights N-wts) (when (or (null partition) (< (part-class-range partition) N-WTS ) ) (setq partition (make-part&classes N-WTS))) (COLLECT-WEIGHTS partition) (UPDATE-PARTITION-MMLS partition) partition ) ) (defun save-class-wts (out-file &optional (weights *curr-wts*) (N-wts *wt-vector-length*)) "Writes a formated *class-wts* to out-file." (let ((*print-array* t) (*print-level* nil) (*print-length* nil)) (with-open-file (stream out-file :direction :output :if-exists :append :if-does-not-exist :create ) (format stream "~2%~A ; of file ~A on ~A at ~A :~%" *N-data* *current-db* (machine-instance) (cdddr (reverse (multiple-value-list (get-decoded-time)))) ) (if (< N-wts *wt-vector-length*) (format stream "~&; Class weights:~2%~A" (subseq weights 0 N-wts)) (format stream "~&; Class weights:~2%~A" weights)) ) ) ) (defun get-class-wts () "Does a 2-level copy of *curr-wts*." (map 'vector 'copy-seq *curr-wts*) ) (defun reset-class-wts (weights &optional N-wts) "Sets *curr-wts* to be a 2-level copy of weights, first checking for identical lengths." (unless N-wts (setf N-wts (length (aref weights 0)))) (reset-wts *curr-wts* weights N-wts) N-wts ) (defun reset-wts (to-weights from-weights &optional N-wts) "Performs an item by item copy of from-weights vector to to-weights vector. The two must be of *N-data* length. N-wts (<= *wt-vector-length*) is the number of active weights in the from-weights vector." (when (/= (length to-weights) *N-data*) (break "attempt to reset a weight vector of length ~A, *N-data* = ~A." (length to-weights) *N-data* ) ) (unless N-wts ; Defaults to full length. (setf N-wts (length (aref from-weights 0))) ) (when (> N-wts *wt-vector-length*) (setf *wt-vector-length* N-wts) (setf *wt-vector-store* ()) ) (dotimes (N-datum *N-data*) ;; Ensure all wt-vectors are of at least *wt-vector-length* (if (< (length (aref to-weights N-datum)) *wt-vector-length*) (setf (aref to-weights N-datum) (make-array *wt-vector-length* :initial-element 0) ) (fill (aref to-weights N-datum) 0) ) (replace (aref to-weights N-datum) (aref from-weights N-datum) :end2 (min N-wts (length (aref from-weights N-datum))) ) ) N-wts ) (defun compress-class-wts (long-wts &key (min-wt 1.0e-8) N-classes) "Compresses a class long-wts array to an A-list form. Output format is: (list N-classes (vector {alist (class . wt)} over data)). min-wt = 0 --> save_all_non-zero." (when long-wts (unless N-classes (setf N-classes *wt-vector-length*)) (let ((wt-array (make-array (length long-wts) :initial-element ())) (class-wts nil) (weight nil) ) (dotimes (n-datum (length long-wts )) (setf class-wts (elt long-wts n-datum)) (dotimes (n-class N-classes) (when (< min-wt (setf weight (elt class-wts n-class))) (push (cons n-class weight) (aref wt-array n-datum)) ) ) ) (list N-classes wt-array) ) ) ) (defun expand-class-wts (short-wts &key (n-classes nil)) "Takes a compressed class weight list and expands it into full vector of vectors form." (when short-wts ; nil --> nil (let* ((min-classes (first short-wts)) (wt-array (second short-wts)) (n-data (length wt-array)) (weights (make-array n-data)) (l-wts nil) ) (when (or (null n-classes) (< n-classes min-classes)) (setf n-classes min-classes) (format t "~&Expand-Class-Wts using ~A from short-wts for n-classes." min-classes ) ) (dotimes (n-datum n-data) (setf l-wts (make-array n-classes :initial-element 0.0)) (map nil #'(lambda (pair) (setf (aref l-wts (car pair)) (cdr pair) ) ) (aref wt-array n-datum) ) (setf (aref weights n-datum) l-wts) ) (values weights n-classes) ) ) ) ;;; ----------------------- Data Display Functions -------------------------------------- (defun display-data (&optional (stream t) (short t)) "Displays the data base. With short t, only the maximum weight class is shown with it's weight. Otherwise the weight list is shown." (let ((*print-structure-contents* nil)) (format stream "~2% ~A ; variable types." *variable-types*) (format stream "~% ~A ; variable ranges." (mapcar #'(lambda (x y) (if (eq x 'discrete) (1- y) y)) *variable-types* *disc-var-ranges*)) (format stream "~% ~A ; Real variable priors." *real-priors*) (format stream "~% ~A ; No of data items.~%" *N-data*) (do ((N-datum 0 (1+ N-datum)) (max 0.0 0.0) ) ((= N-datum *N-data*)) (format stream "~%~4D" N-datum ) (cond (short (map nil #'(lambda (x) (and (numberp x) (< max x) (setq max x))) (aref *curr-wts* N-datum)) (format stream " ~3D ~10,8F - " (position max (aref *curr-wts* N-datum)) max ) ) (t (map nil #'(lambda (x) (format stream " ~A" x)) (aref *curr-wts* N-datum)) (format stream " - ") ) ) (do ((N-var 0 (1+ N-var))) ((= N-var *N-variables*)) (format stream "~3T~A" (get-variable *base-data* N-datum N-var)) ) ) (format stream "~%") ) ) (defun display-data-list (data-Nos &optional (stream t) (short t)) "Displays data for a list of case numbers. With short nil, the full weights are given." (let ((*print-structure-contents* nil) (max nil) ) (format stream "~2% ~A ; variable types." *variable-types*) (format stream "~% ~A ; variable ranges." (mapcar #'(lambda (x y) (if (eq x 'discrete) (1- y) y)) *variable-types* *disc-var-ranges*)) (format stream "~% ~A ; Real variable priors." *real-priors*) (format stream "~% ~A ; No of data items.~%" *N-data*) (mapc #'(lambda (N-datum) (format stream "~%~4D" N-datum ) (cond (short (setq max 0.0) (map nil #'(lambda (x) (and (numberp x) (< max x) (setq max x))) (aref *curr-wts* N-datum)) (format stream " ~D ~10,8F - " (position max (aref *curr-wts* N-datum)) max ) ) (t (map nil #'(lambda (x) (format stream " ~A" x)) (aref *curr-wts* N-datum)) (format stream " - ") ) ) (do ((N-var 0 (1+ N-var))) ((= N-var *N-variables*)) (format stream "~3T~A" (get-variable *base-data* N-datum N-var)) ) ) data-Nos ) (format stream "~%") ) ) (defun display-data-class (partition class &optional (stream t) (short t)) "Displays data for those cases which have maximum weight in the specified class. With short nill, the full set of cases are shown." (let ((*print-structure-contents* nil) (N-weights (part-class-range partition)) ) (format stream "~& ~A ; Class total weight.~%" (part-class-wt partition class)) ; (format stream "~& ~A ; variable types.~%" *variable-types*) (format stream "~& ~A ; variable ranges.~%" (mapcar #'(lambda (x y) (if (eq x 'discrete) (1- y) y)) *variable-types* *disc-var-ranges*)) (format stream "~& ~A ; Real variable priors.~%" *real-priors*) (do ((N-datum 0 (1+ N-datum)) (max 0.0 0.0) ) ((= N-datum *N-data*)) (map nil #'(lambda (x) (and (numberp x)(< max x) (setq max x))) (aref *curr-wts* N-datum)) (cond ((= class (position max (aref *curr-wts* N-datum))) (format stream "~%~4D" N-datum ) (cond (short (format stream "~3D ~10,8F - " class max) ) (t (do ((n 0 (1+ n)) (weights (aref *curr-wts* N-datum)) ) ((= n N-weights)) (format stream " ~13,8F" (aref weights n)) (when (zerop (mod (1+ n) 5)) (format stream "~%~4T") ) ) (format stream " - ") ) ) (do ((N-var 0 (1+ N-var))) ((= N-var *N-variables*)) (format stream "~T~4A" (get-variable *base-data* N-datum N-var)) ) ) ) ) ) ) (defun display-data-by-class (partition &optional (stream t) (short t)) (dotimes (N-class (part-class-range partition)) (format stream "~2&Class #~D~%" N-class) (display-data-class partition N-class stream short) (format stream "~%") ) ) ;;; ________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;; Simple tutored learning (defun tutored-partition (tutor-function &optional (partition nil)) "Generates a partitioning of the data according to a simple tutoring function which returns a class number {0 <= n < range} when applied to a data vector, and returns the class range when given nil." (let* ((n-classes (funcall tutor-function nil)) (tut-part (or (and partition ; reuse the old partition? (= n-classes (part-class-range partition)) (set-partition-to-priors partition) partition ) (make-part&classes n-classes) )) (temp nil) ) (or *curr-wts* (setq *curr-wts* (make-array *N-data* :initial-element nil))) (if (= (length (aref *curr-wts* 0)) n-classes ) (map nil #'(lambda (datum weights) ; reuse the *curr-wts* vectors (fill weights 0.0) (setf (aref weights (funcall tutor-function datum)) 1.0 ) ) *base-data* *curr-wts* ) (dotimes (n *N-data*) ; replace the *curr-wts* vectors (setq temp (make-array n-classes :initial-element 0.0)) (setf (aref temp (funcall tutor-function (get-datum *base-data* n))) 1.0 ) (setf (aref *curr-wts* n) temp) ) ) (COLLECT-WEIGHTS tut-part) (UPDATE-PARTITION-MMLs tut-part) tut-part ) ) (defun thyroid (datum) "A simple tutor-function, for tutoring on a descrete variable #19, of range 3" (if (null datum) 3 ; the range (aref datum 19) ) ) ; ;;; ________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; Experiments on Speed optimization ;;; Standard test conditions: ;;; *base-data*: 2000 from 'CHA:>stutz>class>data-med-1r.db" ;;; *curr-wts*: from CHA:>stutz>class>data-med-1r-20x10%.wts" ;;; *partition*: (make-part&classes 10) ;;; 20 classes (defun base-cycle (partition) (collect-weights partition) (update-partition-MMLs partition) (get-data-weights partition) t ) ;;;; Tests for timed-duration functions (defun test0 () (base-cycle *partition*)) ;(defun test1 () (collect-weights *partition*)) ;(defun test2 () (update-partition-MMLs *partition*)) ;(defun test3 () (get-data-weights *partition*)) ; ;(defmacro make-*2d-curr-weights* () ; `(defvar *2d-curr-weights* (make-array '(,*N-data* ,*N-variables*) ; :initial-contents ,*curr-wts* )) ) ; ;(defun test4a () ; 2.209 sec on trogon with 6.2 ; (declare (optimize speed)) ; 2.206 sec on trogon ; (let ((sum 0.0)) ; (declare (short-float sum)) ; 2.204 sec on trogon ; (map nil #'(lambda (weights) ; (map nil #'(lambda (x) (incf sum x)) ; weights ) ) ; *curr-wts* ) ; sum ) ) ; ;(defun test4b () ; .625 sec on trogon ; (declare (optimize speed)) ; .624 sec on trogon ; (let ((sum 0.0)) ; (declare (short-float sum)) ; .624 sec on trogon ; (dotimes (N-data *N-data*) ; (let ((weights (aref *curr-wts* N-data))) ; (dotimes (N-var *N-variables*) ; (incf sum (aref weights N-var)) ) ) ) ; sum ) ) ; ;(defun test4c () ; .735 sec on trogon ; (declare (optimize speed)) ; .735 sec on trogon ; (let ((sum 0.0)) ; (declare (short-float sum)) ; .735 sec on trogon ; (dotimes (N-data *N-data*) ; (dotimes (N-var *N-variables*) ; (incf sum (aref (aref *curr-wts* N-data) N-var)) ) ) ; sum ) ) ; ;(defun test4d () ; 1.024 sec on trogon ; (declare (optimize speed)) ; 1.022 sec on trogon ; (let ((sum 0.0)) ; (declare (short-float sum)) ; 1.022 sec on trogon ; (dotimes (N-data *N-data*) ; (dotimes (N-var *N-variables*) ; (incf sum (aref *2d-curr-weights* N-data N-var)) ) ) ; sum ) ) ; ;(defun test4e () ; 1.006 sec on trogon ; (declare (optimize speed)) ; 1.005 sec on trogon ; (let ((sum 0.0)) ; (declare (short-float sum)) ; 1.005 sec on trogon ; (dotimes (N-var *N-variables*) ; (dotimes (N-data *N-data*) ; (incf sum (aref *2d-curr-weights* N-data N-var)) ) ) ; sum ) ) ;;; ________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;; Functions for saving and accessing classifications as .wt-set files ;;; Find-best-n and others below use a .wt-set file to checkpoint and save its results. ;;; The file format is defined by: ;;; 'header is an optional descriptive string ;;; 'count is the number of searches made at the last checkpointing. ;;; 'MMLs is an array of 'n-held MMLs ;;; 'classes is an array of 'n-held ordered class-weights arrays ;;; 'weightings is an implicit array of n-held *curr-wts* arrays (defvar *current-results* nil "the file being used for current results") (defun write-.wt-set (file header count MMLs classes weightings &optional (wt-format 'short) (min-wt 1.0e-8) ) "Writes out a complete -.wt-set file." ;; added header argument and documentation JCS 18/sept/87 ;; changed from macro to function WMT 09 JUl 88 (let ((*print-array* t) (*print-level* nil) (*print-length* nil)) (with-open-file (out-str file :direction :output :if-exists :supersede :if-does-not-exist :create) (format out-str "~2&~S" header) ; descriptive string (format out-str "~2&~A" count) ; number of searches made at last file write (format out-str "~2&~A" MMLs) ; array of MMLs for saved partitionings (format out-str "~2&~A" classes) ; array of arrays (map nil #'(lambda (elt) (format out-str "~2& ~A" (case wt-format (short (compress-class-wts elt :min-wt min-wt)) (t elt) ) )) weightings) ) ; copy of *curr-wts* for each partitioning ;; (setf *current-results* file) ; commented out WMT 09 Jul 88 ) ) (defmacro cond-read-.wt-set (file header count MMLs classes weightings N-classes N-held &optional (wt-format ''short) ) "Conditionally reads a .wt-set file. Aborts if current MMLs match the file's MMLs. Note that if the file was written with a 'min-wt' > 0.0, the MML generated by simple collection and updating will be less than that found when generating the set." ;; Added header argument JCS 18/sept/87, A non-null header arg will be appended to file's ;; value if one is present. Otherwise the current header is retained. (let ((temp (gensym)) ) `(with-open-file (in-str ,file :direction :input) (let ((,temp (read in-str))) (if (numberp ,temp) ; No header present (setf ,count ,temp) (progn (if ,header (setf ,header (string-append ,header " - { " ,temp " }")) (setf ,header ,temp) ) (setf ,count (read in-str)) ) ) (if (equal ,MMLs (setq ,temp (read in-str))) nil ;Will not reread current data (progn (setq ,MMLs ,temp) (setq ,classes (read in-str)) (when (not (= (length ,MMLs) (length ,classes))) (break "disagreement: ~A MMLs, ~A classes." (length ,MMLs) (length ,classes) ) ) (when (or (not (numberp ,N-held)) ;Set the No. of classifications held (not (= ,N-held (length ,MMLs))) ) (setf ,N-held (length ,MMLs)) (warn "File ~A has ~A partitionings held. THIS VALUE WILL BE USED." ,file ,N-held) ) (setq ,weightings (make-array ,N-held )) (dotimes (n ,N-held) (setf (aref ,weightings n) (case ,wt-format (short (expand-class-wts (read in-str) :n-classes N-classes)) (t (read in-str)) ) ) ) (when (or (not (numberp ,N-classes)) (not (= ,N-classes (length (aref (aref ,weightings 0) 0)))) ) (setq ,N-classes (length (aref (aref ,weightings 0) 0)) ) (warn "File ~A has ~A classes. THIS VALUE WILL BE USED." ,file ,N-classes ) )) ) ) ;; (setf *current-results* ,file) ; commented out WMT 09 Jul 88 ) ) ) (defun max-classes-from-.wt-set (input-wt-set-file) "This takes a .wt-set file and returns a list of lists each consisting of: the MML, a sorted list of class weights, a list of the number of data having max weight in each class, and a list giving the maximum weight class for each datum." (let ((header nil) (count nil) (mmls nil) (classes nil) (weightings nil) (n-classes nil) (n-held nil)) (cond-read-.wt-set input-wt-set-file header count mmls classes weightings n-classes n-held ) (sort (map 'list #'(lambda (mml class max-classes) (list mml (sort class '>) (mapcar #'(lambda (class) (count class max-classes)) '(0 1 2 3 4 5 6 7 8 9)) max-classes)) mmls classes (map 'list #'(lambda (wt-set) (map 'list #'(lambda (datum-wts) (position (max+ datum-wts) datum-wts)) wt-set)) weightings)) '< :key 'car) ) ) (defun compress-.wt-set-file (input-wt-set-file &optional (output-wt-set-file nil) (min-wt 1.0e-4) (in-format 'short) ) "This is used to translate a .wt-set file from the old *curr-wts* format to the compressed format. Considerable space savings are possible when the data are each weighted in only a few of the classes. Also used to reduce size by only retaining values > min-wt, rather than all values" (or output-wt-set-file (setf output-wt-set-file input-wt-set-file) ) (let ((header nil) (count nil) (MMLs nil) (classes nil) (weightings nil) (N-classes nil) (N-held nil) ) (cond-read-.wt-set input-wt-set-file header count MMLs classes weightings N-classes N-held in-format) (write-.wt-set output-wt-set-file header count MMLs classes weightings 'short min-wt) ) ) (defun split-.wt-set-file (wt-set-file &key (in-format 'short) (out-format 'short) (min-wt 1.0e-4) ) "This splits a .wt-set file into individual files for each partitioning. The new files retain the origional name with a '-n' position suffex." ;; write-component-of-split-.wt-set-file created from previous version of this function WMT 09 Jul 88 (let ((header nil) (count nil) (NLPs nil) (classes nil) (weightings nil) (N-classes nil) (N-held nil)) (cond-read-.wt-set wt-set-file header count NLPs classes weightings N-classes N-held in-format) (when (/= N-classes *wt-vector-length*) ; added WMT 26 Jul 88 (setf *wt-vector-length* N-classes) (setf *wt-vector-store* ()) ) (dotimes (n N-held) (write-component-of-split-.wt-set-file wt-set-file n header count NLPs classes weightings out-format min-wt)))) (defun write-component-of-split-.wt-set-file (wt-set-file N-component header count NLPs classes weightings &optional (out-format 'short) (min-wt 1.0e-8) ) "writes a component of a split weight set file, maintaining the weight set file as split" ;; New function WMT 09 Jul 88 (let ((m1 (make-array 1)) (c1 (make-array 1)) (w1 (make-array 1)) (rootname nil)) (setf rootname (pathname-name wt-set-file)) (setf (elt m1 0) (elt NLPs N-component)) (setf (elt c1 0) (elt classes N-component)) (setf (elt w1 0) (elt weightings N-component)) (write-.wt-set (make-pathname :name (concatenate 'string rootname "-" (format nil "~A" N-component)) :defaults wt-set-file) header count m1 c1 w1 out-format min-wt)) ) (defun read-component-of-split-.wt-set-file (wt-set-file N-component count-list NLPs classes weightings header &optional (in-format 'short) ) "reads a component of a split weight set file, maintaining the weight set file as split" ;; New function WMT 09 Jul 88 (let ((m1 (make-array 1)) (c1 (make-array 1)) (w1 (make-array 1)) (N-held nil) (count 0) (N-classes nil) (read-file (make-pathname :name (concatenate 'string (pathname-name wt-set-file) "-" (format nil "~A" N-component)) :defaults wt-set-file))) (when (probe-file read-file) (cond-read-.wt-set read-file header count m1 c1 w1 N-classes N-held in-format) (push count count-list) (setf (elt NLPs N-component) (elt m1 0)) (setf (elt classes N-component) (elt c1 0)) (setf (elt weightings N-component) (elt w1 0))) (values count-list header NLPs classes weightings))) (defun get-best-of-weightings (wt-set-file &optional (wt-format 'short) (position nil)) "Sets *curr-wts* to be that recorded for the lowest MML in the wt-set-file." ;; revised to deal with headers 31 AUG 87 john ;; allows specification of the desired weight-set's position (0 based) (let ((MMLs nil) (*PRINT-ARRAY* T) (*PRINT-LEVEL* NIL) (*PRINT-LENGTH* NIL) ) (with-open-file (in-str wt-set-file :direction :input) (if (numberp (read in-str)) nil ; no header - discard count field (read in-str) ) ; discard both header and count fields (setf MMLs (read in-str)) (read in-str) ;discard classes field (setf *curr-wts* ;skip over preceeding wt-sets & read best (dotimes (n (or position (position (min+! MMLs) MMLs)) (if (eq wt-format 'short) (expand-class-wts (read in-str)) (read in-str) ) ) (read in-str) ) ) (setf *current-results* wt-set-file) (aref MMLs (or position (position (min+! MMLs) MMLs))) ) ) ) ;;; ________________________________________________________________________________________ ;; Second generation Local Minimum Search Routines (defun update-.wt-set-file (partition wt-set-file &key (update-method t) (n-cycles 5) (mode 'all) (position nil) (dynamic-halt-range nil) (in-format 'short) (out-format 'short) (min-wt 1.0e-8) (header nil) (cache-file nil) (remove-nulls t) (split-.wt-set-file nil)) "This is used to extend or update the MMLs of old .wt-set files. It is assumed that the appropriate DB is loaded, the priors set up, and that partition is of the correct size to match the input file. mode = best will only update the best MML in the wt-set-file. mode = position & position = will only update the MML at position's index" (let ( (count nil) (MMLs nil) (classes nil) (n-min-MML nil) (weightings nil) (N-classes nil) (N-held nil) (results nil) (*PRINT-ARRAY* T) (*PRINT-LEVEL* NIL) (*PRINT-LENGTH* NIL) ) (cond (split-.wt-set-file (setf N-held 1) (setf MMLs (make-array N-held :initial-element nil)) (setf classes (make-array N-held :initial-element nil)) (setf weightings (make-array N-held :initial-element nil)) (cond-read-.wt-set wt-set-file header count MMLs classes weightings N-classes N-held in-format)) (t (cond-read-.wt-set wt-set-file header count MMLs classes weightings N-classes N-held in-format))) (when (and (eq update-method 'disperse-cycle) cache-file (equal (probe-file wt-set-file) (probe-file cache-file)) (/= N-held 1) ) (break "Caching will replace contents of wt-set-file with current classification.~ You are advised to set cache-file to some other value.") ) (when (/= (length (aref weightings 0)) *N-data*) (break "File has weights for ~D data, current data has ~D." (length (aref weightings 0)) *N-data* ) ) (when (/= (length (aref (aref weightings 0) 0)) (part-class-range partition)) (break "File weightings have ~D classes, partition has ~D." (length (aref (aref weightings 0) 0)) (part-class-range partition) ) ) (when (and (not (eql mode 'all)) (not (eql mode 'best)) (not (eql mode 'position))) (break "Mode keyword value must be ALL, BEST or POSITION")) (when (/= N-classes *wt-vector-length*) ; added WMT 09 Jul 88 (setf *wt-vector-length* N-classes) (setf *wt-vector-store* ()) ) (format t "~2% MMLs of wt-set-file:") (format t "~2&****** ~A ******~2%" MMLs) (when (eql mode 'best) (setf n-min-MML (position (min+! MMLs) MMLs)) (format t "~% Best wt-set used: MML = ~A ~2%" (min+! MMLs))) (when (eql mode 'position) (format t "~% Position ~2D wt-set used: MML = ~A ~2%" position (aref MMLs position))) (dotimes (n N-held) (when (or (eql mode 'all) (and (eql mode 'best) (= n n-min-MML)) (and (eql mode 'position) (= n position))) (setf *curr-wts* (aref weightings n)) (let ((*print-array* nil)) (push (multiple-value-list (time-it (case update-method (simple-cycle ; added WMT 15 Feb 89 (SIMPLE-CYCLE partition :max-cycles N-cycles :remove-nulls remove-nulls )) (dynamic-cycle (DYNAMIC-CYCLE partition :N-average N-cycles :remove-nulls remove-nulls :HALT-RANGE dynamic-halt-range) ) (disperse-cycle (COLLECT-WEIGHTS partition) (UPDATE-PARTITION-MMLS partition) (SIMPLE-DISPERSE-CYCLE partition :N-average N-cycles :stream t :halt-range dynamic-halt-range :cache-file cache-file :min-wt 1.0e-4 :remove-nulls remove-nulls :header header ) ) (t (COLLECT-WEIGHTS partition) (UPDATE-PARTITION-MMLS partition) (and remove-nulls (REMOVE-NULL-CLASSES partition :all-but-1 t) (GET-DATA-WEIGHTS partition) (COLLECT-WEIGHTS partition) (UPDATE-PARTITION-MMLS partition) ) (values (part-total-MML partition) 1 (sort (coerce (class-wts partition :all t) 'list) #'> ) (list (part-total-mml partition)) ) ) ) :f-args-list `(,n) ) ) results ) ) (setf (aref MMLs n) (part-total-mml partition)) (setf (aref classes n) (sort (coerce (class-wts partition :all t) 'list) #'>)) (setf (aref weightings n) *curr-wts*) (write-.wt-set wt-set-file header count MMLs classes weightings out-format min-wt ) (format t "~2&****** ~A ******~2%" MMLs) (return-on-char ~ update-.wt-set-file (progn (format nil "~&UPDATE-.WT-SET-FILE terminated during cycle ~A." n) (list MMLs classes weightings results) ) ) ) ) results ) ) (defun find-best-n (N-held N-classes partition search-method N-cycles save-file &optional (file-header nil) (dynamic-halt-range nil) (in-format 'short) (out-format 'short) (min-wt 1.0e-8) (split-.wt-set-file nil) ) (find-best-n-2 N-held partition search-method N-cycles save-file :N-classes N-classes :file-header file-header :dynamic-halt-range dynamic-halt-range :in-format in-format :out-format out-format :min-wt min-wt :split-.wt-set-file split-.wt-set-file)) (defun find-best-n-2 (N-held partition search-method N-cycles save-file &key (N-classes nil) (file-header nil) (dynamic-halt-range nil) (in-format 'short) (out-format 'short) (min-wt 1.0e-8) (remove-nulls t) (split-.wt-set-file nil)) "Uses RANDOM-DISTRIBUTE-DATA with SIMPLE-CYCLE, DYNAMIC-CYCLE, or UPDATE-WEIGHTS to find the best classification of 'N-classes. The best 'N-held are retained in 'save-file. A preexisting 'save-file will be copied and completed or modified. When 'file-header is nill, a preexisting 'save-file's header is retained. Otherwise the header is replaced." ;; Revised time-it calls 26/aug/87 Will. ;; Modified to read possibly unfilled -.wt-set file, added file-header Arg 16/sept/87 John. ;; Modified to handle split weight set file WMT 09 Jul 88 (let ((MMLs nil) (classes nil) (weightings nil) (Max-MML nil) (part-MML nil) (count nil) (count-list ()) (position nil) (results nil) (*print-array* t) (*print-level* nil) (*print-length* nil) ) (if N-classes ; Check for N-Classes compatability. (when (not (= N-classes (part-class-range partition))) (setf partition (make-part&classes N-classes)) (format t "~&Find-Best-N creating a new partitioning of range ~A" N-classes) ) (setf N-classes (part-class-range partition)) ) (when (/= N-classes *wt-vector-length*) (setf *wt-vector-length* N-classes) (setf *wt-vector-store* ()) ) (when (/= (length *curr-wts*) *N-data*) (setf *curr-wts* (make-array *N-data*)) ) ; WMT 09 Jul 88  (when (null (probe-file save-file)) (setf count 0) ; generate an empty data set (setf MMLs (make-array N-held :initial-element nil)) (setf classes (make-array N-held :initial-element nil)) (setf weightings (make-array N-held :initial-element nil)) ) (when split-.wt-set-file ; save-file does not exist for split-.wt-set-file = t (dotimes (n N-held) (multiple-value-setq (count-list file-header MMLs classes weightings) (read-component-of-split-.wt-set-file save-file n count-list MMLs classes weightings file-header in-format))) (if (not (null count-list)) (setf count (max+ count-list)))) (if (probe-file save-file) (cond-read-.wt-set ; Nil header will be replaced by file's value. save-file file-header count MMLs classes weightings N-classes N-held in-format)) ; WMT 09 Jul 88 ^^^^^ (format t "~2&****** ~A ******~2%" MMLs) (loop (incf count ) ;;(RANDOM-DISTRIBUTE-DATA N-classes .9) (RANDOM-SELECT-DATA N-classes :n-samples 1) (setf (part-class-range partition) N-classes) (let ((*print-array* nil)) (push (multiple-value-list (time-it (case search-method (simple-cycle (SIMPLE-CYCLE partition :max-cycles N-cycles :remove-nulls remove-nulls )) (dynamic-cycle (DYNAMIC-CYCLE partition :N-average N-cycles :HALT-RANGE dynamic-halt-range :remove-nulls remove-nulls )) (disperse-cycle (DYNAMIC-DISPERSE-CYCLE partition :N-average N-cycles :HALT-RANGE dynamic-halt-range :remove-nulls remove-nulls) ) (update-weights (UPDATE-WEIGHTS partition 10 N-cycles)) (t (break "Unknown search method: ~A" search-method)) ) :f-args-list (list count) ) ) results ) ) (setq part-MML (part-total-MML partition)) (setq max-MML (max+! MMLs)) ; This gives nil if no numbers in MMLs. (when (and (notany #'(lambda (x) ; Ensure part-MML not duplicated in MMLs set. (and (numberp x) (> 1.0 (abs (- x part-MML))) ) ) MMLs ) (or (some #'null MMLs) ; Fill any empty slots. (< part-MML max-MML) ) ) ; Or possibly replace the largest MML. (setf position (or (position nil MMLs) (position max-MML MMLs) )) (setf (aref MMLs position ) part-MML) (setf (aref classes position) (sort (coerce (class-wts partition :all t) 'list) #'>) ) (when (null (aref weightings position )) ; Make a new array for *curr-wts*. (setf (aref weightings position ) (make-array *N-data*)) ) (rotatef *curr-wts* (aref weightings position ) ) ; Recycle old weight array. (if split-.wt-set-file (write-component-of-split-.wt-set-file save-file position file-header count mmls classes weightings out-format min-wt) (write-.wt-set save-file file-header count MMLs classes weightings out-format min-wt)) (format t "~2&****** ~A ******~2%" MMLs) ) (return-on-char ~ find-best-n-2 (progn (format nil "~&Find-Best-N terminated during cycle ~A." count) (list MMLs classes weightings results) ) ) ) ) ) (defun simple-cycle (partition &key (max-cycles 0) (stream t) (skip-init nil) (remove-nulls nil) ) "Does a simple get_data_probabilities / collect_data_probabilities_into_partitons / update-partition-MMLs cycle. Max-cycles = 0 implies run_indefinitily." (let ((count 0) (MMLs-list ()) (test nil)) (unless skip-init (COLLECT-WEIGHTS partition) (UPDATE-PARTITION-MMLS partition) ) (format stream "~2& ~A - ~A~% ~A ~A" count (formatted-time-string) (part-total-mml partition) (coerce (class-wts partition) 'list) ) (loop (push (part-total-mml partition) MMLs-list) (when remove-nulls (REMOVE-NULL-CLASSES partition :all-but-1 t)) (GET-DATA-WEIGHTS partition) (COLLECT-WEIGHTS partition) (UPDATE-PARTITION-MMLs partition) (incf count) (format stream "~& ~A ~A ~A" count (part-total-mml partition) (coerce (class-wts partition :all t) 'list)) (when (or (>= count max-cycles) (and (setq test (read-char-no-hang)) (char-equal #\~ test) ) ) (return) ) ) (values (part-total-MML partition) count (sort (coerce (class-wts partition :all t) 'list) '> ) (nreverse (push (part-total-mml partition) MMLs-list)) ) ) ) (defvar *min-over-relax-rate* 1.0 "Allows user limiting of dynamic cycle overrelax rate.") (defvar *max-over-relax-rate* 1.5 "Allows user limiting of dynamic cycle overrelax rate.") (defun dynamic-cycle (partition &key (N-average 5) (stream t) (HALT-RANGE nil) (skip-initialize nil) (remove-nulls nil)) "Does a dynamic get_data_probabilities / collect_data_probabilities_into_partitons / update-partition-MMLs cycle. Termination depends on the average of the previous N-average MMLs. When HALT-RANGE is given, an MML within HALT-RANGE of the will terminate. Otherwise an MML within (* HALT-FACTOR MML) of the average will terminate. Increasing N-average above the default will allow searching along flatter slopes" ;; added HALT-RANGE, HALT-FACTOR, save-last-cycle calls 15 SEPT 87 WILL (let* ((MMLs-list ()) (MMLs-for-avg-store (make-array N-average)) (default-halt-range ;; Halts when average difference between MMLs is 1. (/ (let ((sum N-average)) (dotimes (n N-average sum) (incf sum n))) N-average ) ) (halt-factor ;; Halts when average difference between MMLs is (* .0001 MML) (* 0.0001 default-halt-range)) (over-relax-rate 1.0) ; subject to dynamic adjustment within limits. (count 0) (test nil) (MML nil) ) (unless skip-initialize (COLLECT-WEIGHTS partition) ; Initialize the partition: (UPDATE-PARTITION-MMLS partition) ) (setf MML (part-total-mml partition)) (format stream "~2& ~A - ~A~% ~A ~A" count (formatted-time-string) MML (coerce (class-wts partition) 'list) ) (dotimes (n N-average) ; Do sufficient runs to start the average: (push MML MMLs-list) (setf (aref MMLs-for-avg-store (mod count N-average)) MML) (when remove-nulls (REMOVE-NULL-CLASSES partition :all-but-1 t)) ; (GET-DATA-WEIGHTS partition) (GET-OVER-RELAXED-DATA-WEIGHTS partition over-relax-rate) (COLLECT-WEIGHTS partition) (UPDATE-PARTITION-MMLs partition) (incf count) (setf MML (part-total-mml partition)) (format stream "~2& ~A - ~A ratio =~5,2F~% ~A ~A" count (formatted-time-string) over-relax-rate MML (coerce (class-wts partition) 'list) ) (when (and (setq test (read-char-no-hang)) ; Soft abort option (char-equal #\~ test) ) (return-from dynamic-cycle (values MML count (sort (coerce (class-wts partition :all t) 'list) '> ) (nreverse (push (part-total-mml partition) MMLs-list)) ) ) ) (setf over-relax-rate (if (< MML (first MMLs-list)) (* .5 (+ over-relax-rate *max-over-relax-rate*)) (* .5 (+ over-relax-rate *min-over-relax-rate*)) )) ) (loop ;; Until MML is within halt-range of average: (push (part-total-mml partition) MMLs-list) (setf (aref MMLs-for-avg-store (mod count N-average)) (part-total-mml partition)) (when remove-nulls (REMOVE-NULL-CLASSES partition :all-but-1 t)) ; (GET-DATA-WEIGHTS partition) (GET-OVER-RELAXED-DATA-WEIGHTS partition over-relax-rate) (Collect-WEIGHTS partition) (UPDATE-PARTITION-MMLs partition) (incf count) (setf MML (part-total-mml partition)) (format stream "~2& ~A - ~A ratio =~5,2F~% ~A ~A" count (formatted-time-string) over-relax-rate MML (coerce (class-wts partition) 'list) ) (when (or (let ((h-r (or halt-range (max default-halt-range (* halt-factor MML) ) ) )) (and (> h-r ;; Compare MML against average of previous MMLs. (abs (- MML (/ (reduce '+ MMLs-for-avg-store) N-average) ))) (< MML ;; Ensure MMLs are decreasing or rising to a limit. (+ (* .2 h-r) ; We want to get over any hills (aref MMLs-for-avg-store (mod count N-average)) ) ) ) ) (and (setq test (read-char-no-hang)) ; Soft abort option (char-equal #\~ test) )) (return-from dynamic-cycle (values MML count (sort (coerce (class-wts partition :all t) 'list) '> ) (nreverse (push MML MMLs-list)) ) ) ) (setf over-relax-rate (if (< MML (first MMLs-list)) (* .5 (+ over-relax-rate *max-over-relax-rate*)) (* .5 (+ over-relax-rate *min-over-relax-rate*)) )) ) ) ) (defun get-data-weights (partition) "Collects the data weights (probabilities) relative to partition, reusing the *curr-wts* structure." ;; JCS 7/aug/87 (dotimes (N-datum *N-data* *curr-wts*) (setf (aref *curr-wts* N-datum) (GET-NEW-WEIGHT-VECTOR partition (aref *base-data* N-datum) (aref *curr-wts* N-datum)) ) ) ) (defun get-over-relaxed-data-weights ( partition &optional (relaxation-ratio 1.75) ) "Collects the data weights (probabilities) relative to partition, reusing the *curr-wts* structure. If relaxation-ratio /= 1.0, new-weights elements are increased relative to old-weights by that ratio, made non-negative, and renormalized. Expriments show relaxation-ratios > 2.0 will cause oscillations. Timing tests show that with a relaxation-ratio of 1.0 the time is ~5% greater than get-data-weights. Otherwise the time is ~44% greater, but ratios of 1.75 - 1.9 will lower the total search time by about 20%." ;; New JCS 17/Dec/87 using elements of get-data-weights and update-datum-weights. (setq relaxation-ratio (max relaxation-ratio 1.0)) (let ((N-classes (part-class-range partition)) (old-weights nil) (new-weights nil) ) (dotimes (N-datum *N-data*) (setf old-weights (aref *curr-wts* N-datum)) (setf new-weights (GET-NEW-WEIGHT-VECTOR partition (get-datum *base-data* N-datum))) (when (/= relaxation-ratio 1.0) ; Over-relax new-weights and renormalize (dotimes (N-class N-classes) (let ((old (aref old-weights N-class)) ) (setf (aref new-weights N-class) (max (+ old (* relaxation-ratio (- (aref new-weights N-class) old))) 0.0 ) ) ) ) ; Ensures non-negative weights (normalize-active-vector new-weights N-classes ) ) (setf (aref *curr-wts* N-datum) new-weights) (push old-weights *wt-vector-store*) ) *curr-wts* ) ) ;------------------------------------------------------------------------------------ ;; Older Local Minima Search Routines (defun test&record (partition record-file &optional (data-file nil) (dispersion-ratio 0.0) ) "A top-level for making multiple runs with conditional updating, and recording the results. The loop is terminated by entering a '{." (let ((*print-array* t) (*print-level* nil) (*print-length* nil) (count 0) (temp nil) (start-time nil) (delta-time nil) (print-time nil)) (loop (let* ((class-wts (class-wts partition :all t)) (max-size (max+ class-wts)) ) (if (or (= dispersion-ratio 0.0) (< max-size (* 2.0 (/ *N-data* (part-class-range partition)))) ) (random-distribute-data (part-class-range partition) .01) (random-disperse-class-wt-over-all (position max-size class-wts) dispersion-ratio) ) ) (setq start-time (get-internal-real-time)) ; (setq temp (UPDATE-WEIGHTS PARTITION 2 200 .0005 100 10 1.75 10 1.0e3)) ;standard (setq temp (UPDATE-WEIGHTS PARTITION 2 200 .00001 100 10 1.75 10 1.0e3)) (setq delta-time (/ (- (get-internal-real-time) start-time) internal-time-units-per-second 60.0 )) (setq print-time (cdddr (reverse (multiple-value-list (get-decoded-time))))) (with-open-file (stream record-file :direction :output :if-exists :append :if-does-not-exist :create) (format stream "~%~A MML of ~A~ ~% ~A of file ~A on ~A at ~A :~ ~% Running ~A minutes with parameters ~A~ ~% Ordered class-wts: ~A~ ~% Delta Steps: ~A~%" count (first temp) *N-data* *current-db* (machine-instance) print-time delta-time (third temp) (second temp) (fourth temp) ) ) (when data-file (save-class-wts data-file) ) ; (when (boundp '*autoclass-display-window*) ; (send *autoclass-display-window* :init-display) ) (return-on-char ~ nil) (incf count) ) ) ) (defvar *DB-cycle-count* 0 "Globally accessable cycle counter for update-weights.") (defun update-weights (partition &optional ; The partition to be updated. (min-cycles 2) ; Min. No. of DB cycles. (limit-cycles nil) ; Max No. of cycles permitted. (cutoff-ratio .0005) ; Determines Min delta for stopping. (update-mod (/ *N-data* .5)) ; MMLs are updated after this many data. (delta-mod 100.0) ; MMLs are updated after this much delta. (relaxation-ratio 2.0) ; For over-relaxation moves. (re-collect-mod 10) ; Do Collect-Weights after N cycles. (delta-ratio 1.0) ; Min change factor for update-datum-weights ) " This is the main function for matching the data's class weights with the class parameters. The dotimes loop is the core where each datum's weights are updated against the class's current unit-MMLs. The unit-MMLs are updated at least once per DB cycle, OR once per update-mod actual data updates, OR when the values returned by update-datum-weights since the last update-partition-MMLs exceeds delta-mod." ; (let* ((*print-structure-contents* nil) (min-cycle-delta (* cutoff-ratio *N-data*)) ; A smaller delta will end processing. ; (10*min-cycle-delta (* 10.0 min-cycle-delta)) ; Ends over-relaxation mode. (steps nil) (read-test nil) ) (setq *DB-cycle-count* 0) (do ((end-flag nil) (delta nil) (cycle-delta nil) (datum-updates nil) (datum-update-count nil) (delta-datum nil) (class-update-count nil) (class-updates nil) (mml-updates nil) ) (end-flag ;see END CONDITIONS below (COLLECT-WEIGHTS PARTITION) (UPDATE-PARTITION-MMLs PARTITION) (format t "~%~A" (coerce (class-wts partition :all t) 'list)) `(,(part-total-MML partition) ,(sort (coerce (class-wts partition :all t) 'list) '> ) (,*DB-cycle-count* ,cutoff-ratio ,update-mod ,delta-mod ,relaxation-ratio ,re-collect-mod ,delta-ratio) ,(length steps) ) ) (when (zerop (mod *DB-cycle-count* re-collect-mod)) (COLLECT-WEIGHTS PARTITION) (UPDATE-PARTITION-MMLs PARTITION) ) (setq delta 0.0) ;= Sum of deltas since MMLs were updated (setq cycle-delta 0.0) ;= Sum of deltas over cycle (setq datum-updates 0) ;= No. of changes since MMLs were updated (setq datum-update-count 0) ;= No. of changes over cycle (setq mml-updates 0) ;= No. of MML updates (setq class-update-count 0) (dotimes (N-datum *N-data*) ; Update weights & classes WRT each other (and ; 'AND' CONTROL FILTER (multiple-value-setq ;nil = no change (delta-datum class-updates) (UPDATE-DATUM-WEIGHTS N-DATUM PARTITION RELAXATION-RATIO DELTA-RATIO)) (incf class-update-count class-updates) (incf delta delta-datum) (incf cycle-delta delta-datum) (incf datum-updates) (incf datum-update-count) (or (> delta delta-mod) ; Test: do class MMLs need updating? (> datum-updates update-mod) ) (update-partition-MMLs partition) ; Update class MML values (incf mml-updates) (setq delta 0.0) (setq datum-updates 0) ) ) (when (plusp datum-update-count) (UPDATE-PARTITION-MMLs PARTITION) (incf mml-updates) ) (format t "~%Cycle ~3D: ~3D MML updates, ~4D data and ~6D class updates, ~ delta = ~12F, MML = ~12F." *DB-cycle-count* mml-updates datum-update-count class-update-count cycle-delta (part-total-MML partition) ) (when (zerop (mod *DB-cycle-count* 10)) (format t "~%~A" (class-wts partition :all t))) (incf *DB-cycle-count*) (push cycle-delta steps) ; (when (and (/= relaxation-ratio 1.0) ; (< cycle-delta 10*min-cycle-delta) ) ; (setq relaxation-ratio 1.0) ; (format t "~%--- Reset relaxation-ratio to 1.0 ---") ) (cond ((and ; END CONDITIONS (>= *DB-cycle-count* min-cycles) (or (zerop datum-update-count) (< cycle-delta min-cycle-delta) (and limit-cycles (>= *DB-cycle-count* limit-cycles)) (and (setq read-test (read-char-no-hang)) (char-equal (character '~) read-test) ) ) ) (setq end-flag t) ) ) ) ) ) (defun update-datum-weights (N-datum partition &optional (relaxation-ratio 1.0) (delta-ratio 1.0) ) "Generates a new-weights vector, and the update deltas. Each delta is checked for non-null effect on the old weight before use. If relaxation-ratio /= 1.0, new-weights elements are increased relative to old-weights by that ratio, made non-negative, and renormalized. Delta-ratio determines the minimum |delta| used, relative to (* old short-float-epsilon). The magnitude of the difference vector, and the number of classes changed are returned. Expriments show relaxation-ratios > 2.0 will cause oscillations. Timmings show this requires about 30% longer than get-data-weights when using defaults, and about 70% longer with relaxation-ratio \= 1.0 - but total run time is reduced with over-relaxation of ~1.75." ;; revised JCS 7/aug/87 using normalize-active-vector (setq relaxation-ratio (max relaxation-ratio 1.0)) (setq delta-ratio (max delta-ratio 1.0)) ; Ratios less than 1.0 are pointless (let* ((classes (part-classes partition)) (N-classes (part-class-range partition)) (datum (get-datum *base-data* N-datum)) (old-weights (aref *curr-wts* N-datum)) (new-weights (GET-NEW-WEIGHT-VECTOR partition datum)) (sum-delta-sq 0.0) (class-count 0) ) (when (/= relaxation-ratio 1.0) ; Over-relax new-weights and renormalize (dotimes (N-class N-classes) (let ((old (aref old-weights N-class)) ) (setf (aref new-weights N-class) (max (+ old (* relaxation-ratio (- (aref new-weights N-class) old))) 0.0 ) ) ) ) ; Ensures non-negative weights (normalize-active-vector new-weights N-classes ) ) (dotimes (N-class N-classes) ; Update classes with significant deltas (let* ((old (aref old-weights N-class)) (new (aref new-weights N-class)) (delta (- new old)) ) (when (> (abs delta) (* old short-float-epsilon delta-ratio)) (incf class-count) (incf (aref old-weights N-class) delta) (UPDATE-CLASS-WTS (aref classes N-class) delta datum) (when (> (abs delta) *sqrt-least-positive-short-float*) (incf sum-delta-sq (* delta delta)) ) ) ) ) (push new-weights *wt-vector-store*) (values (when (plusp sum-delta-sq) (sqrt sum-delta-sq) ) class-count ) ) ) (defun get-new-weight-vector (partition datum &optional (weights nil)) "Gets a normalized probability vector for datum over partition, removes the numerically insignificant weights, and rescales to a total weight of 1.0. Datum values of *test-sub-symbol* are ignored in calculating the weights. The weights arg, if given, must be an old weight-vector for recycling." ;; rewritten JCS 7/aug/87 w/o changing functionality. (let* ((N-classes (part-class-range partition)) (classes (part-classes partition)) (output-type 'short-float) (output-epsilon short-float-epsilon) (min-weight nil) (log-probs nil) (max-log-prob nil) (log> nil) ) (or (>= (length weights) *wt-vector-length*) ; Recycle weight vectors if possible (>= (length (setq weights (pop *wt-vector-store*))) *wt-vector-length* ) (setq weights (make-array *wt-vector-length* )) ) (fill weights 0) (or (>= (length (setq log-probs (pop *wt-vector-store*))) *wt-vector-length* ) (setq log-probs (make-array *wt-vector-length* )) ) (fill log-probs 0) ;; Generate normalized prob vector from raw log-prob vector using log representation. (dotimes (n N-classes) (setf (aref log-probs n) (LOG-CLASS-DATUM-PROB (aref classes n) datum) ) ) (setq max-log-prob (max+ log-probs :end N-classes)) (cond ((= max-log-prob 1.0) (push weights *wt-vector-store*) (setf weights log-probs) ) (t (setq log> (log (reduce #'(lambda (x y) (+ x (safe-exp (- y max-log-prob)))) log-probs :initial-value 0.0 :end N-classes ) ) ) (dotimes (n N-classes) (setf (aref weights n) (safe-exp (- (aref log-probs n) max-log-prob log>)) ) ) (push log-probs *wt-vector-store*) ;store for recycle ) ) (when (find-if 'minusp weights :end N-classes) (break "negative weight in ~A" weights) ) (setq min-weight (* output-epsilon ; Remove infinitesimal weights. (max (max+ weights :end N-classes) *least-long-float-level* ))) (dotimes (n N-classes) (and (< (aref weights n) min-weight) (setf (aref weights n) 0.0)) ) (NORMALIZE&TYPE-VECTOR weights N-classes output-type) weights ) ) ;;; ________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; Prediction ;; Assuming one has generated a classification (partitioning), prediction is done ;; by calling read-test-data on an appripriate file, mark-test-variables with a list of ;; one or more variables to be predicted, and get-test-weights. *test-values* will then ;; hold the predicted variables, (display-weights *test-weights*) will display the weights, ;; and (get-max-weight-classes *test-weights*) will give a vector of the maximum weight ;; classes. ;; If the classification was done by tutored-partition using a single discrete ;; variable to define the classes, and that variable is now being predicted, do a ;; (map 'vector 'car *test-values*) to get the correct classes. The two lists can be ;; directly compaired to count or identify the errors by simple-errors. (defvar *test-db* nil "Source of test data.") (defvar *N-test-data* 0 "Number of test data.") (defvar *test-data* nil "Data for testing.") (defvar *test-weights* nil "Weights for test data.") (defvar *test-vars* nil "List of variables for testing.") (defvar *test-values* nil "Array of lists of values for variables in *test-vars*.") (defun read-test-data (in-file &optional n-data &key (variable-descriptions nil) (variable-type-check t)) "Reads a standard data file, setting *test-db*, *N-test-data*, and *test-data*; and setting up *test-weights*. The file is checked for compatable *variable-types* and *disc-var-ranges*. This function must match the read pattern of read-db." (setq *test-db* in-file) (with-open-file (stream (string in-file) :direction :input) (let ((variable-types (READ STREAM)) (disc-var-ranges nil)) (if (and variable-type-check (not (equal *variable-types* variable-types))) (break "Input variable types do not match current DB.") ) (if variable-descriptions (read stream)) ; discard variable descriptions (setf disc-var-ranges (mapcar #'(lambda (type range) (if (eq type 'discrete) (1+ range) range)) variable-types (READ STREAM))) (if (and variable-type-check (not (equal *disc-var-ranges* disc-var-ranges))) (break "Input discrete variable ranges do not match current DB.") ) (READ STREAM) ; discard discrete priors (READ STREAM) ; discard real priors (setq *N-test-data* (READ STREAM)) (setq *N-test-data* ; Read the number of data and set up storage. (cond ((eq n-data 'all) *N-test-data*) ((and (numberp n-data) (<= n-data *N-test-data*)) n-data ) (t (query-for-number (format nil "There are ~D data items in the file. How many ~ do you wish to use?" *N-test-data*) 1 *N-test-data*) ) ) ) (setq *test-data* (make-data-base *N-test-data*)) (setq *test-weights* (make-array *N-test-data* :adjustable t :initial-element nil)) (do ((N-datum 0 (1+ N-datum)) (value nil)) ((= N-datum *N-test-data*)) (do ((N-var 0 (1+ N-var)) (types variable-types (cdr types)) ) ((= N-var *N-variables*)) (setq value (READ STREAM)) (set-variable *test-data* N-datum N-var (cond ((or (null value) (eq '? value)) (if (eq 'discrete (car types)) 0 nil ) ) (t value) ) ) ) ) ) ) (fill *test-values* nil) (setq *test-vars* nil) ) (defun mark-test-variables (var-list &optional (sub-symbol *test-sub-symbol*)) "Restorably marks a set of variables for testing." (unless (= *N-test-data* (length *test-values*)) (setq *test-vars* nil) (setq *test-values* (make-array *N-test-data* :initial-element '())) ) (map nil #'(lambda (var) (push var *test-vars*) (dotimes (N-datum *N-test-data*) (push (get-variable *test-data* N-datum var) (aref *test-values* N-datum) ) (set-variable *test-data* N-datum var sub-symbol) ) ) var-list ) ) (defun get-test-weights (partition) " This finds the weight vectors of *test-data* relative to the classes of partition." (setq *test-weights* (map 'vector #'(lambda (datum) (get-new-weight-vector partition datum) ) *test-data* ) ) t ) (defun simple-errors () (let ((errors '()) (values (map 'vector 'car *test-values*)) (maxs (get-max-weight-classes *test-weights*)) ) (dotimes (n *N-test-data*) (if (/= (aref values n) (aref maxs n)) (push n errors)) ) errors ) ) (defun restore-test-variables () "This will restore the *test-data* to it's origional condition and empty *test-vars* and *tst-values*." (when (= *N-test-data* (length *test-values*)) (map nil #'(lambda (datum values) (map nil #'(lambda (var value) (setf (elt datum var) value) ) *test-vars* values ) ) *test-data* *test-values* ) (fill *test-values* nil) (setq *test-vars* nil) ) ) ;;; _______________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; New Heuristic Search Experiments (defun dynamic-disperse-cycle (partition &key (N-average 5) (stream t) (halt-range nil) (remove-nulls nil)) (multiple-value-bind (MML count classes trace) (dynamic-cycle partition :N-average N-average :stream stream :HALT-RANGE halt-range :remove-nulls remove-nulls) MML count classes (simple-disperse-cycle partition :N-average N-average :stream stream :halt-range halt-range :prev-trace trace :remove-nulls remove-nulls) ) ) (defvar *max-disperse-backstep-ratio* 1.0 "Maximum MML value of a dispersed partitioning, relative to the previous partioning MML, to avoid backtracking. Increasing this allows more dispersals to be searched by dynamic-cycle.") (defun simple-disperse-cycle (partition &key (N-average 5) (stream t) (HALT-RANGE nil) (prev-trace ()) (cache-file nil) (header nil) (remove-nulls nil) (min-wt 1.0e-4)) " Alternates between Disperse-Class&Update and Dynamic-Cycle to increase the probability by removing classes. Candidate classes are ordered by Gen-Disperse-List. Dispersing a class must produce an immediate probability increase to be accepted. The prev-failures list allows defering attempts to disperse classes which have not changed since a previous failure." (let ((working-part (copy-partition partition)) (reserve-wts (get-class-wts) ) (trace (list prev-trace)) (disperse-list ()) (prev-failures ()) (disperse nil) (level 0) (result) (test) ) (multiple-value-setq (disperse-list prev-failures) (GEN-DISPERSE-LIST partition prev-failures) ) (loop (when (or (= 1 (part-class-range working-part)) (<= (length disperse-list) 0) (and (setq test (read-char-no-hang)) ; Soft abort option (char-equal #\~ test) ) ) (when (< (part-total-MML working-part) (part-total-MML partition)) (replace-part partition working-part) ) (RETURN-FROM nil (values (part-total-MML partition) level (sort (coerce (class-wts partition :all t) 'list) '>) (apply 'append (reverse trace)) )) ) (format stream "~2&~A - At level ~A, MML ~A, ~%with disperse-list: ~A ~%" (formatted-time-string) level (part-total-MML partition) disperse-list ) (setf disperse (pop disperse-list)) (DISPERSE-CLASS&UPDATE (car disperse) working-part :remove-class remove-nulls) (when (<= (part-total-MML working-part) (* *max-disperse-backstep-ratio* (part-total-MML partition)) ) (setf result (multiple-value-list (DYNAMIC-CYCLE working-part :N-average N-average :stream stream :HALT-RANGE HALT-RANGE :skip-initialize t :remove-nulls remove-nulls)) ) ) (if (<= (part-total-MML working-part) (* (min 1.0 *max-disperse-backstep-ratio*) (part-total-MML partition) ) ) (progn (reset-wts reserve-wts *curr-wts* (part-class-range working-part)) (replace-part partition working-part) (push (fourth result) trace) (incf level) (multiple-value-setq (disperse-list prev-failures) (GEN-DISPERSE-LIST working-part prev-failures) ) (when cache-file (write-.wt-set cache-file (or header "Cached from Simple-Disperse-Cycle") level (make-array 1 :initial-element (part-total-MML working-part)) (make-array 1 :initial-element (sort (coerce (class-wts partition :all t) 'list) '>)) (list *curr-wts*) 'short min-wt)) ) (progn (format stream "~&Backtracking due to MML = ~A~%" (part-total-MML working-part) ) (reset-wts *curr-wts* reserve-wts (part-class-range partition)) (replace-part working-part partition) (pushnew disperse prev-failures :test 'equal)) ) ) ) ) (defun gen-disperse-list (partition &optional (prev-failures ()) ) " Generates a dispersal priority rating list of (N-class rating)'s. Rating is the ratio of the class's total MML to class weight, for only those classes with weight > *empty-class-wt-limit*. The prev-failures list provides a memory for those classes which could not be dispersed on previous attempts, and should therefor be deferred until their ratings change sufficiently. Equiv-disperse-rating and *disperse-change-ratio* define such sufficiency." (let ((ratings nil) (class nil) (classes (part-classes partition)) ) (dotimes (n-class (part-class-range partition)) (setf class (aref classes n-class)) (when (> (class-wt class) *empty-class-wt-limit*) (push (list n-class (round (/ (+ (class-description-MML class) (class-membership-MML class) ) (class-wt class) ) ) ) ratings ) ) ) (setq prev-failures (sort (remove-if-not #'(lambda (x) (member x ratings :test 'equiv-disperse-rating )) prev-failures ) '> :key 'second) ) (setq ratings (sort (remove-if #'(lambda (x) (member x prev-failures :test 'equiv-disperse-rating )) ratings ) '> :key 'second) ) (values (append ratings prev-failures) prev-failures) ) ) (defvar *disperse-change-ratio* .005 "The fractional change required of a disperse rating before it can be removed from the prev-failures list of 'gen-disperse-list. Must be in the range of 0.0 - 1.0.") (defun equiv-disperse-rating (rate-1 rate-2) "Determines if two disperse ratings are equivalent: they belong to the same class and their ratings are not significantly different. Note that disperse ratings are integral." (and (= (first rate-1) (first rate-2)) (<= (abs (- (second rate-1) (second rate-2))) (round (* .5 *disperse-change-ratio* (+ (second rate-1) (second rate-2)) )) ) ) ) (defun simple-disperse-class (N-class partition &optional (remove-class nil)) "Disperse members of partition's N-class. This is done by setting the class to it's priors and getting new class weights. Remove-class will effectivly remove the class from further computation." (when(>= N-class (part-class-range partition)) (break "Attempt to disperse class beyond current class range")) (let* ((max-class (1- (part-class-range partition))) (classes (part-classes partition)) (D-class (aref classes N-class)) ) (SET-CLASS-TO-PRIORS D-class (class-memb-prior-wt D-class)) (when remove-class ;; Shift D-class to end without otherwise altering class (do ((n N-class (1+ n)) ) ;; order and zero the newly corresponding weights. ((>= n max-class)) (setf (aref classes n) (aref classes (1+ n))) ) (setf (aref classes max-class) D-class) (dotimes (n *n-data*) (setf (aref (aref *curr-wts* n) max-class) 0) ) (decf (part-class-range partition)) ) (GET-DATA-WEIGHTS partition) ) ) (defun disperse-class&update (N-class partition &key (remove-class nil)) "Performs a 'simple-disperse-class then updates 'partition. Remove-class will effectivly remove the class from further computation, but THIS IS NOT ACTIVATED due to changes required in update-partition-mmls and called functions." (SIMPLE-DISPERSE-CLASS N-class partition remove-class) (COLLECT-WEIGHTS partition) (UPDATE-PARTITION-MMLs partition) ) ;------------------------ (defun smoothly-disperse-class-wt-over-all (N-class dispersion-ratio) "Smoothly disperses 'dispersion-ratio fraction of the weight currently assigned to N-class, over all other classes. This requires some random variation over the dispersal" (let ((N-classes (length (aref *curr-wts* 0)))) (if (<= N-classes 1) (break "Dispersion not possible with only one class.") (progn (when (or (< dispersion-ratio 0.0) (> dispersion-ratio 1.0) ) (break "Dispersion ratio must be in range [0.0 1.0]: ~A." dispersion-ratio) ) (when (and (> dispersion-ratio 0.0) (<= dispersion-ratio 1.0) ) (let* ((delta-ratio (/ dispersion-ratio (- N-classes 1))) (cutoff (/ *least-positive-short-float* delta-ratio)) ) (map nil #'(lambda (weights) (let ((weight (aref weights N-class)) (delta nil) ) (when (>= weight cutoff) (setq delta (* delta-ratio weight)) (decf (aref weights N-class) (* N-classes delta 1.0d0)) (dotimes (n N-classes) (incf (aref weights n) delta) ) (setf (aref weights N-class) (max 0.0 (coerce (aref weights N-class) 'short-float) ) ) ) ) ) *curr-wts* ) t ) )) ) ) ) (defun random-disperse-class-wt-over-all (N-class dispersion-ratio) "Randomly disperses 'dispersion-ratio fraction of the weight currently assigned to N-class, over all other classes." (let* ((N-classes (length (aref *curr-wts* 0))) (deltas (make-array N-classes)) ) (if (<= N-classes 1) (break "Dispersion not possible with only one class.") (progn (when (or (< dispersion-ratio 0.0) (> dispersion-ratio 1.0) ) (break "Dispersion ratio must be in range [0.0 1.0]: ~A." dispersion-ratio) ) (when (and (> dispersion-ratio 0.0) (<= dispersion-ratio 1.0) ) (map nil #'(lambda (weights) (let ((weight (aref weights N-class)) (delta-weight 0.0) (delta nil) (scale nil)) (when (> weight 0.0) (dotimes (n N-classes) (setf (aref deltas n) (+ 1.0 (random 10.0)))) (setf (aref deltas N-class) 0.0) (setq scale (/ dispersion-ratio (reduce '+ deltas))) (dotimes (n N-classes) (setq delta (* scale (aref deltas n) weight)) (when (>= delta (* short-float-epsilon (aref weights n))) (incf (aref weights n) delta) (incf delta-weight delta) ) ) (decf (aref weights N-class) delta-weight) ) ) ) *curr-wts*) t )) ) ) ) ;;; _______________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; Old Heuristic MML search routines from AutoClass I ;(defvar *partition-store* nil "Collector to moniter partitioning process") ;(defun find-MML-partition (&optional (start-partition nil) (min-abort nil) ) ; "Generates a MML partitioning of 'class-range ;;; partitions: ; (let ((new-partition nil) (mode nil) (candidates nil) (N-split nil)) ; (cond (start-partition ; (setq *best-partition* start-partition ) ) ; (t (setq *best-partition* (make-part&classes 1)) ; (random-distribute-data 1) ) ) ; (simple-minimize-MML *best-partition*) ; (setq *best-class-assignments* (get-class-assignments)) ; (setq candidates (ordered-classes *best-partition*)) ; (and *trace-0* (stringp *monitor*) ; (with-open-file (str *monitor* :direction :output :if-exists :append ; :if-does-not-exist :create) ; (format str "~4%~A with ~D of ~A at ~A on run ~D - Initial partition is:" ; (machine-instance) *N-data* *current-db* ; (time:print-current-time nil) *r&r-count*) ; (display-part-MMLs *best-partition* str) ; (format str "~&Processing order will be: ~A~%" candidates) ) ) ; (do ((end-it nil) ; (success nil nil) ) ; (end-it) ; (do () ; ((or success (endp candidates) )) ; (setq N-split (max 2 (truncate (log (part-class-wt *best-partition* (car candidates)))))) ; (setq new-partition ; (make-part&classes (+ N-split -1 (part-class-range *best-partition*)))) ; (setq mode 'Split) ; (do ((count 0 (1+ count))) ; ((or success (= count 2))) ; (restore-class-assignments *best-class-assignments*) ; (expand-partition new-partition) ; (split-class new-partition (car candidates) N-split) ; (simple-minimize-MML new-partition min-abort ) ; (and (> (- (part-total-MML *best-partition*) (part-total-MML new-partition)) ; (* short-float-epsilon ; (part-class-range *best-partition*) (part-total-MML *best-partition*) ) ) ; (setq success new-partition) ) ) ; (cond ((and (not success) ; (< 1 (part-class-range *best-partition*)) ) ; (setq mode 'Wide-disperse) ; (restore-class-assignments *best-class-assignments*) ; (expand-partition new-partition) ; (disperse-class (car candidates) (part-class-range new-partition)) ; (expand-partition new-partition) ; (simple-minimize-MML new-partition min-abort ) ; (and (> (- (part-total-MML *best-partition*) (part-total-MML new-partition)) ; (* short-float-epsilon ; (part-class-range *best-partition*) ; (part-total-MML *best-partition*) ) ) ; (setq success new-partition) ) )) ; (cond ((and (not success) ; (< 1 (part-class-range *best-partition*)) ) ; (setq mode 'Short-disperse) ; (restore-class-assignments *best-class-assignments*) ; (disperse-class (car candidates) (part-class-range *best-partition*) t) ; (expand-partition new-partition) ; (simple-minimize-MML new-partition min-abort ) ; (and (> (- (part-total-MML *best-partition*) (part-total-MML new-partition)) ; (* short-float-epsilon ; (part-class-range *best-partition*) ; (part-total-MML *best-partition*) ) ) ; (setq success new-partition) ) )) ; (or success (pop candidates)) ) ; ; (cond ((not success) ; (setq end-it t)) ; (t ; (setq *best-partition* success) ; (setq *best-class-assignments* (get-class-assignments)) ; (and *trace-0* (stringp *monitor*) ; (with-open-file (str *monitor* :direction :output :if-exists :append ; :if-does-not-exist :create) ; (format str "~&~2%~A with ~D of ~A at ~A on run ~D - ~A on ~D gives:" ; (machine-instance) *N-data* *current-db* ; (time:print-current-time nil) *r&r-count* mode ; (car candidates) ) ; (display-part-MMLs *best-partition* str) ; (format str "~&Processing order will be: ~A~%" candidates) ) ) ; (setq candidates (ordered-classes *best-partition*)) ) ) ) ; (push *best-partition* *partition-store*) ; (restore-class-assignments *best-class-assignments*) ; (if *collect-partition-assignments* ; (push *best-class-assignments* *partition-assignments-list*) ) ; *best-partition* ; )) (defun ordered-classes (partition) "Orders the classes by a weighted semi-average MML, Where the weighting is slightly in favor of the larger classes. " (let ((list ()) (range (part-class-range partition)) (class-size nil)) (do ((N-class 0 (1+ N-class))) ((= N-class range)) (and (not (zerop (setq class-size (part-class-wt partition N-class)))) (push (cons N-class (* (+ (class-memb-mml partition N-class) (class-desc-mml partition N-class) ) (expt class-size -.85) )) list ) ) ) (mapcar 'car (sort list '> :key 'cdr)) ) ) ;;; The following was used to investigate various ordering functions (defun ordered-classes-* (partition key) (let ((list ()) (range (part-class-range partition)) (avg-data-mml 0) ;used to be data-size-mml (class-size nil)) (do ((N-class 0 (1+ N-class))) ((= N-class range)) (and (not (zerop (setq class-size (part-class-wt partition n-class)))) (push (cons n-class (case key (total (+ (* class-size avg-data-mml) (class-memb-mml partition n-class) (class-desc-mml partition n-class) )) (desc (class-desc-mml partition n-class)) (size class-size) (avg (/ (+ (class-memb-mml partition n-class) (class-desc-mml partition n-class) ) class-size )) (desc-avg (/ (class-desc-mml partition n-class) class-size)) (desc*sqrt (/ (class-desc-mml partition N-class) (sqrt class-size))) (avg*sqrt (/ (+ (* class-size avg-data-MML) (class-memb-mml partition N-class) (class-desc-mml partition N-class) ) (sqrt class-size) )) (t (and (numberp key) (* (+ (* class-size avg-data-MML) (class-memb-mml partition N-class) (class-desc-mml partition N-class) ) (expt class-size key) ) )) ) ) list ) ) ) (mapcar 'car (sort list '> :key 'cdr)) ) ) ;;; (splitable-classes partition) - determines which classes have average MML ;;; greater than the partition average , or size greater than average, and returns them ;;; as a sorted list. ;;; ;(defun splitable-classes (partition) ; (let ((avg-MML (/ (+ (collect-memb-MMLs partition) ; (collect-desc-MMLs partition) ) ; *N-data* )) ; (avg-size (/ *N-data* (part-class-range partition))) ; (split-list ()) ; (class-size nil) (class-avg nil) ) ; (do* ((N-class 0 (1+ N-class))) ; ((= N-class (part-class-range partition))) ; (and (not (zerop (setq class-size (part-class-wt partition N-class)))) ; (setq class-avg (/ (class-desc-MML partition N-class) ; class-size)) ; (or (<= avg-size class-size) ; (<= avg-MML class-avg) ) ; (push (cons N-class class-avg) split-list) ) ) ; (mapcar 'car ; (sort split-list '> :key 'cdr) ) ) ) (defun split-class (partition split-class N-splits) "Redistributes the data in 'split-class into N-splits classes. One of these is the origional split-class and he others are the highest numbered (1- N-splits) classes in the partition. These are: (- class-range N-splits) to (- class-range 1)." (if (< N-splits 2) (break "Must split a class into at least two classes.")) ;;;;set new rank if necessary to expand array...otherwise nil (let ((number-of-classes (part-class-range partition)) (new-rank (if (< (array-dimension (aref *curr-wts* 0) 0) (+ (part-class-range partition) (1- N-splits))) (+ (part-class-range partition) (1- N-splits)) nil)) ) ; ;;;change parameters in partition description (incf (part-class-range partition) (1- N-splits)) (do* ((N-datum 0 (1+ N-datum))) ((= N-datum *N-data*)) ; ;;;find ammount of weight to distribute onto each new class (let* ((weight-array (aref *curr-wts* N-datum)) (old-class-wt (aref weight-array split-class))) ; ;;;prepare weight array by packing (do ((n (1+ split-class) (1+ n))) ((= n number-of-classes) ) (setf (aref weight-array (1- n)) (aref weight-array n)) (setf (aref (part-classes partition) (1- n)) ;;;remember to arrange the (aref (part-classes partition) n )) ) ;;;accumulators too ; ;;;if necessary, add to the rank of the arrays (cond (new-rank (adjust-array weight-array new-rank :initial-element 0) ; (adjust-array (part-classes partition) new-rank :initial-element 0) ) ) ; ;;;add new classes with the weights distributed across a range of weights. (let* ((percent-random-weight 100) (percent-unchanged-weight (- 1000 percent-random-weight)) (rnd-list (mapcar #'random (make-list N-splits :initial-element percent-random-weight))) (rnd-list-sum (apply '+ rnd-list)) (proportion-list (mapcar #'(lambda (x) (/ (+ percent-unchanged-weight x) (+ (* N-splits percent-unchanged-weight) rnd-list-sum))) rnd-list)) ) ;;; (format t "~%~&Old array : ~A " weight-array) ;;;DEBUG (do ((new-class (1- number-of-classes) (1+ new-class)) (proportion-list proportion-list (cdr proportion-list))) ((= new-class (+ (1- number-of-classes) N-splits)) (setf (aref *curr-wts* N-datum) weight-array)) (setf (aref weight-array new-class) (* old-class-wt (car proportion-list)))) ;;; ;;;DEBUG ;;; (format t "~&Weight-list: ~A Sum:~3d" rnd-list rnd-list-sum) ;;; (format t "~&Proportions: ~A Sum:~3D" proportion-list (apply '+ proportion-list)) ;;; (format t "~&New array : ~A ~%~%" weight-array) ) ) ) ) ) ;;; (dispersable-classes partition) - determines which classes have average ;;; MML greater than partition average and size less than partition average, and ;;; returns them as a sorted list. ;;; ;(defun dispersable-classes (partition) ; (let ((avg-MML (/ (part-total-MML partition) *N-data*)) ; (avg-size (/ *N-data* (part-class-range partition))) ; (disperse-list ()) ; (class-avg nil) (class-size nil)) ; (do* ((N-class 0 (1+ N-class))) ; ((= N-class (part-class-range partition))) ; (and (not (zerop (setq class-size (part-class-wt partition N-class)))) ; (<= class-size avg-size) ; (setq class-avg (/ (+ (class-memb-MML partition N-class) ; (class-desc-MML partition N-class) ) ; class-size )) ; (< avg-MML class-avg) ; (push (cons N-class (/ class-avg class-size)) disperse-list) ) ) ; (mapcar 'car ; (sort disperse-list '> :key 'cdr) ) ) ) (defun disperse-class (D-class partition &optional (remove-D-class nil)) "Disperse members from class in partition into the remaining classes. If is t then is a recipient of some members also; otherwise it is deleted." (when(>= D-class (part-class-range partition)) (break "Attempt to disperse class beyond current class range")) (let* ((total-No-of-classes (part-class-range partition)) (class-range (if remove-D-class (1- total-No-of-classes) total-No-of-classes)) (weight-delta nil) (weight-array nil)) ; ;;;change parameters in partitition description without changing array size. (setf (part-class-range partition) class-range) (do ((N-datum 0 (1+ N-datum))) ((= N-datum *N-data*)) ; ;;;find ammount of weight to distribute upon each class (setq weight-array (aref *curr-wts* N-datum)) (setq weight-delta (/ (aref weight-array D-class) (if remove-D-class (1- total-No-of-classes) total-No-of-classes))) ; ;;;pack the classes "above" D-class down (cond (remove-D-class (do ((n (1+ D-class) (1+ n))) ((= n total-No-of-classes)) (setf (aref weight-array (1- n)) (aref weight-array n)) (setf (aref (part-classes partition) (1- n)) ;;;remember to arrange the (aref (part-classes partition) n )) ) ;;;accumulators too (setf (aref weight-array class-range) 0) ) ;;;clean up edge of array (t (setf (aref weight-array D-class) 0) ) ) ;;;zero D-class--will distrib to it. ; ;;;increment all weights by weight-delta and save result to *curr-wts* (do ((n 0 (1+ n))) ((= n class-range) (setf (aref *curr-wts* N-datum) weight-array)) (incf (aref weight-array n) weight-delta)) ) ) ) ;;; ________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; Functions For Generating And Testing Normal Distributions (defun mean&sigma (number-seq) ;calculates mean & standard-deviation over a list (let ((out-type (if (integerp (elt number-seq 0)) 'short-float (type-of (elt number-seq 0)))) (count 0) (sum 0.0d0) (sum-sq 0.0d0) (mean nil) ) (map nil #'(lambda (value) (setq value (coerce value 'long-float)) (incf count) (incf sum value) (incf sum-sq (* value value)) ) number-seq ) (list (coerce (setq mean (/ sum count)) out-type) (coerce (sqrt (- (/ sum-sq count) (* mean mean))) out-type) ) ) ) (defun variable-mean&sigma (N-var) ;Mean & Sigma for a real variable in *base-data* (let ((count 0) (sum 0.0d0) (sum-sq 0.0d0) (mean nil) (value nil)) (or (member (elt *variable-types* N-var) ; Type check '(real-pt real&err) ) (break "~& called for ~A type variable.~%" (elt *variable-types* N-var) ) ) (do* ((n 0 (1+ n))) ((= n *N-data*)) (and (setq value (aref (aref *base-data* n) N-var)) ; ignore nil's (setq value (coerce value 'long-float)) (incf count) (incf sum value) (incf sum-sq (* value value)) ) ) (list (coerce (setq mean (/ sum count)) 'short-float) (coerce (sqrt (- (/ sum-sq count) (* mean mean))) 'short-float) ) ) ) (defun gen-normal (mean sigma) ;multiple calls produce a normal distribution (let ((range (* sigma (sqrt (/ 3.0 10.0)))) ) (do ((m 0 (1+ m))) ((= m 10) mean) (incf mean (* (random range) (- (* 2 (random 2)) 1) )) ) ) ) ;;; gen-normal-data - Generates a list of lists of normaly distributed number sets. ;;; The mean&sigma-list elements are lists of (mean sigma), each of which defines a ;;; sublist element: (gen-normal-data 500 '((25 5) (380 18) (1024 64))) ;;; (defun gen-normal-data (N-data mean&sigma-list) (let ((data ()) (N-vars (length mean&sigma-list)) (datum nil) ) (do ((n 0 (1+ n))) ((= n N-data)) (setq datum ()) (do ((i 0 (1+ i)) (pairs mean&sigma-list (cdr pairs)) ) ((= i N-vars)) (push (apply 'gen-normal (car pairs)) datum ) ) (push (reverse datum) data) ) data ) ) ;;; gen-formatted-data - Generates a list which when printed in a editor buffer can ;;; be easily reformatted to that required by a data file. The data-spec-list is a list ;;; of gen-normal-data argument lists as specified above. ;;; EXAMPLE: (gen-formatted-data '((5 ((25 5) (380 18))) (7 ((20 5) (400 18))))) ;;; (defun gen-formatted-data (data-spec-list) (let ((N-vars (length (cadar data-spec-list))) (N-data (apply '+ (mapcar 'car data-spec-list))) (data (apply 'append (mapcar #'(lambda (x) (apply 'gen-normal-data x)) (REVERSE data-spec-list) ) )) (priors nil) (output nil)) (do ((n (1- N-vars) (1- n))) ((< n 0)) (push (mean&sigma (mapcar #'(lambda (x) (elt x n)) data)) priors ) ) (do ((rest data (cdr rest))) ((endp rest)) (push (format nil "~A~%" (car rest)) output) ) (push (format nil "~A~%" N-data) output) (push (format nil "~A~%" priors) output) output ) ) (defun gen-data-spec-list (N-samples/pattern N-vars N-patt &optional (sigma-el 2) (sigma-patt 10) (var-min-limit 10) (var-max-limit 100) ) (map 'list ;; reformats variable lists for gen-formatted-data #'(lambda (normals) (list N-samples/pattern (map 'list #'(lambda (mean) (list (round mean) sigma-el)) normals))) (apply 'map ;; changes N-var lists of patterns into N-patt lists of variables (append '(list list) (map 'list ; reformats with max and min means first #'(lambda (normals) (let ((max (max+ normals)) (min (min+ normals))) (append (list max min) (remove max (remove min normals :count 1) :count 1)))) (map 'list ; generates N-patt means for each variable #'(lambda (mean) (gen-normal-list N-patt mean sigma-patt)) (let ; generates a ~random list of N-vars ((l1 ()) (range (- var-max-limit var-min-limit)) ) (dotimes (i N-vars) (push (+ var-min-limit (random range)) l1) ) (sort l1 '<) ) ) ) ) ) ) ) ;;; graph-list - generates a simple bar graph from a list of numbers. ;;; (defun graph-list (list &optional (scale 1) min max (stream t)) (let ((step nil) (half-step nil)) (setq list (sort (copy-list list) '<)) (or min (setq min (car list))) (or max (setq max (car (last list)))) (setq step (/ (- max min) 40)) (setq half-step (/ step 2.0)) (do ((end (+ min step) (+ end step))) ((> end max)) (format stream "~&~6,2F |" (- end half-step)) (do ((n 0 (1+ n))) ((or (endp list) (> (car list) end))) (when (zerop (mod n scale)) (format stream "X") ) (pop list)) (format stream "~%") ) ) ) (defun graph-real-var (N-var &key (wts *curr-wts*) (data *base-data*) (scale 1) min max (stream t) (n-lines 40)) "Generates a bar graph for real variable N-var using data and wts. Unknown values (nil values) are ignored. Min & max default to the variable limits. When writing to *standard-output*, line-wraping is suppressed. Printed characters are selected from the symbols list by the datum's assigned class (currently 50 max)." ; Updated JCS 18/Dec/87 (let ((symbols (vector #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\A #\B #\C #\D #\E #\F #\G #\H #\I #\J #\K #\L #\M #\N #\O #\P #\Q #\R #\S #\T #\U #\V #\W #\X #\Y #\Z #\+ #\- #\= #\* #\! #\@ #\$ #\% #\& #\* #\{ #\} #\[ #\] ) ) (lines (make-array n-lines :initial-element ())) (print-width most-positive-fixnum) (pairs ()) (step nil) (half-step nil) ) (or (member (elt *variable-types* N-var) ; Type check '(real-pt ) ) (break "~&Graph-Real-Var called for ~A type variable.~%" (elt *variable-types* N-var) ) ) (map nil #'(lambda (x y) (and (aref x N-var) (push (list (aref x N-var) (max+position y) ) pairs))) data wts) (setq pairs (sort pairs '< :key 'car)) (or min (setq min (caar pairs))) (or max (setq max (caar (last pairs)))) (setq step (/ (- max min) n-lines)) (setq half-step (/ step 2.0)) (do ((end (+ min step) (+ end step)) ; Build up and order the lines (n-line 0 (1+ n-line)) (curr-line () () ) ) ((= n-line N-Lines)) (do () ; Fill line with class numbers ((or (endp pairs) (> (caar pairs) end) )) (push (cadar pairs) curr-line) (pop pairs) ) (setf (elt lines n-line) (sort curr-line '<)) ) (cond ((or (eq stream t) ; Line wrap suppression (eq stream *standard-output*) ) #+ :symbolics (setq print-width (- (/ (- (send *standard-output* :width) (send *standard-output* :left-margin-size) (send *standard-output* :right-margin-size) ) (send *standard-output* :char-width) ) 13 )) ; = (+ 1 (length of next format pattern)) #- :symbolics (setq print-width (- 80 13)) )) (format stream "~&Variable ~A has ~A known values.~%" N-var (length pairs)) (do ((end (+ min step) (+ end step)) ; Build graph by lines (n-line 0 (1+ n-line)) curr-line ) ((= n-line N-Lines)) (setf curr-line (elt lines n-line)) (format stream "~&~10,2F |" (- end half-step)) (do ((n 0 (1+ n))) ; Fill line with symbols ((endp curr-line) ) (when (and (zerop (mod n scale)) (< n print-width) ) (format stream "~A" (aref symbols (car curr-line))) ) (pop curr-line) ) (format stream "~%") ) )) ;;; ________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; Timers ;;; This version was created at Symbolics, Inc. ;;; It is used to produce the official Symbolics benchmark values. ;;; Translated to Common Lisp by Charlie Hornig. ;;; Timing tools by Charlie Hornig and Dan Weinreb. ;;; This file is in the public domain. ;;; Low-level timing functions ;;; Given a function of no arguments, calls the function once without ;;; interrupts, and returns the time consumed in seconds, ;;; and the time that was spent in page faults, in seconds. ;;; Modified for taking arguments by John Stutz, 4/87. (defun timed-duration (fn &rest args) #+ :symbolics (process-allow-schedule) ;; Make sure that we aren't in the middle of a GC flip. #+ :symbolics (si:gc-reclaim-oldspace) ;; Turn off the scheduler so we can get repeatable numbers on Lisp Machines. (#+(or symbolics ti lmi mit) global:without-interrupts #-(or symbolics ti lmi mit) progn ;; The rest of this is pretty straightforward. (let ((start-real (get-internal-real-time)) (start-run (get-internal-run-time)) (result nil) ) (setq result (apply fn args)) (let ((end-real (get-internal-real-time)) (end-run (get-internal-run-time))) (values (/ (- end-real start-real) internal-time-units-per-second 1.0) (/ (- end-run start-run) internal-time-units-per-second 1.0) result ))))) (defun map-timed-duration (result-type function &rest sequences) (let ((total-real-time 0.0) (total-run-time 0.0) (temp nil) ) (setq temp (apply 'map `(,result-type ,#'(lambda (&rest args) (multiple-value-bind (real-time run-time result) (apply 'timed-duration (cons function args)) (incf total-real-time real-time) (incf total-run-time run-time) result ) ) . ,sequences ) ) ) (values total-real-time total-run-time temp ) ) ) (defparameter *minimum-tests* 3 "Run each test at least this many times") (defparameter *minimum-duration* 6 "Run each test for at least this many seconds") ;;; Given a function of no arguments, run it enough times that it is run ;;; at least *minimum-tests* times, and enough times so that at least ;;; *minimum-duration* seconds are consumed. ;;; Return the following values: ;;; The total number of runs that were done. ;;; The minimum time consumed per call in seconds ;;; The minimum cpu time (subtracting page fault time) ;;; The average time consumed per call in seconds ;;; The average cpu time (subtracting page fault time) (defun multiple-timed-duration (fn) ;; Run the function once, at least. (multiple-value-bind (total-real-time total-run-time) (timed-duration fn) (let ((min-real-time total-real-time) (min-run-time total-run-time) (repeats 1)) ;; Run it some more until we've done it enough. (loop (when (and (>= repeats *minimum-tests*) (>= total-run-time *minimum-duration*)) (return)) (multiple-value-bind (time cpu) (timed-duration fn) (incf total-real-time time) (setf min-real-time (min min-real-time time)) (incf total-run-time cpu) (setf min-run-time (min min-run-time cpu)) (incf repeats))) (values repeats min-real-time min-run-time (/ total-real-time repeats 1.0) (/ total-run-time repeats 1.0) )))) ;;; ________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; Utility Routines (defun display (vector &key (stream t)) "Use to print selected vector." (let ((*print-array* t)) (format stream "~&~A~%" vector))) (defun v-display (vector &key (stream t)) "Use to print selected vector." (let ((*print-array* t)) (if (or (vectorp vector) (listp vector)) (map nil #'(lambda (el) (format t "~&~A~%" el)) vector) (format stream "~&~A~%" vector) ) ) ) (defun formatted-time-string () "Produces a string matching the Symbolics 'who line' time stamp." (multiple-value-bind (second minute hour date month year day) (get-decoded-time) (setq month (elt '("Jan" "Feb" "Mar" "Apr" "May" "Jun" "Jul" "Aug" "Sep" "Oct" "Nov" "Dec" ) (1- month))) (setq day (elt '("Mon" "Tue" "Wed" "Thr" "Fri" "Sat" "Sun") day)) (format nil "~A ~A ~A ~A ~A:~A:~A" day date month year hour minute second) ) ) (defun safe-exp (x) "Satisfies attempts to take an out-of-bounds exponent, by providing limiting values." ;;; The Symbolics compiler will fold in the constant logs and exponents. ;; JCS 7/aug/87 (case (type-of x) ((fixnum single-float) (cond ((>= x (log most-positive-single-float)) (exp (1- (log most-positive-single-float))) ) ((<= x (log *least-positive-single-float*)) 0.0 ) (t (exp x)) ) ) (double-float (cond ((>= x (log most-positive-double-float)) (exp (1- (log most-positive-double-float))) ) ((<= x (log *least-positive-double-float*)) 0.0 ) (t (exp x)) ) ) (short-float (cond ((>= x (log most-positive-short-float)) (exp (1- (log most-positive-short-float)))) ((<= x (log *least-positive-short-float*)) 0.0 ) (t (exp x)) ) ) (long-float (cond ((>= x (log most-positive-long-float)) (exp (1- (log most-positive-long-float)))) ((<= x (log *least-positive-long-float*)) 0.0 ) (t (exp x)) ) ) ) ) (defun reset (fun seq &key end) "Replaces each value of seq with (funcall fun value)." (let ((l (length seq))) (dotimes (n (if end (min end l) l)) (setf (elt seq n) (funcall fun (elt seq n))) ) seq ) ) (defun randomize-random () (let ((semi-random (1+ (mod (get-internal-real-time) 1000)))) (dotimes (n semi-random) (random semi-random)) semi-random ) ) (defun simple-normalize-vector (vector) "Normalizes a vector. The origional vector's components are altered." (let ((scale (/ 1.0 (reduce '+ vector) )) ) (dotimes (n (length vector)) (setf (aref vector n) (* scale (aref vector n)) ) ) vector ) ) (defun normalize-active-vector (vector length) "Normalizes active part of a vector. The origional vector's components are altered." (let ((scale (/ 1.0 (reduce '+ vector :end length) )) ) (dotimes (n length) (setf (aref vector n) (* scale (aref vector n)) ) ) vector ) ) (defun normalize&type-vector (vector &optional (length nil) (element-type 'short-float)) "Normalizes a vector and coerces type. The origional vector's components are altered. When 'length is given, only the first 'length elements count." (let ((scale (/ 1.0 (max *least-positive-long-float* ; In case weights are all 0. (reduce '+ vector :end length) ))) ) (or length (setq length (length vector))) (dotimes (n length) (setf (aref vector n) (coerce (* scale (aref vector n)) element-type) ) ) vector ) ) ;;; The following max+ and min+ may be called on a numerical sequenence of any length. ;;; Note that this contrasts to max and min which must be 'applied' to a list ;;; of limited length. ;;; Revised 20april88 JCS - to allow specification of end positions. (defun max+ (num-seq &key end) "Maximum value of a numerical sequence of any length." (if (not (numberp (elt num-seq 0))) (break "~&non-numbers in ~A~%" num-seq) (reduce #'(lambda (x y) (if (numberp y) (max x y) (break "~&non-number ~A in num-seq~%" y) ) ) num-seq :end end :initial-value (elt num-seq 0) ) ) ) (defun min+ (num-seq &key end) "Minimum value of a numerical sequence of any length." (if (not (numberp (elt num-seq 0))) (break "~&non-numbers in ~A~%" num-seq) (reduce #'(lambda (x y) (if (numberp y) (min x y) (break "~&non-number ~A in num-seq~%" y) ) ) num-seq :end end :initial-value (elt num-seq 0) ) ) ) (defun max+position (num-seq &key end) "Position of the maximum value of a numerical sequence of any length." (position (max+ num-seq :end end) num-seq) ) (defun min+position (num-seq &key end) "Position of the minimum value of a numerical sequence of any length." (position (min+ num-seq :end end) num-seq) ) (defun max+! (seq &key end) "Maximum numerical value of a symbol sequence of any length, or nil if no numbers found." (let ((max (find-if #'numberp seq :end end))) (when max (reduce #'(lambda (x y) (if (numberp y) (max x y) x ) ) seq :end end :initial-value max ) ) ) ) (defun min+! (seq &key end) "Minimum numerical value of a symbol sequence of any length, or nil if no numbers found." (let ((min (find-if #'numberp seq :end end))) (when min (reduce #'(lambda (x y) (if (numberp y) (min x y) x ) ) seq :end end :initial-value min ) ) ) ) (defun query-for-number (message min max) (format t "~2%~A" message) (do ((input nil)) ((and (eq (type-of input) (type-of max)) (<= min input) (<= input max) ) input) (format t "~& Enter a number in the range ~D to ~D inclusive: " min max) (setq input (read t)) ) ) (defun square (x) (* x x)) (defun ~= (n1 n2 &key (range 1.0)) (> range (abs (- n1 n2))) ) (defun sigma-sq (N sum sum-sq) "Calculates the classical varience." (let ((mean (/ sum 1.0d0 N))) (coerce (- (/ sum-sq 1.0d0 N) (* mean mean) ) (type-of sum) ) ) ) (defun log-gamma (x &optional (full-precision nil)) "Computes (log (gamma x)) for x > 1.0 by numerical approximation. Correct to minimum of 4 digits for ~.05 < x < ~.99 or ~1.05 < x < ~1.975 or x > ~2.05 ." (setq x (float x)) ;; speeds processing of integer x w/o affecting reals (cond ((> x 3.0) (+ (* (- x 0.5e0) (log x)) (- x) 0.9189385332046727e0 ;; (log (sqrt (* 2 pi))) (/ 0.08333333333333333e0 x) ;; (/ 1 12 x) (- (/ 0.002777777777777778e0 (* x x x))) ;; -(/ 1 360 x^3) (if full-precision 0.0 (+ (/ 7.936507936507937e-4 (* x x x x x)) ;; (/ 1 1260 x^5) (- (/ 5.952380952380953e-4 (expt x 7))) ) ) ;; -(/ 1 1680 x^7) ) ) ((or (= x 1.0) (= x 2.0)) 0.0 ) ((> x 0.0) (- (log-gamma (+ 3.0e0 x)) (log (* x (+ 1.0e0 x) (+ 2.0e0 x))) ) ) (t (break "Attempted to take log-gamma ~A" x) ) ) ) ;;; ________________________________________________________________________________________ ;;; ________________________________________________________________________________________ ;;; Symbolics Specific Bit Mapped Graphical Routines ;;; These were primairly written for viewing IRAS LRS spectra, and the resulting classes. ;;; The variables x & y give the screen positions of the origen, x-step is the number of ;;; pixels per unit x, y-scale scales y values to pixels, and y-base is the y value at the ;;; plotting origen. #+ :SYMBOLICS (defun line-graph-list (list x y &optional (x-step 1) (y-scale 1.0) ) "Graphs a list as a line. X & y give the screen positions of the origen, x-step is the number of pixels per unit x, y-scale scales y values to pixels." (dw:with-own-coordinates () (when (cdr list) (graphics:draw-line (round x) (round (- y (* y-scale (car list)))) (round (+ x x-step)) (round (- y (* y-scale (cadr list)))) ) (line-graph-list (cdr list) (+ x x-step) y x-step y-scale) ) (graphics:draw-point (round x) (round y)) ) ) #+ :SYMBOLICS (defun line-graph-seq (seq x y &optional (x-step 1) (y-scale 1.0) (y-base 0.0) (base t)) "Graphs a sequence as a line. X & y give the screen positions of the origen, x-step is the number of pixels per unit x, y-scale scales y values to pixels, and y-base is the y value at the plotting origen." (let ((N-el (length seq)) (xpt x) (points nil)) (dotimes (n N-el) (push (round (- y (* y-scale (- (elt seq n) y-base)))) points) (push (round xpt) points) (incf xpt x-step) ) (dw:with-own-coordinates () (graphics:draw-lines points) (and base (graphics:draw-line (round x) (round y) (round (+ x (* x-step N-el))) (round y)) ) ) ) ) #+ :SYMBOLICS (defun static-line-graph-seq (seq x y &optional (x-step 1) (y-scale 1.0) (y-base 0.0) (base t)) "Graphs a sequence as a line at an output history independent of the current screen location. X & y give the screen positions of the origen, x-step is the number of pixels per unit x, y-scale scales y values to pixels, and y-base is the y value at the plotting origen. Try: (map 'nil #'(lambda (x) (static-line-graph-seq x 20 690 5 .2 8000)) ) for plotting a sequence of sequences of about 200 data in the range 8000 - 10500." (let ((N-el (length seq)) (xpt x) (points nil)) (dotimes (n N-el) (push (round (- y (* y-scale (- (elt seq n) y-base)))) points) (push (round xpt) points) (incf xpt x-step) ) (graphics:draw-lines points) (and base (graphics:draw-line (round x) (round y) (round (+ x (* x-step N-el))) (round y)) ) ) ) #+ :SYMBOLICS (defun dot-graph-list (list x y &optional (x-step 1) (y-scale 1.0) ) "Graphs a list as a dotted line. X & y give the screen positions of the origen, x-step is the number of pixels per unit x, y-scale scales y values to pixels." (dw:with-own-coordinates () (when list (graphics:draw-point (round x) (round (- y (* y-scale (car list)))) ) (dot-graph-list (cdr list) (+ x x-step) y x-step y-scale) ) (graphics:draw-point (round x) (round y)) ) ) #+ :SYMBOLICS (defun dot-graph-seq (seq x y &optional (x-step 1) (y-scale 1.0) ) "Graphs a sequence as a dotted line. X & y give the screen positions of the origen, x-step is the number of pixels per unit x, y-scale scales y values to pixels." (setq y (round y)) (dw:with-own-coordinates () (dotimes (N-el (length seq)) (graphics:draw-point (round x) (- y (round (* y-scale (elt seq N-el)))) ) (graphics:draw-point (round x) y) (incf x x-step) ) ) ) #+ :SYMBOLICS (defun static-dot-graph-seq (seq x y &optional (x-step 1) (y-scale 1.0) ) "Graphs a sequence as a dotted line at an output history independent of the current screen location. X & y give the screen positions of the origen, x-step is the number of pixels per unit x, y-scale scales y values to pixels. Try: (map 'nil #'(lambda (x) (static-line-graph-seq x 20 690 5 .2 8000)) ) for plotting a sequence of sequences of about 200 data in the range 8000 - 10500." (setq y (round y)) (dotimes (N-el (length seq)) (graphics:draw-point (round x) (- y (round (* y-scale (elt seq N-el)))) ) (graphics:draw-point (round x) y) (incf x x-step) ) ) ;;; ________________________________________________________________________________________ #+ :SYMBOLICS (defun dot-graph-class (N-class x y &optional (x-step 1) (y-scale 1.0)) (let ((class-list (get-max-weight-classes *curr-wts*))) (dw:with-own-coordinates () (map 'nil #'(lambda (datum class) (and (= class N-class) (static-dot-graph-seq datum x y x-step y-scale))) *base-data* class-list ) ) ) ) #+ :SYMBOLICS (defun line-graph-class (N-class x y &optional (x-step 1) (y-scale 1.0)) (let ((class-list (get-max-weight-classes *curr-wts*))) (dw:with-own-coordinates () (map 'nil #'(lambda (datum class) (and (= class N-class) (static-line-graph-seq datum x y x-step y-scale nil))) *base-data* class-list ) (graphics:draw-line (round x) (round y) (round (+ x (* x-step *N-data*))) (round y)) ) ) ) ;;; ________________________________________________________________________________________ #+ :SYMBOLICS (defun bar-graph-mean&sigmas (mean&sigmas x y &optional (x-step 1) (y-scale 1.0)) "See class-real-mean&sigmas. Non-real variables have nil placeholders." (dw:with-own-coordinates () (when mean&sigmas (when (car mean&sigmas) (let ((y-mean (round (- y (* y-scale (first (car mean&sigmas)))))) (y-sigma (round (* y-scale (second (car mean&sigmas))))) ) (graphics:draw-rectangle x (- y-mean y-sigma) (+ x (max 1 (1- x-step))) (+ y-mean y-sigma) ) (graphics:draw-rectangle x (- y-mean 1) (+ x (max 1 (1- x-step))) (+ y-mean 1) :alu :flip) ) (bar-graph-mean&sigmas (cdr mean&sigmas) (+ x x-step) y x-step y-scale) ) (graphics:draw-point x y ) ) ) ) #+ :SYMBOLICS (defun beam-graph-mean&sigmas (mean&sigmas x y &optional (x-step 1) (y-scale 1.0)) "See class-real-mean&sigmas. Non-real variables have nil placeholders." (dw:with-own-coordinates () (when mean&sigmas (when (car mean&sigmas) (let ((y-mean (round (- y (* y-scale (first (car mean&sigmas)))))) (y-sigma (round (* y-scale (second (car mean&sigmas))))) ) (graphics:draw-rectangle ; mean value line x (- y-mean 1) (+ x (max 1 x-step)) (+ y-mean 2)) (graphics:draw-line ; upper sigma line x (- y-mean y-sigma) (+ x (max 1 (1- x-step))) (- y-mean y-sigma) ) (graphics:draw-line ; sigma tie line (+ x (max 1 (truncate (* .5 x-step)))) (- y-mean y-sigma) (+ x (max 1 (truncate (* .5 x-step)))) (+ y-mean y-sigma) ) (graphics:draw-line ; lower sigma line x (+ y-mean y-sigma) (+ x (max 1 (1- x-step))) (+ y-mean y-sigma) ) ) (beam-graph-mean&sigmas (cdr mean&sigmas) (+ x x-step) y x-step y-scale) ) (graphics:draw-point x y ) ) ) ) ;;; ________________________________________________________________________________________ ;;; ________________________________________________________________________________________