;; -*- Mode: LISP; Package: COMMON-LISP-USER; Base: 10; Syntax: Common-lisp -*- ; ;****************************************************************************** ; ; AUTOCLASS II - Automatic Classification of Data ; ; Authors : Peter Cheeseman - RIACS - Research Institute for Advanced ; Computer Science ; Matthew Self - Sterling Software ; ; John Stutz - RIA - Artificial Intelligence Research Branch ; ; William Taylor - Sterling Software ; ; Address : MS 244-17, NASA Ames Research Center, Moffett Field, CA 94035 ; ; Phone : (415) 694-4946 - FTS 464-4946 ; ; Arpanet : taylor%plu@io.arc.nasa.gov ; ; Environment : Common Lisp ; ; Revision History : 11 Jan 88 - original released version - ; ;****************************************************************************** ; ; File : >taylor>autoclass-x>ac>dev-will.lisp ; ; Functions: REFINE-BEST-OF-WT-SET ; REDUCE-*BASE-DATA*-FOR-CLASS ; SAVE-FULL-*BASE-DATA* ; RESTORE-FULL-*BASE-DATA* ; CREATE-LARGER-CLASS-WT.SET (not used) ; CREATE-META-*BASE-DATA* ; GET-MEAN-VALUES ; WRITE-META-*BASE-DATA* ; READ-META-*BASE-DATA* ; PRINT-META-CLASSES ; GET-META-CLASSES ; META-PAGINATE ; META-OUTPUT-PAGE-HEADERS ; RE-ORDER-CLASSES ; PRINT-*META-BASIC-CLASSES-MAPPING* ; PROCESS-DATA-RANGES ; GENERATE-*DISC-VAR-RANGES* ; GENERATE-*REAL-PRIORS* ; DATA-CHECK ; PROCESS-REAL-PT-DATA ; PROCESS-REAL-PT-DATA-DISCRETE ; FILL-BINS ; CONVERT-DATA-TO-DISCRETE ; CONVERT-*TEST-DATA*-TO-DISCRETE ; SAVE-CONVERT-ARRAY ; WRITE-*BASE-DATA* ; READ-*BASE-DATA* ; GENERATE-HEADER-FILE ; GET-CLASS-WEIGHT-ORDERING ; READ&VALIDATE-DB ; SEQUENCE-CASES ; ; Revision history : 04 Jun 87 - original version ; 05 Jan 88 - For nograph UI => remove lrs flux references ;;; ;;;********************************** ;;; (defvar *BASE-DATA-STORE* nil) (defvar *PARTITION-STORE* nil) (defvar *N-DATA-STORE* nil) (defvar *CURR-WTS-STORE* nil) (defvar *CURRENT-RESULTS-STORE* nil) (defvar *VARIABLE-RANGES* nil "list of pairs - min-range & max-range for each variable - used by PROCESS-DATA-RANGES") (defvar *DISCRETES-ADJUSTED+1* () "list of attribute indexes which have their values increased by 1") (defvar *CLASS-ASSIGNMENTS* nil) (defvar *META-BASIC-CLASSES-MAPPING* nil) (defvar *CLASS-WT-ORDERING* nil) ; used by UI & xxx-data-mods.lisp (defvar *SPLIT-CLASS-NUM* NIL "non nil if base data is of a split class") (defstruct BIN value-array cnt-array assoc-list) ;;; ;;;********************************** ;;; (defun REFINE-BEST-OF-WT-SET (partition N-held N-classes save-file &optional (N-average 5) (stream t) (refined-criterion nil) (header nil)) "runs dynamic-cycle on best classification of .wt-set and saves it back into .wt-set" (let ((MMLs nil) (classes nil) (weightings nil) (position nil) (count nil) (*print-array* t) (*print-level* nil) (*print-length* nil) ) (when (probe-file save-file) (cond-read-.wt-set save-file header count MMLs classes weightings N-classes N-held)) (setf position (position (min+! MMLs) MMLs)) (setf *curr-wts* (elt weightings position)) (dynamic-cycle partition N-average stream refined-criterion) (setf (elt weightings position) *curr-wts*) (setf (elt MMLs position ) (part-total-MML partition)) (setf (elt classes position) (sort (map 'list #'class-wt (part-classes partition)) #'>) ) (write-.wt-set save-file header (incf count) MMLs classes weightings) (format t "~2%****** ~A ******~2%" MMLs))) ;;; ;;;********************************** ;;; (defun REDUCE-*BASE-DATA*-FOR-CLASS (class) "create a new *base-data* consisting of datum from one specified class" (let ((list-of-datums ())) (save-full-*base-data*) (dotimes (index *n-data-store*) (when (equal (aref *class-assignments* index) class) (setf list-of-datums (cons index list-of-datums)))) (setf list-of-datums (nreverse list-of-datums)) (let* ((new-base-length (length list-of-datums)) (base-data (make-array new-base-length))) (do ((id 0 (1+ id))) ((>= id new-base-length)) (setf (aref base-data id) (copy-seq (aref *base-data-store* (first list-of-datums)))) (setf list-of-datums (rest list-of-datums))) (setf *base-data* base-data) (setf *n-data* new-base-length)) (collect-real-priors-from-data))) ; computes *real-priors* & *real-bases* ;;; ;;;********************************** ;;; (defun SAVE-FULL-*BASE-DATA* () "save full base data before REDUCE-*BASE-DATA*-FOR-CLASS or CREATE-META-*BASE-DATA* has been used" (when (null *base-data-store*) (setf *class-assignments* (get-max-weight-classes *curr-wts*)) (setf *base-data-store* *base-data*) (setf *n-data-store* *n-data*) (setf *partition-store* *partition*) (setf *curr-wts-store* *curr-wts*) (setf *current-results-store* *current-results*))) ;;; ;;;********************************** ;;; (defun RESTORE-FULL-*BASE-DATA* () "restore full base data after SAVE-FULL-*BASE-DATA*has been used" (setf *base-data* *base-data-store*) (setf *n-data* *n-data-store*) (setf *partition* *partition-store*) (setf *curr-wts* *curr-wts-store*) (setf *current-results* *current-results-store*) (collect-real-priors-from-data)) ; computes *real-priors* & *real-bases* ;;; ;;;********************************** ;;; **NOT USED** (defun CREATE-LARGER-CLASS-WT.SET (old-wt.set new-wt.set num-old-classes num-new-classes N-held &optional (file-header nil)) "create a new wt.set file with an initial classification for a larger number of classes exptrapolated from an old wt.set file" (get-best-of-weightings (string old-wt.set)) (let ((new-curr-wts (make-array *n-data*))) (dotimes (datum *n-data*) (set-datum new-curr-wts datum (make-array num-new-classes :initial-element 0.0)) (dotimes (class-num num-old-classes) (set-variable new-curr-wts datum class-num (get-variable *curr-wts* datum class-num)))) (make-part&classes num-new-classes) (setf *curr-wts* new-curr-wts)) (collect-weights *partition*) (update-partition-mmls *partition*) (let ((MMLs (make-array N-held :initial-element nil)) (classes (make-array N-held :initial-element nil)) (weightings (make-array N-held :initial-element nil)) (*print-array* t)(*print-level* nil) (*print-length* nil)) (setf (elt MMLs 0) (part-total-mml *partition*)) (setf (elt classes 0) (sort (map 'list #'class-wt (part-classes *partition*)) #'>)) (setf (elt weightings 0) *curr-wts*) (write-.wt-set (string new-wt.set) file-header N-held MMLs classes weightings) (format t "~2%****** ~A ******~2%" MMLs))) ;;; ;;;********************************** ;;; (defun CREATE-META-*BASE-DATA* (meta-base-save-file) "create *base-data*, n-classes in number, with attribute values being the means for the class -- assumes all IGNORE vars are at start of datum discrete var types are no opted by setting them = 0 re-use header vars 0 & 1 -- leave others alone" (save-full-*base-data*) (let* ((n-classes (part-class-range *partition-store*)) (meta-base-data (make-array n-classes :initial-element nil))) (dotimes (class-num n-classes) (let ((datum-array (make-array *n-variables* :initial-element nil))) (setf (aref datum-array 0) class-num) (setf (aref datum-array 1) (apply #'(lambda () (let ((datum-cnt 0)) (dotimes (datum-num *N-data-store*) (when (= (aref *class-assignments* datum-num) class-num) (setq datum-cnt (1+ datum-cnt)))) datum-cnt)) ())) (setf (aref meta-base-data class-num) (get-mean-values class-num datum-array)))) (setf *base-data* meta-base-data) (setf *n-data* n-classes) (collect-real-priors-from-data) ; compute *real-priors* & *real-bases* (write-meta-*base-data* meta-base-save-file) t)) ;;; ;;;********************************** ;;; (defun GET-MEAN-VALUES (class-num datum-array) "calculate mean values of data attributes" (dotimes (attribute-num *n-variables*) (let ((attribute-value-array (aref (class-value-wts (aref (part-classes *partition-store*) class-num)) attribute-num))) (case (nth attribute-num *variable-types*) (ignore ) (real-pt (setf (aref datum-array attribute-num) (/ (aref attribute-value-array 3) (aref attribute-value-array 2)))) (discrete (setf (aref datum-array attribute-num) ; (apply #'(lambda (att-num att-array) ; (let ((wt-sum 0.0) (value-wt-sum 0.0)) ; (dotimes (n (length att-array)) ; (incf wt-sum (aref att-array n)) ; (incf value-wt-sum (* n (aref att-array n)))) ; (/ value-wt-sum wt-sum))) ; (list attribute-num attribute-value-array)) 0))))) datum-array) ;;; ;;;******************************************** ;;; (defun WRITE-META-*BASE-DATA* (save-file) "write meta *base-data* to save file - suitable to be read by read-meta-*base-data*" (let ((*print-array* t) (*print-length* nil) (*print-level* nil)) (with-open-file (stream (string save-file) :direction :output) (format stream "~2& ; *variable-types* ~& ~A" *variable-types*) (format stream "~2& ; *n-variables* ~& ~A" *n-variables*) (format stream "~2& ; *variable-descriptions* ~& ~S" *variable-descriptions*) (format stream "~2& ; *disc-var-ranges* ~& ~A" *disc-var-ranges*) (format stream "~2& ; *disc-priors* ~& ~A" *disc-priors*) (format stream "~2& ; *real-priors* ~& ~A" *real-priors*) (format stream "~2& ; *N-data* ~& ~A ~&" *N-data*) (format stream "~2& ; *base-data* ~& ~A ~&" *base-data*)))) ;;; ;;;******************************************** ;;; (defun READ-META-*BASE-DATA* (save-file) "read meta *base-data*, etc from save file - does equivalent of read-db" (setf *current-db* (string save-file)) (let ((*print-array* t) (*print-length* nil) (*print-level* nil)) (with-open-file (stream (string save-file) :direction :input) (setf *variable-types* (read stream)) (setf *n-variables* (read stream)) (setf *variable-descriptions* (read stream)) (setf *disc-var-ranges* (read stream)) (setf *disc-priors* (read stream)) (setf *real-priors* (read stream)) (setf *real-bases* (Make-*Real-Bases*-List)) (Update-Real-MML-Constants) (setf *N-data* (read stream)) (setf *base-data* (read stream)) (setq *curr-wts* (make-array *N-data* :adjustable t :initial-element nil))))) ;;; ;;;********************************** ;;; (defun PRINT-META-CLASSES (file) "Sorts the elements of get-meta-classes by AutoClass meta max-class and class" (with-open-file (out-str file :direction :output) ;; (let ((out-str t)) (format out-str "~%~12@T AutoClass meta-classifications for the ~A classes ~%~12@T of ~A, ~%~12@T using the ~A MML classification ~%~12@T in ~A." *n-data* *current-db* (part-total-mml *partition*) *current-results*) (format out-str "~%~12@T SORTED BY AUTOCLASS META CLASSIFICATION.") (let ((inluded-key-p t)) (meta-paginate out-str (sort-meta-classes-by-autoclass-classes) inluded-key-p)))) ;;; ;;;********************************** ;;; (defun SORT-META-CLASSES-BY-AUTOCLASS-CLASSES (&optional (data *base-data*) (weights *curr-wts*)) "sort meta classes by autoclass classification" (let ((meta-classes nil)) (setf meta-classes ; compute sort key & insert at front (map 'list #'(lambda (elt) (let ((key (+ (* 100 (first (second elt))) ; AutoClass class (first (first elt))))) ; class num (cons key elt))) (get-meta-classes data weights))) (stable-sort meta-classes #'< :key #'first))) ;;; ;;;********************************** ;;; (defun PRINT-META-CLASSES-2 (file &optional (data *base-data*) (weights *curr-wts*)) "elements of get-meta-classes by class number (already in that order)" (with-open-file (out-str file :direction :output) ;; (let ((out-str t)) (format out-str "~%~12@T AutoClass meta-classifications for the ~A classes ~%~12@T of ~A, ~%~12@T using the ~A MML classification ~%~12@T in ~A." *n-data* *current-db* (part-total-mml *partition*) *current-results*) (format out-str "~%~12@T SORTED BY BASE CLASS NUMBER.") (let ((inluded-key-p t)) (meta-paginate out-str (get-meta-classes data weights) (not inluded-key-p))))) ;;; ;;;********************************** ;;; (defun GET-META-CLASSES (&optional (data *base-data*) (weights *curr-wts*)) "collects data from *base-data* & along with with the ordered set of (class wt) pairs for which wt > .1 as: ((h1 h2 ... h-N) (c1 w1) (c2 w2) ...)." (let* ((N-classes (length (elt weights 0))) (temp (make-array N-classes)) (out-list ())) (do* ((index 0 (1+ index))) ((= index *n-data*)) (let ((collector ()) (wt ()) (datum (aref data index)) (wts (aref weights index))) (setq temp (sort (replace temp wts) '>)) (dotimes (n N-classes) (setq wt (elt temp n)) (if (> wt .1) (push (list (position wt wts) wt) collector) (return) ) ) (setf out-list (cons (cons (list (aref datum 0) ; class-id (aref datum 1)) ; num of datum in class (nreverse collector)) out-list)))) (nreverse out-list))) ;;; ;;;********************************** ;;; (defun META-PAGINATE (out-str list-of-lines inluded-key-p) "write output with headers at top of each page" (meta-output-page-headers out-str t) (let ((cnt 0) (page-cnt 64) (format-string "~&~12@T~{ ~2D ~4D~}~{~5@T~{(~2D ~4,2F)~}~}")) (dolist (elt list-of-lines) (setf cnt (1+ cnt)) (if inluded-key-p (format out-str format-string (second elt) (rest (rest elt))) (format out-str format-string (first elt) (rest elt))) (when (= cnt page-cnt) (setf cnt 0) (setf page-cnt 70) (format out-str "~|") (meta-output-page-headers out-str nil))))) ;;; ;;;********************************** ;;; (defun META-OUTPUT-PAGE-HEADERS (out-str page-1-p) "write out page headers for sort listings" (let ((new-line "~%")) (if page-1-p (setf new-line "~2&")) (format out-str (concatenate 'string new-line "~12@T Class num # datum AutoClass (# wt) ~2%")))) ;;; ;;;********************************** ;;; (defun GENERATE-*META-BASIC-CLASSES-MAPPING* () "generate mapping from basic class ordering to meta class ordering" (let ((raw-meta-classes (sort-meta-classes-by-autoclass-classes)) (meta-classes ())) (dolist (element raw-meta-classes) (setf meta-classes (cons (list (second element) (first (third element))) meta-classes))) (nreverse meta-classes))) ;;; ;;;********************************** ;;; (defun RE-ORDER-CLASSES (N-held N-classes base-*current-results* meta-classes-mapping) "re-order classes in partition using meta classification & save to .wt-set file" (let ((MMLs nil) (classes nil) (weightings nil) (position nil) (count nil) (header nil) (*print-array* t) (*print-level* nil) (*print-length* nil)) (cond-read-.wt-set base-*current-results* header count MMLs classes weightings N-classes N-held) (setf position (position (min+! MMLs) MMLs)) (setf *curr-wts* (elt weightings position)) (collect-weights *partition*) (update-partition-mmls *partition*) (let* ((base-class-array (part-classes *partition*)) (meta-class-array (make-array N-classes :initial-element nil))) (do ((class-n 0 (1+ class-n)) ; adjust partition (meta-class meta-classes-mapping (rest meta-class))) ((null meta-class)) (setf (aref meta-class-array class-n) (aref base-class-array (first (first (first meta-class)))))) (setf (part-classes *partition*) meta-class-array) (dotimes (data-n *n-data*) ; adjust current weights (setf base-class-array (aref *curr-wts* data-n)) (setf meta-class-array (make-array N-classes :initial-element nil)) (do ((class-n 0 (1+ class-n)) (meta-class meta-classes-mapping (rest meta-class))) ((null meta-class)) (setf (aref meta-class-array class-n) (aref base-class-array (first (first (first meta-class)))))) (setf (aref *curr-wts* data-n) meta-class-array))) ;; save *curr-wts* (setf (elt weightings position) *curr-wts*) (setf (elt MMLs position ) (part-total-MML *partition*)) (setf (elt classes position) (sort (map 'list #'class-wt (part-classes *partition*)) #'>) ) (write-.wt-set base-*current-results* header count MMLs classes weightings) (format t "~2%****** ~A ******~2%" MMLs)) t) ;;; ;;;********************************** ;;; (defun PRINT-*META-BASIC-CLASSES-MAPPING* (file n-classes current-db part-total-mml current-results) "prints *meta-basic-classes-mapping* in basic classes environment use Format File to print output file - because of Greek letters in *meta-basic-classes-mapping*" (with-open-file (out-str file :direction :output) ;; (let ((out-str t)) (format out-str "~%@begin(example) ~%") (format out-str "~%~5@T AutoClass re-ordered meta-classifications for the ~A classes ~%~5@T of ~A, ~%~5@T using the ~A MML classification in ~%~5@T ~A." n-classes current-db part-total-mml current-results) (format out-str (concatenate 'string "~2&" "~15@T Class num Meta-class num Membership ~2%")) (let ((cnt 0) (line-cnt 36) (class-num 0) (format-string "~&~15@T ~2D ~@7A ~3D")) (dolist (elt *meta-basic-classes-mapping*) (format out-str format-string class-num (format nil "@g(~A~D)" (string-downcase (first elt)) (second elt)) (apply #'(lambda () (let ((datum-cnt 0)) (dotimes (datum-num *N-data*) (when (= (aref *class-assignments* datum-num) class-num) (setq datum-cnt (1+ datum-cnt)))) datum-cnt)) ())) (setf cnt (1+ cnt)) (setf class-num (1+ class-num)) (when (= cnt line-cnt) (setf cnt 0) (setf line-cnt 42) (format out-str "~%@newpage[]") (format out-str (concatenate 'string "~%" "~15@T Class num Meta-class num Membership ~2%"))))) (format out-str "~%@end(example) ~%"))) ;;; ;;;*********************************************************** ;;; (defun PROCESS-DATA-RANGES (in-file out-file &key n-variables (log-file t) (check-real-pt-also nil)) "range check a raw data base, for discrete & real-pt attributes" (let ((output-datum ()) (datum-cnt 0) (datum-id nil) (value nil) (exit-flag nil) (log t) (variable-types *variable-types*) (variable-ranges *variable-ranges*)) (when (stringp log-file) (setf log (open log-file :direction :output))) (with-open-file (in in-file :direction :input) (with-open-file (out out-file :direction :output) ;; (let ((out t)) ; debug (format log "~2% Discrete attribute indexes whose values are adjusted by +1 ~% ~A" *discretes-adjusted+1*) (loop ;; (if (= datum-cnt 4) (return t)) ; debug (incf datum-cnt) (setf output-datum ()) (dotimes (variable-index n-variables) (setf value (read in nil 'eof)) (when (equal value 'eof) (setf exit-flag t) (return t)) (if (= variable-index 0) (setf datum-id value)) (when (not (equal value '?)) (let ((var-ranges (nth variable-index variable-ranges))) (case (nth variable-index variable-types) (discrete (when (and (/= value 0) ; input unknown (or (< value (first var-ranges)) (> value (second var-ranges)))) (format log "~% datum-cnt ~A datum-id ~A var-indx ~A range ~A value ~A => set to ?" datum-cnt datum-id variable-index var-ranges value) (setf value '?)) ; out of range unknown (if (and (not (equal value '?)) (member variable-index *discretes-adjusted+1*)) (incf value))) (real-pt (when (and check-real-pt-also (or (< value (first var-ranges)) (> value (second var-ranges)))) (format log "~% datum-cnt ~A datum-id ~A var-indx ~A range ~A value ~A => set to ?" datum-cnt datum-id variable-index var-ranges value) (setf value '?)))))) ; out of range unknown (setf output-datum (cons value output-datum))) (if exit-flag (return t)) (format out "~%") (do ((variable-index 0 (1+ variable-index)) (value-list (nreverse output-datum) (rest value-list))) ((null value-list)) (format out " ~S" (first value-list)))))) (format log "~2% ~A data processed" (1- datum-cnt)) (when (stringp log-file) (close log)))) ;;; ;;;********************************** ;;; (defun GENERATE-*DISC-VAR-RANGES* (&optional (variable-types *variable-types*) (variable-ranges *variable-ranges*)) "generate *disc-var-ranges* from *variable-types* & *variable-ranges*" (let ((variable-index -1)) (mapcar #'(lambda (type range) (setf variable-index (1+ variable-index)) (case type (ignore nil) (real-pt nil) (discrete ; range always starts at 1 - ends at max range value (if (member variable-index *discretes-adjusted+1*) (1+ (second range)) (second range))))) variable-types variable-ranges))) ;;; ;;;********************************** ;;; (defun GENERATE-*REAL-PRIORS* (&optional (variable-types *variable-types*) (variable-ranges *variable-ranges*)) "generate *real-priors* from *variable-types* & *variable-ranges*" (mapcar #'(lambda (type range) (case type (ignore nil) (real-pt '(1 1)) (discrete nil))) variable-types variable-ranges)) ;;; ;;;********************************** ;;; (defun DATA-CHECK (in-file n-variables var-index) "print out values of attribute indicated by var-index from input file" (let ((value nil) (exit-flag nil)) (with-open-file (in in-file :direction :input) (format t "~%") (loop (dotimes (variable-index n-variables) (setf value (read in nil 'eof)) (when (equal value 'eof) (setf exit-flag t) (return t)) (if (= variable-index var-index) (format t " ~A" value))) (if exit-flag (return t)))))) ;;; ;;;********************************** ;;; (defun PROCESS-REAL-PT-DATA (in-file out-file &key n-data (variable-descriptions nil) (header-file nil) (log-file t) (convert-file nil) (num-unique-values 10)) "check for real-pt attributes that need to be converted to discretes, since their number of unique values is less than num-unique-values" (read-db in-file n-data :variable-descriptions variable-descriptions :header-file header-file) (let ((num-of-bins (1+ num-unique-values)) (log t) (real-pt-bin-array (make-array *n-variables* :initial-element nil))) (when (stringp log-file) (setf log (open log-file :direction :output))) (dotimes (indx *n-variables*) (setf (aref real-pt-bin-array indx) (make-bin :value-array (make-array num-of-bins :initial-element nil) :cnt-array (make-array num-of-bins :initial-element 0) :assoc-list nil))) (fill-bins n-data real-pt-bin-array num-of-bins) (dotimes (variable-index *n-variables*) (let ((variable-struct (aref real-pt-bin-array variable-index))) (when (equal (nth variable-index *variable-types*) 'real-pt) (cond ((or (= (aref (bin-cnt-array variable-struct) 1) 0) ; all values are unknown (and (= (aref (bin-cnt-array variable-struct) 0) 0) (= (aref (bin-cnt-array variable-struct) 2) 0))) ; all values are known & are the same (setf (nth variable-index *variable-types*) 'ignore) (setf (nth variable-index *disc-var-ranges*) nil) (setf (nth variable-index *real-priors*) nil) (format log "~% variable ~A : ~A *** type changed to IGNORE => all values are the same or all values are unknown" variable-index (nth variable-index *variable-descriptions*))) ((= (aref (bin-cnt-array variable-struct) num-unique-values) 0) ; not all bins used -> discrete (process-real-pt-data-discrete variable-index num-of-bins variable-struct log)))))) (when (stringp log-file) (close log)) (convert-data-to-discrete n-data real-pt-bin-array out-file convert-file))) ;;; ;;;********************************** ;;; (defun PROCESS-REAL-PT-DATA-DISCRETE (variable-index num-of-bins variable-struct log) "convert real-pt variable to discrete" (setf (nth variable-index *variable-types*) 'discrete) (setf (nth variable-index *real-priors*) nil) (let ((range 1) (assoc-list '((nil 0)))) (do ((bin-num 1 (1+ bin-num))) ((= bin-num num-of-bins)) (cond ((/= (aref (bin-cnt-array variable-struct) bin-num) 0) (setf assoc-list (cons (list (aref (bin-value-array variable-struct) bin-num) bin-num) assoc-list)) (incf range)) (t (return t)))) (format log "~% variable-index ~A " variable-index) (dotimes (bin-num num-of-bins) (format log "~% bin ~D: value ~A cnt ~3D" bin-num (aref (bin-value-array variable-struct) bin-num) (aref (bin-cnt-array variable-struct) bin-num))) (setf (bin-assoc-list variable-struct) assoc-list) (setf (nth variable-index *disc-priors*) (make-list range :initial-element (/ 1.0 range))) (setf (nth variable-index *disc-var-ranges*) range) (format log "~% variable ~A : ~A *** type changed to DISCRETE " variable-index (nth variable-index *variable-descriptions*)) (format log "~% ~6@Tmapping ~A" (reverse assoc-list)))) ;;; ;;;********************************** ;;; (defun FILL-BINS (n-data real-pt-bin-array num-of-bins) "fill bins for *base-data*" (dotimes (n n-data) (let ((variable-array (aref *base-data* n)) (value nil)) (dotimes (variable-index *n-variables*) (when (equal (nth variable-index *variable-types*) 'real-pt) (setf value (aref variable-array variable-index)) (let ((value-array (bin-value-array (aref real-pt-bin-array variable-index))) (cnt-array (bin-cnt-array (aref real-pt-bin-array variable-index)))) (cond ((null value) (incf (aref cnt-array 0))) ; unknown values (t (do ((bin-num 1 (1+ bin-num))) ((= bin-num num-of-bins)) (cond ((null (aref value-array bin-num)) (setf (aref value-array bin-num) value) (incf (aref cnt-array bin-num)) (return t)) ((= value (aref value-array bin-num)) (incf (aref cnt-array bin-num)) (return t)))))))))))) ;;; ;;;********************************** ;;; (defun CONVERT-DATA-TO-DISCRETE (n-data real-pt-bin-array out-file convert-file) "replace values for variables converted to discretes" (when (stringp convert-file) (save-convert-array convert-file real-pt-bin-array)) (dotimes (n n-data) (let ((variable-array (aref *base-data* n)) (new-value nil) (value nil)) (dotimes (variable-index *n-variables*) (when (equal (nth variable-index *variable-types*) 'discrete) (setf value (aref variable-array variable-index)) (cond ((> (length (bin-assoc-list (aref real-pt-bin-array variable-index))) 1) (setf new-value (second (assoc value (bin-assoc-list (aref real-pt-bin-array variable-index)))))) (t ; for non-converted discretes (if (null value) (setf new-value 0) (setf new-value value)))) (setf (aref variable-array variable-index) new-value))))) (collect-real-priors-from-data :fixed-sigma nil) (write-*base-data* out-file)) ;;; ;;;********************************** ;;; (defun CONVERT-*TEST-DATA*-TO-DISCRETE (&key n-data convert-file) "replace values for variables converted to discretes" (let ((*print-array* t) (*print-length* nil) (*print-level* nil) (real-pt-bin-array nil)) (with-open-file (stream (string convert-file) :direction :input) (setf real-pt-bin-array (read stream))) (dotimes (n n-data) (let ((variable-array (aref *test-data* n)) (new-value nil) (value nil)) (dotimes (variable-index *n-variables*) (when (and (equal (nth variable-index *variable-types*) 'discrete) (not (null (bin-assoc-list (aref real-pt-bin-array variable-index))))) (setf value (aref variable-array variable-index)) (cond ((null value) (setf new-value 0)) (t (let ((map-pair (assoc value (bin-assoc-list (aref real-pt-bin-array variable-index))))) (cond ((not (null map-pair)) (setf new-value (second map-pair))) (t (format t "~% value ~A not in map array for attribute index ~A" value variable-index) (setf new-value 0)))))) ; set to missing (setf (aref variable-array variable-index) new-value))))))) ;;; ;;;********************************** ;;; (defun SAVE-CONVERT-ARRAY (out-file real-pt-bin-array) "save conversion array for use on prediction data" (let ((*print-array* t) (*print-length* nil) (*print-level* nil)) (with-open-file (stream (string out-file) :direction :output) (format stream "~2& ; real-pt-bin-array ~& ~A" real-pt-bin-array)))) ;;; ;;;********************************** ;;; (defun WRITE-*BASE-DATA* (out-file) (let ((*print-array* t) (*print-length* nil) (*print-level* nil)) (with-open-file (stream (string out-file) :direction :output) (format stream "~2& ; *variable-types* ~& ~A" *variable-types*) (format stream "~2& ; *n-variables* ~& ~A" *n-variables*) (format stream "~2& ; *variable-descriptions* ~& ~S" *variable-descriptions*) ; note ~S (format stream "~2& ; *disc-var-ranges* ~& ~A" *disc-var-ranges*) (format stream "~2& ; *disc-priors* ~& ~A" *disc-priors*) (format stream "~2& ; *real-priors* ~& ~A" *real-priors*) (format stream "~2& ; *N-data* ~& ~A ~&" *N-data*) (format stream "~2& ; *base-data* ~& ~S ~&" *base-data*)))) ; note ~S ;;; ;;;********************************** ;;; (defun READ-*BASE-DATA* (save-file &key (split-class-num nil)) "read *base-data*, etc from save file - does equivalent of read-db" (setf *current-db* (string save-file)) (setf *split-class-num* split-class-num) (let ((*print-array* t) (*print-length* nil) (*print-level* nil)) (with-open-file (stream (string save-file) :direction :input) (setf *variable-types* (read stream)) (setf *n-variables* (read stream)) (setf *variable-descriptions* (read stream)) (setf *disc-var-ranges* (read stream)) (setf *disc-priors* (read stream)) (setf *real-priors* (read stream)) (setf *real-bases* (Make-*Real-Bases*-List)) (Update-Real-MML-Constants) (setf *N-data* (read stream)) (setf *base-data* (read stream)) (setq *curr-wts* (make-array *N-data* :adjustable t :initial-element nil))))) ;;; ;;;********************************** ;;; (defun GENERATE-HEADER-FILE (header-file &key N-data title) "generate header file for data base" (let ((*print-array* t) (*print-length* nil) (*print-level* nil)) (with-open-file (stream (string header-file) :direction :output) (format stream "~2& ; data base description ~& ; ~S" title) (format stream "~2& ; *variable-types* ~& ~A" *variable-types*) (if (not (null *variable-descriptions*)) (format stream "~2& ; *variable-descriptions* ~& ~S" *variable-descriptions*)) (format stream "~2& ; *disc-var-ranges* ~& ~A" (generate-*disc-var-ranges*)) (format stream "~2& ; *disc-priors* ~& ~A" nil) (format stream "~2& ; *real-priors* ~& ~A" (generate-*real-priors*)) (format stream "~2& ; *N-data* ~& ~A ~&" N-data)))) ;;; ;;;******************************************************* (nth 5 *class-wt-ordering*) ;;; (defun GET-CLASS-WEIGHT-ORDERING () "compute ordering of classes by class membership (weight)" (let* ((classes-array (part-classes *partition*)) (sort-array ())) (dotimes (N-class (length classes-array)) (setf sort-array (cons (list (class-wt (aref classes-array N-class)) N-class) sort-array))) (setf *class-wt-ordering* (mapcar #'second (sort sort-array #'> :key #'first))))) ;;; ;;;********************************** ;;; (read&validate-db ">marshall>autoclass>aids>aids.db" :n-data 529 :n-variables 259 :db-params t :variable-descriptions t) (defun READ&VALIDATE-DB (in-file &key n-data n-variables (db-params nil) (variable-descriptions nil) (header-file nil) (init-case-number 1) (case-number-discontinuities ())) "check data base for correct n-data and n-variables counts - ignores data base definition parameters, if they are present (db-params = t) case-number-discontinuities -> (1310 1183) means case 1183 follows 1310 and sequence continues" ;; assumes that the first attribute of each case is the case number (with-open-file (stream (string in-file) :direction :input) (when (and db-params (null header-file)) (read stream) ; *variable-types* (if variable-descriptions (read stream)) ; *variable-descriptions* (read stream) ; *variable-types* (read stream) ; *disc-priors* (read stream) ; *real-priors* (read stream)) ; *n-data* ;; Read the data base. (let ((value nil) (n-data-cnt init-case-number) (exit nil)) (do ((n-datum 0 (1+ n-datum))) ((= n-datum n-data)) (do ((n-var 0 (1+ n-var))) ((= n-var n-variables)) (setq value (read stream)) ; first attribute ;; check for case-number-discontinuities (when (= n-var 0) (when (and (>= (length case-number-discontinuities) 2) (= (1- n-data-cnt) (first case-number-discontinuities)) (= value (second case-number-discontinuities))) (setf n-data-cnt (second case-number-discontinuities)) (setf case-number-discontinuities (nthcdr 2 case-number-discontinuities))) (when (/= value n-data-cnt) (format t "~& Bad attribute cnt in n-data ~A" (1- n-data-cnt)) (setf exit t) (return t)))) (if exit (return t)) (incf n-data-cnt))))) ;;; ;;;********************************** ;;; (defun SEQUENCE-CASES (in-file out-file) "add new att 0 - 0 based sequence number to a data base" (with-open-file (stream (string in-file) :direction :input) (with-open-file (out-stream (string out-file) :direction :output) (let ((n-data-cnt 0) (line-string "")) (loop (setf line-string (read-line stream nil 'eof)) (if (equal line-string 'eof) (return t)) (format out-stream "~%~A" (concatenate 'string (format nil "~A " n-data-cnt) line-string)) (incf n-data-cnt)))))) ;;; ;;;********************************** ;;;