;;; -*- Mode: LISP; Package: :cl-user; BASE: 10; Syntax: ANSI-Common-Lisp; -*- ;;; ;;; Touched: Sun Nov 20 15:04:19 2005 +0530 ;;; Time-stamp: <2008-06-15 06:55:01 madhu> ;;; Bugs-To: enometh@net.meer ;;; Copyright (C) 2005 Madhu. All rights Reserved ;;; ;;; $Id: mytrie.lisp,v 1.8 2007/12/31 07:11:46 madhu Exp madhu $ ;;; (defpackage "TRIE" (:use "CL") (:export "GETTRIE" "SETTRIE" "MAPTRIE" "MAKE-TRIE" "REMTRIE" "MATCHTRIE" "TRIE-COMPLETIONS" "TRIE-RANDOM-KEY" "READ-DICT" "READ-ORDERED-DICTS" "*WORDS-PATH*" "*WORDS*" "+MAX-WORD-SIZE+" "*ORDERED-DICTIONARY*" "COUNT-CONSES" "COMPRESS-TRIE" "PARSE-IDENTIFIER" "TRIE-SET-IDENTIFIER" "SEGMENT-SENTENCE" "INTERN-STRING" "COMM-TRIES")) (in-package "TRIE") (declaim (optimize (speed 3))) ;;; ;;; consing newbie version ;;; ---------------------------------------------------------------------- ;;; TRIE: (VAL . ALIST) | CDR TRIE == (SUB-KEY . SUB-TRIE)+ ;;; ALIST: (SUB-KEY . TRIE)+ | ;;; KEY: SUB-KEY+ | ;;; (defun trie-descend (trie sub-key) "Internal. Return sub-trie associated with SUB-KEY." (let ((cons (assoc sub-key (cdr trie)))) (when cons (cdr cons)))) (defun trie-get (trie key &optional default) "Internal. Second return value is non-NIL if the node for KEY exists in TRIE. NOTES: DEFAULT is returned when KEY is not found in the TRIE, or when its value of KEY in the trie is NIL. If a key is present, calling TRIE-GET on any prefix of key will yield a non-nil second return value. The second return value cannot be used to determine if KEY is present or absent from the trie." (if (null key) (if trie (values (or (car trie) default) t) (values default nil)) (let ((sub-trie (trie-descend trie (car key)))) (if sub-trie (trie-get sub-trie (cdr key) default) (values default nil))))) (defun trie-set (trie key &optional (val T)) (if (null key) (setf (car trie) val) (let ((sub-trie (trie-descend trie (car key)))) (unless sub-trie (setq sub-trie (cons nil nil)) (push (cons (car key) sub-trie) (cdr trie))) (trie-set sub-trie (cdr key) val)))) (defun trie-delete (trie key) "Internal." (if (null key) (prog1 (car trie) (setf (car trie) nil)) (let ((sub-trie (trie-descend trie (car key)))) (when sub-trie (multiple-value-prog1 (trie-delete sub-trie (cdr key)) (unless (or (car sub-trie) (cdr sub-trie)) (setf (cdr trie) ; cmu bug in delete (delete sub-trie (cdr trie) :test #'eq :key #'cdr)))))))) (defun trie-completions (trie &optional prefix key-so-far) "Return a list of keys in TRIE with prefix PREFIX." (if prefix (let ((sub-trie (trie-descend trie (car prefix)))) (when sub-trie (trie-completions sub-trie (cdr prefix) (cons (car prefix) key-so-far)))) (nconc (if (car trie) (list (reverse key-so-far))) (loop for (sub-key . sub-trie) in (cdr trie) nconc (trie-completions sub-trie prefix (cons sub-key key-so-far)))))) (defun trie-merge (trie1 trie2) "Merge TRIE2 into TRIE1. Destroys TRIE1, copies TRIE2." (loop for x in (cdr trie2) for (sub-key2 . sub-trie2) = x for cons1 = (assoc sub-key2 (cdr trie1)) ; sub-key1 sub-trie1 if cons1 do (setf (cdr cons1) (trie-merge (cdr cons1) sub-trie2)) else do (push (copy-tree x) (cdr trie1))) trie1) (defun trie-merge* (trie1 trie2) "Merge TRIE2 into TRIE1. Destroys TRIE1 and reuses TRIE2 structure." (loop for x in (cdr trie2) for (sub-key2 . sub-trie2) = x for cons1 = (assoc sub-key2 (cdr trie1)) ; sub-key1 sub-trie1 if cons1 do (setf (cdr cons1) (trie-merge* (cdr cons1) sub-trie2)) else do (push x (cdr trie1))) trie1) (defun trie-complete (trie &optional prefix (new-trie (cons nil nil))) "Return a trie containing completions of PREFIX in TRIE." (cond ((endp prefix) (when (car trie) (unless (car new-trie) ; warn? (setf (car new-trie) (car trie)))) (values (trie-merge new-trie trie) T)) ; internal use only (T (let ((sub-trie (trie-descend trie (car prefix)))) (if (endp sub-trie) new-trie (let ((cons (assoc (car prefix) (cdr new-trie))) ret) (if cons (setq ret (nth-value 1 (trie-complete sub-trie (cdr prefix) (cdr cons)))) (let ((xtrie (cons nil nil))) (when (nth-value 1 (trie-complete sub-trie (cdr prefix) xtrie)) (push (cons (car prefix) xtrie) (cdr new-trie)) (setf ret t)))) (values new-trie ret))))))) ;;; ;;; ;;; #+nil (defun trie-match (trie &optional prefix) "Consing, no-reusing-structure." (if (endp prefix) (if (car trie) (cons (car trie) nil)) (let ((assocs (cond ((eq (car prefix) :wild) (loop for x in (cdr trie) for (sub-key . sub-trie) = x if (when sub-trie (let ((new-sub-trie (trie-match sub-trie (cdr prefix)))) (when new-sub-trie (cons sub-key new-sub-trie)))) collect it)) ((consp (car prefix)) (loop for sub-key in (car prefix) for x = (assoc sub-key (cdr trie)) for sub-trie = (cdr x) if (when sub-trie (let ((new-sub-trie (trie-match sub-trie (cdr prefix)))) (when new-sub-trie (cons sub-key new-sub-trie)))) collect it)) (t (let* ((sub-key (car prefix)) (x (assoc sub-key (cdr trie)))) (when x (destructuring-bind (sub-key . sub-trie) x (when sub-trie (let ((new-sub-trie (trie-match sub-trie (cdr prefix)))) (when new-sub-trie (list (cons sub-key new-sub-trie)))))))))))) (when assocs (cons (car trie) assocs))))) (defun trie-match (trie &optional prefix (new-trie (cons nil nil))) "Return a trie containing all keys that match wildcarded SEARCH" (if (endp prefix) (when (car trie) (setf (car new-trie) (car trie)) (values new-trie t)) ;; second value for internal use only.. (let (ret) ; whether trie has any keys below this level (if (eq (car prefix) :wild) (loop for x in (cdr trie) for (sub-key . sub-trie) = x do (if sub-trie (let ((cons (assoc sub-key (cdr new-trie)))) (if cons (trie-match sub-trie (cdr prefix) (cdr cons)) (let* ((xtrie (cons nil nil))) (when (nth-value 1 (trie-match sub-trie (cdr prefix) xtrie)) (push (cons sub-key xtrie) (cdr new-trie)) (unless ret (setf ret t)))))))) (if (consp (car prefix)) (loop for sub-key in (car prefix) for sub-trie = (cdr (assoc sub-key (cdr trie))) do (if sub-trie (let ((cons (assoc sub-key (cdr new-trie)))) (if cons (trie-match sub-trie (cdr prefix) (cdr cons)) (let* ((xtrie (cons nil nil))) (when (nth-value 1 (trie-match sub-trie (cdr prefix) xtrie)) (push (cons sub-key xtrie) (cdr new-trie)) (unless ret (setf ret t)))))))) (let ((sub-trie (trie-descend trie (car prefix)))) (if sub-trie (let ((cons (assoc (car prefix) (cdr new-trie)))) (if cons (trie-match sub-trie (cdr prefix) (cdr cons)) (let* ((xtrie (cons nil nil))) (when (nth-value 1 (trie-match sub-trie (cdr prefix) xtrie)) (push (cons (car prefix) xtrie) (cdr new-trie)) (setf ret t))))))))) (values new-trie ret)))) #+nil (defun trie-matchn (trie &optional prefix) "Destructive." (if (endp prefix) (when (car trie) (unless (endp (cdr trie)) (setf (cdr trie) nil)) trie) (cond ((eq (car prefix) :wild) ; mutate trie (let (not-empty-p) (loop with prev-cons = trie for assocs ;;; on (cdr trie) = (or (cdr trie) (loop-finish)) then (if assocs (cdr assocs) (loop-finish)) for x = (car assocs) for (sub-key . sub-trie) = x do (if (endp sub-trie) ;; remove x (setf (cdr prev-cons) (cdr assocs)) (let ((new-sub-trie (trie-matchn sub-trie (cdr prefix)))) (if (endp new-sub-trie) (setf (cdr prev-cons) (cdr assocs)) (progn (setf prev-cons assocs) (unless not-empty-p (setf not-empty-p t))))))) (if not-empty-p (when (cdr trie) trie) (setf (cdr trie) nil)))) ((consp (car prefix)) (let (not-empty-p) (loop with prev-cons = trie ;; then (cdr prev-cons) for assocs ;;; on (cdr trie) = (or (cdr trie) (loop-finish)) then (if assocs (cdr assocs) (loop-finish)) for x = (car assocs) for (sub-key . sub-trie) = x do (cond ((find sub-key (car prefix)) (if (endp sub-trie) (setf (cdr prev-cons) (cdr assocs)) (let ((new-sub-trie (trie-matchn sub-trie (cdr prefix)))) (if (endp new-sub-trie) (setf (cdr prev-cons) (cdr assocs)) (progn (setf prev-cons assocs) (unless not-empty-p (setf not-empty-p t))))))) (t (setf (cdr prev-cons) (cdr assocs))))) (if not-empty-p (when (cdr trie) trie) (setf (cdr trie) nil)))) (t (let* ((sub-key (car prefix)) (assocs (cdr trie)) (x (assoc sub-key assocs))) (if x (destructuring-bind (key-dash . sub-trie) x (declare (ignore key-dash)) (let ((new-sub-trie (trie-matchn sub-trie (cdr prefix)))) (cond (new-sub-trie (assert (eq new-sub-trie sub-trie)) (unless (eq x (cadr trie)) (setf (cadr trie) x)) (unless (endp (cddr trie)) (setf (cddr trie) nil)) (when (cdr trie) trie) ) (t (setf (cdr trie) nil))))) (setf (cdr trie) nil))))))) (defun shuffle (seq &aux (len (length seq))) (loop for i below len for j = (random len) unless (= i j) do (rotatef (elt seq i) (elt seq j))) seq) (defun trie-random-key (trie &optional shortest key-so-far) "Not uniformly distributed." (when (car trie) (return-from trie-random-key (nreverse key-so-far))) (let ((children (shuffle (copy-list (cdr trie))))) (tagbody loop (when children (let* ((length (length children)) (idx (random (if (car trie) (1+ length) length)))) (when (car trie) (when (= idx length) (return-from trie-random-key (nreverse key-so-far)))) (let* ((cons (pop children)) (sub-key (car cons)) (ret (trie-random-key (cdr cons) shortest (cons sub-key key-so-far)))) (when ret (return-from trie-random-key ret)) (go loop))))))) ;;; ---------------------------------------------------------------------- ;;; ;;; PUBLIC FUNCTIONS (FOLLOWING THE HASH TABLE INTERFACE) ;;; (defun make-trie () (cons nil nil)) (defconstant +explicit-null+ '+explicit-null+) (defun gettrie (key trie &optional default) (let* ((bogus (gensym)) (retval (trie-get trie (etypecase key (cons key) (sequence (coerce key 'list))) bogus))) (if (eq retval bogus) (values default nil) (values (if (eq retval +explicit-null+) nil retval) t)))) (define-setf-expander gettrie (key trie &optional default) (let ((key-var (gensym)) (trie-var (gensym)) (default-var (gensym)) (new-value-var (gensym))) (values `(,key-var ,trie-var ,@(if default `(,default-var))) ;Temporary variables. `(,key ,trie ,@(if default `(,default))) ;Value forms. `(,new-value-var) ;Store variables. `(prog1 ,new-value-var (trie-set ,trie-var (etypecase ,key-var ;Storing form. (cons ,key-var) (sequence (coerce ,key-var 'list))) (if (null ,new-value-var) +explicit-null+ ,new-value-var))) `(gettrie ,key-var ,trie-var ,@(if default `(,default-var)))))) (defun remtrie (key trie) (trie-delete trie (etypecase key (cons key) (sequence (coerce key 'list))))) (defun maptrie (map-function trie) (labels ((rec (trie &optional prefix key-so-far) (if (null prefix) (;;nconc progn(if (car trie) (list (funcall map-function (reverse key-so-far) (car trie)))) (if (cdr trie) (loop for (sub-key . sub-trie) in (cdr trie) ;; nconc do (rec sub-trie nil (cons sub-key key-so-far))))) (let ((sub-trie (trie-descend trie (car prefix)))) (when sub-trie (rec sub-trie (cdr prefix) (push (car prefix) key-so-far))))))) (rec trie) (values))) (defun matchtrie (key trie) (setq key (map 'list (lambda (x) (if (eql x #\.) :wild x)) key)) (trie-match trie key)) (defun trie-count (trie) "Example." (let ((sum 0)) (maptrie (lambda (k v) (incf sum)) trie) sum)) ;; madhu 061208 (defun descend-trie (string trie &key (start 0) end) "Return the sub-trie after walking key sequence STRING." (declare (type string string) (type list trie)) (loop for i of-type fixnum from start below (or end (length string)) for c = (elt string i) for sub-trie of-type list = (trie-descend trie c) then (trie-descend sub-trie c) if (endp sub-trie) return nil finally (return sub-trie))) (defun descend-trie (trie string &key (start 0) end f) "Example. Return the sub-trie after descending TRIE with keys from the sequence, STRING. F if supplied is mapped on each trie node visited during descent. F should be a function of 2 arguments: the trie node and the subkey corresponding to that trie node." (reduce (lambda (trie1 c) (if f (funcall f trie1 c)) (if trie1 (trie-descend trie1 c) (return-from descend-trie nil))) string :initial-value trie :start start :end end)) ;;; ---------------------------------------------------------------------- ;;; ;;; DICTIONARY INTF ;;; ;;; #+cmu #p"home:extern/arccc-0.1a/examples/wlist.txt" (defvar *words-path* nil #+nil #p"home:wlist.txt" "Path to dictionary") (defvar +max-word-size+ 20 "Maximum length of words in dictionary") (defvar *words* (make-trie) "Trie of dictionary words") (defun read-dict (&optional (*words-path* *words-path*)) "Sets *WORDS*" (setq *words* (make-trie)) #+cmu (ext:gc :full t) (let (#+cmu(ext:*gc-verbose* nil)) (time (with-open-file (stream *words-path* :direction :input) (loop for line = (read-line stream nil) while line do (TRIE-SET *words* (coerce line 'list) T)))))) (defvar *ordered-dictionary* (make-array +max-word-size+ :initial-element nil) "Array of tries indexed by word length") (defun read-ordered-dicts (&optional (*words* *words*)) "Sets *ORDERED-DICTIONARY*" #+cmu (ext:gc :full t) (let (#+cmu(ext:*gc-verbose* nil)) (time (MAPTRIE (lambda (k v) (declare (ignore v)) (let ((len (length k))) (unless (aref *ordered-dictionary* len) (setf (aref *ordered-dictionary* len) (MAKE-TRIE))) (setf (GETTRIE k (aref *ordered-dictionary* len)) t))) *words*)) nil)) ;;; ---------------------------------------------------------------------- ;;; ;;; COMPARING TRIES ;;; ;; ;; Mon Jan 15 13:10:10 2007 +0530 ;; (defun comm-tries (trie1 trie2 &key comm-23 comm-13 comm-12) "Compare Trie1 and trie2 with the same headaches as unix COMM(1). Call COMM-23 with arguments (KEY VAL1) on elements unique to TRIE1 Call COMM-13 with arguments (KEY VAL2) on elements unique to TRIE2. Call COMM-12 with arguments (KEY VAL1 VAL2) on elements common to TRIE1 and TRIE2." (let ((seen-keys (TRIE:MAKE-TRIE)) (NOTEXISTS (make-symbol "NOTEXISTS"))) (TRIE:MAPTRIE (lambda (k1 v1) (let ((v2 (trie:gettrie k1 trie2 notexists))) (cond ((eq v2 notexists) (when comm-23 (funcall comm-23 k1 v1))) (T (when comm-12 (funcall comm-12 k1 v1 v2)) (setf (trie:gettrie k1 seen-keys) T))))) TRIE1) (TRIE:MAPTRIE (lambda (k2 v2) (let ((seen-p (trie:gettrie k2 seen-keys NOTEXISTS))) (when (eq seen-p NOTEXISTS) (let ((v1 (trie:gettrie k2 trie1 notexists))) (when (eq v1 notexists) (when comm-13 (funcall comm-13 k2 v2))))))) TRIE2))) ;; ;; Tue Mar 22 21:18:01 2005 +0530 ;; (defun walk-trie (trie &key ;TODO make this a method combination. (pre-process (lambda (k v) (declare (ignorable k)) (warn "pre-processing ~A" v) :descend)) (post-process (lambda (k v) (declare (ignorable k)) (warn "post-processing ~A" v)))) "The :pre-process method is invoked before descending a level. If this returns nil, the level is not descended, and the post-processing method is not invoked. Otherwise the level is descended, and The :post-process method is invoked when the recursion unwinds" (let ((key-so-far (make-array 20 :adjustable t :fill-pointer 0))) (labels ((recurse-1 (trie) (vector-push-extend 0 key-so-far) (unwind-protect (loop for pair in (cdr trie) do (setf (aref key-so-far (1- (fill-pointer key-so-far))) (car pair)) (recurse (cdr pair))) (decf (fill-pointer key-so-far)))) (recurse (trie &optional obj1) (let ((descend-p t)) (when (setf obj1 (car trie)) (when pre-process (setf descend-p (funcall pre-process key-so-far obj1)))) (when descend-p (recurse-1 trie) (when obj1 (when post-process (funcall post-process key-so-far obj1))))))) (recurse trie)))) ;;; ---------------------------------------------------------------------- ;;; ;;; PARSING ;;; (defun parse-identifier (string trie &key (start 0) end shortest-p) "TRIE is a trie of character keys. Second value if non-NIL indicates the position just past where the identifier was parsed before traversing the full length of STRING." (loop with retval and retidx of-type fixnum = 0 and length of-type fixnum = (length string) for i of-type fixnum from start below (if end (if (<= end length) end length) length) for c = (elt string i) for sub-trie = (trie-descend trie c) then (trie-descend sub-trie c) while sub-trie if (car sub-trie) do (setq retval (car sub-trie) retidx (1+ i)) (if shortest-p (loop-finish)) finally (if retval (return (values retval retidx))))) ;; ;; madhu 061220 ;; (defun trie-set-identifier (key trie &key (value nil value-supplied-p) (start 0) end) "If TRIE contains KEY return its corresponding value. Otherwise set the value of KEY to VALUE. IF VALUE is not supplied, the value defaults to a copy of the specified portion of KEY." (loop for i of-type fixnum from start below (or end (length key)) for sub-key = (elt key i) for sub-trie = (or (trie::trie-descend trie sub-key) (let ((x (cons nil nil))) (push (cons sub-key x) (cdr trie)) x)) then (or (trie::trie-descend sub-trie sub-key) (let ((x (cons nil nil))) (push (cons sub-key x) (cdr sub-trie)) x)) finally (return (if (car sub-trie) (car sub-trie) (setf (car sub-trie) (if value-supplied-p value (subseq key start end))))))) ;; ;; Sun Jun 18 15:38:24 2006 +0530 ;; (defun segment-sentence (trie string &optional (start 0) (end (length string)) output) "Template code. Segment STRING into a list of strings present in TRIE, printing all solutions." (let ((p start) (sub-trie trie) resumptions) (tagbody loop (assert (< p end) nil "Start index ~A > end index ~A" start end) (setq sub-trie (trie::trie-descend sub-trie (aref string p))) (cond (sub-trie (cond ((car sub-trie) (let ((parsed-word (subseq string start (1+ p)))) (cond ((= (1+ p) end) (warn "SOLUTION: ~A" (reverse (cons parsed-word output))) (go cont)) (t (cond ((cdr sub-trie) ; choice-point (push (list (cons parsed-word output) (1+ p) end) resumptions) (incf p) (go loop)) (t (push parsed-word output) (setq sub-trie trie start (1+ p)) (incf p) (go loop))))))) (t (cond ((= (1+ p) end) ; fail at end-of-input (go cont)) (t (incf p) (go loop)))))) (t (go cont))) ;; fail at end-of-trie cont (cond ((endp resumptions) T) (t (destructuring-bind (output1 start1 end1) (car resumptions) (setq output output1 start start1 end end1 p start sub-trie trie resumptions (cdr resumptions)) (go loop))))))) ;;; ---------------------------------------------------------------------- ;;; ;;; ;;; (defvar *strings-trie* (cons nil nil) "Internal. A TRIE to intern strings in.") ;; This version trie cannot have strings as sub-keys because we use ;; eql to compare elements! workaround: intern the strings first in ;; another trie. This functionality overlaps with TRIE-SET-IDENTIFIER. (defun intern-string (string &key (start 0) end (strings-trie *strings-trie*) (copy-p t)) (declare (type string string) (type (integer 0) start) (type (or null (integer 1)) end) (type list strings-trie)) (loop for i of-type fixnum from start below (or end (length string)) for sub-key = (char string i) for trie = strings-trie then sub-trie for sub-trie = (trie-descend trie sub-key) if (endp sub-trie) do (push (cons sub-key (setq sub-trie (cons nil nil))) (cdr trie)) finally (return (cond ((null (car sub-trie)) (setf (car sub-trie) (if (and (zerop start) (or (null end) (= end (length string))) (not copy-p)) string (subseq string start end)))) (t (car sub-trie)))))) (defun trie-rationalize-subkey-strings (trie &optional (strings-trie *strings-trie*) ) "Intern the STRING sub-keys of TRIE in STRINGS-TABLE and replace them in-place." (when trie (loop for x in (cdr trie) for (sub-key . sub-trie) = x do (setf (car x) (intern-string sub-key :strings-trie strings-trie)) (trie-rationalize-subkey-strings sub-trie strings-trie)))) ;;; ---------------------------------------------------------------------- ;;; ;;; (defun traverse-top-down (trie &optional f key-so-far) "Template code: Traverse TRIE bottom up, applying F on each each node." (declare (special key-so-far)) (catch :do-not-descend ;todo tighten (if f (funcall f trie) (warn "~A" (reverse key-so-far))) (when (consp trie) (loop for x in (cdr trie) for (letter . sub-trie) = x do (traverse-top-down sub-trie f (cons letter key-so-far)))))) (defun traverse-bottom-up (trie &optional f key-so-far) "Template code: Traverse TRIE bottom up, applying F on each each node." (declare (special key-so-far)) (cond ((endp (cdr trie)) (if f (funcall f trie) (warn "~A" (reverse key-so-far)))) (t (loop for x in (cdr trie) for (letter . sub-trie) = x do (traverse-bottom-up sub-trie f (cons letter key-so-far))) (if f (funcall f trie) (warn "~A" (reverse key-so-far)))))) ;;; ;;; (defun compress-trie (trie &key (+hashmax+ 1031) (+hash0+ 1) (hash-subkey #'char-code) (hash-val #'(lambda (x) (if x 1 0)))) "Compress TRIE into a new DAG by sharing common accepting states. Returns the s-exp DAG." ; uses Huet's hashing scheme (let ((table (make-array +hashmax+ :initial-element nil))) (labels ((recall (trie-node hashkey) (values (loop for elem in (aref table hashkey) if (equalp trie-node elem) return elem finally (push trie-node (aref table hashkey)) (return trie-node)) hashkey)) (compress-trie-internal (trie) (cond ((endp (cdr trie)) (recall trie (funcall hash-val (car trie)))) (t (let ((hash +hash0+) sub-tries1) ; linear hash code. (loop for (letter . sub-trie) in (cdr trie) do (multiple-value-bind (sub-trie1 hash1) (compress-trie-internal sub-trie) (incf hash (* (funcall hash-subkey letter) hash1)) (push (cons letter sub-trie1) sub-tries1))) (recall (cons (car trie) (nreverse sub-tries1)) (mod (+ hash (funcall hash-val (car trie))) +hashmax+))))))) (compress-trie-internal trie)))) (defun count-conses (sexp &optional (table (make-hash-table :test #'eq)) (count 0)) "Return as values the number of cons cells and the number of distinct cons cells in SEXP." (values (cond ((endp sexp) count) (t (incf (gethash sexp table 0)) (if (consp (car sexp)) (count-conses (car sexp) table (+ count 1 (count-conses (cdr sexp) table))) (count-conses (cdr sexp) table (1+ count))))) (hash-table-count table))) (defun trie-count-nodes (trie) "Example." (let ((count 0) (table (make-hash-table :test #'eq))) (trie::traverse-bottom-up trie (lambda (node) (incf count) (incf (gethash node table 0)))) (values count (hash-table-count table)))) ;;; ---------------------------------------------------------------------- ;;; ;;; ITERATOR ;; ;; Unhygenic macro use ;; (defmacro enqueue-at-end (x queue queue-last-cons) ; enqueue at last position `(let ((new-cons (cons ,x nil))) (cond ((endp ,queue-last-cons) (setq ,queue-last-cons new-cons) (setq ,queue ,queue-last-cons)) (t (setf (cdr ,queue-last-cons) new-cons) (setq ,queue-last-cons new-cons))))) (defmacro queue-pop (queue queue-last-cons) ; deque at first position `(cond ((endp ,queue-last-cons) #+dbg (assert (endp ,queue))) ((eq ,queue-last-cons ,queue) (prog1 (car ,queue-last-cons) (setq ,queue-last-cons nil ,queue nil))) (t (pop ,queue)))) (defmacro enqueue-at-begining (x queue queue-last-cons) ; enqueue at first position `(let ((new-cons (cons ,x nil))) (cond ((endp ,queue-last-cons) (setq ,queue-last-cons new-cons) (setq ,queue new-cons)) (t (setf (cdr new-cons) ,queue) (setq ,queue new-cons))))) (defmacro key-push (x key last-cons prev-cons) ; push at last position `(let ((new-cons (cons ,x nil))) (cond ((endp ,last-cons) #+dbg (assert (endp ,prev-cons)) #+dbg (assert (endp ,key)) (setq ,last-cons new-cons) (setq ,key new-cons)) (t (setf (cdr ,last-cons) new-cons) (setq ,prev-cons ,last-cons) (setq ,last-cons new-cons))))) (defmacro key-pop (key last-cons prev-cons) ; pop at last position `(cond (,prev-cons #+dbg (assert (eq (cdr ,prev-cons) ,last-cons)) (prog1 (car ,last-cons) (setf (cdr ,prev-cons) nil) (setq ,last-cons ,prev-cons) (setq ,prev-cons nil))) (,last-cons (prog1 (car ,last-cons) (cond ((eq ,last-cons ,key) (setq ,key nil ,last-cons nil)) ((eq ,last-cons (cdr ,key)) (setf (cdr ,key) nil) (setq ,last-cons ,key)) (t (loop for p on ,key ; (prev . (last . LAST)) when (eq (cddr p) ,last-cons) do (setf (cddr p) nil) (setq ,prev-cons p ,last-cons (cdr p)) (return) finally (error "Sanity 1")))))) (t (error "Sanity 2")))) ;;; ---------------------------------------------------------------------- ;;; ;;; #+nil (defun iterate-trie-sequentially (trie &aux (stack-marker (gensym))) (let (key last-cons prev-cons queue queue-tail) (when (car trie) (warn "key=~A val=~A" key (car trie))) (loop for (sub-key . sub-trie) in (cdr trie) do (enqueue-at-end (cons sub-key sub-trie) queue queue-tail)) (loop until (endp queue) for x = (queue-pop queue queue-tail) do (assert x) (print queue) if (eq x stack-marker) do (key-pop key last-cons prev-cons) else do (destructuring-bind (sub-key . sub-trie) x (assert sub-key) (enqueue-at-begining stack-marker queue queue-tail) (key-push sub-key key last-cons prev-cons) (when (car sub-trie) (warn "key=~A val=~A" key (car sub-trie))) (loop for (sub-key2 . sub-trie2) in (cdr sub-trie) do (push (cons sub-key2 sub-trie2) queue)))))) ;; madhu 051113 ;; (defmacro with-trie-iterator ((function trie) &body body) "WITH-TRIE-ITERATOR ((function trie) &body body) provides a method of manually looping over the elements of a trie. FUNCTION is bound to a generator-macro that, withing the scope of the invocation, returns one or three values. The first value tells whether any objects remain in the trie. When the first value is non-NIL, the second and third values are the key and the value of the next object." (let ((n-function (gensym "WITH-TRIE-ITERATOR-"))) `(let ((,n-function (let* ((queue T) key last-cons prev-cons queue-tail (stack-marker (gensym))) (labels ((,function () (cond ((listp queue) (if (endp queue) NIL (let ((x (queue-pop queue queue-tail))) (cond ((eq x stack-marker) (key-pop key last-cons prev-cons) (,function)) (t (destructuring-bind (sub-key . sub-trie) x (assert sub-key) (enqueue-at-begining stack-marker queue queue-tail) (key-push sub-key key last-cons prev-cons) (loop for (sub-key2 . sub-trie2) in (cdr sub-trie) do (push (cons sub-key2 sub-trie2) queue)) (if (car sub-trie) (values key (car sub-trie)) (,function)))))))) (T (setq queue nil) (loop for (sub-key . sub-trie) in (cdr ,trie) do (enqueue-at-end (cons sub-key sub-trie) queue queue-tail)) (if (car ,trie) (values T key (car ,trie)) (,function)))))) #',function)))) (macrolet ((,function () '(funcall ,n-function))) ,@body)))) ;;; ---------------------------------------------------------------------- ;;; ;;; THE FOLLOWING METHODS DEAL WITH THE FOLLOWING REPRESNTATION OF ;;; TRIE-VAL ;;; ;;; TRIE-VAL = (NIL|T . PLIST) (defun annotate-trie1-with-counts (trie) "TRIE-VAL = (NIL|T . PLIST)." (traverse-bottom-up trie (lambda (trie) (setf (getf (cdr (car trie)) :count) (if (endp (cdr trie)) 1 (loop for (sub-key . sub-trie) in (cdr trie) sum (getf (cdr (car sub-trie)) :count))))))) (defun make-trie1-from-dict (dict1 &aux ($t (cons (list NIL) NIL))) "TRIE-VAL = (NIL|T . PLIST). DICT1 is a vector of words to add. Returns a new trie annotated with counts." (loop for word across DICT1 do (incf (getf (cdr (car $t)) :count 0)) (loop for trie = $t then sub-trie for sub-key across word for sub-trie = (or (trie-descend trie sub-key) (let ((x (cons (list NIL) nil))) (push (cons sub-key x) (cdr trie)) x)) do (incf (getf (cdr (car sub-trie)) :count 0)) finally (setf (car (car trie)) T))) $t) (defun trie1-purge-property (trie indicator) "TRIE-VAL = (NIL|T . PLIST)" (trie::traverse-bottom-up trie (lambda (node) (if (consp (car node)) (remf (cdar node) indicator))))) (defun sort-trie1-alist-by-count (trie) "TRIE-VAL = (NIL|T . PLIST)" (when (cdr trie) (setf (cdr trie) (sort (cdr trie) (lambda (x y) ; x y are (sub-key . sub-trie) (< (getf (cdar (cdr x)) :count) (getf (cdar (cdr y)) :count))))))) (defun trie1-some-random-word (trie &optional key-so-far) "Trie is annotated with COUNT and the alist is sorted by COUNT. TRIE-VAL = (NIL|T . PLIST)" (cond ((endp (cdr trie)) (if (caar trie) (nreverse key-so-far))) (t (let* ((count (getf (cdar trie) :count)) (needle (random count))) (reduce (lambda (x y) (assert (<= (getf (cdar (cdr x)) :count) (getf (cdar (cdr y)) :count))) y) (cdr trie)) (let ((cumulative-count 0) sub-key sub-trie (x (cdr trie)) z) (loop (if (endp x) (error "sanity: ")) (setq sub-key (car (car x)) sub-trie (cdr (car x)) x (cdr x)) (incf cumulative-count (getf (cdar sub-trie) :count)) #+nil (progn (setq z (nconc z (list cumulative-count))) (warn "cumulative-count=~D needle=~D z=~A" cumulative-count needle z)) (cond ((and (zerop needle) (caar trie)) ; XXX ; (return (nreverse key-so-far))) ((<= needle cumulative-count) (return (trie1-some-random-word sub-trie (cons sub-key key-so-far))))))))))) (defun trie1-count (trie) "TRIE-VAL = (NIL|T . PLIST)" (let ((count 0)) (trie:maptrie (lambda (k v) (declare (ignore v)) (if (car v) (incf count))) trie) count))