;;; -*- Mode: LISP; Package: :cl-user; BASE: 10; Syntax: ANSI-Common-Lisp; -*- ;;; ;;; Touched: Sun Mar 25 17:05:20 2007 +0530 ;;; Time-stamp: <2007-03-28 09:26:37 madhu> ;;; Bugs-To: ;;; Status: Experimental. Do not redistribute. ;;; Copyright (C) 2007 Madhu. All Rights Reserved. ;;; (defpackage "RCSFILE-PARSER-1-1" (:use "CL")) (in-package "RCSFILE-PARSER-1-1") (defvar *terminals* '("head" "branch" "access" "symbols" "locks" "comment" "expand" "strict" "date" "author" "state" "branches" "next" "desc" "log" "text")) (defvar *digits* '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) (defvar *specials* '(#\$ #\, #\. #\: #\; #\@)) (defvar *whitespace* '(#\Space #\Backspace #\Tab #\Newline #\Vt #\Formfeed #\Return)) (defun idchar-p (c) "Characters Permitted by RCSFILE(5)." (and (graphic-char-p c) (not (find c *specials*)) (not (find c *whitespace*)))) #+nil (defvar *idchars* (or '(#\! #\" #\# #\% #\& #\' #\( #\) #\* #\+ #\- #\/ #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\< #\= #\> #\? #\A #\B #\C #\D #\E #\F #\G #\H #\I #\J #\K #\L #\M #\N #\O #\P #\Q #\R #\S #\T #\U #\V #\W #\X #\Y #\Z #\[ #\\ #\] #\^ #\_ #\` #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\z #\{ #\| #\} #\~) #+nil (loop for i below 256 for c = (code-char i) if (idchar-p c) collect c))) #+nil (defun flatten (x &aux stack result) "RCSFILE. Flattens the list X while silently removing Null elements." (flet ((rec (item) (if (atom item) (when item (push item result)) (loop for elem in item do (push elem stack))))) (declare (inline rec)) (rec x) (loop while stack do (funcall #'rec (pop stack))) result)) (defun skip-whitespace (buffer &key (start 0) end) "RCSFILE. Returns as values T, if whitspace was skipped, and index where read terminated." (loop with found-p for i from start below (or end (length buffer)) for c = (aref buffer i) while (find c *whitespace*) unless found-p do (setf found-p t) finally (return (values found-p i)))) #+nil (skip-whitespace " 898") (defun parse-num (buffer &key (start 0) end) "RCSFILE. Returns as values a string denoting the num read and index where the read terminated. num ::= {digit | .}+ digit ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9" (loop for i from start below (or end (length buffer)) for c = (char buffer i) while (or (eql c #\.) (find c *digits*)) collect c into num finally (return (values (if num (coerce num 'string)) i)))) #+nil (parse-num "03498") (defun parse-string (buffer &key (start 0) (end (length buffer))) "RCSFILE. Returns as values, a list of (BEGIN END) positions of @-escaped string in BUFFER, if found, and the index where the read terminated." (loop initially (unless (char= (char buffer start) #\@) (loop-finish)) for i = start then (1+ j) for j = (position #\@ buffer :start (1+ i) :end end) while j if (or (= (1+ j) end) (and (< (1+ j) end) (char/= (char buffer (1+ j)) #\@))) return (values (list (1+ start) j) (1+ j)) finally (return (values nil start)))) #|| (parse-string "@ foo @") (parse-string "@ @ ") (get 'common-lisp-indent-function 'tagbody) (defindent prog lisp-indent-tagbody) (intersection (cons #\. *digits*) *idchars*) ||# ;; Whitespace is tricky: it has have no significance except in ;; STRINGS. and should be implicitly skipped. However, white space ;; cannot appear within an ID, NUM, or SYM. Hack this: (DEFPACKAGE "RCSCAT" (:USE) (:EXPORT "NUM" "SYM" "ID" "STRING")) (defun parse-nonwhite (buffer &key (start 0) end) "Return three values: The category, the value and the position where read terminated. num ::= {digit | .}+ id ::= {num} idchar {idchar | num}* sym ::= {digit}* idchar {idchar | digit}*" (loop with maybe-num = nil and maybe-id = nil and maybe-digit = nil for i from start below (or (position-if (lambda (c) (member c *whitespace*)) buffer :start start :end end) end (length buffer)) for c = (char buffer i) do (cond ((digit-char-p c) (unless maybe-digit (setq maybe-digit i))) ((char= c #\.) (unless maybe-num (setq maybe-num i))) ((idchar-p c) (unless maybe-id (setq maybe-id i))) (t (loop-finish))) finally (if (< start i) (return (values (if maybe-id (if maybe-num 'RCSCAT:ID 'RCSCAT:SYM) 'RCSCAT:NUM) (subseq buffer start i) i)) (return (values nil nil start))))) #|| (parse-nonwhite "foobarf3 1.") (defmacro define-rcsfile-tokens (package-name) `(defpackage ,package-name (:use) (:export ,@(loop for x in *terminals* collect (string-upcase x)) ,@(loop for x in *specials* collect (string x)) ,@(loop for x in *digits* collect (string x))))) (define-rcsfile-tokens "RCSTOK") ||# (DEFPACKAGE "RCSTOK" (:USE) (:EXPORT "HEAD" "BRANCH" "ACCESS" "SYMBOLS" "LOCKS" "COMMENT" "EXPAND" "STRICT" "DATE" "AUTHOR" "STATE" "BRANCHES" "NEXT" "DESC" "LOG" "TEXT") (:export ":" ";")) (defparameter *rcsfile-lexer-trie* (cons nil nil) "TRIE = (VAL . ALIST) where ALIST = (SUB-KEY . SUB-TRIE)+") (defun set-identifier (string value) "RCSFILE. Sets value of key STRING to VALUE in *RCSFILE-LEXER-TRIE*." (loop for trie = *RCSFILE-LEXER-TRIE* THEN sub-trie for c across string for sub-trie = (cdr (assoc c (cdr trie))) if (endp sub-trie) do (push (cons c (setq sub-trie (cons nil nil))) (cdr trie)) finally (unless (null (car sub-trie)) (cerror "Continue: replace existing value." "String ~S has value ~S in the lexer trie." string (car sub-trie))) (setf (car sub-trie) value))) (defun parse-identifier (string &key (start 0) end shortest-p) "RCSFILE. Parse STRING to retrieve a symbol from *RCSFILE-LEXER-TRIE*. Second return value if NON-NIL indicates the position just past where the parsing stopped before traversing the full length of STRING." (loop with retval and retidx for i from start below (or end (length string)) for c = (elt string i) for sub-trie = (cdr (assoc c (cdr *RCSFILE-LEXER-TRIE*))) then (cdr (assoc c (cdr sub-trie))) if sub-trie if (car sub-trie) if shortest-p return (values (car sub-trie) (1+ i)) else do (setq retval (car sub-trie) retidx (1+ i)) finally (if retval (return (values retval retidx))))) ;; populate *RCSFILE-LEXER-TRIE* ;; (loop for x in *terminals* do (set-identifier x (find-symbol (string-upcase x) "RCSTOK"))) (defun parse-date (string &key (start 0) end) "RCSFILE. Returns a UTIME." (let* ((ypos (position #\. string :start start :end end)) (year (ecase (- ypos start) (2 (+ 1900 (parse-integer string :start start :end ypos))) (4 (parse-integer string :start start :end ypos)))) (mpos (position #\. string :start (1+ ypos) :end end)) (month (ecase (- mpos ypos) (3 (parse-integer string :start (1+ ypos) :end mpos)))) (dpos (position #\. string :start (1+ mpos) :end end)) (day (ecase (- dpos mpos) (3 (parse-integer string :start (1+ mpos) :end dpos)))) (hpos (position #\. string :start (1+ dpos) :end end)) (hour (ecase (- hpos dpos) (3 (parse-integer string :start (1+ dpos) :end hpos)))) (npos (position #\. string :start (1+ hpos) :end end)) (minute (ecase (- npos hpos) (3 (parse-integer string :start (1+ hpos) :end npos)))) (sec (parse-integer string :start (1+ npos) :end (+ 3 npos)))) (encode-universal-time sec minute hour day month year 0))) #|| (user:iso-8601-date :utime (parse-date "2006.01.07.20.07.24") :tz 0) (loop for x being each external-symbol of "RCSTOK" collect x) (load "home:cmu/Packages/fucc_0.2/sysdcl") (require 'fucc) ||# ;; ;; Nominated for the Best Product Name award since MS Word. ;; (FUCC:DEFPARSER *RCSFILE-PARSER* ;; ---INITIAL-NON-TERMINAL RCSTEXT ;; ---LIST-OF-TERMINALS ( RCSCAT:STRING RCSCAT:NUM RCSCAT:ID RCSCAT:SYM RCSTOK:HEAD RCSTOK:BRANCH RCSTOK:ACCESS RCSTOK:SYMBOLS RCSTOK:LOCKS RCSTOK:COMMENT RCSTOK:EXPAND RCSTOK:STRICT RCSTOK:DATE RCSTOK:AUTHOR RCSTOK:STATE RCSTOK:BRANCHES RCSTOK:NEXT RCSTOK:DESC RCSTOK:LOG RCSTOK:TEXT RCSTOK:|;| RCSTOK:|:| ) ;;--- LIST OF RULES ((RCSTEXT -> ADMIN (* DELTA) DESC (* DELTATEXT)) (ADMIN -> RCSTOK:HEAD (:MAYBE NUM) RCSTOK:|;| (:MAYBE RCSTOK:BRANCH (:MAYBE NUM) RCSTOK:|;|) RCSTOK:ACCESS (* ID) RCSTOK:|;| RCSTOK:SYMBOLS (* SYM RCSTOK:|:| NUM) RCSTOK:|;| RCSTOK:LOCKS (* ID RCSTOK:|:| NUM) RCSTOK:|;| (:MAYBE RCSTOK:STRICT RCSTOK:|;|) (:MAYBE RCSTOK:COMMENT (:MAYBE STRING) RCSTOK:|;|) (:MAYBE RCSTOK:EXPAND (:MAYBE STRING) RCSTOK:|;|) (* NEWPHRASE)) (DELTA -> NUM RCSTOK:DATE NUM RCSTOK:|;| RCSTOK:AUTHOR ID RCSTOK:|;| RCSTOK:STATE (:MAYBE ID) RCSTOK:|;| RCSTOK:BRANCHES (* NUM) RCSTOK:|;| RCSTOK:NEXT (:MAYBE NUM) RCSTOK:|;| (* NEWPHRASE)) (DESC -> RCSTOK:DESC STRING) (DELTATEXT -> NUM RCSTOK:LOG STRING (* NEWPHRASE) RCSTOK:TEXT STRING) (NUM -> RCSCAT:NUM) (SYM -> RCSCAT:SYM (:CALL (LAMBDA (X) (PRINT X)))) (ID -> RCSCAT:ID) (STRING -> RCSCAT:STRING) (NEWPHRASE -> ID (* WORD)) (WORD -> (:OR ID NUM STRING))) ;;---KEYWORDS :TYPE :LR :LEXER-OPTIONS (:CONTEXT)) ;; 3 shift/reduce conflicts (defvar *trace-lexer* nil) (defun rcsfile-lexer (buffer &key (start 0) (end (length buffer))) (declare (fixnum start end)) (assert (char= (aref buffer (1- end)) #\Newline) nil "RCS file must end in a newline.") (let ((i start)) (lambda (context) (prog ((retries 0)) (when *trace-lexer* (format t "POSITION: ~D CONTEXT: ~S.~%" i context)) retry (if (= i end) (return NIL)) (multiple-value-bind (skipped-p idx) (skip-whitespace buffer :start i :end end) (when skipped-p (setq i idx) (incf retries) (go retry))) (multiple-value-bind (tok idx) (parse-identifier buffer :start i :end end) (when tok (assert (find tok context) nil "POSITION ~D: Didn't find ~S in ~S" i tok context) (setq i idx) (when *trace-lexer* (format t "RETURNING LITERAL: ~S.~%" tok)) (return (values tok tok)))) (when (or (find 'RCSCAT:NUM context) (find 'RCSCAT:SYM context) (find 'RCSCAT:ID context)) (multiple-value-bind (cat val idx) (parse-nonwhite buffer :start i :end end) (when cat (setq i idx) (if (and (eq cat 'RCSCAT:SYM) (not (find cat context)) (find 'RCSCAT:ID context)) (setq cat 'RCSCAT:ID)) (when *trace-lexer* (format t "RETURNING ~S ~S.~%" cat val)) (return (values cat val))))) (when (find 'RCSCAT:STRING context) (multiple-value-bind (positions idx) (parse-string buffer :start i :end end) (when positions (setq i idx) (when *trace-lexer* (format t "FOUND STRING BETWEEN POSITIONS ~S.~%" positions)) (return (values 'RCSCAT:STRING positions))))) (when (eql (char buffer i) #\:) (incf i) (assert (find 'RCSTOK:|:| context)) (return (values 'RCSTOK:|:| 'RCSTOK:|:|))) (when (eql (char buffer i) #\;) (incf i) (assert (find 'RCSTOK:|;| context)) (return (values 'RCSTOK:|;| 'RCSTOK:|;|))) (error "LEXER FAILED. BUFFER AT POSITION ~D. CONTEXTS=~S" i context))))) #|| (defvar $rcsfile "home:cl/RCS/localtime.l,v") (defvar $buf (user:slurp-file $rcsfile nil :element-type 'character)) (parse-identifier $buf :start 0) (parse-identifier $buf :start 10) (parse-nonwhite $buf :start 70) (fucc:parser-lr (rcsfile-lexer $buf) *rcsfile-parser*) ||#