;;; -*- Mode: LISP; Package: :cl-user; BASE: 10; Syntax: ANSI-Common-Lisp; -*- ;;; ;;; Time-stamp: <2009-04-13 19:43:39 IST> ;;; Touched: Sat Jan 24 10:51:33 2009 +0530 ;;; Bugs-To: enometh@meer.net ;;; Status: Experimental. Do not redistribute ;;; Copyright (C) 2009 Madhu. All Rights Reserved. ;;; copyright (C) 1992 by Erann Gat, all rights reserved ;;; ;;; A slighly modified version of PARCIL - A Parser for C syntax In Lisp. ;;; See parcil.cl in the CMU AI REPOSITORY for the original comments section ;;; and the the bottom of this file for changes. ;;; (defpackage "PARCILSYM" (:use) (:import-from "CL" "<=" "/" "=" "*" "<" ">=" ">" "++" "/=" "+" "-") (:export ">>=" "<<=" "." "->" "*" "/" "+" "-" "<<" ">>" "<" ">" "<=" ">=" "==" "!=" "&" "^" "|" "&&" "+=" "-=" "*=" "/=" "%=" "&=" "^=" "|=" "||" "!" "~" "++" "--" "=" "%" "(" "{" "}" ";" "[" "]" ")" ",")) (defpackage "PARCIL" (:use "PARCILSYM" "CL") (:export "PARCIL" "*PARCILSYM-PACKAGE*" "*BINARY-OPS*" "*BINOP-ALIST*" "*UNARY-OP-ALIST*" "*EOL-COMMENT-DELIMS*")) (in-package "PARCIL") ;;; ---------------------------------------------------------------------- ;;; ;;; ;;; (defvar *parcilsym-package* (find-package "PARCILSYM") "Package in which to intern symbols and operators. Defaults to the PACILSYM package. If you set this to some other package, you must ensure that package inherits the external symbols of the PARCILSYM package" ) (defmacro iterate (name args &body body) "Defines a local function named NAME with body BODY. ARGS is a list of \((arg1 initform1) ... \(argn initformn)). The local function takes arguments \(arg1 .. argn\) and is initially called as \(NAME initform1 .. initformn)." `(labels ((,name ,(mapcar #'car args) ,@body)) (,name ,@(mapcar #'cadr args)))) ;;; ---------------------------------------------------------------------- ;;; ;;; Crufty pseudo-text-file interface ;;; (defvar *the-string* "") (defvar *the-pointer* 0) (defun parse-init (s) (setq *the-string* s) (setq *the-pointer* 0)) (defun eof? (&optional (offset 0)) (>= (+ *the-pointer* offset) (length *the-string*))) (defun peek (&optional (offset 0)) (if (eof? offset) nil (char *the-string* (+ *the-pointer* offset)))) (defun readc () (if (eof?) (error "READC: EOF.") (prog1 (peek) (incf *the-pointer*)))) ;;; ---------------------------------------------------------------------- ;;; ;;; TOKENIZER ;;; (defun letter? (c) (and (characterp c) (or (char<= #\a c #\z) (char<= #\A c #\Z)))) (defun digit? (c) (and (characterp c) (char<= #\0 c #\9))) (defun ident? (s) (and s (symbolp s) (let ((c (char (symbol-name s) 0))) (or (alpha-char-p c) (eql c #\_))))) (defvar *eol-comment-delims* '("//") "Comment till end of line.") (defun eat-spaces () (loop (let ((c (peek))) (let ((delim (find c *eol-comment-delims* :key (lambda (x) (elt x 0))))) (when delim (when (ecase (length delim) (1 t) (2 (eql (elt delim 1) (peek 1)))) (loop (when (eql (setq c (readc)) #\Newline) (return)))))) (if (or (eql c #\Space) (eql c #\Tab) (eql c #\Newline) (eql c #\Linefeed) (eql c #\Return) (eql c #\Page)) (readc) (return))))) (defun syntax-error () (error "Syntax error near ~S" (subseq *the-string* (max 0 (1- *the-pointer*))))) (defun parse-fixnum (&optional (base 10)) (multiple-value-bind (n cnt) (parse-integer *the-string* :start *the-pointer* :radix base :junk-allowed t) (setq *the-pointer* cnt) (if (not (numberp n)) (syntax-error)) n)) (defun parse-symbol () ; XXX interns in *package* (intern (string-upcase (with-output-to-string (s) (loop (let ((c (peek))) (if (and c (or (letter? c) (digit? c) (eql c #\_))) (princ (readc) s) (return)))) s)) *parcilsym-package*)) (defun parse-radix-integer () (readc) (parse-fixnum (ecase (readc) (#\x 16) (#\o 8) (#\b 2)))) (defun parse-number () (let* ((n1 (parse-fixnum)) (s (signum n1)) (c (peek))) (prog ((d 0.1)) (if (eql c #\.) (go decimal)) (if (or (eql c #\e) (eql c #\E)) (go expt)) (return n1) decimal (readc) (let ((c (peek))) (when (digit? c) (incf n1 (* d s (- (char-code c) (char-code #\0)))) (setf d (/ d 10)) (go decimal)) (if (or (eql c #\e) (eql c #\E)) (go expt)) (return n1)) expt (readc) (let ((e (parse-fixnum))) (return (* n1 (expt 10 e))))))) (defun parse-string (terminator) (readc) (with-output-to-string (s) (loop for old = nil then c for c = (readc) when (and (eql c terminator) (not (eql old #\\))) return s do (princ c s)))) (defun parse-operator () (let* ((c (intern (string (readc)) *parcilsym-package*)) (s (intern (format nil "~A~A" c (peek)) *parcilsym-package*))) (cond ((member s '(<< >>)) (readc) (if (eql (peek) #\=) (intern (format nil "~A~A" s (readc)) *parcilsym-package*) s)) ((member s '(++ -- << >> -> <= >= != == && += -= *= /= %= &= ^= \|= \|\|)) (readc) s) (t c)))) (defun parse-atom () (eat-spaces) (if (eof?) nil (let ((c (peek))) (cond ((letter? c) (parse-symbol)) ((eql c #\0) (if (letter? (peek 1)) (parse-radix-integer) (parse-number))) ((digit? c) (parse-number)) ((eql c #\") (parse-string c)) ((eql c #\') (character (parse-string c))) ;madhu 090124 (t (parse-operator)))))) ;;; ---------------------------------------------------------------------- ;;; ;;; TOKENIZER INTF ;;; (defvar *next*) (defun scan () (setf *next* (parse-atom))) ;;; ---------------------------------------------------------------------- ;;; ;;; ;;; (defvar *binary-ops* '((\. ->) (* / %) (+ -) (<< >>) (< > <= >=) (== !=) (&) (^) (\|) (&&) (\|\|) (= += -= *= /= %= &= ^= \|= >>= <<=)) "List of lists of symbols. Order indicates precedence. Last list doubles as a list of assignment operators.") (defvar *binop-alist* '((\. . struct-ref) (= . setf) (% . mod) (<< . ashl) (>> . ashr) (& . logand) (^ . logxor) (\| . logior) (&& . and) (\|\| . or)) "Any binary operator in this alist will be renamed in the parsed version.") (defvar *unary-op-alist* '((* . deref) (& . address-of) (- . -) (! . not) (~ . lognot) (++ . incf) (-- . decf)) "Any prefix unary operator included in this table will be renamed in the parsed version. (Postfix ++ and -- are handled specially, in PARSE-TERM.") (defvar *keywords* '(return)) (defun binop? (s) (member s *binary-ops* :test #'member)) (defun assignop? (s) (member s (car (last *binary-ops*)))) (defun priority (s) (let ((p (position s *binary-ops* :test #'member))) (and p (- 40 p)))) (defun translate-binop (op) (or (cdr (assoc op *binop-alist*)) op)) ;;; ---------------------------------------------------------------------- ;;; ;;; RECURSIVE-DESCENT PARSER ;;; (defun parse-expression (&optional (priority -1)) (iterate loop0 ((result (parse-term))) (let ((op (translate-binop *next*)) (new-priority (priority *next*))) (cond ((assignop? *next*) (scan) (list op result (loop0 (parse-term)))) ((and (binop? *next*) (> new-priority priority)) (scan) (loop0 (list op result (parse-expression new-priority)))) (t result))))) (defun parse-arglist (&optional (terminator '\)) (separator '\,)) (iterate loop0 () (cond ((null *next*) (error "Missing ~S" terminator)) ((eq *next* terminator) (scan) nil ) (t (let ((arg1 (parse-expression))) (unless (or (eq *next* separator) (eq *next* terminator)) (syntax-error)) (if (eq *next* separator) (scan)) (cons arg1 (loop0))))))) ;;; This function parses what K&R call primary expressions. These include ;;; numbers, variables, structure references, array references, and all unary ;;; operators. Parsing of curly brackets is also stuck in here, though it ;;; probably shouldn't be. The weird precedence rules make this a fairly ;;; hariy and brittle piece of code. ;;; (defun parse-term () (iterate loop0 ((term (prog1 *next* (scan)))) (cond ((numberp term) term ) ((assoc term *unary-op-alist*) (list (cdr (assoc term *unary-op-alist*)) (parse-term))) ((eq term '\( ) (cons 'progn (parse-arglist))) ((eq term '{) (list* 'let '() (parse-arglist '} '\;))) ((eq *next* '\( ) (scan) (loop0 (cons term (parse-arglist)))) ((eq *next* '\[ ) (scan) (loop0 `(aref ,term ,@(parse-arglist '\])))) ((eq *next* '\.) (loop0 `(struct-ref ,term ,(prog1 (scan) (scan))))) ((eq *next* '->) (loop0 `(-> ,term ,(prog1 (scan) (scan))))) ((eq *next* '++) (scan) (loop0 `(prog1 ,term (incf ,term)))) ((eq *next* '--) (scan) (loop0 `(prog1 ,term (decf ,term)))) (t (if (and (atom term) (not (ident? term))) (syntax-error)) term)))) ;;; ---------------------------------------------------------------------- ;;; ;;; TOP LEVEL INTF ;;; (defun parcil (s) (parse-init s) (scan) (prog1 (parse-expression) ;; If there's stuff left over something went wrong. (if *next* (syntax-error)))) #+nil (parcil "x=y*sin(pi/2.7)") ;=> (SETF X (* Y (SIN (/ PI 2.7)))) ;;; ---------------------------------------------------------------------- ;;; ;;; READER HOOK ;;; #+nil (defun |#{-reader| (stream char arg) (declare (ignore char arg)) (parcil (with-output-to-string (s) (loop (let ((c (read-char stream))) (if (eql c #\}) (return s) (princ c s))))))) #+nil (set-dispatch-macro-character #\# #\{ #'|#{-reader|) #+nil (read-from-string "#{ ( x=1,y=2,print(x+y),sin(pi/2) ) }") ;;; ---------------------------------------------------------------------- #|| CHANGELOG: * 2009-01-24 Madhu. - functions rearranged, reindented, most comments removed. - (iterate): use loop0 instead of loop. - (while): removed. - (parse-symbol): Updated to use CL:LOOP - (*eol-comment-delims*): new variable for eat-spaces - (eat-spaces): extended to skip tabs newlines, single line comments - (parse-string): coerce single quotes to characters. Fixes BUG (progn (parse-init "'c'") (scan)) - (readc): throw error on EOF. Fixes BUG (progn (parse-init "'(1)") (scan)) - handle symbols from other packages: - (parse-operator, parse-symbol): intern in *parcilsym-package* EG USAGE (in-package "FOO") (use-package "PARCILSYM") (defun foo-parcil (string) "ADDS AND OR AS BINARY OPERATORS." (let* ((parcil:*parcilsym-package* (find-package "FOO")) (parcil:*binary-ops* '((\. ->) (* / %) (+ -) (<< >>) (< > <= >=) (== !=) (&) (^) (\|) (&&) (and) (or) (\|\|) (= += -= *= /= %= &= ^= \|= >>= <<=))) (parcil:*binop-alist* '((\. . struct-ref) (= . setf) (% . mod) (<< . ashl) (>> . ashr) (== . =) (& . logand) (^ . logxor) (\| . logior) (&& . and) (\|\| . or)))) (parcil:parcil string))) - (parse-number) FIX (progn (parse-init"-100.1e2") (parse-number)) (progn (parse-init"-100.1e-2") (parse-number)) - (parse-string) FIX (progn (parse-init"\"fo\\\"o\"") (parse-string #\")) - ident? (ident? "_foo") => t ||#