;;; -*- Mode: LISP; Package: :cl-user; BASE: 10; Syntax: ANSI-Common-Lisp; -*- ;;; ;;; Touched: Mon Apr 02 14:34:55 2007 +0530 ;;; Time-stamp: <2007-04-02 23:07:56 madhu> ;;; Bugs-To: ;;; Status: Experimental. Do not redistribute. ;;; Copyright (C) 2007 Madhu. All rights reserved. ;;; ;;; Reconstructing accent in the "satapatha brahma.na ;;; (defpackage "VEDAVID-1" (:use "CL")) (in-package "VEDAVID-1") (defun parse-html-tag (string &key (start 0) end) "Scrap HTML Tags" (assert (char= (aref string start) #\<)) (loop with parsed for i from (1+ start) below (or end (length string)) unless parsed do (setq parsed :PARSED) if (char= (aref string i) #\>) do (loop-finish) finally (return (values parsed (1+ i) end)))) #+nil (parse-html-tag " bar") ;;; ;;; ;;; (defvar *known-html-special-char-translations-alist* ; "sathapatha accent (pairlis '("gt" "ntilde" "eacute" "uacute" "oacute" "amp" "aacute" "iacute") '( "$o$" "~n" "e_" "u_" "o_" "_" "a_" "i_"))) (defun fetch-known-html-special-char-translation (string &key (start 0) end) (let ((string-len (if end (- end start) (- (length string) start)))) (loop for (key . translation) in *known-html-special-char-translations-alist* for key-len = (length key) if (and (= key-len string-len) (string= key string :start2 start :end2 end)) return translation))) #+nil (fetch-known-html-special-char-translation "amp") ;;; ;;; (defvar *html-special-char-trie* (trie:make-trie)) #+nil (let (x) (trie:maptrie (lambda (k v) (push (cons (coerce k 'string) v) x)) *html-special-char-trie*) (nreverse x)) (defun parse-html-strip (string &key (start 0) end stream) (let ((pos start) (end-pos (or end (length string)))) (loop (unless (< pos end-pos) (return :EOF)) (case (aref string pos) (#\< (multiple-value-bind (ignore end2) (parse-html-tag string :start pos :end end-pos) (declare (ignore ignore)) (assert end2) (setq pos end2))) (#\& (multiple-value-bind (trans end2) (trie:parse-identifier string *html-special-char-trie* :start (1+ pos) :end end-pos) (cond (trans (assert end2) (assert (char= (aref string end2) #\;))) (t (setq end2 (position #\; string :start (1+ pos) :end end-pos)) (assert end2 nil "Invalid HTML at position ~D" pos) (setq trans (fetch-known-html-special-char-translation string :start (1+ pos) :end end2)) (assert trans nil "Unknown identifier ~S encountered for first time." (subseq string (1+ pos) end2)) (trie:trie-set-identifier string *html-special-char-trie* :start (1+ pos) :end end2 :value trans))) (write-string trans stream) (setq pos (1+ end2)))) (otherwise (write-char (aref string pos) stream) (incf pos)))))) #+nil (with-output-to-string (stream) (parse-html-strip "14.6.1.6
yaájñavalkyeti hovaaca yádidaM sárvamahoraatraábhyaamaaptaM" :stream stream)) ;;; ;;; ;;; (eval-when (load eval compile) (unless (find-package "SKTCHAR") (defpackage "SKTCHAR" (:use) (:export "a" "aa" "i" "ii" "u" "uu" ".r" ".r.r" ".l" ".l.l" ; simple-vowels "e" "ai" "o" "au" ; diphthongs "k" "kh" "g" "gh" "\"n" ; "ka.n.thya" "c" "ch" "j" "jh" "~n" ; "taalavya" ".t" ".th" ".d" ".dh" ".n" ; "murdhnya" "t" "th" "d" "dh" "n" ; "dantya" "p" "ph" "b" "bh" "m" ; "o.s.tya" "y" "r" "l" "v" "\"s" ".s" "s" "h" ".h" ".m" ".a" ".o" " ")) )) ;;; ;;; (defun parse-itrans-char (string &key (start 0) (end (length string)) (junk-allowed-p t) &aux value1 (value2 start)) (declare (type fixnum start end value2) (type string string)) (flet ((set-value (value &optional (incr 1)) (declare (type fixnum incr)) (setq value1 value value2 (+ incr value2))) (second-char-p () (< start (- end 1))) (third-char-p () (< start (- end 2))) (char () (char string start)) (second-char ()(char string (1+ start))) (third-char () (char string (+ start 2)))) (case (char) (#\a (set-value 'SKTCHAR:|a|) (when (second-char-p) (case (second-char) (#\a (set-value 'SKTCHAR:|aa|)) (#\i (set-value 'SKTCHAR:|ai|)) (#\u (set-value 'SKTCHAR:|au|))))) (#\A (set-value 'SKTCHAR:|aa|) (when (second-char-p) (case (second-char) (#\U (ecase (third-char) (#\M (set-value 'SKTCHAR:|.o| 2))))))) (#\i (set-value 'SKTCHAR:|i|) (when (second-char-p) (case (second-char) (#\i (set-value 'SKTCHAR:|ii|))))) (#\I (set-value 'SKTCHAR:|ii|)) (#\u (set-value 'SKTCHAR:|u|) (when (second-char-p) (case (second-char) (#\u (set-value 'SKTCHAR:|uu|))))) (#\U (set-value 'SKTCHAR:|uu|)) #+XXX (#\R (ecase (second-char) ((#\R #\^) (ecase (third-char) (#\i (set-value 'SKTCHAR:|.r| 3)) (#\I (set-value 'SKTCHAR:|.r.r| 3)))))) (#\R (set-value 'SKTCHAR:|.r|) (when (second-char-p) (case (second-char) (#\: (set-value 'SKTCHAR:|.r.r| 2))))) (#\L (ecase (second-char) ((#\L #\^) (ecase (third-char) (#\i (set-value 'SKTCHAR:|.l| 3)) (#\I (set-value 'SKTCHAR:|.l.l| 3)))))) (#\e (set-value 'SKTCHAR:|e|)) (#\o (set-value 'SKTCHAR:|o|)) (#\M (set-value 'SKTCHAR:|.m|)) (#\H (set-value 'SKTCHAR:|.h|)) (#\' (set-value 'SKTCHAR:|.a|)) (#\k (set-value 'SKTCHAR:|k|) (when (second-char-p) (case (second-char) (#\h (set-value 'SKTCHAR:|kh|))))) (#\g (set-value 'SKTCHAR:|g|) (when (second-char-p) (case (second-char) (#\h (set-value 'SKTCHAR:|kh|))))) (#\~ (ecase (second-char) (#\N (set-value 'SKTCHAR:|"n| 2)) (#\n (set-value 'SKTCHAR:|~n| 2)))) #+XXX (#\c (ecase (second-char) (#\h (set-value 'SKTCHAR:|c| 2)))) (#\c (set-value 'SKTCHAR:|c|) (when (second-char-p) (case (second-char) (#\h (set-value 'SKTCHAR:|ch|))))) (#\C (ecase (second-char) (#\h (set-value 'SKTCHAR:|ch| 2)))) (#\j (set-value 'SKTCHAR:|j|) (when (second-char-p) (case (second-char) (#\h (set-value 'SKTCHAR:|jh|))))) (#\T (set-value 'SKTCHAR:|.t|) (when (second-char-p) (case (second-char) (#\h (set-value 'SKTCHAR:|.th|))))) (#\D (set-value 'SKTCHAR:|.d|) (when (second-char-p) (case (second-char) (#\h (set-value 'SKTCHAR:|.dh|))))) (#\N (set-value 'SKTCHAR:|.n|) ;;; XXX (when (second-char-p) (case (second-char) (#\^ (set-value 'SKTCHAR:|"n|))))) (#\t (set-value 'SKTCHAR:|t|) (when (second-char-p) (case (second-char) (#\h (set-value 'SKTCHAR:|th|))))) (#\d (set-value 'SKTCHAR:|d|) (when (second-char-p) (case (second-char) (#\h (set-value 'SKTCHAR:|dh|))))) (#\n (set-value 'SKTCHAR:|n|)) (#\p (set-value 'SKTCHAR:|p|) (when (second-char-p) (case (second-char) (#\h (set-value 'SKTCHAR:|ph|))))) (#\b (set-value 'SKTCHAR:|b|) (when (second-char-p) (case (second-char) (#\h (set-value 'SKTCHAR:|bh|))))) (#\m (set-value 'SKTCHAR:|m|)) (#\y (set-value 'SKTCHAR:|y|)) (#\r (set-value 'SKTCHAR:|r|)) (#\l (set-value 'SKTCHAR:|l|)) ((#\v #\w) (set-value 'SKTCHAR:|v|)) (#\s (set-value 'SKTCHAR:|s|) (when (second-char-p) (case (second-char) (#\h (set-value 'SKTCHAR:|"s|) (when (third-char-p) (case (third-char) (#\h (set-value 'SKTCHAR:|.s|)))))))) #+XXX (#\S (ecase (second-char) (#\h (set-value 'SKTCHAR:|.s|)))) (#\S (set-value 'SKTCHAR:|.s|) (when (second-char-p) (case (second-char) (#\h (set-value 'SKTCHAR:|.s|))))) (#\h (set-value 'SKTCHAR:|h|) (when (second-char-p) (case (second-char) (#\- (set-value 'SKTCHAR:|.h|))))) (#\O (ecase (second-char) (#\M (set-value 'SKTCHAR:|.m|)))) (#\Space (set-value 'SKTCHAR:| |))) (cond (value1 (values value1 value2)) (junk-allowed-p (assert (= value2 start)) (values (char) (1+ start)))))) (defun parse-itrans-line (string &key (start 0) end) (let ((pos-begin start) (pos-end (or end (length string)))) (loop while (< pos-begin pos-end) collect (multiple-value-bind (sktchar end2) (parse-itrans-char string :start pos-begin :end pos-end) (prog1 sktchar (assert (> end2 pos-begin)) (setq pos-begin end2)))))) #+nil (parse-itrans-line "enometh") ;;; ;;; ;;; (defvar *write-skt-eat-characters* nil) (defvar *write-skt-allowed-characters* '(#\Newline #\Linefeed #\Return #\Tab #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) (defvar *write-skt-translate-characters-alist* nil) (define-condition unknown-character () ((c :initarg :char :reader unknown-character-character)) (:report (lambda (condition stream) (format stream "Unknown Character ~C" (unknown-character-character condition))))) #+elisp (defindent prog lisp-indent-tagbody) (defun check-character (character) "Returns NIL or a character." (prog (return *current-condition* cons) (labels ((handle-condition (condition) (setq *current-condition* condition)) (unknown-character () (unknown-character-character *current-condition*)) (test-function (condition) #-cmu ; usual cmu+slime fuckup (eql condition *current-condition*) #+cmu t) (xlate-once (x) (setq return x) (go return)) (report-xlate-once (stream) (format stream "Return something else this once")) (xlate-always (x) (setq return x) (assert (not (assoc (unknown-character) *write-skt-translate-characters-alist*))) (setq *write-skt-translate-characters-alist* (acons (unknown-character) x *write-skt-translate-characters-alist*)) (go return)) (report-xlate-always (stream) (format stream "Return something else and permanently modify xlate table.")) (xlate-interactive () (format *terminal-io* "~&~&Enter lisp form: ") (multiple-value-list (read))) (literal-once () (setq return (unknown-character)) (go return)) (report-literal-once (stream) (format stream "Return ~C just this once." (unknown-character))) (literal-always () (setq return (unknown-character)) (assert (not (find (unknown-character) *write-skt-allowed-characters*))) (pushnew return *write-skt-allowed-characters*) (go return)) (report-literal-always (stream) (format stream "Return ~C, adding it permanently to the allowed list." (unknown-character))) (ignore-once () (go return)) (report-ignore-once (stream) (format stream "Ignore ~C just this once." (unknown-character))) (ignore-always () (assert (not (find (unknown-character) *write-skt-eat-characters*))) (pushnew (unknown-character) *write-skt-eat-characters*) (go return)) (report-ignore-always (stream) (format stream "Ignore ~C, adding it permanently to the ignored list." (unknown-character)))) (restart-bind ((literal-once #'literal-once :report-function #'report-literal-once :test-function #'test-function) (literal-always #'literal-always :report-function #'report-literal-always :test-function #'test-function) (ignore-once #'ignore-once :report-function #'report-ignore-once :test-function #'test-function) (ignore-always #'ignore-always :report-function #'report-ignore-always :test-function #'test-function) (xlate-once #'xlate-once :report-function #'report-xlate-once :test-function #'test-function :interactive-function #'xlate-interactive) (xlate-always #'xlate-always :report-function #'report-xlate-always :test-function #'test-function :interactive-function #'xlate-interactive)) (handler-bind ((unknown-character #'handle-condition)) (cond ((find character *write-skt-eat-characters*) (return nil)) ((find character *write-skt-allowed-characters*) (return character)) ((setq cons (assoc character *write-skt-translate-characters-alist*)) (return (cdr cons))) (t (error 'unknown-character :char character)))))) return (if return (return return)))) (defun write-skt-line (parsed &optional stream) (loop for elem in parsed do (etypecase elem (character (let ((checked (check-character elem))) (when checked (write-char checked stream)))) (symbol (write-string (symbol-name elem) stream))))) #+nil (with-output-to-string (stream) (let ((*write-skt-allowed-characters* *write-skt-allowed-characters*) (*write-skt-eat-characters* *write-skt-eat-characters*)) (list :WRITTEN (write-skt-line (parse-itrans-line "madhu.#.@$!") stream) :ALLOWED *write-skt-allowed-characters* :IGNORED *write-skt-eat-characters*))) (defvar *whitespace* '(#\Space #\Backspace #\Tab #\Newline #\Vt #\Formfeed #\Return)) (defun whitespace-p (character) (find character *whitespace*)) (defun number-p (character) (or (digit-char-p character) (char= character #\.))) (defvar *specials* '(#\( #\) #\[ #\])) (defun special-p (character) (find character *specials*)) #+nil (every 'number-p "14.8.15[1]") (defun write-skt-boilerplate-header (html-file output-file stream) (format stream "%%% -*-LaTeX-*- %%% %%% Touched: ~A %%% Time-stamp: <> %%% Bugs-To: %%% Copyright (C) 2007 Madhu. All rights Reserved. %%% %%% ~A: generated from ~A %%% By ~A \\documentclass[notitlepage]{article} \\usepackage[larger,iitalic,uitalic]{skt} %%%%% SKT OPTIONS {\\skt [ 52+ 45+ 50+ 61+ 64+ 136+ 144+ 131+ ]} %%%%% s.ra tha j~~na la 1 4 ddya .dhya %%%%% %% 137+ 135+ 148+ 40+ %% thna tna pta a %%%%% \\title{} \\author{} \\date{} \\parindent 0pt \\addtolength{\\parskip}{2em} \\newcommand{\\attrib}[1]{% \\nopagebreak{\\raggedleft\\hfill\\mbox{$\\bigl[$#1$\\bigr]$}}\\\\*} \\begin{document}" (user:date :stream nil) (file-namestring output-file) html-file (lisp-implementation-version))) (defun write-skt-boilerplate-footer (html-file output-file stream) (format stream "\\end{document} %%% Output ended: ~A %%% ~A: generated from ~A" (user:date :stream nil) (file-namestring output-file) html-file)) (defun convert-vedavid (html-file output-file) "Write skt output." (let ((buf (user:slurp-file html-file nil :element-type 'character)) (*write-skt-allowed-characters* (append '(#\_) *write-skt-allowed-characters*)) (*write-skt-translate-characters-alist* '((#\! . #\_))) (*write-skt-eat-characters* (append '(#\+) *write-skt-eat-characters*)) (in-sktcommand-p 0)) (with-input-from-string (input-stream (with-output-to-string (stream) (parse-html-strip buf :stream stream))) (with-open-file (output-stream output-file :direction :output :if-exists :supersede) (write-skt-boilerplate-header html-file output-file output-stream) (loop for line = (read-line input-stream nil nil) while line do (let ((parsed (parse-itrans-line line))) (cond ((or (null parsed) (every (lambda (x) (and (characterp x) (whitespace-p x))) parsed)) ;;(write-string "\\\\" output-stream) (unless (zerop in-sktcommand-p) (assert (> in-sktcommand-p 0)) (write-char #\} output-stream) (terpri output-stream) (decf in-sktcommand-p)) (terpri output-stream)) ((every (lambda (x) (and (characterp x) (or (special-p x) (whitespace-p x) (number-p x)))) parsed) (write-string "\\attrib{" output-stream) (map nil (lambda (x) (write-char x output-stream)) parsed) (write-char #\} output-stream) (terpri output-stream) (assert (zerop in-sktcommand-p)) (incf in-sktcommand-p) (write-string "{\\sktb " output-stream) (terpri output-stream)) (t (write-skt-line parsed output-stream) (terpri output-stream))))) (write-skt-boilerplate-footer html-file output-file output-stream))))) #+nil (convert-vedavid "home:outbox/vedavid/1sb/14d.html" "home:tex/1sb/14d.skt")