;;; -*- Mode: LISP; Package: :cl-user; BASE: 10; Syntax: ANSI-Common-Lisp; -*- ;;; ;;; Touched: Sat Apr 14 19:12:48 2007 +0530 ;;; Time-stamp: <2009-07-26 23:09:11 MDT> ;;; Bugs-To: ;;; Status: Experimental. Do not redistribute ;;; Copyright (C) 2007 Madhu. All Rights Reserved. ;;; (defpackage "WTF-UTF-8" (:use "CL")) (in-package "WTF-UTF-8") (defun read-utf8-datum (stream) "Calls READ-BYTE on STREAM. Decodes and returns an integer." (let ((octet (read-byte stream))) (cond ((zerop (logand octet #x80)) octet) ((= #xc0 (logand octet #xe0)) (logior (ash (logand octet #x1f) 6) (logand (read-byte stream) #x3f))) ((= #xe0 (logand octet #xf0)) (logior (logior (ash (logand octet #xf) 12) (ash (logand (read-byte stream) #x3f) 6)) (logand (read-byte stream) #x3f))) ((= #xf0 (logand octet #xf8)) (logior (logior (logior (ash (logand octet #x7) 18) (ash (logand (read-byte stream) #x3f) 12)) (ash (logand (read-byte stream) #x3f) 6)) (logand (read-byte stream) #x3f))) ((= #xf8 (logand octet #xfc)) (logior (logior (logior (logior (ash (logand octet #x3) 24) (ash (logand (read-byte stream) #x3f) 18)) (ash (logand (read-byte stream) #x3f) 12)) (ash (logand (read-byte stream) #x3f) 6)) (logand (read-byte stream) #x3f))) ((= #xfc (logand octet #xfe)) (logior (logior (logior (logior (logior (ash (logand octet #x1) 32) (ash (logand (read-byte stream) #x3f) 24)) (ash (logand (read-byte stream) #x3f) 18)) (ash (logand (read-byte stream) #x3f) 12)) (ash (logand (read-byte stream) #x3f) 6)) (logand (read-byte stream) #x3f))) (t (error "Unexpected value ~X at start of UTF-8 sequence." octet))))) (defun write-utf8-datum (code stream) "Calls WRITE-BYTE on STREAM to encode the integer CODE." (cond ((< code #x80) (write-byte code stream)) ((< code #x800) (write-byte (logior (logand #xff (ash #x7e 5)) (ash code -6)) stream) (write-byte (logior #x80 (logand #x3f code)) stream)) ((< code #x10000) (write-byte (logior (logand #xff (ash #x7e 4)) (ash code -12)) stream) (write-byte (logior #x80 (logand #x3f (ash code -6))) stream) (write-byte (logior #x80 (logand #x3f code)) stream)) ((< code #x20000) (write-byte (logior (logand #xff (ash #x7e 3)) (ash code -18)) stream) (write-byte (logior #x80 (logand #x3f (ash code -18))) stream) (write-byte (logior #x80 (logand #x3f (ash code -12))) stream) (write-byte (logior #x80 (logand #x3f (ash code -6))) stream) (write-byte (logior #x80 (logand #x3f code)) stream)) ((< code #x4000000) (write-byte (logior (logand #xff (ash #x7e 2)) (ash code -24)) stream) (write-byte (logior #x80 (logand #x3f (ash code -18))) stream) (write-byte (logior #x80 (logand #x3f (ash code -12))) stream) (write-byte (logior #x80 (logand #x3f (ash code -6))) stream) (write-byte (logior #x80 (logand #x3f code)) stream)) ((< code #x4000000) (write-byte (logior (logand #xff (ash #x7e 1)) (ash code -32)) stream) (write-byte (logior #x80 (logand #x3f (ash code -24))) stream) (write-byte (logior #x80 (logand #x3f (ash code -18))) stream) (write-byte (logior #x80 (logand #x3f (ash code -12))) stream) (write-byte (logior #x80 (logand #x3f (ash code -6))) stream) (write-byte (logior #x80 (logand #x3f code)) stream)) (t (error "unsupported unicode datum: ~s." code)))) ^L (defun slurp-utf8 (file &optional (array (make-array 1024 :element-type '(integer 0) :adjustable t :fill-pointer 0))) (with-open-file (stream file :element-type '(unsigned-byte 8)) (let (length) (handler-case (loop (assert (setq length (vector-push-extend (read-utf8-datum stream) array)) nil "VECTOR-PUSH-EXTEND FAILED")) (end-of-file (c) (declare (ignore c)) (assert (= length (1- (length array)) )))))) array) (defun dump-utf8 (vector filename) (with-open-file (stream filename :direction :output :if-exists :supersede :element-type '(unsigned-byte 8)) (loop for code-point across vector count 1 do (write-utf8-datum code-point stream)))) #|| (defvar $unicode-array (slurp-utf8 "/var/local/sanskrit/www.shivashakti.com/KuMa.txt")) (array-total-size $unicode-array) (time (progn (setf (fill-pointer $unicode-array) 0) (setq $unicode-array (slurp-utf8 "/var/local/sanskrit/www.shivashakti.com/KuMa.txt" $unicode-array)) (length $unicode-array))) (time (dump-utf8 $unicode-array "home:/kuma.xml")) (progn (setf (fill-pointer $unicode-array) 0) (setq $unicode-array (time (slurp-utf8 "/var/local/sanskrit/indology.info/etexts/archive/texts/veni/venisamhara.xml" $unicode-array))) (length $unicode-array)) (time (dump-utf8 $unicode-array "home:/veni.xml")) (progn (setf (fill-pointer $unicode-array) 0) (setq $unicode-array (time (slurp-utf8 "home:hello.out" $unicode-array))) (time (dump-utf8 $unicode-array "/tmp/hello"))) ||# ;; ;; UTF-16 distinguishes encoding order: ;; UTF-16LE is UTF16 in "little-endian" byte order. (order mark is 0xFF 0xFE) ;; UTF-16BE is UTF16 in "big-endian" byte order. (order mark is 0xFE 0xFF) (defun ucs-2-char-type (byte-order-mark) (ecase byte-order-mark (#xfeff 'utf-16le) (#xfffe 'utf-16be))) (defun read-utf16-datum-12 (stream) (+ (ash (read-byte stream) 8) (read-byte stream))) (defun write-utf16-datum-12 (code stream) (write-byte (logand #xff (ash code -8)) stream) (write-byte (logand #xff code) stream)) (defun read-utf16-datum-21 (stream) (+ (read-byte stream) (ash (read-byte stream) 8))) (defun write-utf16-datum-21 (code stream) (assert (<= code #xffff) nil "Illegal code for ucs-2: ~D" code) (write-byte (logand #xff code) stream) (write-byte (logand #xff (ash code -8)) stream)) ;;; ---------------------------------------------------------------------- ;;; ;;; #+nil (user:lc "home:clisp/mytrie") (defvar *unidata-file* "/var/local/extern/emacs--unicode-2/admin/unidata/UnicodeData.txt") (eval-when (load eval compile) (defvar +unidata-fields+ '(code unicode-name category combining-class bidi-class decomposition numeric-value1 numeric-value2 numeric-value3 bidi-mirrored older-name iso-comment uppercase lowercase titlecase)) (defmacro defstruct-unidata-line () `(defstruct (unidata-line (:type list) (:constructor %make-unidata-line ,+unidata-fields+)) ,@+unidata-fields+)) (defstruct-unidata-line) (defvar *unicode-names* (trie:make-trie)) (defvar *older-names* (trie:make-trie)) (defvar *categories* (trie:make-trie)) (defvar *iso-comments* (trie:make-trie)) (defvar *bidi-classes* (trie:make-trie)) (defvar *decomposition-names* (trie:make-trie)) ;; #+NIL XXX (defun pluralize (string) (let ((length (length string)) suffix-length) (labels ((ends-with (suffix) (if (<= (setq suffix-length (length suffix)) length) (string-equal string suffix :start1 (- length suffix-length)) (setq suffix-length nil))) (prev-char () (char string (- length suffix-length 1))) (fixcase (suffix) (if (upper-case-p (prev-char)) (string-upcase suffix) suffix)) (retval (suffix &optional replace) (concatenate 'string (if (and replace (< suffix-length length)) (subseq string 0 (- length suffix-length)) string) (fixcase suffix)))) (cond ((ends-with "y") (cond ((not (find (prev-char) "aeiou" :test #'char-equal)) (retval "ies" t)) (t (retval "s")))) ((ends-with "ies") nil) ((ends-with "s") (cond ((find (prev-char) "aeious" :test #'char-equal) (retval "es")) (t nil))) (t (setq suffix-length 0) (cond ((alpha-char-p (prev-char)) (retval "s")) (t nil))))))) ;;#+NIL XXX (defun trie-name-for-symbol (sym) (intern (concatenate 'string "*" (or (pluralize (symbol-name sym)) (concatenate 'string (symbol-name sym) "-NAMES")) "*"))) ;;#+NIL XXX (defmacro defvar-tries (&optional (defvar 'defvar)) `(progn ,@(loop for x in +unidata-fields+ collect `(,defvar ,(trie-name-for-symbol x) (trie:make-trie))))) (defvar-tries) #+NIL ;;XXX (defvar-tries defparameter) (defun parse-decomposition (string) (loop for substring in (user::string-split #\Space string) for i from 0 do (assert (> (length substring) 0)) if (and (zerop i) (char= #\< (char substring 0))) do (assert (char= #\> (char substring (1- (length substring))))) and collect (trie:intern-seq substring *decomposition-names*) else collect (parse-integer substring :radix 16))) (defun transform-unidata-arg (symbol) `(when (> (length ,symbol) 0) ;;#+NIL XXX (trie:intern-seq ,symbol ,(trie-name-for-symbol symbol)) ,(ecase symbol (unicode-name `(trie:intern-seq ,symbol *unicode-names*)) (older-name `(trie:intern-seq ,symbol *older-names*)) (category `(trie:intern-seq ,symbol *categories*)) (bidi-class `(trie:intern-seq ,symbol *bidi-classes*)) (iso-comment `(trie:intern-seq ,symbol *iso-comments*)) ((code) `(parse-integer ,symbol :radix 16)) ((numeric-value3 numeric-value2 numeric-value1) `(let* (*read-eval* (num (read-from-string ,symbol))) (assert (numberp num)) num)) (uppercase `(parse-integer ,symbol :radix 16)) (lowercase `(parse-integer ,symbol :radix 16)) (titlecase `(parse-integer ,symbol :radix 16)) (decomposition `(parse-decomposition ,symbol)) (bidi-mirrored `(progn (assert (= (length ,symbol) 1)) (ecase (char ,symbol 0) (#\Y t) (#\N nil)))) (combining-class `(parse-integer ,symbol))))) (defmacro defun-make-unidata-line () `(defun make-unidata-line (line) (destructuring-bind ,+unidata-fields+ (user::string-split #\; line) (%make-unidata-line ,@(loop for x in +unidata-fields+ collect (transform-unidata-arg x)))))) (defun-make-unidata-line)) (defvar *all-codes* (make-hash-table)) (defun read-unidata-file (&optional (file *unidata-file*)) (with-open-file (stream file) (loop for line = (read-line stream nil nil) for lineno from 1 while line do (let ((unidata-line (make-unidata-line line))) (multiple-value-bind (value foundp) (gethash (unidata-line-code unidata-line) *all-codes*) (assert (not foundp) nil "Duplicate code ~D on line ~D" (unidata-line-code unidata-line) lineno)) (setf (gethash (unidata-line-code unidata-line) *all-codes*) unidata-line))))) (defun make-unidata-form (list) (loop for y in list for x in +unidata-fields+ when y collect (intern (symbol-name x) :keyword) and collect y)) (defun hexfrob-unidata-form (form &optional (check t)) (let ((hex (getf form :hex)) (code (getf form :code))) (assert (integerp code)) (unless hex (setf (getf form :hex) (setq hex (format nil "~4,'0X" code)))) (when check (let ((number (parse-integer hex :radix 16))) (restart-case (unless (= code number) (error "CODE ~D does not match HEX ~A (~D)." code hex number)) (delete-hex-and-retry () :report "Delete Hex and retry." (hexfrob-unidata-form (remf form :hex) check))))) form)) (defun trie-complete (trie) (mapcar (lambda (x) (coerce x 'string)) (trie:trie-completions trie))) #|| (cd /var/local/extern/emacs/admin/unidata/ && grep DEVANAG UnicodeData.txt > /tmp/0900.txt) (clrhash *all-codes*) (time (read-unidata-file)) (time (read-unidata-file "/tmp/0900.txt")) (gethash #x106 *all-codes*) (gethash #x0906 *all-codes*) (make-unidata-form (gethash #x106 *all-codes*)) (make-unidata-form (gethash #x0906 *all-codes*)) (trie::trie-count *unicode-names*) (mapcar (lambda (x) (trie-name-for-symbol x)) +unidata-fields+) (loop for x in +unidata-fields+ for trie-name = (trie-name-for-symbol x) for trie = (symbol-value trie-name) do (format t "~%~% ~S~% ~S" trie-name (trie-complete trie))) (pprint (trie-complete *decompositions*)) (hash-table-count *all-codes*) (setq $list (let (x) (maphash (lambda (k v) (push (make-unidata-form v) x)) *all-codes*) x)) (setq $list (sort $list #'< :key #'(lambda (x) (getf x :code)))) (pprint $list) (map-into $list 'hexfrob-unidata-form $list) ||# ;;; ---------------------------------------------------------------------- ;;; ;;; SANSKRIT - UNICODE ROMAN <-> UNICODE DEVANAGARI CONVERSIONS ;;; (defvar *convlist* '( ;; * ASCII chars ((#\a) (#x0905) ()) ((#\A) (#x0905) ()) ((#\i) (#x0907) (#x093F)) ((#\I) (#x0907) (#x093F)) ((#\u) (#x0909) (#x0941)) ((#\U) (#x0909) (#x0941)) ((#\e) (#x090F) (#x0947)) ((#\E) (#x090F) (#x0947)) ((#\o) (#x0913) (#x094B)) ((#\O) (#x0913) (#x094B)) ((#\a #\i) (#x0910) (#x0948)) ((#\A #\i) (#x0910) (#x0948)) ((#\a #\u) (#x0914) (#x094C)) ((#\A #\u) (#x0914) (#x094C)) ((#\k) (#x0915) (CONS)) ((#\K) (#x0915) (CONS)) ((#\q) (#x0958) (CONS)) ((#\Q) (#x0958) (CONS)) ((#\k #\h) (#x0916) (CONS)) ((#\K #\h) (#x0916) (CONS)) ((#\g) (#x0917) (CONS)) ((#\G) (#x0917) (CONS)) ((#\g #\h) (#x0918) (CONS)) ((#\G #\h) (#x0918) (CONS)) ((#\c) (#x091A) (CONS)) ((#\C) (#x091A) (CONS)) ((#\c #\h) (#x091B) (CONS)) ((#\C #\h) (#x091B) (CONS)) ((#\j) (#x091C) (CONS)) ((#\J) (#x091C) (CONS)) ((#\z) (#x095B) (CONS)) ((#\Z) (#x095B) (CONS)) ((#\j #\h) (#x091D) (CONS)) ((#\J #\h) (#x091D) (CONS)) ((#\t) (#x0924) (CONS)) ((#\T) (#x0924) (CONS)) ((#\t #\h) (#x0925) (CONS)) ((#\T #\h) (#x0925) (CONS)) ((#\d) (#x0926) (CONS)) ((#\D) (#x0926) (CONS)) ((#\d #\h) (#x0927) (CONS)) ((#\D #\h) (#x0927) (CONS)) ((#\n) (#x0928) (CONS)) ((#\N) (#x0928) (CONS)) ((#\p) (#x092A) (CONS)) ((#\P) (#x092A) (CONS)) ((#\p #\h) (#x092B) (CONS)) ((#\P #\h) (#x092B) (CONS)) ((#\f) (#x095E) (CONS)) ((#\F) (#x095E) (CONS)) ((#\b) (#x092C) (CONS)) ((#\B) (#x092C) (CONS)) ((#\b #\h) (#x092D) (CONS)) ((#\B #\h) (#x092D) (CONS)) ((#\m) (#x092E) (CONS)) ((#\M) (#x092E) (CONS)) ((#\y) (#x092F) (CONS)) ((#\Y) (#x092F) (CONS)) ((#\r) (#x0930) (CONS)) ((#\R) (#x0930) (CONS)) ((#\l) (#x0932) (CONS)) ((#\L) (#x0932) (CONS)) ((#\v) (#x0935) (CONS)) ((#\V) (#x0935) (CONS)) ((#\s) (#x0938) (CONS)) ((#\S) (#x0938) (CONS)) ((#\h) (#x0939) (CONS)) ((#\H) (#x0939) (CONS)) ;; ISO-Latin 1 chars ((#x00E0) (#x0905) ()) ; a grave ((#x00C0) (#x0905) ()) ; A grave ((#x00E1) (#x0905) ()) ; a acute ((#x00C1) (#x0905) ()) ; A acute ((#x00EC) (#x0907) (#x093F)) ; i grave ((#x00CC) (#x0907) (#x093F)) ; I grave ((#x00ED) (#x0907) (#x093F)) ; i acute ((#x00CD) (#x0907) (#x093F)) ; I acute ((#x00F9) (#x0909) (#x0941)) ; u grave ((#x00D9) (#x0909) (#x0941)) ; U grave ((#x00FA) (#x0909) (#x0941)) ; u acute ((#x00DA) (#x0909) (#x0941)) ; U acute ((#x00E8) (#x090F) (#x0947)) ; e grave ((#x00C8) (#x090F) (#x0947)) ; E grave ((#x00E9) (#x090F) (#x0947)) ; e acute ((#x00C9) (#x090F) (#x0947)) ; E acute ((#x00F2) (#x0913) (#x094B)) ; o grave ((#x00D2) (#x0913) (#x094B)) ; O grave ((#x00F3) (#x0913) (#x094B)) ; o acute ((#x00D3) (#x0913) (#x094B)) ; O acute ((#x00EA) (#x090D) (#x0945)) ; e circ ((#x00CA) (#x090D) (#x0945)) ; E circ ((#x00F4) (#x0911) (#x0949)) ; o circ ((#x00D4) (#x0911) (#x0949)) ; O circ ((#x00E3) (#x0905 #x0901) (#x0901)) ; a tilde ((#x00C3) (#x0905 #x0901) (#x0901)) ; A tilde ((#x00F5) (#x0913 #x0902) (#x094B #x0902)) ; o tilde ((#x00D5) (#x0913 #x0902) (#x094B #x0902)) ; O tilde ((#x00F1) (#x091E) (CONS)) ; n tilde ((#x00D1) (#x091E) (CONS)) ; N tilde ;; Other single-codepoint chars ((#x0101) (#x0906) (#x093E)) ; a macron ((#x0100) (#x0906) (#x093E)) ; A macron ((#x1EBD) (#x090F #x0901) (#x0947 #x0902)) ; e tilde ((#x1EBC) (#x090F #x0901) (#x0947 #x0902)) ; E tilde ((#x012B) (#x0908) (#x0940)) ; i macron ((#x012A) (#x0908) (#x0940)) ; I macron ((#x016B) (#x090A) (#x0942)) ; u macron ((#x016A) (#x090A) (#x0942)) ; U macron ((#x0129) (#x0907 #x0901) (#x093F #x0902)) ; i tilde ((#x0128) (#x0907 #x0901) (#x093F #x0902)) ; I tilde ((#x0169) (#x0909 #x0901) (#x0941 #x0901)) ; u tilde ((#x0168) (#x0909 #x0901) (#x0941 #x0901)) ; U tilde ((#x1E41) (#x0902) ()) ; m odot ((#x1E40) (#x0902) ()) ; M odot ((#x1E25) (#x0903) ()) ; h udot ((#x1E24) (#x0903) ()) ; H udot ;; madhu ((#x1E43) (#x0902) ()) ; m udot/ ((#x1E42) (#x0902) ()) ; M udot ;; h ubar: no Unicode Devanagari char ;; h ubreve: no Unicode Devanagari char ;; H ubreve: no Unicode Devanagari char ((#x0121) (#x095A) (CONS)) ; g odot ((#x0120) (#x095A) (CONS)) ; G odot ((#x1E45) (#x0919) (CONS)) ; n odot ((#x1E44) (#x0919) (CONS)) ; N odot ((#x1E6D) (#x091F) (CONS)) ; t udot ((#x1E6C) (#x091F) (CONS)) ; T udot ((#x1E6D #\h) (#x0920) (CONS)) ; th udot ((#x1E6C #\h) (#x0920) (CONS)) ; Th udot ((#x1E0D) (#x0921) (CONS)) ; d udot ((#x1E0C) (#x0921) (CONS)) ; D udot ((#x1E0D #\h) (#x0922) (CONS)) ; dh udot ((#x1E0C #\h) (#x0922) (CONS)) ; Dh udot ((#x1E5B) (#x095C) (CONS)) ; r udot ((#x1E5A) (#x095C) (CONS)) ; R udot ((#x1E5B #\h) (#x095D) (CONS)) ; rh udot ((#x1E5A #\h) (#x095D) (CONS)) ; Rh udot ((#x1E47) (#x0923) (CONS)) ; n udot ((#x1E46) (#x0923) (CONS)) ; N udot ((#x1E49) (#x0929) (CONS)) ; n ubar ((#x1E48) (#x0929) (CONS)) ; N ubar ((#x1E37) (#x0933) (CONS)) ; l udot ((#x1E36) (#x0933) (CONS)) ; L udot ((#x1E3B) (#x0934) (CONS)) ; l ubar ((#x1E3A) (#x0934) (CONS)) ; L ubar ((#x015B) (#x0936) (CONS)) ; s acute ((#x015A) (#x0936) (CONS)) ; S acute ((#x1E63) (#x0937) (CONS)) ; s udot ((#x1E62) (#x0937) (CONS)) ; S udot ((#x1E8F) (#x095F) (CONS)) ; y odot ((#x1E8E) (#x095F) (CONS)) ; Y odot ;; Two-codepoint chars ((#\a #x0129) (#x0910 #x0902) (#x0948 #x0902)) ; ai tilde ((#\A #x0129) (#x0910 #x0902) (#x0948 #x0902)) ; Ai tilde ((#\a #x0169) (#x0914 #x0902) (#x094C #x0902)) ; au tilde ((#\A #x0169) (#x0914 #x0902) (#x094C #x0902)) ; Au tilde ((#x0101 #x0303) (#x0906 #x0901) (#x093E #x0901)) ; a mactil ((#x0100 #x0303) (#x0906 #x0901) (#x093E #x0901)) ; A mactil ((#x012B #x0303) (#x0908 #x0902) (#x0940 #x0902)) ; i mactil ((#x012A #x0303) (#x0908 #x0902) (#x0940 #x0902)) ; I mactil ((#x016B #x0303) (#x090A #x0901) (#x0942 #x0901)) ; u mactil ((#x016A #x0303) (#x090A #x0901) (#x0942 #x0901)) ; U mactil ((#x0101 #x0300) (#x0906) (#x093E)) ; a macgrv ((#x0100 #x0300) (#x0906) (#x093E)) ; A macgrv ((#x0101 #x0301) (#x0906) (#x093E)) ; a macac ((#x0100 #x0301) (#x0906) (#x093E)) ; A macac ((#x012B #x0300) (#x0908) (#x0940)) ; i macgrv ((#x012A #x0300) (#x0908) (#x0940)) ; I macgrv ((#x012B #x0301) (#x0908) (#x0940)) ; i macac ((#x012A #x0301) (#x0908) (#x0940)) ; I macac ((#x016B #x0300) (#x090A) (#x0942)) ; u macgrv ((#x016A #x0300) (#x090A) (#x0942)) ; U macgrv ((#x016B #x0301) (#x090A) (#x0942)) ; u macac ((#x016A #x0301) (#x090A) (#x0942)) ; U macac ((#\l #x0325) (#x090C) (#x0962)) ; l uring ((#\L #x0325) (#x090C) (#x0962)) ; L uring ((#\r #x0325) (#x090B) (#x0943)) ; r uring ((#\R #x0325) (#x090B) (#x0943)) ; R uring ((#\m #x0310) (#x0901) ()) ; m cand ((#\M #x0310) (#x0901) ()) ; M cand ;; H ubar: no Unicode Devanagari char ((#\r #x0306) (#x0931) (CONS)) ; r breve ((#\R #x0306) (#x0931) (CONS)) ; R breve ;; Three-codepoint chars ((#\l #x0325 #x0304) (#x0961) (#x0963)) ; l uringmac ((#\L #x0325 #x0304) (#x0961) (#x0963)) ; L uringmac ((#\r #x0325 #x0304) (#x0960) (#x0944)) ; r uringmac ((#\R #x0325 #x0304) (#x0960) (#x0944)) ; R uringmac ((#\l #x0325 #x0300) (#x090C) (#x0962)) ; l uringgrv ((#\L #x0325 #x0300) (#x090C) (#x0962)) ; L uringgrv ((#\l #x0325 #x0301) (#x090C) (#x0962)) ; l uringac ((#\L #x0325 #x0301) (#x090C) (#x0962)) ; L uringac ((#\r #x0325 #x0300) (#x090B) (#x0943)) ; r uringgrv ((#\R #x0325 #x0300) (#x090B) (#x0943)) ; R uringgrv ((#\r #x0325 #x0301) (#x090B) (#x0943)) ; r uringac ((#\R #x0325 #x0301) (#x090B) (#x0943)) ; R uringac ;; Four-codepoint chars ((#\l #x0325 #x0304 #x0300) (#x0961) (#x0963)) ; l uringmacgrv ((#\L #x0325 #x0304 #x0300) (#x0961) (#x0963)) ; L uringmacgrv ((#\l #x0325 #x0304 #x0301) (#x0961) (#x0963)) ; l uringmacac ((#\L #x0325 #x0304 #x0301) (#x0961) (#x0963)) ; L uringmacac ((#\r #x0325 #x0304 #x0300) (#x0960) (#x0944)) ; r uringmacgrv ((#\R #x0325 #x0304 #x0300) (#x0960) (#x0944)) ; R uringmacgrv ((#\r #x0325 #x0304 #x0301) (#x0960) (#x0944)) ; r uringmacac ((#\R #x0325 #x0304 #x0301) (#x0960) (#x0944)) ; R uringmacac ((#\k #x200D #\h #x0331) (#x0959) (CONS)) ; kh ubar ((#\K #x200D #\h #x0331) (#x0959) (CONS)) ; Kh ubar ((#\k #x200D #\H #x0331) (#x0959) (CONS)) ; kH ubar ((#\K #x200D #\H #x0331) (#x0959) (CONS)) ; KH ubar ;; Convert numerals to Devanagari ((#\0) (#x0966) (#x094D #x0966)) ; halant ((#\1) (#x0967) (#x094D #x0967)) ((#\2) (#x0968) (#x094D #x0968)) ((#\3) (#x0969) (#x094D #x0969)) ((#\4) (#x096A) (#x094D #x096A)) ((#\5) (#x096B) (#x094D #x096B)) ((#\6) (#x096C) (#x094D #x096C)) ((#\7) (#x096D) (#x094D #x096D)) ((#\8) (#x096E) (#x094D #x096E)) ((#\9) (#x096F) (#x094D #x096F)) ;; Convert apostrophe to avagraha ((#\') (#x093D) ()) ((#x2019) (#x093D) ()) ; approved apostrophe ) "Table adapted from ur2ud (C) 2002 John D. Smith.")