comparison lisp/mule/devan-util.el @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents
children 2923009caf47
comparison
equal deleted inserted replaced
770:336a418893b5 771:943eaba38521
1 ;;; devan-util.el --- Support for Devanagari Script Composition -*- coding: iso-2022-7bit; -*-
2
3 ;; Copyright (C) 1996, 2001 Free Software Foundation, Inc.
4
5 ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
6
7 ;; Keywords: multilingual, Indian, Devanagari
8
9 ;; This file is part of XEmacs.
10
11 ;; XEmacs is free software; you can redistribute it and/or modify it
12 ;; under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; XEmacs is distributed in the hope that it will be useful, but
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs; see the file COPYING. If not, write to the Free
23 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24 ;; 02111-1307, USA.
25
26 ;;; Synched up with: Emacs 21.0.103 (language/devan-util.el).
27
28 ;;; Commentary:
29
30 ;; History:
31 ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
32 ;; 1997.3.24 fixed some bugs.
33
34 ;; Future work ::
35 ;; Decompose the input characters and process them on the character basis.
36
37 ;; Devanagari script composition rules and related programs.
38
39 ;;; Code:
40
41 ;;;
42 ;;; Steps toward composition of Devanagari Characters.
43 ;;;
44
45 ;;; Basic functions.
46
47 ;;;###autoload
48 (defun indian-to-devanagari (char)
49 "Convert IS 13194 character CHAR to Devanagari basic characters.
50 If CHAR is not IS 13194, return CHAR as is."
51 (let ((charcodes (split-char char)))
52 (if (eq (car charcodes) 'indian-is13194)
53 (make-char 'indian-2-column ?\x21 (nth 1 charcodes))
54 char)))
55
56 ;;;###autoload
57 (defun devanagari-to-indian (char)
58 "Convert Devanagari basic character CHAR to IS 13194 characters.
59 If CHAR is not Devanagari basic character, return CHAR as is."
60 (let ((charcodes (split-char char)))
61 (if (and (eq (car charcodes) 'indian-2-column)
62 (= (nth 1 charcodes) ?\x21))
63 (make-char 'indian-is13194 (nth 2 charcodes))
64 char)))
65
66 ;;;###autoload
67 (defun indian-to-devanagari-region (from to)
68 "Convert IS 13194 characters in region to Devanagari basic characters.
69 When called from a program, expects two arguments,
70 positions (integers or markers) specifying the region."
71 (interactive "r")
72 (save-excursion
73 (goto-char from)
74 (while (< (point) to)
75 (let ((char (following-char)))
76 (if (eq (char-charset char) 'indian-is13194)
77 (progn
78 (delete-char 1)
79 (insert (indian-to-devanagari char)))
80 (forward-char 1))))))
81
82 ;;;###autoload
83 (defun devanagari-to-indian-region (from to)
84 "Convert Devanagari basic characters in region to Indian characters.
85 When called from a program, expects two arguments,
86 positions (integers or markers) specifying the region."
87 (interactive "r")
88 (save-excursion
89 (goto-char from)
90 (while (< (point) to)
91 (let ((char (following-char)))
92 (if (eq (char-charset char) 'indian-2-column)
93 (progn
94 (delete-char 1)
95 (insert (devanagari-to-indian char)))
96 (forward-char 1))))))
97
98 ;;;###autoload
99 (defun indian-to-devanagari-string (string)
100 "Convert Indian characters in STRING to Devanagari Basic characters."
101 (let* ((len (length string))
102 (i 0)
103 (vec (make-vector len 0)))
104 (while (< i len)
105 (aset vec i (indian-to-devanagari (aref string i)))
106 (setq i (1+ i)))
107 (concat vec)))
108
109 ;; Phase 0 - Determine whether the characters can be composed.
110 ;;
111 ;;;
112 ;;; Regular expressions to split characters for composition.
113 ;;;
114 ;;
115 ;; Indian script word contains one or more syllables.
116 ;; In BNF, it can be expressed as follows:
117 ;;
118 ;; Word ::= {Syllable} [Cons-Syllable]
119 ;; Syllable ::= Cons-Vowel-Syllable | Vowel-Syllable
120 ;; Vowel-Syllable ::= V[D]
121 ;; Cons-Vowel-Syllable ::= [Cons-Syllable] Full-Cons [M] [D]
122 ;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] [Pure-Cons] Pure-Cons
123 ;; Pure-Cons ::= Full-Cons H
124 ;; Full-Cons ::= C [N]
125 ;;
126 ;; {} repeat, [] optional
127 ;;
128 ;; C - Consonant ($(5!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E(B
129 ;; $(5!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X(B)
130 ;; N - Nukta ($(5!i(B)
131 ;; H - Halant($(5!h(B) or Virama
132 ;; V - Vowel ($(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2#&#'#*(B)
133 ;; ("$(5#&#'#*(B" can be obtained by IS13194 vowels with nukta.)
134 ;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu ($(5!!!"(B)
135 ;; (Visaraga ($(5!#(B) is excluded.)
136 ;; M - Matra ($(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g#K#L#M(B)
137 ;; ("$(5#K#L#M(B" can be obtained by IS13194 matras with nukta.)
138 ;;
139 ;; In Emacs, one syllable of Indian language is considered to be one
140 ;; composite glyph. If we expand the above expression for
141 ;; cons-vowel-syllable, it would be:
142 ;;
143 ;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] [D]
144 ;;
145 ;; Therefore, in worst case, the one syllable may contain
146 ;; following characters.
147 ;;
148 ;; C N H C N H C N H C N H C N M D
149 ;;
150 ;; The example is a sanskrit word "kArtsnya", where five consecutive
151 ;; consonants appear.
152 ;;
153 ;; On the other hand, consonant-syllable, which appears at the end of
154 ;; the word, would have the following expression:
155 ;;
156 ;; [C [N] H] [C [N] H] [C [N] H] C [N] H
157 ;;
158 ;; This is acceptable BEFORE proper consonant-syllable is input. The
159 ;; string which doesn't match with the above expression is invalid and
160 ;; thus must be fixed.
161 ;;
162 ;; Note:
163 ;; Third case can be considered, which is an acceptable syllable and can
164 ;; not add any code more.
165 ;;
166 ;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] D
167 ;;
168 ;; However, to make editing possible even in this condition, we will
169 ;; not consider about this case.
170 ;;
171 ;; Note:
172 ;; Currently, it seems that the only following consonants would have
173 ;; Nukta sign attatched.
174 ;; ($(5!3!4!5!:!?!@!I(B)
175 ;; Therefore, [$(5!3(B-$(5!X(B]$(5!i(B? can be re-written as
176 ;; \\([$(5!3!4!5!:!?!@!I(B]$(5!i(B\\)\\|[$(5!3(B-$(5!X(B]
177
178 (defconst devanagari-full-cons
179 "\\(\\([$(5!3!4!5!:!?!@!I(B]$(5!i(B\\)\\|[$(5!3(B-$(5!X$.$E"%(B]\\)"
180 "Devanagari full consonant")
181
182 (defconst devanagari-pure-cons
183 (concat "\\(" devanagari-full-cons "$(5!h(B\\)")
184 "Devanagari pure consonant")
185
186 (defconst devanagari-matra
187 "\\(\\([$(5!_![!\(B]$(5!i(B\\)\\|[$(5!Z(B-$(5!g#K#L#M(B]\\)"
188 "Devanagari Matra Signs. '$(5#K#L#M(B' can also be created from the combination
189 of '$(5!_![!\(B' and nukta sign.")
190
191 (defconst devanagari-vowel
192 "\\(\\([$(5!*!&!'(B]$(5!i(B\\)\\|[$(5!$(B-$(5!2#&#'#*(B]\\)"
193 "Devanagari Vowels. '$(5#&#'#*(B' can also be created from the combination
194 of '$(5!*!&!'(B' and nukta sign.")
195
196 (defconst devanagari-vowel-syllable
197 (concat devanagari-vowel "[$(5!!!"(B]?")
198 "Devanagari vowel syllable.")
199
200 (defconst devanagari-cons-syllable
201 (concat devanagari-pure-cons "?" devanagari-pure-cons "?"
202 devanagari-pure-cons "?" devanagari-pure-cons "$")
203 "Devanagari consonant syllable")
204
205 (defconst devanagari-cons-vowel-syllable
206 (concat "\\("
207 devanagari-pure-cons "?" devanagari-pure-cons "?"
208 devanagari-pure-cons "?" devanagari-pure-cons "\\)?"
209 devanagari-full-cons devanagari-matra "?[$(5!!!"(B]?")
210 "Devanagari consonant vowel syllable.")
211
212 ;;
213 ;; Also, digits and virams should be processed other than syllables.
214 ;;
215 ;; In IS 13194, Avagrah is obtained by Nukta after Viram, and
216 ;; OM is obtained by Nukta after Chandrabindu
217 ;;
218
219 (defconst devanagari-digit-viram-visarga
220 "[$(5!q(B-$(5!z!j!#(B]")
221
222 (defconst devanagari-other-sign
223 "\\([$(5!!!j(B]$(5!i(B\\)\\|\\([$(5#!#J(B]\\)")
224
225 (defconst devanagari-composite-glyph-unit
226 (concat "\\(" devanagari-cons-syllable
227 "\\)\\|\\(" devanagari-vowel-syllable
228 "\\)\\|\\(" devanagari-cons-vowel-syllable
229 "\\)\\|\\(" devanagari-other-sign
230 "\\)\\|\\(" devanagari-digit-viram-visarga "\\)")
231 "Regexp matching to Devanagari string to be composed form one glyph.")
232
233 ;;(put-charset-property charset-devanagari-1-column
234 ;; 'char-to-glyph 'devanagari-compose-string)
235 ;;(put-charset-property charset-devanagari-2-column
236 ;; 'char-to-glyph 'devanagari-compose-string)
237
238 ;; Sample
239 ;;
240 ;;(string-match devanagari-cons-vowel-syllable-examine "$(5!X![(B") => 0
241 ;;(string-match devanagari-cons-vowel-syllable-examine "$(5!F!h!D!\(B") => 0
242 ;;(string-match devanagari-cons-vowel-syllable-examine "$(5!X![!F!h!D!\(B") => 0
243
244 ;;
245 ;; Steps toward the composition
246 ;; Converting Character Codes to Composite Glyph.
247 ;;
248 ;; Example : $(5!X![(B/$(5!F!h!D!\(B
249 ;;
250 ;; First, convert Characters to appropriate glyphs.
251 ;;
252 ;; => $(5!X![(B/$(5"F!D!\(B
253 ;;
254 ;; Then, determine the base glyph, apply-orders and apply-rules.
255 ;;
256 ;; => $(5!X(B (ml.mr) $(5![(B / $(5!D(B (ml.mr) $(5"F(B (mr ml) $(5!\(B
257 ;;
258 ;; Finally, convert 2-column glyphs to 1-column glyph
259 ;; if such a glyph exist.
260 ;;
261 ;; => $(6!X(B (ml.mr) $(6![(B / $(6!D(B (ml.mr) $(6"F(B (mr ml) $(6!\(B
262 ;;
263 ;; Compose the glyph.
264 ;;
265 ;; => 4$(6!Xt%![0!X![1(B/4$(6!Dt%"Fv#!\0!D"F!\1(B
266 ;; => 4$(6!Xt%![0!X![14!Dt%"Fv#!\0!D"F!\1(B
267 ;;
268
269 ;;
270 ;; Phase 1: Converting Character Code to Glyph Code.
271 ;;
272 ;;
273 ;; IMPORTANT:
274 ;; There may be many rules that you many want to suppress.
275 ;; In that case, please comment out that rule.
276 ;;
277 ;; RULES WILL BE EVALUATED FROM FIRST TO LAST.
278 ;; PUT MORE SPECIFIC RULES FIRST.
279 ;;
280 ;; TO DO:
281 ;; Prepare multiple specific list of rules for each languages
282 ;; that adopt Devanagari script.
283 ;;
284
285 (defconst devanagari-char-to-glyph-rules
286 '(
287
288 ;; `r' at the top of syllable and followed by other consonants.
289 ;; ("[^$(5!h(B]\\($(5!O!h(B\\)[$(5!3(B-$(5!X(B]" "$(5"p(B")
290 ("^\\($(5!O!h(B\\)[$(5!3(B-$(5!X(B]" "$(5"p(B")
291
292 ;; Ligature Rules
293 ("\\($(5!3!h!B!h!O!h!M(B\\)" "$(5$!(B" sanskrit)
294 ("\\($(5!3!h!B!h!T(B\\)" "$(5$"(B" sanskrit)
295 ("\\($(5!3!h!B!h!M(B\\)" "$(5$#(B" sanskrit)
296 ("\\($(5!3!h!F!h!M(B\\)" "$(5$$(B")
297 ("\\($(5!3!h!O!h!M(B\\)" "$(5$%(B")
298 ("\\($(5!3!h!O(B\\)" "$(5"#(B") ; Post "r"
299 ("\\($(5!3!h!T!h!M(B\\)" "$(5$&(B" sanskrit)
300 ("\\($(5!3!h(B\\)$(5!3!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"3(B") ; Special Half Form
301 ("\\($(5!3!h!3(B\\)" "$(5$'(B")
302 ("\\($(5!3!h(B\\)$(5!B!h!O(B" "$(5"3(B") ; Special Rules for "k-tr"
303 ("\\($(5!3!h!B(B\\)" "$(5$((B")
304 ("\\($(5!3!h!F(B\\)" "$(5$)(B")
305 ("\\($(5!3!h!L(B\\)" "$(5$*(B")
306 ("\\($(5!3!h!M(B\\)" "$(5$+(B")
307 ("\\($(5!3!h!Q(B\\)" "$(5$,(B")
308 ("\\($(5!3!h!T(B\\)" "$(5$-(B")
309 ("\\($(5!3!h!V!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"l(B") ; Half Form
310 ("\\($(5$.!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"l(B") ; Half Form
311 ("\\($(5!3!h!V(B\\)" "$(5$.(B")
312 ("\\($(5!3!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"3(B") ; Half Form
313 ("\\($(5!3!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"s(B") ; Nukta Half Form
314 ("\\($(5!3!i(B\\)" "$(5#3(B") ; Nukta
315 ("\\($(5!4!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"4(B") ; Half Form
316 ("\\($(5!4!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"t(B") ; Nukta Half Form
317 ("\\($(5!4!i(B\\)" "$(5#4(B") ; Nukta
318 ("\\($(5!5!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"`(B") ; Half Form
319 ("\\($(5!5!h!O(B\\)" "$(5"$(B") ; Post "r"
320 ("\\($(5!5!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"5(B") ; Half Form
321 ("\\($(5!5!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"u(B") ; Nukta Half Form
322 ("\\($(5!5!i(B\\)" "$(5#5(B") ; Nukta
323 ("\\($(5!6!h!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"a(B") ; Half Form
324 ("\\($(5!6!h!F(B\\)" "$(5$/(B")
325 ; Slot
326 ("\\($(5!6!h!O(B\\)" "$(5!6"q(B") ; Post "r"
327 ("\\($(5!6!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"6(B") ; Half Form
328 ("\\($(5!7!h!3!h!B!h!M(B\\)" "$(5$0(B" sanskrit)
329 ("\\($(5!7!h!3!h!V!h!T(B\\)" "$(5$1(B" sanskrit)
330 ("\\($(5!7!h!3!h!B(B\\)" "$(5$2(B" sanskrit)
331 ("\\($(5!7!h!3!h!V(B\\)" "$(5$3(B" sanskrit)
332 ("\\($(5!7!h!3!h!O(B\\)" "$(5$9"q(B") ; Special Rule. May be precomposed font needed.
333 ("\\($(5!7!h!6!h!O(B\\)" "$(5$4(B" sanskrit)
334 ("\\($(5!7!h!3!h!M(B\\)" "$(5$5(B" sanskrit)
335 ("\\($(5!7!h!4!h!M(B\\)" "$(5$6(B" sanskrit)
336 ("\\($(5!7!h!5!h!M(B\\)" "$(5$7(B" sanskrit)
337 ("\\($(5!7!h!6!h!M(B\\)" "$(5$8(B" sanskrit)
338 ("\\($(5!7!h!3(B\\)" "$(5$9(B")
339 ("\\($(5!7!h!4(B\\)" "$(5$:(B")
340 ("\\($(5!7!h!5!h!O(B\\)" "$(5$;"q(B") ; Special Rule. May be precomposed font needed.
341 ("\\($(5!7!h!5(B\\)" "$(5$;(B")
342 ("\\($(5!7!h!6(B\\)" "$(5$<(B")
343 ("\\($(5!7!h!7(B\\)" "$(5$=(B")
344 ("\\($(5!7!h!F(B\\)" "$(5$>(B")
345 ("\\($(5!7!h!L(B\\)" "$(5$?(B")
346 ("\\($(5!7!h!M(B\\)" "$(5$@(B")
347 ("\\($(5!8!h(B\\)[$(5!8!<(B]$(5!h(B" "$(5"8(B") ; Half Form
348 ("\\($(5!8!h!8(B\\)" "$(5$A(B")
349 ("\\($(5!8!h!<(B\\)" "$(5$B(B")
350 ("\\($(5!8!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"8"q(B") ; Half Form Post "r"
351 ("\\($(5!8!h!O(B\\)" "$(5!8"q(B") ; Post "r"
352 ("\\($(5!8!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"8(B") ; Half Form
353 ("\\($(5!9!h!M(B\\)" "$(5$C(B")
354 ("\\($(5!:!h!O(B\\)" "$(5$D(B")
355 ("\\($(5!:!h!<!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"m(B") ; Half Form
356 ("\\($(5!:!h!<(B\\)" "$(5$E(B")
357 ("\\($(5!:!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5":(B") ; Half Form
358 ("\\($(5!:!i!h!O(B\\)" "$(5"!(B") ; Nukta Post "r"
359 ("\\($(5!:!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"z(B") ; Nukta Half Form
360 ("\\($(5!:!i(B\\)" "$(5#:(B") ; Nukta
361 ("\\($(5!;!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5";(B") ; Half Form
362 ("\\($(5!<!h(B\\)$(5!8!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"<(B") ; Special Half Form
363 ("\\($(5!<!h!8(B\\)" "$(5$F(B")
364 ("\\($(5!<!h(B\\)$(5!:!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"<(B") ; Special Half Form
365 ("\\($(5!<!h!:(B\\)" "$(5$G(B")
366 ("\\($(5!<!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"<(B") ; Half Form
367 ("\\($(5!=!h!3(B\\)" "$(5$H(B")
368 ("\\($(5!=!h!=(B\\)" "$(5$I(B")
369 ("\\($(5!=!h!>(B\\)" "$(5$J(B")
370 ("\\($(5!=!h!M(B\\)" "$(5$K(B")
371 ("\\($(5!>!h!M(B\\)" "$(5$L(B")
372 ("\\($(5!?!h!5!h!M(B\\)" "$(5$M(B" sanskrit)
373 ("\\($(5!?!h!6!h!O(B\\)" "$(5$N(B" sanskrit)
374 ("\\($(5!?!h!O!h!M(B\\)" "$(5$O(B")
375 ("\\($(5!?!h!5(B\\)" "$(5$P(B")
376 ("\\($(5!?!h!6(B\\)" "$(5$Q(B")
377 ("\\($(5!?!h!?(B\\)" "$(5$R(B")
378 ("\\($(5!?!h!L(B\\)" "$(5$S(B")
379 ("\\($(5!?!h!M(B\\)" "$(5$T(B")
380 ("\\($(5!?!i(B\\)" "$(5#?(B") ; Nukta
381 ("\\($(5!@!h!M(B\\)" "$(5$`(B")
382 ("\\($(5!@!i(B\\)" "$(5#@(B") ; Nukta
383 ("\\($(5!A!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"A(B") ; Half Form
384 ("\\($(5!B!h(B\\)$(5!B!h!O(B" "$(5"B(B") ; Special Rule for "t-tr"
385 ("\\($(5!B!h!B!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"c(B") ; Half Form
386 ("\\($(5!B!h!B(B\\)" "$(5$a(B")
387 ("\\($(5!B!h!F(B\\)" "$(5$b(B")
388 ("\\($(5!B!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"d(B") ; Half Form Post "r"
389 ("\\($(5!B!h!O(B\\)" "$(5"%(B") ; Post "r"
390 ("\\($(5!B!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"B(B") ; Half Form
391 ("\\($(5!C!h!O(B\\)" "$(5!C"q(B") ; Post "r"
392 ("\\($(5!C!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"C(B") ; Half Form
393 ("\\($(5!D!h!D!h!M(B\\)" "$(5$c(B")
394 ("\\($(5!D!h!E!h!M(B\\)" "$(5$d(B")
395 ("\\($(5!D!h!K!h!M(B\\)" "$(5$e(B")
396 ("\\($(5!D!h!K!h!O(B\\)" "$(5$r"r(B") ; Special Case for "dbhr" ; ***
397 ("\\($(5!D!h!O!h!M(B\\)" "$(5$f(B")
398 ("\\($(5!D!h!T!h!M(B\\)" "$(5$g(B")
399 ("\\($(5!D!h!5!h!O(B\\)" "$(5$h(B")
400 ("\\($(5!D!h!6!h!O(B\\)" "$(5$i(B")
401 ("\\($(5!D!h!D!h!T(B\\)" "$(5$j(B")
402 ("\\($(5!D!h!E!h!T(B\\)" "$(5$k(B")
403 ("\\($(5!D!h(B\\)$(5!E!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5!D!h(B") ; Special Half Form (for ddhra)
404 ("\\($(5!D!h!5(B\\)" "$(5$l(B")
405 ("\\($(5!D!h!6(B\\)" "$(5$m(B")
406 ("\\($(5!D!h!D(B\\)" "$(5$n(B")
407 ("\\($(5!D!h!E(B\\)" "$(5$o(B")
408 ("\\($(5!D!h!F(B\\)" "$(5$p(B")
409 ("\\($(5!D!h(B\\)$(5!J!h(B" "$(5!D!h(B") ; Suppressing "db-"
410 ("\\($(5!D!h!J(B\\)" "$(5$q(B")
411 ("\\($(5!D!h!K(B\\)" "$(5$r(B")
412 ("\\($(5!D!h!L(B\\)" "$(5$s(B")
413 ("\\($(5!D!h!M(B\\)" "$(5$t(B")
414 ("\\($(5!D!h!T(B\\)" "$(5$u(B")
415 ("\\($(5!E!h!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"e(B") ; Half Form
416 ("\\($(5!E!h!F(B\\)" "$(5$v(B")
417 ("\\($(5!E!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"f(B") ; Half Form Post "r"
418 ("\\($(5!E!h!O(B\\)" "$(5!E"q(B") ; Post "r"
419 ("\\($(5!E!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"E(B") ; Half Form
420 ("\\($(5!F!h!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"k(B") ; Half Form
421 ("\\($(5!F!h!F(B\\)" "$(5$w(B")
422 ("\\($(5!F!h!O(B\\)" "$(5!F"q(B")
423 ("\\($(5!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"F(B") ; Half Form
424 ("\\($(5!G!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"G(B") ; Nukta Half Form
425 ("\\($(5!H!h(B\\)$(5!B!h!O(B" "$(5"H(B") ; Special Rule for "p-tr"
426 ("\\($(5!H!h!B!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"g(B") ; Half Form
427 ("\\($(5!H!h!B(B\\)" "$(5$x(B")
428 ("\\($(5!H!h!F(B\\)" "$(5$y(B")
429 ("\\($(5!H!h!Q(B\\)" "$(5$z(B")
430 ("\\($(5!H!h!O(B\\)" "$(5"&(B") ; Post "r"
431 ("\\($(5!H!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"H(B") ; Half Form
432 ("\\($(5!I!h!O(B\\)" "$(5"'(B") ; Post "r"
433 ("\\($(5!I!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"I(B") ; Half Form
434 ("\\($(5!I!i!h!O(B\\)" "$(5""(B") ; Nukta Post "r"
435 ("\\($(5!I!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"y(B") ; Nukta Half Form
436 ("\\($(5!I!i(B\\)" "$(5#I(B") ; Nukta
437 ("\\($(5!J!h(B\\)$(5!F!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"J(B") ; Special Half Form
438 ("\\($(5!J!h!F(B\\)" "$(5${(B")
439 ("\\($(5!J!h(B\\)$(5!J!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"J(B") ; Special Half Form
440 ("\\($(5!J!h!J(B\\)" "$(5$|(B")
441 ("\\($(5!J!h(B\\)$(5!T!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"J(B") ; Special Half Form
442 ("\\($(5!J!h!T(B\\)" "$(5$}(B")
443 ("\\($(5!J!h!O(B\\)" "$(5!J"q(B") ; Post "r"
444 ("\\($(5!J!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"J(B") ; Half Form
445 ("\\($(5!K!h!F(B\\)" "$(5$~(B")
446 ("\\($(5!K!h!O(B\\)" "$(5!K"q(B") ; Post "r"
447 ("\\($(5!K!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"K(B") ; Half Form
448 ("\\($(5!L!h!F(B\\)" "$(5#P(B")
449 ("\\($(5!L!h!Q(B\\)" "$(5#Q(B")
450 ("\\($(5!L!h!O(B\\)" "$(5!L"q(B") ; Post "r"
451 ("\\($(5!L!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"L(B") ; Half Form
452 ("\\($(5!M!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"M(B") ; Half Form
453 ("\\($(5!N!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"N(B") ; Half Form
454 ;; special form for "ru".
455 ("\\($(5!O!](B\\)" "$(5",(B")
456 ("\\($(5!O!^(B\\)" "$(5"-(B")
457 ("\\($(5!P!](B\\)" "$(5".(B")
458 ("\\($(5!P!^(B\\)" "$(5"/(B")
459 ;;
460 ("\\($(5!Q!h!Q(B\\)" "$(5#`(B" sanskrit)
461 ("\\($(5!Q!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"Q(B") ; Half Form
462 ("\\($(5!R!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"R(B") ; Half Form
463 ("\\($(5!S!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"S(B") ; Half Form
464 ("\\($(5!T!h!F(B\\)" "$(5#a(B")
465 ("\\($(5!T!h!T(B\\)" "$(5#b(B")
466 ("\\($(5!T!h!O(B\\)" "$(5!T"q(B") ; Post "r"
467 ("\\($(5!T!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"T(B") ; Half Form
468 ("\\($(5!U!h!8!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"h(B") ; Half Form
469 ("\\($(5!U!h!8(B\\)" "$(5#c(B")
470 ("\\($(5!U!h!F(B\\)" "$(5#d(B")
471 ("\\($(5!U!h!J(B\\)" "$(5#e(B")
472 ("\\($(5!U!h!Q(B\\)" "$(5#f(B")
473 ("\\($(5!U!h(B\\)$(5!T!h!O(B" "$(5"U(B") ; Special Half Form
474 ("\\($(5!U!h!T!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"j(B") ; Half Form
475 ; ("\\($(5!U!h!T(B\\)" "$(5#g(B")
476 ("\\($(5!U!h!O!h!T(B\\)" "$(5#g(B")
477 ("\\($(5!U!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"i(B") ; Half Form
478 ("\\($(5!U!h!O(B\\)" "$(5")(B") ; Post "r"
479 ("\\($(5!U!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"U(B") ; Half Form
480 ("\\($(5!V!h!=!h!O!h!M(B\\)" "$(5#h(B")
481 ("\\($(5!V!h!=!h!M(B\\)" "$(5#i(B")
482 ("\\($(5!V!h!=!h!T(B\\)" "$(5#j(B")
483 ("\\($(5!V!h!=(B\\)" "$(5#k(B")
484 ("\\($(5!V!h!>(B\\)" "$(5#l(B")
485 ("\\($(5!V!h!O(B\\)" "$(5!V"q(B") ; Post "r"
486 ("\\($(5!V!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"V(B") ; Half Form
487 ("\\($(5!W!h!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"W"F(B") ; Special Half Form
488 ("\\($(5!W!h!F(B\\)" "$(5#m(B")
489 ("\\($(5!W!h!O(B\\)" "$(5#n(B")
490 ("\\($(5!W!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"W(B") ; Half Form
491 ("\\($(5!X!h!A(B\\)" "$(5#p(B")
492 ("\\($(5!X!h!F(B\\)" "$(5#q(B")
493 ("\\($(5!X!h!L(B\\)" "$(5#r(B")
494 ("\\($(5!X!h!M(B\\)" "$(5#s(B")
495 ("\\($(5!X!h!O(B\\)" "$(5#t(B")
496 ("\\($(5!X!h!Q(B\\)" "$(5#u(B")
497 ("\\($(5!X!h!T(B\\)" "$(5#v(B")
498 ;; Special Ligature Rules
499 ("\\($(5!X!_(B\\)" "$(5#R(B")
500
501 ;; For consonants other than listed above, glyph-composition will
502 ;; be applied. If the consonant which is preceding "$(5!O(B" does not
503 ;; have the vertical line (such as "$(5!?(B"), "$(5"r(B" is put beneath the
504 ;; consonant.
505 ;;
506 ("[$(5!7!9!=!>!?!@!D!O!P!R!S!X(B]\\($(5!h!O(B\\)" "$(5"r(B")
507 ("[$(5!6!8!C!E!F!H!J!K!L!M!T!V(B]\\($(5!h!O(B\\)" "$(5"q(B")
508 ("$(5!?!i(B\\($(5!h!O(B\\)" "$(5"r(B")
509 ("$(5!@!i(B\\($(5!h!O(B\\)" "$(5"r(B")
510
511 ;; Nukta with Non-Consonants
512 ("\\($(5!!!i(B\\)" "$(5#!(B")
513 ("\\($(5!&!i(B\\)" "$(5#&(B")
514 ("\\($(5!'!i(B\\)" "$(5#'(B")
515 ("\\($(5!*!i(B\\)" "$(5#*(B")
516 ("\\($(5![!i(B\\)" "$(5#L(B")
517 ("\\($(5!\!i(B\\)" "$(5#M(B")
518 ("\\($(5!_!i(B\\)" "$(5#K(B")
519 ("\\($(5!j!i(B\\)" "$(5#J(B")
520
521 ;; Special rule for "r + some vowels"
522 ("\\($(5!O!_!i(B\\)" "$(5#*"p(B")
523 ("\\($(5!O![!i(B\\)" "$(5#&"p(B")
524 ("\\($(5!O!\!i(B\\)" "$(5#'"p(B")
525 ("\\($(5!O!_(B\\)" "$(5!*"p(B")
526 ;; If everything fails, "y" will connect to the front consonant.
527 ("\\($(5!h!M(B\\)" "$(5"](B")
528 )
529 "Alist of regexps of Devanagari character sequences vs composed characters.")
530
531 (let ((rules devanagari-char-to-glyph-rules))
532 (while rules
533 (let ((rule (car rules))
534 (chars) (char) (glyphs) (glyph))
535 (setq rules (cdr rules))
536 (string-match "\\\\(\\(.+\\)\\\\)" (car rule))
537 (setq chars (substring (car rule) (match-beginning 1) (match-end 1)))
538 (setq char (string-to-char chars))
539 (setq glyphs (cdr rule))
540 (setq glyph (string-to-char (car glyphs)))
541 (put-char-code-property
542 char 'char-to-glyph
543 ;; We don't "cons" it since priority is top to down.
544 (append (get-char-code-property char 'char-to-glyph) (list rule)))
545
546 (if (and (< ?(5z(B glyph) ; Glyphs only.
547 (null (get-char-code-property glyph 'glyph-to-char)))
548 ; One glyph may corresponds to multiple characters,
549 ; e.g., surrounding vowel in Tamil, etc.
550 ; but for Devanagari, we put this restriction
551 ; to make sure the fact that one glyph corresponds to one char.
552 (put-char-code-property
553 glyph 'glyph-to-char
554 (cons (list (car glyphs) chars)
555 (get-char-code-property glyph 'glyph-to-char)
556 ))))))
557
558 ;;
559 ;; Function used in both characters-to-glyphs conversion and
560 ;; glyphs-to-characters conversion.
561 ;;
562
563 (defun max-match-len (regexp)
564 "Return the maximum length of text that can match the pattern REGEXP.
565 Only [...] pattern of regexp is recognized."
566 (let ((len 0)
567 (index 0))
568 (while (string-match "\\[\\([^\]]\\)+\\]" regexp index)
569 (setq len (+ len (- (match-beginning 0) index) 1)
570 index (match-end 0)))
571 len))
572
573 ;; Return t iff at least one member appears in both LIST1 and LIST2.
574 (defun intersecting-p (list1 list2)
575 (let ((found nil))
576 (while (and list1 (not found))
577 (if (memq (car list1) list2)
578 (setq found t)
579 (setq list1 (cdr list1))))
580 found))
581
582 (defun string-conversion-by-rule (source symbol &rest specs)
583 "Convert string SOURCE by rules stored in SYMBOL property of each character.
584 The remaining arguments forms a list SPECS that restricts applicable rules.
585
586 The rules has the form ((REGEXP STR RULE-SPEC ...) ...).
587 Each character sequence in STRING that matches REGEXP is
588 replaced by STR.
589
590 If SPECS is nil, only rules with no RULE-SPECs is applied. Otherwise
591 rules with no RULE-SPECS and rules that have at least one member of
592 SPECS in RULE-SPECs is applied.
593
594 Rules are tested in the order of the list, thus more specific rules
595 should be placed in front of less specific rules.
596
597 If rule is given in the forms of regexp '...\\(...\\)...', a character
598 sequence that matches the pattern inside of the parenthesis is the
599 subject of the match. Otherwise, the entire expression is the subject
600 of the match."
601 (let ((pos 0)
602 (dst-str ""))
603 (while (< pos (length source))
604 (let ((found nil)
605 (rules (get-char-code-property
606 (string-to-char
607 (substring source pos)) symbol)))
608 (while rules
609 (let* ((rule (car rules))
610 (regexp (car rule))
611 (replace-str (car (cdr rule)))
612 (rule-specs (cdr (cdr rule)))
613 search-pos)
614 (if (not (or (null rule-specs)
615 (intersecting-p specs rule-specs)))
616 (setq rules (cdr rules))
617 (if (null (string-match "\\\\(.+\\\\)" regexp))
618 (progn
619 (setq regexp (concat "\\(" regexp "\\)"))
620 (setq search-pos pos))
621 (setq search-pos (- pos (max-match-len
622 (substring regexp
623 (string-match "^[^\\\\]*" regexp)
624 (match-end 0))))))
625 (if (< search-pos 0) (setq search-pos 0))
626 (if (string-match regexp source search-pos)
627 (if (= (match-beginning 1) pos)
628 (progn
629 (setq dst-str (concat dst-str replace-str))
630 (setq rules nil) ; Get out of the loop.
631 (setq found t)
632 ;; proceed `pos' for replaced characters.
633 (setq pos (match-end 1)))
634 (setq rules (cdr rules)))
635 (setq rules (cdr rules))))))
636 ;; proceed to next position
637 (if (not found)
638 (setq dst-str (concat dst-str (substring source pos (1+ pos)))
639 pos (1+ pos)))))
640 dst-str))
641
642
643 ;;
644 ;; Convert Character Code to Glyph Code
645 ;;
646
647 ;;;###autoload
648 (defun char-to-glyph-devanagari (string &rest langs)
649 "Convert Devanagari characters in STRING to Devanagari glyphs.
650 Ligatures and special rules are processed."
651 (apply
652 'string-conversion-by-rule
653 (append (list string 'char-to-glyph) langs)))
654
655 ;; Example:
656 ;;(char-to-glyph-devanagari "$(5!X![!F!h!D!\(B") => "$(5!X!["F!D!\(B"
657 ;;(char-to-glyph-devanagari "$(5!O!Z!V!h!=!h!O![!M(B") => ???
658
659 ;;
660 ;; Phase 2: Compose Glyphs to form One Glyph.
661 ;;
662
663 ;; Each list consists of glyph, application-priority and application-direction.
664 ;;
665 ;; Glyphs will be ordered from low priority number to high priority number.
666 ;; If application-priority is omitted, it is assumed to be 0.
667 ;; If application-direction is omitted, it is asumbed to be '(mr . ml).
668
669 (defconst devanagari-composition-rules
670 '((?$(5!!(B 0 (tr . br))
671 (?$(5!"(B 0 (mr . mr))
672 (?$(5!#(B 0)
673 (?$(5!$(B 0)
674 (?$(5!%(B 0)
675 (?$(5!&(B 0)
676 (?$(5!'(B 0)
677 (?$(5!((B 0)
678 (?$(5!)(B 0)
679 (?$(5!*(B 0)
680 (?$(5!+(B 0)
681 (?$(5!,(B 0)
682 (?$(5!-(B 0)
683 (?$(5!.(B 0)
684 (?$(5!/(B 0)
685 (?$(5!0(B 0)
686 (?$(5!1(B 0)
687 (?$(5!2(B 0)
688 (?$(5!3(B 0)
689 (?$(5!4(B 0)
690 (?$(5!5(B 0)
691 (?$(5!6(B 0)
692 (?$(5!7(B 0)
693 (?$(5!8(B 0)
694 (?$(5!9(B 0)
695 (?$(5!:(B 0)
696 (?$(5!;(B 0)
697 (?$(5!<(B 0)
698 (?$(5!=(B 0)
699 (?$(5!>(B 0)
700 (?$(5!?(B 0)
701 (?$(5!@(B 0)
702 (?$(5!A(B 0)
703 (?$(5!B(B 0)
704 (?$(5!C(B 0)
705 (?$(5!D(B 0)
706 (?$(5!E(B 0)
707 (?$(5!F(B 0)
708 (?$(5!G(B 0)
709 (?$(5!H(B 0)
710 (?$(5!I(B 0)
711 (?$(5!J(B 0)
712 (?$(5!K(B 0)
713 (?$(5!L(B 0)
714 (?$(5!M(B 0)
715 (?$(5!N(B 0)
716 (?$(5!O(B 0)
717 (?$(5!P(B 0)
718 (?$(5!Q(B 0)
719 (?$(5!R(B 0)
720 (?$(5!S(B 0)
721 (?$(5!T(B 0)
722 (?$(5!U(B 0)
723 (?$(5!V(B 0)
724 (?$(5!W(B 0)
725 (?$(5!X(B 0)
726 (?$(5!Y(B 0)
727 (?$(5!Z(B 0)
728 (?$(5![(B 0 (ml . mr))
729 (?$(5!\(B 0)
730 (?$(5!](B 0 (br . tr))
731 (?$(5!^(B 0 (br . tr))
732 (?$(5!_(B 0 (br . tr))
733 (?$(5!`(B 0 (mr . mr)) ; (tc . bc)
734 (?$(5!a(B 0 (mr . mr))
735 (?$(5!b(B 0 (mr . mr))
736 (?$(5!c(B 0 (mr . mr))
737 (?$(5!d(B 0)
738 (?$(5!e(B 0)
739 (?$(5!f(B 0)
740 (?$(5!g(B 0)
741 (?$(5!h(B 0 (br . tr))
742 (?$(5!i(B 0 (br . tr))
743 (?$(5!j(B 0)
744 (nil 0)
745 (nil 0)
746 (nil 0)
747 (nil 0)
748 (nil 0)
749 (nil 0)
750 (?$(5!q(B 0)
751 (?$(5!r(B 0)
752 (?$(5!s(B 0)
753 (?$(5!t(B 0)
754 (?$(5!u(B 0)
755 (?$(5!v(B 0)
756 (?$(5!w(B 0)
757 (?$(5!x(B 0)
758 (?$(5!y(B 0)
759 (?$(5!z(B 0)
760 (nil 0)
761 (nil 0)
762 (nil 0)
763 (nil 0)
764 (?$(5"!(B 0)
765 (?$(5""(B 0)
766 (?$(5"#(B 0)
767 (?$(5"$(B 0)
768 (?$(5"%(B 0)
769 (?$(5"&(B 0)
770 (?$(5"'(B 0)
771 (?$(5"((B 0)
772 (?$(5")(B 0)
773 (?$(5"*(B 0)
774 (?$(5"+(B 0)
775 (?$(5",(B 0)
776 (?$(5"-(B 0)
777 (?$(5".(B 0)
778 (?$(5"/(B 0)
779 (?$(5"0(B 0)
780 (?$(5"1(B 0)
781 (?$(5"2(B 0)
782 (?$(5"3(B 0)
783 (?$(5"4(B 0)
784 (?$(5"5(B 0)
785 (?$(5"6(B 0)
786 (?$(5"7(B 0)
787 (?$(5"8(B 0)
788 (?$(5"9(B 0)
789 (?$(5":(B 0)
790 (?$(5";(B 0)
791 (?$(5"<(B 0)
792 (?$(5"=(B 0)
793 (?$(5">(B 0)
794 (?$(5"?(B 0)
795 (?$(5"@(B 0)
796 (?$(5"A(B 0)
797 (?$(5"B(B 0)
798 (?$(5"C(B 0)
799 (?$(5"D(B 0)
800 (?$(5"E(B 0)
801 (?$(5"F(B 0)
802 (?$(5"G(B 0)
803 (?$(5"H(B 0)
804 (?$(5"I(B 0)
805 (?$(5"J(B 0)
806 (?$(5"K(B 0)
807 (?$(5"L(B 0)
808 (?$(5"M(B 0)
809 (?$(5"N(B 0)
810 (?$(5"O(B 0)
811 (?$(5"P(B 0)
812 (?$(5"Q(B 0)
813 (?$(5"R(B 0)
814 (?$(5"S(B 0)
815 (?$(5"T(B 0)
816 (?$(5"U(B 0)
817 (?$(5"V(B 0)
818 (?$(5"W(B 0)
819 (?$(5"X(B 0)
820 (?$(5"Y(B 0)
821 (?$(5"Z(B 0)
822 (?$(5"[(B 0)
823 (?$(5"\(B 0)
824 (?$(5"](B 0)
825 (?$(5"^(B 0)
826 (?$(5"_(B 0)
827 (?$(5"`(B 0)
828 (?$(5"a(B 0)
829 (?$(5"b(B 0)
830 (?$(5"c(B 0)
831 (?$(5"d(B 0)
832 (?$(5"e(B 0)
833 (?$(5"f(B 0)
834 (?$(5"g(B 0)
835 (?$(5"h(B 0)
836 (?$(5"i(B 0)
837 (?$(5"j(B 0)
838 (?$(5"k(B 0)
839 (?$(5"l(B 0)
840 (?$(5"m(B 0)
841 (?$(5"n(B 0)
842 (?$(5"o(B 0)
843 (?$(5"p(B 10 (mr . mr))
844 (?$(5"q(B 0 (br . br))
845 (?$(5"r(B 0 (br . tr))
846 (?$(5"s(B 0)
847 (?$(5"t(B 0)
848 (?$(5"u(B 0)
849 (?$(5"v(B 0)
850 (?$(5"w(B 0)
851 (?$(5"x(B 0)
852 (?$(5"y(B 0)
853 (?$(5"z(B 0)
854 (?$(5"{(B 0)
855 (?$(5"|(B 0)
856 (?$(5"}(B 0)
857 (?$(5"~(B 0)
858 (?$(5#!(B 0)
859 (?$(5#"(B 0)
860 (?$(5##(B 0)
861 (?$(5#$(B 0)
862 (?$(5#%(B 0)
863 (?$(5#&(B 0)
864 (?$(5#'(B 0)
865 (?$(5#((B 0)
866 (?$(5#)(B 0)
867 (?$(5#*(B 0)
868 (?$(5#+(B 0)
869 (?$(5#,(B 0)
870 (?$(5#-(B 0)
871 (?$(5#.(B 0)
872 (?$(5#/(B 0)
873 (?$(5#0(B 0)
874 (?$(5#1(B 0)
875 (?$(5#2(B 0)
876 (?$(5#3(B 0)
877 (?$(5#4(B 0)
878 (?$(5#5(B 0)
879 (?$(5#6(B 0)
880 (?$(5#7(B 0)
881 (?$(5#8(B 0)
882 (?$(5#9(B 0)
883 (?$(5#:(B 0)
884 (?$(5#;(B 0)
885 (?$(5#<(B 0)
886 (?$(5#=(B 0)
887 (?$(5#>(B 0)
888 (?$(5#?(B 0)
889 (?$(5#@(B 0)
890 (?$(5#A(B 0)
891 (?$(5#B(B 0)
892 (?$(5#C(B 0)
893 (?$(5#D(B 0)
894 (?$(5#E(B 0)
895 (?$(5#F(B 0)
896 (?$(5#G(B 0)
897 (?$(5#H(B 0)
898 (?$(5#I(B 0)
899 (?$(5#J(B 0)
900 (?$(5#K(B 0 (br . tr))
901 (?$(5#L(B 0 (br . tr))
902 (?$(5#M(B 0 (br . tr))
903 (?$(5#N(B 0)
904 (?$(5#O(B 0)
905 (?$(5#P(B 0)
906 (?$(5#Q(B 0)
907 (?$(5#R(B 0)
908 (?$(5#S(B 0)
909 (?$(5#T(B 0)
910 (?$(5#U(B 0)
911 (?$(5#V(B 0)
912 (?$(5#W(B 0)
913 (?$(5#X(B 0)
914 (?$(5#Y(B 0)
915 (?$(5#Z(B 0)
916 (?$(5#[(B 0)
917 (?$(5#\(B 0)
918 (?$(5#](B 0)
919 (?$(5#^(B 0)
920 (?$(5#_(B 0)
921 (?$(5#`(B 0)
922 (?$(5#a(B 0)
923 (?$(5#b(B 0)
924 (?$(5#c(B 0)
925 (?$(5#d(B 0)
926 (?$(5#e(B 0)
927 (?$(5#f(B 0)
928 (?$(5#g(B 0)
929 (?$(5#h(B 0)
930 (?$(5#i(B 0)
931 (?$(5#j(B 0)
932 (?$(5#k(B 0)
933 (?$(5#l(B 0)
934 (?$(5#m(B 0)
935 (?$(5#n(B 0)
936 (?$(5#o(B 0)
937 (?$(5#p(B 0)
938 (?$(5#q(B 0)
939 (?$(5#r(B 0)
940 (?$(5#s(B 0)
941 (?$(5#t(B 0)
942 (?$(5#u(B 0)
943 (?$(5#v(B 0)
944 (?$(5#w(B 0)
945 (?$(5#x(B 0)
946 (?$(5#y(B 0)
947 (?$(5#z(B 0)
948 (?$(5#{(B 0)
949 (?$(5#|(B 0)
950 (?$(5#}(B 0)
951 (?$(5#~(B 0)
952 (?$(5$!(B 0)
953 (?$(5$"(B 0)
954 (?$(5$#(B 0)
955 (?$(5$$(B 0)
956 (?$(5$%(B 0)
957 (?$(5$&(B 0)
958 (?$(5$'(B 0)
959 (?$(5$((B 0)
960 (?$(5$)(B 0)
961 (?$(5$*(B 0)
962 (?$(5$+(B 0)
963 (?$(5$,(B 0)
964 (?$(5$-(B 0)
965 (?$(5$.(B 0)
966 (?$(5$/(B 0)
967 (?$(5$0(B 0)
968 (?$(5$1(B 0)
969 (?$(5$2(B 0)
970 (?$(5$3(B 0)
971 (?$(5$4(B 0)
972 (?$(5$5(B 0)
973 (?$(5$6(B 0)
974 (?$(5$7(B 0)
975 (?$(5$8(B 0)
976 (?$(5$9(B 0)
977 (?$(5$:(B 0)
978 (?$(5$;(B 0)
979 (?$(5$<(B 0)
980 (?$(5$=(B 0)
981 (?$(5$>(B 0)
982 (?$(5$?(B 0)
983 (?$(5$@(B 0)
984 (?$(5$A(B 0)
985 (?$(5$B(B 0)
986 (?$(5$C(B 0)
987 (?$(5$D(B 0)
988 (?$(5$E(B 0)
989 (?$(5$F(B 0)
990 (?$(5$G(B 0)
991 (?$(5$H(B 0)
992 (?$(5$I(B 0)
993 (?$(5$J(B 0)
994 (?$(5$K(B 0)
995 (?$(5$L(B 0)
996 (?$(5$M(B 0)
997 (?$(5$N(B 0)
998 (?$(5$O(B 0)
999 (?$(5$P(B 0)
1000 (?$(5$Q(B 0)
1001 (?$(5$R(B 0)
1002 (?$(5$S(B 0)
1003 (?$(5$T(B 0)
1004 (?$(5$U(B 0)
1005 (?$(5$V(B 0)
1006 (?$(5$W(B 0)
1007 (?$(5$X(B 0)
1008 (?$(5$Y(B 0)
1009 (?$(5$Z(B 0)
1010 (?$(5$[(B 0)
1011 (?$(5$\(B 0)
1012 (?$(5$](B 0)
1013 (?$(5$^(B 0)
1014 (?$(5$_(B 0)
1015 (?$(5$`(B 0)
1016 (?$(5$a(B 0)
1017 (?$(5$b(B 0)
1018 (?$(5$c(B 0)
1019 (?$(5$d(B 0)
1020 (?$(5$e(B 0)
1021 (?$(5$f(B 0)
1022 (?$(5$g(B 0)
1023 (?$(5$h(B 0)
1024 (?$(5$i(B 0)
1025 (?$(5$j(B 0)
1026 (?$(5$k(B 0)
1027 (?$(5$l(B 0)
1028 (?$(5$m(B 0)
1029 (?$(5$n(B 0)
1030 (?$(5$o(B 0)
1031 (?$(5$p(B 0)
1032 (?$(5$q(B 0)
1033 (?$(5$r(B 0)
1034 (?$(5$s(B 0)
1035 (?$(5$t(B 0)
1036 (?$(5$u(B 0)
1037 (?$(5$v(B 0)
1038 (?$(5$w(B 0)
1039 (?$(5$x(B 0)
1040 (?$(5$y(B 0)
1041 (?$(5$z(B 0)
1042 (?$(5${(B 0)
1043 (?$(5$|(B 0)
1044 (?$(5$}(B 0)
1045 (?$(5$~(B 0)
1046 ))
1047
1048 ;; Determine composition priority and rule of the array of Glyphs.
1049 ;; Sort the glyphs with their priority.
1050
1051 (defun devanagari-reorder-glyphs-for-composition (string start end)
1052 (let ((pos start)
1053 (ordered-glyphs nil))
1054 (while (< pos end)
1055 (let ((glyph (aref string pos)))
1056 (setq pos (1+ pos))
1057 (setq ordered-glyphs
1058 (append ordered-glyphs
1059 (list (assq glyph devanagari-composition-rules))))))
1060 (sort ordered-glyphs '(lambda (x y) (< (car (cdr x)) (car (cdr y)))))))
1061
1062 ;;(devanagari-compose-to-one-glyph "$(5"5!X![(B") => "4$(6!Xv#"5t%![0!X"5![1(B"
1063
1064 (defun devanagari-compose-to-one-glyph (devanagari-string)
1065 (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition
1066 devanagari-string 0 (length devanagari-string)))
1067 ;; List of glyphs to be composed.
1068 (cmp-glyph-list (list (car (car o-glyph-list))))
1069 (o-glyph-list (cdr o-glyph-list)))
1070 (while o-glyph-list
1071 (let* ((o-glyph (car o-glyph-list))
1072 (glyph (if (< 2 (length o-glyph))
1073 ;; default composition
1074 (list (car (cdr (cdr o-glyph))) (car o-glyph))
1075 ;; composition with a specified rule
1076 (list '(mr . ml) (car o-glyph)))))
1077 (setq o-glyph-list (cdr o-glyph-list))
1078 (setq cmp-glyph-list (append cmp-glyph-list glyph))))
1079 ;; Before applying compose-chars, convert glyphs to
1080 ;; 1-column width if possible.
1081 (setq cmp-glyph-list (devanagari-wide-to-narrow cmp-glyph-list))
1082 (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list))
1083 (apply 'compose-chars cmp-glyph-list))))
1084
1085 (defun devanagari-composition-component (string &optional start end)
1086 (or start (setq start 0))
1087 (or end (setq end (length string)))
1088 (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition
1089 string start end))
1090 ;; List of glyphs to be composed.
1091 (cmp-glyph-list (list (car (car o-glyph-list)))))
1092 (setq o-glyph-list (cdr o-glyph-list))
1093 (while o-glyph-list
1094 (let* ((o-glyph (car o-glyph-list))
1095 (glyph (if (< 2 (length o-glyph))
1096 ;; default composition
1097 (list (car (cdr (cdr o-glyph))) (car o-glyph))
1098 ;; composition with a specified rule
1099 (list '(mr . ml) (car o-glyph)))))
1100 (setq o-glyph-list (cdr o-glyph-list))
1101 (setq cmp-glyph-list (append cmp-glyph-list glyph))))
1102 ;; Convert glyphs to 1-column width if possible.
1103 (devanagari-wide-to-narrow cmp-glyph-list)))
1104
1105 ;; Utility function for Phase 2.5
1106
1107 ;; Check whether GLYPH is a Devanagari vertical modifier or not.
1108 ;; If it is a vertical modifier, whether it should be 1-column shape or not
1109 ;; depends on previous non-vertical modifier.
1110 (defun devanagari-vertical-modifier-p (glyph)
1111 (string-match (char-to-string glyph)
1112 "[$(5!"!]!^!_!`!a!b!c!h!i"p"q"r#K#L#M(B]"))
1113
1114 (defun devanagari-non-vertical-modifier-p (glyph)
1115 (string-match (char-to-string glyph)
1116 ; "[$(5!Z![!\!d!e!f!g(B]"))
1117 "[$(5![(B]"))
1118
1119 (defun devanagari-wide-to-narrow-char (char)
1120 "Convert Devanagari character CHAR to the corresponding narrow character.
1121 If there's no corresponding narrow character, return CHAR as is."
1122 (let ((narrow (cdr (assq char devanagari-1-column-char))))
1123 (or narrow char)))
1124
1125 ;;
1126 ;; Phase 2.5 Convert appropriate character to 1-column shape.
1127 ;;
1128 ;; This is temporary and should be removed out when Emacs supports
1129 ;; variable width characters.
1130 ;;
1131 ;; This will convert the composing glyphs (2 column glyphs)
1132 ;; to narrow (1 column) glyphs if they exist.
1133 ;;
1134 ;; devanagari-wide-to-narrow-old converts glyphs simply.
1135 ;; devanagari-wide-to-narrow takes care of upper/lower apply-glyphs
1136 ;; with 2 column base-glyph.
1137 ;;
1138 ;; Execution Examples
1139 ;;(devanagari-wide-to-narrow '(?$(5!3(B (ml . ml) ?$(5!a(B))
1140 ;;(devanagari-wide-to-narrow '(?$(5!F(B (ml . ml) ?$(5!a(B))
1141
1142 (defun devanagari-wide-to-narrow (src-list)
1143 (devanagari-wide-to-narrow-iter src-list t))
1144
1145 (defun devanagari-wide-to-narrow-iter (src-list 2-col-glyph)
1146 (let ((glyph (car src-list)))
1147 (cond ((null src-list) '())
1148 ; not glyph code
1149 ((not (numberp glyph))
1150 (cons glyph
1151 (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph)))
1152 ; glyphs to be processed regardless of the value of "2-col-glyph"
1153 ((devanagari-non-vertical-modifier-p glyph)
1154 (cons (devanagari-wide-to-narrow-char glyph)
1155 (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph)))
1156 ; glyphs which are depends on the value of "2-col-glyph"
1157 ((devanagari-vertical-modifier-p glyph)
1158 (if 2-col-glyph
1159 (cons glyph
1160 (devanagari-wide-to-narrow-iter (cdr src-list) t))
1161 (cons (devanagari-wide-to-narrow-char glyph)
1162 (devanagari-wide-to-narrow-iter (cdr src-list)
1163 2-col-glyph))))
1164 ; normal glyph
1165 (t
1166 (if (cdr (assq glyph devanagari-1-column-char))
1167 (cons (devanagari-wide-to-narrow-char glyph)
1168 (devanagari-wide-to-narrow-iter (cdr src-list) nil))
1169 (cons glyph
1170 (devanagari-wide-to-narrow-iter (cdr src-list) t)))))))
1171
1172
1173 ;;
1174 ;; Summary
1175 ;;
1176
1177 ;;
1178 ;; Decomposition of composite sequence.
1179 ;;
1180
1181 ;;;###autoload
1182 (defun devanagari-decompose-string (str)
1183 "Decompose Devanagari string STR"
1184 (decompose-string (copy-sequence str)))
1185
1186 ;;;###autoload
1187 (defun devanagari-decompose-region (from to)
1188 (interactive "r")
1189 (decompose-region from to))
1190
1191 ;;;
1192 ;;; Composition
1193 ;;;
1194
1195 ;;;###autoload
1196 (defun devanagari-compose-string (str &rest langs)
1197 (setq str (copy-sequence str))
1198 (let ((idx 0)
1199 rest match-b match-e)
1200 (while (string-match devanagari-composite-glyph-unit str idx)
1201 (let* ((match-b (match-beginning 0))
1202 (match-e (match-end 0))
1203 (cmps (devanagari-composition-component
1204 (apply
1205 'char-to-glyph-devanagari
1206 (cons (substring str match-b match-e) langs)))))
1207 (compose-string str match-b match-e cmps)
1208 (setq idx match-e))))
1209 str)
1210
1211 ;;;###autoload
1212 (defun devanagari-compose-region (from to &rest langs)
1213 (interactive "r")
1214 (save-excursion
1215 (save-restriction
1216 (narrow-to-region from to)
1217 (goto-char (point-min))
1218 (while (re-search-forward devanagari-composite-glyph-unit nil t)
1219 (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
1220 (cmps (devanagari-composition-component
1221 (apply
1222 'char-to-glyph-devanagari
1223 (cons (buffer-substring match-b match-e) langs)))))
1224 (compose-region match-b match-e cmps))))))
1225
1226 ;; For pre-write and post-read conversion
1227
1228 ;;;###autoload
1229 (defun devanagari-compose-from-is13194-region (from to)
1230 "Compose IS 13194 characters in the region to Devanagari characters."
1231 (interactive "r")
1232 (save-excursion
1233 (save-restriction
1234 (narrow-to-region from to)
1235 (indian-to-devanagari-region (point-min) (point-max))
1236 (devanagari-compose-region (point-min) (point-max))
1237 (- (point-max) (point-min)))))
1238
1239 ;;;###autoload
1240 (defun in-is13194-devanagari-post-read-conversion (len)
1241 (let ((pos (point)))
1242 (devanagari-compose-from-is13194-region pos (+ pos len))))
1243
1244 ;;;###autoload
1245 (defun devanagari-decompose-to-is13194-region (from to)
1246 "Decompose Devanagari characters in the region to IS 13194 characters."
1247 (interactive "r")
1248 (save-excursion
1249 (save-restriction
1250 (narrow-to-region from to)
1251 (devanagari-decompose-region (point-min) (point-max))
1252 (devanagari-to-indian-region (point-min) (point-max)))))
1253
1254 ;;;###autoload
1255 (defun in-is13194-devanagari-pre-write-conversion (from to)
1256 (let ((old-buf (current-buffer)))
1257 (set-buffer (generate-new-buffer " *temp*"))
1258 (if (stringp from)
1259 (insert from)
1260 (insert-buffer-substring old-buf from to))
1261 (devanagari-decompose-to-is13194-region (point-min) (point-max))
1262 ;; Should return nil as annotations.
1263 nil))
1264
1265 ;; For input/output of ITRANS
1266
1267 ;;;###autoload
1268 (defun devanagari-encode-itrans-region (from to)
1269 (interactive "r")
1270 (save-restriction
1271 (narrow-to-region from to)
1272 (devanagari-decompose-to-is13194-region (point-min) (point-max))
1273 (indian-encode-itrans-region (point-min) (point-max))))
1274
1275 ;;;###autoload
1276 (defun devanagari-decode-itrans-region (from to)
1277 (interactive "r")
1278 (save-restriction
1279 (narrow-to-region from to)
1280 (indian-decode-itrans-region (point-min) (point-max))
1281 (devanagari-compose-from-is13194-region (point-min) (point-max))))
1282
1283 ;;
1284 (provide 'devan-util)
1285
1286 ;;; devan-util.el end here
1287
1288 ;; Local Variables:
1289 ;; coding: iso-2022-7bit
1290 ;; End: