778
|
1 ;;; devan-util.el --- support for Devanagari Script Composition -*- coding: iso-2022-7bit; -*-
|
771
|
2
|
|
3 ;; Copyright (C) 1996, 2001 Free Software Foundation, Inc.
|
|
4
|
|
5 ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
|
|
6
|
|
7 ;; Keywords: multilingual, Indian, Devanagari
|
|
8
|
|
9 ;; This file is part of XEmacs.
|
|
10
|
|
11 ;; XEmacs is free software; you can redistribute it and/or modify it
|
|
12 ;; under the terms of the GNU General Public License as published by
|
|
13 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
14 ;; any later version.
|
|
15
|
|
16 ;; XEmacs is distributed in the hope that it will be useful, but
|
|
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
19 ;; General Public License for more details.
|
|
20
|
|
21 ;; You should have received a copy of the GNU General Public License
|
|
22 ;; along with XEmacs; see the file COPYING. If not, write to the Free
|
|
23 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
|
24 ;; 02111-1307, USA.
|
|
25
|
778
|
26 ;;; Synched up with: Emacs 21.1 (language/devan-util.el).
|
771
|
27
|
|
28 ;;; Commentary:
|
|
29
|
|
30 ;; History:
|
|
31 ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
|
|
32 ;; 1997.3.24 fixed some bugs.
|
|
33
|
|
34 ;; Future work ::
|
|
35 ;; Decompose the input characters and process them on the character basis.
|
|
36
|
|
37 ;; Devanagari script composition rules and related programs.
|
|
38
|
|
39 ;;; Code:
|
|
40
|
|
41 ;;;
|
|
42 ;;; Steps toward composition of Devanagari Characters.
|
|
43 ;;;
|
|
44
|
|
45 ;;; Basic functions.
|
|
46
|
|
47 ;;;###autoload
|
|
48 (defun indian-to-devanagari (char)
|
|
49 "Convert IS 13194 character CHAR to Devanagari basic characters.
|
|
50 If CHAR is not IS 13194, return CHAR as is."
|
|
51 (let ((charcodes (split-char char)))
|
|
52 (if (eq (car charcodes) 'indian-is13194)
|
|
53 (make-char 'indian-2-column ?\x21 (nth 1 charcodes))
|
|
54 char)))
|
|
55
|
|
56 ;;;###autoload
|
|
57 (defun devanagari-to-indian (char)
|
|
58 "Convert Devanagari basic character CHAR to IS 13194 characters.
|
|
59 If CHAR is not Devanagari basic character, return CHAR as is."
|
|
60 (let ((charcodes (split-char char)))
|
|
61 (if (and (eq (car charcodes) 'indian-2-column)
|
|
62 (= (nth 1 charcodes) ?\x21))
|
|
63 (make-char 'indian-is13194 (nth 2 charcodes))
|
|
64 char)))
|
|
65
|
|
66 ;;;###autoload
|
|
67 (defun indian-to-devanagari-region (from to)
|
|
68 "Convert IS 13194 characters in region to Devanagari basic characters.
|
|
69 When called from a program, expects two arguments,
|
|
70 positions (integers or markers) specifying the region."
|
|
71 (interactive "r")
|
|
72 (save-excursion
|
|
73 (goto-char from)
|
|
74 (while (< (point) to)
|
|
75 (let ((char (following-char)))
|
|
76 (if (eq (char-charset char) 'indian-is13194)
|
|
77 (progn
|
|
78 (delete-char 1)
|
|
79 (insert (indian-to-devanagari char)))
|
|
80 (forward-char 1))))))
|
|
81
|
|
82 ;;;###autoload
|
|
83 (defun devanagari-to-indian-region (from to)
|
|
84 "Convert Devanagari basic characters in region to Indian characters.
|
|
85 When called from a program, expects two arguments,
|
|
86 positions (integers or markers) specifying the region."
|
|
87 (interactive "r")
|
|
88 (save-excursion
|
|
89 (goto-char from)
|
|
90 (while (< (point) to)
|
|
91 (let ((char (following-char)))
|
|
92 (if (eq (char-charset char) 'indian-2-column)
|
|
93 (progn
|
|
94 (delete-char 1)
|
|
95 (insert (devanagari-to-indian char)))
|
|
96 (forward-char 1))))))
|
|
97
|
|
98 ;;;###autoload
|
|
99 (defun indian-to-devanagari-string (string)
|
|
100 "Convert Indian characters in STRING to Devanagari Basic characters."
|
|
101 (let* ((len (length string))
|
|
102 (i 0)
|
|
103 (vec (make-vector len 0)))
|
|
104 (while (< i len)
|
|
105 (aset vec i (indian-to-devanagari (aref string i)))
|
|
106 (setq i (1+ i)))
|
|
107 (concat vec)))
|
|
108
|
|
109 ;; Phase 0 - Determine whether the characters can be composed.
|
|
110 ;;
|
|
111 ;;;
|
|
112 ;;; Regular expressions to split characters for composition.
|
|
113 ;;;
|
|
114 ;;
|
|
115 ;; Indian script word contains one or more syllables.
|
|
116 ;; In BNF, it can be expressed as follows:
|
|
117 ;;
|
|
118 ;; Word ::= {Syllable} [Cons-Syllable]
|
|
119 ;; Syllable ::= Cons-Vowel-Syllable | Vowel-Syllable
|
|
120 ;; Vowel-Syllable ::= V[D]
|
|
121 ;; Cons-Vowel-Syllable ::= [Cons-Syllable] Full-Cons [M] [D]
|
|
122 ;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] [Pure-Cons] Pure-Cons
|
|
123 ;; Pure-Cons ::= Full-Cons H
|
|
124 ;; Full-Cons ::= C [N]
|
|
125 ;;
|
|
126 ;; {} repeat, [] optional
|
|
127 ;;
|
|
128 ;; C - Consonant ($(5!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E(B
|
|
129 ;; $(5!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X(B)
|
|
130 ;; N - Nukta ($(5!i(B)
|
|
131 ;; H - Halant($(5!h(B) or Virama
|
|
132 ;; V - Vowel ($(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2#&#'#*(B)
|
|
133 ;; ("$(5#&#'#*(B" can be obtained by IS13194 vowels with nukta.)
|
|
134 ;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu ($(5!!!"(B)
|
|
135 ;; (Visaraga ($(5!#(B) is excluded.)
|
|
136 ;; M - Matra ($(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g#K#L#M(B)
|
|
137 ;; ("$(5#K#L#M(B" can be obtained by IS13194 matras with nukta.)
|
|
138 ;;
|
|
139 ;; In Emacs, one syllable of Indian language is considered to be one
|
|
140 ;; composite glyph. If we expand the above expression for
|
|
141 ;; cons-vowel-syllable, it would be:
|
|
142 ;;
|
|
143 ;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] [D]
|
|
144 ;;
|
|
145 ;; Therefore, in worst case, the one syllable may contain
|
|
146 ;; following characters.
|
|
147 ;;
|
|
148 ;; C N H C N H C N H C N H C N M D
|
|
149 ;;
|
|
150 ;; The example is a sanskrit word "kArtsnya", where five consecutive
|
|
151 ;; consonants appear.
|
|
152 ;;
|
|
153 ;; On the other hand, consonant-syllable, which appears at the end of
|
|
154 ;; the word, would have the following expression:
|
|
155 ;;
|
|
156 ;; [C [N] H] [C [N] H] [C [N] H] C [N] H
|
|
157 ;;
|
|
158 ;; This is acceptable BEFORE proper consonant-syllable is input. The
|
|
159 ;; string which doesn't match with the above expression is invalid and
|
|
160 ;; thus must be fixed.
|
|
161 ;;
|
|
162 ;; Note:
|
|
163 ;; Third case can be considered, which is an acceptable syllable and can
|
|
164 ;; not add any code more.
|
|
165 ;;
|
|
166 ;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] D
|
|
167 ;;
|
|
168 ;; However, to make editing possible even in this condition, we will
|
|
169 ;; not consider about this case.
|
|
170 ;;
|
|
171 ;; Note:
|
|
172 ;; Currently, it seems that the only following consonants would have
|
|
173 ;; Nukta sign attatched.
|
|
174 ;; ($(5!3!4!5!:!?!@!I(B)
|
|
175 ;; Therefore, [$(5!3(B-$(5!X(B]$(5!i(B? can be re-written as
|
|
176 ;; \\([$(5!3!4!5!:!?!@!I(B]$(5!i(B\\)\\|[$(5!3(B-$(5!X(B]
|
|
177
|
|
178 (defconst devanagari-full-cons
|
|
179 "\\(\\([$(5!3!4!5!:!?!@!I(B]$(5!i(B\\)\\|[$(5!3(B-$(5!X$.$E"%(B]\\)"
|
|
180 "Devanagari full consonant")
|
|
181
|
|
182 (defconst devanagari-pure-cons
|
|
183 (concat "\\(" devanagari-full-cons "$(5!h(B\\)")
|
|
184 "Devanagari pure consonant")
|
|
185
|
|
186 (defconst devanagari-matra
|
|
187 "\\(\\([$(5!_![!\(B]$(5!i(B\\)\\|[$(5!Z(B-$(5!g#K#L#M(B]\\)"
|
|
188 "Devanagari Matra Signs. '$(5#K#L#M(B' can also be created from the combination
|
|
189 of '$(5!_![!\(B' and nukta sign.")
|
|
190
|
|
191 (defconst devanagari-vowel
|
|
192 "\\(\\([$(5!*!&!'(B]$(5!i(B\\)\\|[$(5!$(B-$(5!2#&#'#*(B]\\)"
|
|
193 "Devanagari Vowels. '$(5#&#'#*(B' can also be created from the combination
|
|
194 of '$(5!*!&!'(B' and nukta sign.")
|
|
195
|
|
196 (defconst devanagari-vowel-syllable
|
|
197 (concat devanagari-vowel "[$(5!!!"(B]?")
|
|
198 "Devanagari vowel syllable.")
|
|
199
|
|
200 (defconst devanagari-cons-syllable
|
|
201 (concat devanagari-pure-cons "?" devanagari-pure-cons "?"
|
|
202 devanagari-pure-cons "?" devanagari-pure-cons "$")
|
|
203 "Devanagari consonant syllable")
|
|
204
|
|
205 (defconst devanagari-cons-vowel-syllable
|
|
206 (concat "\\("
|
|
207 devanagari-pure-cons "?" devanagari-pure-cons "?"
|
|
208 devanagari-pure-cons "?" devanagari-pure-cons "\\)?"
|
|
209 devanagari-full-cons devanagari-matra "?[$(5!!!"(B]?")
|
|
210 "Devanagari consonant vowel syllable.")
|
|
211
|
|
212 ;;
|
|
213 ;; Also, digits and virams should be processed other than syllables.
|
|
214 ;;
|
|
215 ;; In IS 13194, Avagrah is obtained by Nukta after Viram, and
|
|
216 ;; OM is obtained by Nukta after Chandrabindu
|
|
217 ;;
|
|
218
|
|
219 (defconst devanagari-digit-viram-visarga
|
|
220 "[$(5!q(B-$(5!z!j!#(B]")
|
|
221
|
|
222 (defconst devanagari-other-sign
|
|
223 "\\([$(5!!!j(B]$(5!i(B\\)\\|\\([$(5#!#J(B]\\)")
|
|
224
|
|
225 (defconst devanagari-composite-glyph-unit
|
|
226 (concat "\\(" devanagari-cons-syllable
|
|
227 "\\)\\|\\(" devanagari-vowel-syllable
|
|
228 "\\)\\|\\(" devanagari-cons-vowel-syllable
|
|
229 "\\)\\|\\(" devanagari-other-sign
|
|
230 "\\)\\|\\(" devanagari-digit-viram-visarga "\\)")
|
2391
|
231 "Regexp matching Devanagari string to be composed from one glyph.")
|
771
|
232
|
|
233 ;;(put-charset-property charset-devanagari-1-column
|
|
234 ;; 'char-to-glyph 'devanagari-compose-string)
|
|
235 ;;(put-charset-property charset-devanagari-2-column
|
|
236 ;; 'char-to-glyph 'devanagari-compose-string)
|
|
237
|
|
238 ;; Sample
|
|
239 ;;
|
|
240 ;;(string-match devanagari-cons-vowel-syllable-examine "$(5!X![(B") => 0
|
|
241 ;;(string-match devanagari-cons-vowel-syllable-examine "$(5!F!h!D!\(B") => 0
|
|
242 ;;(string-match devanagari-cons-vowel-syllable-examine "$(5!X![!F!h!D!\(B") => 0
|
|
243
|
|
244 ;;
|
|
245 ;; Steps toward the composition
|
|
246 ;; Converting Character Codes to Composite Glyph.
|
|
247 ;;
|
|
248 ;; Example : $(5!X![(B/$(5!F!h!D!\(B
|
|
249 ;;
|
|
250 ;; First, convert Characters to appropriate glyphs.
|
|
251 ;;
|
|
252 ;; => $(5!X![(B/$(5"F!D!\(B
|
|
253 ;;
|
|
254 ;; Then, determine the base glyph, apply-orders and apply-rules.
|
|
255 ;;
|
|
256 ;; => $(5!X(B (ml.mr) $(5![(B / $(5!D(B (ml.mr) $(5"F(B (mr ml) $(5!\(B
|
|
257 ;;
|
|
258 ;; Finally, convert 2-column glyphs to 1-column glyph
|
|
259 ;; if such a glyph exist.
|
|
260 ;;
|
|
261 ;; => $(6!X(B (ml.mr) $(6![(B / $(6!D(B (ml.mr) $(6"F(B (mr ml) $(6!\(B
|
|
262 ;;
|
|
263 ;; Compose the glyph.
|
|
264 ;;
|
|
265 ;; => 4$(6!Xt%![0!X![1(B/4$(6!Dt%"Fv#!\0!D"F!\1(B
|
|
266 ;; => 4$(6!Xt%![0!X![14!Dt%"Fv#!\0!D"F!\1(B
|
|
267 ;;
|
|
268
|
|
269 ;;
|
|
270 ;; Phase 1: Converting Character Code to Glyph Code.
|
|
271 ;;
|
|
272 ;;
|
|
273 ;; IMPORTANT:
|
|
274 ;; There may be many rules that you many want to suppress.
|
|
275 ;; In that case, please comment out that rule.
|
|
276 ;;
|
|
277 ;; RULES WILL BE EVALUATED FROM FIRST TO LAST.
|
|
278 ;; PUT MORE SPECIFIC RULES FIRST.
|
|
279 ;;
|
|
280 ;; TO DO:
|
|
281 ;; Prepare multiple specific list of rules for each languages
|
|
282 ;; that adopt Devanagari script.
|
|
283 ;;
|
|
284
|
|
285 (defconst devanagari-char-to-glyph-rules
|
|
286 '(
|
|
287
|
|
288 ;; `r' at the top of syllable and followed by other consonants.
|
|
289 ;; ("[^$(5!h(B]\\($(5!O!h(B\\)[$(5!3(B-$(5!X(B]" "$(5"p(B")
|
|
290 ("^\\($(5!O!h(B\\)[$(5!3(B-$(5!X(B]" "$(5"p(B")
|
|
291
|
|
292 ;; Ligature Rules
|
|
293 ("\\($(5!3!h!B!h!O!h!M(B\\)" "$(5$!(B" sanskrit)
|
|
294 ("\\($(5!3!h!B!h!T(B\\)" "$(5$"(B" sanskrit)
|
|
295 ("\\($(5!3!h!B!h!M(B\\)" "$(5$#(B" sanskrit)
|
|
296 ("\\($(5!3!h!F!h!M(B\\)" "$(5$$(B")
|
|
297 ("\\($(5!3!h!O!h!M(B\\)" "$(5$%(B")
|
|
298 ("\\($(5!3!h!O(B\\)" "$(5"#(B") ; Post "r"
|
|
299 ("\\($(5!3!h!T!h!M(B\\)" "$(5$&(B" sanskrit)
|
|
300 ("\\($(5!3!h(B\\)$(5!3!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"3(B") ; Special Half Form
|
|
301 ("\\($(5!3!h!3(B\\)" "$(5$'(B")
|
|
302 ("\\($(5!3!h(B\\)$(5!B!h!O(B" "$(5"3(B") ; Special Rules for "k-tr"
|
|
303 ("\\($(5!3!h!B(B\\)" "$(5$((B")
|
|
304 ("\\($(5!3!h!F(B\\)" "$(5$)(B")
|
|
305 ("\\($(5!3!h!L(B\\)" "$(5$*(B")
|
|
306 ("\\($(5!3!h!M(B\\)" "$(5$+(B")
|
|
307 ("\\($(5!3!h!Q(B\\)" "$(5$,(B")
|
|
308 ("\\($(5!3!h!T(B\\)" "$(5$-(B")
|
|
309 ("\\($(5!3!h!V!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"l(B") ; Half Form
|
|
310 ("\\($(5$.!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"l(B") ; Half Form
|
|
311 ("\\($(5!3!h!V(B\\)" "$(5$.(B")
|
|
312 ("\\($(5!3!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"3(B") ; Half Form
|
|
313 ("\\($(5!3!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"s(B") ; Nukta Half Form
|
|
314 ("\\($(5!3!i(B\\)" "$(5#3(B") ; Nukta
|
|
315 ("\\($(5!4!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"4(B") ; Half Form
|
|
316 ("\\($(5!4!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"t(B") ; Nukta Half Form
|
|
317 ("\\($(5!4!i(B\\)" "$(5#4(B") ; Nukta
|
|
318 ("\\($(5!5!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"`(B") ; Half Form
|
|
319 ("\\($(5!5!h!O(B\\)" "$(5"$(B") ; Post "r"
|
|
320 ("\\($(5!5!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"5(B") ; Half Form
|
|
321 ("\\($(5!5!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"u(B") ; Nukta Half Form
|
|
322 ("\\($(5!5!i(B\\)" "$(5#5(B") ; Nukta
|
|
323 ("\\($(5!6!h!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"a(B") ; Half Form
|
|
324 ("\\($(5!6!h!F(B\\)" "$(5$/(B")
|
|
325 ; Slot
|
|
326 ("\\($(5!6!h!O(B\\)" "$(5!6"q(B") ; Post "r"
|
|
327 ("\\($(5!6!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"6(B") ; Half Form
|
|
328 ("\\($(5!7!h!3!h!B!h!M(B\\)" "$(5$0(B" sanskrit)
|
|
329 ("\\($(5!7!h!3!h!V!h!T(B\\)" "$(5$1(B" sanskrit)
|
|
330 ("\\($(5!7!h!3!h!B(B\\)" "$(5$2(B" sanskrit)
|
|
331 ("\\($(5!7!h!3!h!V(B\\)" "$(5$3(B" sanskrit)
|
|
332 ("\\($(5!7!h!3!h!O(B\\)" "$(5$9"q(B") ; Special Rule. May be precomposed font needed.
|
|
333 ("\\($(5!7!h!6!h!O(B\\)" "$(5$4(B" sanskrit)
|
|
334 ("\\($(5!7!h!3!h!M(B\\)" "$(5$5(B" sanskrit)
|
|
335 ("\\($(5!7!h!4!h!M(B\\)" "$(5$6(B" sanskrit)
|
|
336 ("\\($(5!7!h!5!h!M(B\\)" "$(5$7(B" sanskrit)
|
|
337 ("\\($(5!7!h!6!h!M(B\\)" "$(5$8(B" sanskrit)
|
|
338 ("\\($(5!7!h!3(B\\)" "$(5$9(B")
|
|
339 ("\\($(5!7!h!4(B\\)" "$(5$:(B")
|
|
340 ("\\($(5!7!h!5!h!O(B\\)" "$(5$;"q(B") ; Special Rule. May be precomposed font needed.
|
|
341 ("\\($(5!7!h!5(B\\)" "$(5$;(B")
|
|
342 ("\\($(5!7!h!6(B\\)" "$(5$<(B")
|
|
343 ("\\($(5!7!h!7(B\\)" "$(5$=(B")
|
|
344 ("\\($(5!7!h!F(B\\)" "$(5$>(B")
|
|
345 ("\\($(5!7!h!L(B\\)" "$(5$?(B")
|
|
346 ("\\($(5!7!h!M(B\\)" "$(5$@(B")
|
|
347 ("\\($(5!8!h(B\\)[$(5!8!<(B]$(5!h(B" "$(5"8(B") ; Half Form
|
|
348 ("\\($(5!8!h!8(B\\)" "$(5$A(B")
|
|
349 ("\\($(5!8!h!<(B\\)" "$(5$B(B")
|
|
350 ("\\($(5!8!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"8"q(B") ; Half Form Post "r"
|
|
351 ("\\($(5!8!h!O(B\\)" "$(5!8"q(B") ; Post "r"
|
|
352 ("\\($(5!8!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"8(B") ; Half Form
|
|
353 ("\\($(5!9!h!M(B\\)" "$(5$C(B")
|
|
354 ("\\($(5!:!h!O(B\\)" "$(5$D(B")
|
|
355 ("\\($(5!:!h!<!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"m(B") ; Half Form
|
|
356 ("\\($(5!:!h!<(B\\)" "$(5$E(B")
|
|
357 ("\\($(5!:!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5":(B") ; Half Form
|
|
358 ("\\($(5!:!i!h!O(B\\)" "$(5"!(B") ; Nukta Post "r"
|
|
359 ("\\($(5!:!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"z(B") ; Nukta Half Form
|
|
360 ("\\($(5!:!i(B\\)" "$(5#:(B") ; Nukta
|
|
361 ("\\($(5!;!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5";(B") ; Half Form
|
|
362 ("\\($(5!<!h(B\\)$(5!8!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"<(B") ; Special Half Form
|
|
363 ("\\($(5!<!h!8(B\\)" "$(5$F(B")
|
|
364 ("\\($(5!<!h(B\\)$(5!:!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"<(B") ; Special Half Form
|
|
365 ("\\($(5!<!h!:(B\\)" "$(5$G(B")
|
|
366 ("\\($(5!<!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"<(B") ; Half Form
|
|
367 ("\\($(5!=!h!3(B\\)" "$(5$H(B")
|
|
368 ("\\($(5!=!h!=(B\\)" "$(5$I(B")
|
|
369 ("\\($(5!=!h!>(B\\)" "$(5$J(B")
|
|
370 ("\\($(5!=!h!M(B\\)" "$(5$K(B")
|
|
371 ("\\($(5!>!h!M(B\\)" "$(5$L(B")
|
|
372 ("\\($(5!?!h!5!h!M(B\\)" "$(5$M(B" sanskrit)
|
|
373 ("\\($(5!?!h!6!h!O(B\\)" "$(5$N(B" sanskrit)
|
|
374 ("\\($(5!?!h!O!h!M(B\\)" "$(5$O(B")
|
|
375 ("\\($(5!?!h!5(B\\)" "$(5$P(B")
|
|
376 ("\\($(5!?!h!6(B\\)" "$(5$Q(B")
|
|
377 ("\\($(5!?!h!?(B\\)" "$(5$R(B")
|
|
378 ("\\($(5!?!h!L(B\\)" "$(5$S(B")
|
|
379 ("\\($(5!?!h!M(B\\)" "$(5$T(B")
|
|
380 ("\\($(5!?!i(B\\)" "$(5#?(B") ; Nukta
|
|
381 ("\\($(5!@!h!M(B\\)" "$(5$`(B")
|
|
382 ("\\($(5!@!i(B\\)" "$(5#@(B") ; Nukta
|
|
383 ("\\($(5!A!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"A(B") ; Half Form
|
|
384 ("\\($(5!B!h(B\\)$(5!B!h!O(B" "$(5"B(B") ; Special Rule for "t-tr"
|
|
385 ("\\($(5!B!h!B!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"c(B") ; Half Form
|
|
386 ("\\($(5!B!h!B(B\\)" "$(5$a(B")
|
|
387 ("\\($(5!B!h!F(B\\)" "$(5$b(B")
|
|
388 ("\\($(5!B!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"d(B") ; Half Form Post "r"
|
|
389 ("\\($(5!B!h!O(B\\)" "$(5"%(B") ; Post "r"
|
|
390 ("\\($(5!B!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"B(B") ; Half Form
|
|
391 ("\\($(5!C!h!O(B\\)" "$(5!C"q(B") ; Post "r"
|
|
392 ("\\($(5!C!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"C(B") ; Half Form
|
|
393 ("\\($(5!D!h!D!h!M(B\\)" "$(5$c(B")
|
|
394 ("\\($(5!D!h!E!h!M(B\\)" "$(5$d(B")
|
|
395 ("\\($(5!D!h!K!h!M(B\\)" "$(5$e(B")
|
|
396 ("\\($(5!D!h!K!h!O(B\\)" "$(5$r"r(B") ; Special Case for "dbhr" ; ***
|
|
397 ("\\($(5!D!h!O!h!M(B\\)" "$(5$f(B")
|
|
398 ("\\($(5!D!h!T!h!M(B\\)" "$(5$g(B")
|
|
399 ("\\($(5!D!h!5!h!O(B\\)" "$(5$h(B")
|
|
400 ("\\($(5!D!h!6!h!O(B\\)" "$(5$i(B")
|
|
401 ("\\($(5!D!h!D!h!T(B\\)" "$(5$j(B")
|
|
402 ("\\($(5!D!h!E!h!T(B\\)" "$(5$k(B")
|
|
403 ("\\($(5!D!h(B\\)$(5!E!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5!D!h(B") ; Special Half Form (for ddhra)
|
|
404 ("\\($(5!D!h!5(B\\)" "$(5$l(B")
|
|
405 ("\\($(5!D!h!6(B\\)" "$(5$m(B")
|
|
406 ("\\($(5!D!h!D(B\\)" "$(5$n(B")
|
|
407 ("\\($(5!D!h!E(B\\)" "$(5$o(B")
|
|
408 ("\\($(5!D!h!F(B\\)" "$(5$p(B")
|
|
409 ("\\($(5!D!h(B\\)$(5!J!h(B" "$(5!D!h(B") ; Suppressing "db-"
|
|
410 ("\\($(5!D!h!J(B\\)" "$(5$q(B")
|
|
411 ("\\($(5!D!h!K(B\\)" "$(5$r(B")
|
|
412 ("\\($(5!D!h!L(B\\)" "$(5$s(B")
|
|
413 ("\\($(5!D!h!M(B\\)" "$(5$t(B")
|
|
414 ("\\($(5!D!h!T(B\\)" "$(5$u(B")
|
|
415 ("\\($(5!E!h!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"e(B") ; Half Form
|
|
416 ("\\($(5!E!h!F(B\\)" "$(5$v(B")
|
|
417 ("\\($(5!E!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"f(B") ; Half Form Post "r"
|
|
418 ("\\($(5!E!h!O(B\\)" "$(5!E"q(B") ; Post "r"
|
|
419 ("\\($(5!E!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"E(B") ; Half Form
|
|
420 ("\\($(5!F!h!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"k(B") ; Half Form
|
|
421 ("\\($(5!F!h!F(B\\)" "$(5$w(B")
|
|
422 ("\\($(5!F!h!O(B\\)" "$(5!F"q(B")
|
|
423 ("\\($(5!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"F(B") ; Half Form
|
|
424 ("\\($(5!G!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"G(B") ; Nukta Half Form
|
|
425 ("\\($(5!H!h(B\\)$(5!B!h!O(B" "$(5"H(B") ; Special Rule for "p-tr"
|
|
426 ("\\($(5!H!h!B!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"g(B") ; Half Form
|
|
427 ("\\($(5!H!h!B(B\\)" "$(5$x(B")
|
|
428 ("\\($(5!H!h!F(B\\)" "$(5$y(B")
|
|
429 ("\\($(5!H!h!Q(B\\)" "$(5$z(B")
|
|
430 ("\\($(5!H!h!O(B\\)" "$(5"&(B") ; Post "r"
|
|
431 ("\\($(5!H!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"H(B") ; Half Form
|
|
432 ("\\($(5!I!h!O(B\\)" "$(5"'(B") ; Post "r"
|
|
433 ("\\($(5!I!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"I(B") ; Half Form
|
|
434 ("\\($(5!I!i!h!O(B\\)" "$(5""(B") ; Nukta Post "r"
|
|
435 ("\\($(5!I!i!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"y(B") ; Nukta Half Form
|
|
436 ("\\($(5!I!i(B\\)" "$(5#I(B") ; Nukta
|
|
437 ("\\($(5!J!h(B\\)$(5!F!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"J(B") ; Special Half Form
|
|
438 ("\\($(5!J!h!F(B\\)" "$(5${(B")
|
|
439 ("\\($(5!J!h(B\\)$(5!J!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"J(B") ; Special Half Form
|
|
440 ("\\($(5!J!h!J(B\\)" "$(5$|(B")
|
|
441 ("\\($(5!J!h(B\\)$(5!T!h(B[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"J(B") ; Special Half Form
|
|
442 ("\\($(5!J!h!T(B\\)" "$(5$}(B")
|
|
443 ("\\($(5!J!h!O(B\\)" "$(5!J"q(B") ; Post "r"
|
|
444 ("\\($(5!J!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"J(B") ; Half Form
|
|
445 ("\\($(5!K!h!F(B\\)" "$(5$~(B")
|
|
446 ("\\($(5!K!h!O(B\\)" "$(5!K"q(B") ; Post "r"
|
|
447 ("\\($(5!K!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"K(B") ; Half Form
|
|
448 ("\\($(5!L!h!F(B\\)" "$(5#P(B")
|
|
449 ("\\($(5!L!h!Q(B\\)" "$(5#Q(B")
|
|
450 ("\\($(5!L!h!O(B\\)" "$(5!L"q(B") ; Post "r"
|
|
451 ("\\($(5!L!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"L(B") ; Half Form
|
|
452 ("\\($(5!M!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"M(B") ; Half Form
|
|
453 ("\\($(5!N!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"N(B") ; Half Form
|
|
454 ;; special form for "ru".
|
|
455 ("\\($(5!O!](B\\)" "$(5",(B")
|
|
456 ("\\($(5!O!^(B\\)" "$(5"-(B")
|
|
457 ("\\($(5!P!](B\\)" "$(5".(B")
|
|
458 ("\\($(5!P!^(B\\)" "$(5"/(B")
|
|
459 ;;
|
|
460 ("\\($(5!Q!h!Q(B\\)" "$(5#`(B" sanskrit)
|
|
461 ("\\($(5!Q!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"Q(B") ; Half Form
|
|
462 ("\\($(5!R!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"R(B") ; Half Form
|
|
463 ("\\($(5!S!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"S(B") ; Half Form
|
|
464 ("\\($(5!T!h!F(B\\)" "$(5#a(B")
|
|
465 ("\\($(5!T!h!T(B\\)" "$(5#b(B")
|
|
466 ("\\($(5!T!h!O(B\\)" "$(5!T"q(B") ; Post "r"
|
|
467 ("\\($(5!T!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"T(B") ; Half Form
|
|
468 ("\\($(5!U!h!8!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"h(B") ; Half Form
|
|
469 ("\\($(5!U!h!8(B\\)" "$(5#c(B")
|
|
470 ("\\($(5!U!h!F(B\\)" "$(5#d(B")
|
|
471 ("\\($(5!U!h!J(B\\)" "$(5#e(B")
|
|
472 ("\\($(5!U!h!Q(B\\)" "$(5#f(B")
|
|
473 ("\\($(5!U!h(B\\)$(5!T!h!O(B" "$(5"U(B") ; Special Half Form
|
|
474 ("\\($(5!U!h!T!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"j(B") ; Half Form
|
|
475 ; ("\\($(5!U!h!T(B\\)" "$(5#g(B")
|
|
476 ("\\($(5!U!h!O!h!T(B\\)" "$(5#g(B")
|
|
477 ("\\($(5!U!h!O!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"i(B") ; Half Form
|
|
478 ("\\($(5!U!h!O(B\\)" "$(5")(B") ; Post "r"
|
|
479 ("\\($(5!U!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"U(B") ; Half Form
|
|
480 ("\\($(5!V!h!=!h!O!h!M(B\\)" "$(5#h(B")
|
|
481 ("\\($(5!V!h!=!h!M(B\\)" "$(5#i(B")
|
|
482 ("\\($(5!V!h!=!h!T(B\\)" "$(5#j(B")
|
|
483 ("\\($(5!V!h!=(B\\)" "$(5#k(B")
|
|
484 ("\\($(5!V!h!>(B\\)" "$(5#l(B")
|
|
485 ("\\($(5!V!h!O(B\\)" "$(5!V"q(B") ; Post "r"
|
|
486 ("\\($(5!V!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"V(B") ; Half Form
|
|
487 ("\\($(5!W!h!F!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"W"F(B") ; Special Half Form
|
|
488 ("\\($(5!W!h!F(B\\)" "$(5#m(B")
|
|
489 ("\\($(5!W!h!O(B\\)" "$(5#n(B")
|
|
490 ("\\($(5!W!h(B\\)[$(5!3(B-$(5!N!P(B-$(5!X(B]" "$(5"W(B") ; Half Form
|
|
491 ("\\($(5!X!h!A(B\\)" "$(5#p(B")
|
|
492 ("\\($(5!X!h!F(B\\)" "$(5#q(B")
|
|
493 ("\\($(5!X!h!L(B\\)" "$(5#r(B")
|
|
494 ("\\($(5!X!h!M(B\\)" "$(5#s(B")
|
|
495 ("\\($(5!X!h!O(B\\)" "$(5#t(B")
|
|
496 ("\\($(5!X!h!Q(B\\)" "$(5#u(B")
|
|
497 ("\\($(5!X!h!T(B\\)" "$(5#v(B")
|
|
498 ;; Special Ligature Rules
|
|
499 ("\\($(5!X!_(B\\)" "$(5#R(B")
|
|
500
|
|
501 ;; For consonants other than listed above, glyph-composition will
|
|
502 ;; be applied. If the consonant which is preceding "$(5!O(B" does not
|
|
503 ;; have the vertical line (such as "$(5!?(B"), "$(5"r(B" is put beneath the
|
|
504 ;; consonant.
|
|
505 ;;
|
|
506 ("[$(5!7!9!=!>!?!@!D!O!P!R!S!X(B]\\($(5!h!O(B\\)" "$(5"r(B")
|
|
507 ("[$(5!6!8!C!E!F!H!J!K!L!M!T!V(B]\\($(5!h!O(B\\)" "$(5"q(B")
|
|
508 ("$(5!?!i(B\\($(5!h!O(B\\)" "$(5"r(B")
|
|
509 ("$(5!@!i(B\\($(5!h!O(B\\)" "$(5"r(B")
|
|
510
|
|
511 ;; Nukta with Non-Consonants
|
|
512 ("\\($(5!!!i(B\\)" "$(5#!(B")
|
|
513 ("\\($(5!&!i(B\\)" "$(5#&(B")
|
|
514 ("\\($(5!'!i(B\\)" "$(5#'(B")
|
|
515 ("\\($(5!*!i(B\\)" "$(5#*(B")
|
|
516 ("\\($(5![!i(B\\)" "$(5#L(B")
|
|
517 ("\\($(5!\!i(B\\)" "$(5#M(B")
|
|
518 ("\\($(5!_!i(B\\)" "$(5#K(B")
|
|
519 ("\\($(5!j!i(B\\)" "$(5#J(B")
|
|
520
|
|
521 ;; Special rule for "r + some vowels"
|
|
522 ("\\($(5!O!_!i(B\\)" "$(5#*"p(B")
|
|
523 ("\\($(5!O![!i(B\\)" "$(5#&"p(B")
|
|
524 ("\\($(5!O!\!i(B\\)" "$(5#'"p(B")
|
|
525 ("\\($(5!O!_(B\\)" "$(5!*"p(B")
|
|
526 ;; If everything fails, "y" will connect to the front consonant.
|
|
527 ("\\($(5!h!M(B\\)" "$(5"](B")
|
|
528 )
|
|
529 "Alist of regexps of Devanagari character sequences vs composed characters.")
|
|
530
|
|
531 (let ((rules devanagari-char-to-glyph-rules))
|
|
532 (while rules
|
|
533 (let ((rule (car rules))
|
|
534 (chars) (char) (glyphs) (glyph))
|
|
535 (setq rules (cdr rules))
|
|
536 (string-match "\\\\(\\(.+\\)\\\\)" (car rule))
|
|
537 (setq chars (substring (car rule) (match-beginning 1) (match-end 1)))
|
|
538 (setq char (string-to-char chars))
|
|
539 (setq glyphs (cdr rule))
|
|
540 (setq glyph (string-to-char (car glyphs)))
|
|
541 (put-char-code-property
|
|
542 char 'char-to-glyph
|
|
543 ;; We don't "cons" it since priority is top to down.
|
|
544 (append (get-char-code-property char 'char-to-glyph) (list rule)))
|
|
545
|
|
546 (if (and (< ?(5z(B glyph) ; Glyphs only.
|
|
547 (null (get-char-code-property glyph 'glyph-to-char)))
|
|
548 ; One glyph may corresponds to multiple characters,
|
|
549 ; e.g., surrounding vowel in Tamil, etc.
|
|
550 ; but for Devanagari, we put this restriction
|
|
551 ; to make sure the fact that one glyph corresponds to one char.
|
|
552 (put-char-code-property
|
|
553 glyph 'glyph-to-char
|
|
554 (cons (list (car glyphs) chars)
|
|
555 (get-char-code-property glyph 'glyph-to-char)
|
|
556 ))))))
|
|
557
|
|
558 ;;
|
|
559 ;; Function used in both characters-to-glyphs conversion and
|
|
560 ;; glyphs-to-characters conversion.
|
|
561 ;;
|
|
562
|
|
563 (defun max-match-len (regexp)
|
|
564 "Return the maximum length of text that can match the pattern REGEXP.
|
|
565 Only [...] pattern of regexp is recognized."
|
|
566 (let ((len 0)
|
|
567 (index 0))
|
|
568 (while (string-match "\\[\\([^\]]\\)+\\]" regexp index)
|
|
569 (setq len (+ len (- (match-beginning 0) index) 1)
|
|
570 index (match-end 0)))
|
|
571 len))
|
|
572
|
|
573 ;; Return t iff at least one member appears in both LIST1 and LIST2.
|
|
574 (defun intersecting-p (list1 list2)
|
|
575 (let ((found nil))
|
|
576 (while (and list1 (not found))
|
|
577 (if (memq (car list1) list2)
|
|
578 (setq found t)
|
|
579 (setq list1 (cdr list1))))
|
|
580 found))
|
|
581
|
|
582 (defun string-conversion-by-rule (source symbol &rest specs)
|
|
583 "Convert string SOURCE by rules stored in SYMBOL property of each character.
|
|
584 The remaining arguments forms a list SPECS that restricts applicable rules.
|
|
585
|
|
586 The rules has the form ((REGEXP STR RULE-SPEC ...) ...).
|
|
587 Each character sequence in STRING that matches REGEXP is
|
|
588 replaced by STR.
|
|
589
|
|
590 If SPECS is nil, only rules with no RULE-SPECs is applied. Otherwise
|
|
591 rules with no RULE-SPECS and rules that have at least one member of
|
|
592 SPECS in RULE-SPECs is applied.
|
|
593
|
|
594 Rules are tested in the order of the list, thus more specific rules
|
|
595 should be placed in front of less specific rules.
|
|
596
|
|
597 If rule is given in the forms of regexp '...\\(...\\)...', a character
|
|
598 sequence that matches the pattern inside of the parenthesis is the
|
|
599 subject of the match. Otherwise, the entire expression is the subject
|
|
600 of the match."
|
|
601 (let ((pos 0)
|
|
602 (dst-str ""))
|
|
603 (while (< pos (length source))
|
|
604 (let ((found nil)
|
|
605 (rules (get-char-code-property
|
|
606 (string-to-char
|
|
607 (substring source pos)) symbol)))
|
|
608 (while rules
|
|
609 (let* ((rule (car rules))
|
|
610 (regexp (car rule))
|
|
611 (replace-str (car (cdr rule)))
|
|
612 (rule-specs (cdr (cdr rule)))
|
|
613 search-pos)
|
|
614 (if (not (or (null rule-specs)
|
|
615 (intersecting-p specs rule-specs)))
|
|
616 (setq rules (cdr rules))
|
|
617 (if (null (string-match "\\\\(.+\\\\)" regexp))
|
|
618 (progn
|
|
619 (setq regexp (concat "\\(" regexp "\\)"))
|
|
620 (setq search-pos pos))
|
|
621 (setq search-pos (- pos (max-match-len
|
|
622 (substring regexp
|
|
623 (string-match "^[^\\\\]*" regexp)
|
|
624 (match-end 0))))))
|
|
625 (if (< search-pos 0) (setq search-pos 0))
|
|
626 (if (string-match regexp source search-pos)
|
|
627 (if (= (match-beginning 1) pos)
|
|
628 (progn
|
|
629 (setq dst-str (concat dst-str replace-str))
|
|
630 (setq rules nil) ; Get out of the loop.
|
|
631 (setq found t)
|
|
632 ;; proceed `pos' for replaced characters.
|
|
633 (setq pos (match-end 1)))
|
|
634 (setq rules (cdr rules)))
|
|
635 (setq rules (cdr rules))))))
|
|
636 ;; proceed to next position
|
|
637 (if (not found)
|
|
638 (setq dst-str (concat dst-str (substring source pos (1+ pos)))
|
|
639 pos (1+ pos)))))
|
|
640 dst-str))
|
|
641
|
|
642
|
|
643 ;;
|
|
644 ;; Convert Character Code to Glyph Code
|
|
645 ;;
|
|
646
|
|
647 ;;;###autoload
|
|
648 (defun char-to-glyph-devanagari (string &rest langs)
|
|
649 "Convert Devanagari characters in STRING to Devanagari glyphs.
|
|
650 Ligatures and special rules are processed."
|
|
651 (apply
|
|
652 'string-conversion-by-rule
|
|
653 (append (list string 'char-to-glyph) langs)))
|
|
654
|
|
655 ;; Example:
|
|
656 ;;(char-to-glyph-devanagari "$(5!X![!F!h!D!\(B") => "$(5!X!["F!D!\(B"
|
|
657 ;;(char-to-glyph-devanagari "$(5!O!Z!V!h!=!h!O![!M(B") => ???
|
|
658
|
|
659 ;;
|
|
660 ;; Phase 2: Compose Glyphs to form One Glyph.
|
|
661 ;;
|
|
662
|
|
663 ;; Each list consists of glyph, application-priority and application-direction.
|
|
664 ;;
|
|
665 ;; Glyphs will be ordered from low priority number to high priority number.
|
|
666 ;; If application-priority is omitted, it is assumed to be 0.
|
|
667 ;; If application-direction is omitted, it is asumbed to be '(mr . ml).
|
|
668
|
|
669 (defconst devanagari-composition-rules
|
|
670 '((?$(5!!(B 0 (tr . br))
|
|
671 (?$(5!"(B 0 (mr . mr))
|
|
672 (?$(5!#(B 0)
|
|
673 (?$(5!$(B 0)
|
|
674 (?$(5!%(B 0)
|
|
675 (?$(5!&(B 0)
|
|
676 (?$(5!'(B 0)
|
|
677 (?$(5!((B 0)
|
|
678 (?$(5!)(B 0)
|
|
679 (?$(5!*(B 0)
|
|
680 (?$(5!+(B 0)
|
|
681 (?$(5!,(B 0)
|
|
682 (?$(5!-(B 0)
|
|
683 (?$(5!.(B 0)
|
|
684 (?$(5!/(B 0)
|
|
685 (?$(5!0(B 0)
|
|
686 (?$(5!1(B 0)
|
|
687 (?$(5!2(B 0)
|
|
688 (?$(5!3(B 0)
|
|
689 (?$(5!4(B 0)
|
|
690 (?$(5!5(B 0)
|
|
691 (?$(5!6(B 0)
|
|
692 (?$(5!7(B 0)
|
|
693 (?$(5!8(B 0)
|
|
694 (?$(5!9(B 0)
|
|
695 (?$(5!:(B 0)
|
|
696 (?$(5!;(B 0)
|
|
697 (?$(5!<(B 0)
|
|
698 (?$(5!=(B 0)
|
|
699 (?$(5!>(B 0)
|
|
700 (?$(5!?(B 0)
|
|
701 (?$(5!@(B 0)
|
|
702 (?$(5!A(B 0)
|
|
703 (?$(5!B(B 0)
|
|
704 (?$(5!C(B 0)
|
|
705 (?$(5!D(B 0)
|
|
706 (?$(5!E(B 0)
|
|
707 (?$(5!F(B 0)
|
|
708 (?$(5!G(B 0)
|
|
709 (?$(5!H(B 0)
|
|
710 (?$(5!I(B 0)
|
|
711 (?$(5!J(B 0)
|
|
712 (?$(5!K(B 0)
|
|
713 (?$(5!L(B 0)
|
|
714 (?$(5!M(B 0)
|
|
715 (?$(5!N(B 0)
|
|
716 (?$(5!O(B 0)
|
|
717 (?$(5!P(B 0)
|
|
718 (?$(5!Q(B 0)
|
|
719 (?$(5!R(B 0)
|
|
720 (?$(5!S(B 0)
|
|
721 (?$(5!T(B 0)
|
|
722 (?$(5!U(B 0)
|
|
723 (?$(5!V(B 0)
|
|
724 (?$(5!W(B 0)
|
|
725 (?$(5!X(B 0)
|
|
726 (?$(5!Y(B 0)
|
|
727 (?$(5!Z(B 0)
|
|
728 (?$(5![(B 0 (ml . mr))
|
|
729 (?$(5!\(B 0)
|
|
730 (?$(5!](B 0 (br . tr))
|
|
731 (?$(5!^(B 0 (br . tr))
|
|
732 (?$(5!_(B 0 (br . tr))
|
|
733 (?$(5!`(B 0 (mr . mr)) ; (tc . bc)
|
|
734 (?$(5!a(B 0 (mr . mr))
|
|
735 (?$(5!b(B 0 (mr . mr))
|
|
736 (?$(5!c(B 0 (mr . mr))
|
|
737 (?$(5!d(B 0)
|
|
738 (?$(5!e(B 0)
|
|
739 (?$(5!f(B 0)
|
|
740 (?$(5!g(B 0)
|
|
741 (?$(5!h(B 0 (br . tr))
|
|
742 (?$(5!i(B 0 (br . tr))
|
|
743 (?$(5!j(B 0)
|
|
744 (nil 0)
|
|
745 (nil 0)
|
|
746 (nil 0)
|
|
747 (nil 0)
|
|
748 (nil 0)
|
|
749 (nil 0)
|
|
750 (?$(5!q(B 0)
|
|
751 (?$(5!r(B 0)
|
|
752 (?$(5!s(B 0)
|
|
753 (?$(5!t(B 0)
|
|
754 (?$(5!u(B 0)
|
|
755 (?$(5!v(B 0)
|
|
756 (?$(5!w(B 0)
|
|
757 (?$(5!x(B 0)
|
|
758 (?$(5!y(B 0)
|
|
759 (?$(5!z(B 0)
|
|
760 (nil 0)
|
|
761 (nil 0)
|
|
762 (nil 0)
|
|
763 (nil 0)
|
|
764 (?$(5"!(B 0)
|
|
765 (?$(5""(B 0)
|
|
766 (?$(5"#(B 0)
|
|
767 (?$(5"$(B 0)
|
|
768 (?$(5"%(B 0)
|
|
769 (?$(5"&(B 0)
|
|
770 (?$(5"'(B 0)
|
|
771 (?$(5"((B 0)
|
|
772 (?$(5")(B 0)
|
|
773 (?$(5"*(B 0)
|
|
774 (?$(5"+(B 0)
|
|
775 (?$(5",(B 0)
|
|
776 (?$(5"-(B 0)
|
|
777 (?$(5".(B 0)
|
|
778 (?$(5"/(B 0)
|
|
779 (?$(5"0(B 0)
|
|
780 (?$(5"1(B 0)
|
|
781 (?$(5"2(B 0)
|
|
782 (?$(5"3(B 0)
|
|
783 (?$(5"4(B 0)
|
|
784 (?$(5"5(B 0)
|
|
785 (?$(5"6(B 0)
|
|
786 (?$(5"7(B 0)
|
|
787 (?$(5"8(B 0)
|
|
788 (?$(5"9(B 0)
|
|
789 (?$(5":(B 0)
|
|
790 (?$(5";(B 0)
|
|
791 (?$(5"<(B 0)
|
|
792 (?$(5"=(B 0)
|
|
793 (?$(5">(B 0)
|
|
794 (?$(5"?(B 0)
|
|
795 (?$(5"@(B 0)
|
|
796 (?$(5"A(B 0)
|
|
797 (?$(5"B(B 0)
|
|
798 (?$(5"C(B 0)
|
|
799 (?$(5"D(B 0)
|
|
800 (?$(5"E(B 0)
|
|
801 (?$(5"F(B 0)
|
|
802 (?$(5"G(B 0)
|
|
803 (?$(5"H(B 0)
|
|
804 (?$(5"I(B 0)
|
|
805 (?$(5"J(B 0)
|
|
806 (?$(5"K(B 0)
|
|
807 (?$(5"L(B 0)
|
|
808 (?$(5"M(B 0)
|
|
809 (?$(5"N(B 0)
|
|
810 (?$(5"O(B 0)
|
|
811 (?$(5"P(B 0)
|
|
812 (?$(5"Q(B 0)
|
|
813 (?$(5"R(B 0)
|
|
814 (?$(5"S(B 0)
|
|
815 (?$(5"T(B 0)
|
|
816 (?$(5"U(B 0)
|
|
817 (?$(5"V(B 0)
|
|
818 (?$(5"W(B 0)
|
|
819 (?$(5"X(B 0)
|
|
820 (?$(5"Y(B 0)
|
|
821 (?$(5"Z(B 0)
|
|
822 (?$(5"[(B 0)
|
|
823 (?$(5"\(B 0)
|
|
824 (?$(5"](B 0)
|
|
825 (?$(5"^(B 0)
|
|
826 (?$(5"_(B 0)
|
|
827 (?$(5"`(B 0)
|
|
828 (?$(5"a(B 0)
|
|
829 (?$(5"b(B 0)
|
|
830 (?$(5"c(B 0)
|
|
831 (?$(5"d(B 0)
|
|
832 (?$(5"e(B 0)
|
|
833 (?$(5"f(B 0)
|
|
834 (?$(5"g(B 0)
|
|
835 (?$(5"h(B 0)
|
|
836 (?$(5"i(B 0)
|
|
837 (?$(5"j(B 0)
|
|
838 (?$(5"k(B 0)
|
|
839 (?$(5"l(B 0)
|
|
840 (?$(5"m(B 0)
|
|
841 (?$(5"n(B 0)
|
|
842 (?$(5"o(B 0)
|
|
843 (?$(5"p(B 10 (mr . mr))
|
|
844 (?$(5"q(B 0 (br . br))
|
|
845 (?$(5"r(B 0 (br . tr))
|
|
846 (?$(5"s(B 0)
|
|
847 (?$(5"t(B 0)
|
|
848 (?$(5"u(B 0)
|
|
849 (?$(5"v(B 0)
|
|
850 (?$(5"w(B 0)
|
|
851 (?$(5"x(B 0)
|
|
852 (?$(5"y(B 0)
|
|
853 (?$(5"z(B 0)
|
|
854 (?$(5"{(B 0)
|
|
855 (?$(5"|(B 0)
|
|
856 (?$(5"}(B 0)
|
|
857 (?$(5"~(B 0)
|
|
858 (?$(5#!(B 0)
|
|
859 (?$(5#"(B 0)
|
|
860 (?$(5##(B 0)
|
|
861 (?$(5#$(B 0)
|
|
862 (?$(5#%(B 0)
|
|
863 (?$(5#&(B 0)
|
|
864 (?$(5#'(B 0)
|
|
865 (?$(5#((B 0)
|
|
866 (?$(5#)(B 0)
|
|
867 (?$(5#*(B 0)
|
|
868 (?$(5#+(B 0)
|
|
869 (?$(5#,(B 0)
|
|
870 (?$(5#-(B 0)
|
|
871 (?$(5#.(B 0)
|
|
872 (?$(5#/(B 0)
|
|
873 (?$(5#0(B 0)
|
|
874 (?$(5#1(B 0)
|
|
875 (?$(5#2(B 0)
|
|
876 (?$(5#3(B 0)
|
|
877 (?$(5#4(B 0)
|
|
878 (?$(5#5(B 0)
|
|
879 (?$(5#6(B 0)
|
|
880 (?$(5#7(B 0)
|
|
881 (?$(5#8(B 0)
|
|
882 (?$(5#9(B 0)
|
|
883 (?$(5#:(B 0)
|
|
884 (?$(5#;(B 0)
|
|
885 (?$(5#<(B 0)
|
|
886 (?$(5#=(B 0)
|
|
887 (?$(5#>(B 0)
|
|
888 (?$(5#?(B 0)
|
|
889 (?$(5#@(B 0)
|
|
890 (?$(5#A(B 0)
|
|
891 (?$(5#B(B 0)
|
|
892 (?$(5#C(B 0)
|
|
893 (?$(5#D(B 0)
|
|
894 (?$(5#E(B 0)
|
|
895 (?$(5#F(B 0)
|
|
896 (?$(5#G(B 0)
|
|
897 (?$(5#H(B 0)
|
|
898 (?$(5#I(B 0)
|
|
899 (?$(5#J(B 0)
|
|
900 (?$(5#K(B 0 (br . tr))
|
|
901 (?$(5#L(B 0 (br . tr))
|
|
902 (?$(5#M(B 0 (br . tr))
|
|
903 (?$(5#N(B 0)
|
|
904 (?$(5#O(B 0)
|
|
905 (?$(5#P(B 0)
|
|
906 (?$(5#Q(B 0)
|
|
907 (?$(5#R(B 0)
|
|
908 (?$(5#S(B 0)
|
|
909 (?$(5#T(B 0)
|
|
910 (?$(5#U(B 0)
|
|
911 (?$(5#V(B 0)
|
|
912 (?$(5#W(B 0)
|
|
913 (?$(5#X(B 0)
|
|
914 (?$(5#Y(B 0)
|
|
915 (?$(5#Z(B 0)
|
|
916 (?$(5#[(B 0)
|
|
917 (?$(5#\(B 0)
|
|
918 (?$(5#](B 0)
|
|
919 (?$(5#^(B 0)
|
|
920 (?$(5#_(B 0)
|
|
921 (?$(5#`(B 0)
|
|
922 (?$(5#a(B 0)
|
|
923 (?$(5#b(B 0)
|
|
924 (?$(5#c(B 0)
|
|
925 (?$(5#d(B 0)
|
|
926 (?$(5#e(B 0)
|
|
927 (?$(5#f(B 0)
|
|
928 (?$(5#g(B 0)
|
|
929 (?$(5#h(B 0)
|
|
930 (?$(5#i(B 0)
|
|
931 (?$(5#j(B 0)
|
|
932 (?$(5#k(B 0)
|
|
933 (?$(5#l(B 0)
|
|
934 (?$(5#m(B 0)
|
|
935 (?$(5#n(B 0)
|
|
936 (?$(5#o(B 0)
|
|
937 (?$(5#p(B 0)
|
|
938 (?$(5#q(B 0)
|
|
939 (?$(5#r(B 0)
|
|
940 (?$(5#s(B 0)
|
|
941 (?$(5#t(B 0)
|
|
942 (?$(5#u(B 0)
|
|
943 (?$(5#v(B 0)
|
|
944 (?$(5#w(B 0)
|
|
945 (?$(5#x(B 0)
|
|
946 (?$(5#y(B 0)
|
|
947 (?$(5#z(B 0)
|
|
948 (?$(5#{(B 0)
|
|
949 (?$(5#|(B 0)
|
|
950 (?$(5#}(B 0)
|
|
951 (?$(5#~(B 0)
|
|
952 (?$(5$!(B 0)
|
|
953 (?$(5$"(B 0)
|
|
954 (?$(5$#(B 0)
|
|
955 (?$(5$$(B 0)
|
|
956 (?$(5$%(B 0)
|
|
957 (?$(5$&(B 0)
|
|
958 (?$(5$'(B 0)
|
|
959 (?$(5$((B 0)
|
|
960 (?$(5$)(B 0)
|
|
961 (?$(5$*(B 0)
|
|
962 (?$(5$+(B 0)
|
|
963 (?$(5$,(B 0)
|
|
964 (?$(5$-(B 0)
|
|
965 (?$(5$.(B 0)
|
|
966 (?$(5$/(B 0)
|
|
967 (?$(5$0(B 0)
|
|
968 (?$(5$1(B 0)
|
|
969 (?$(5$2(B 0)
|
|
970 (?$(5$3(B 0)
|
|
971 (?$(5$4(B 0)
|
|
972 (?$(5$5(B 0)
|
|
973 (?$(5$6(B 0)
|
|
974 (?$(5$7(B 0)
|
|
975 (?$(5$8(B 0)
|
|
976 (?$(5$9(B 0)
|
|
977 (?$(5$:(B 0)
|
|
978 (?$(5$;(B 0)
|
|
979 (?$(5$<(B 0)
|
|
980 (?$(5$=(B 0)
|
|
981 (?$(5$>(B 0)
|
|
982 (?$(5$?(B 0)
|
|
983 (?$(5$@(B 0)
|
|
984 (?$(5$A(B 0)
|
|
985 (?$(5$B(B 0)
|
|
986 (?$(5$C(B 0)
|
|
987 (?$(5$D(B 0)
|
|
988 (?$(5$E(B 0)
|
|
989 (?$(5$F(B 0)
|
|
990 (?$(5$G(B 0)
|
|
991 (?$(5$H(B 0)
|
|
992 (?$(5$I(B 0)
|
|
993 (?$(5$J(B 0)
|
|
994 (?$(5$K(B 0)
|
|
995 (?$(5$L(B 0)
|
|
996 (?$(5$M(B 0)
|
|
997 (?$(5$N(B 0)
|
|
998 (?$(5$O(B 0)
|
|
999 (?$(5$P(B 0)
|
|
1000 (?$(5$Q(B 0)
|
|
1001 (?$(5$R(B 0)
|
|
1002 (?$(5$S(B 0)
|
|
1003 (?$(5$T(B 0)
|
|
1004 (?$(5$U(B 0)
|
|
1005 (?$(5$V(B 0)
|
|
1006 (?$(5$W(B 0)
|
|
1007 (?$(5$X(B 0)
|
|
1008 (?$(5$Y(B 0)
|
|
1009 (?$(5$Z(B 0)
|
|
1010 (?$(5$[(B 0)
|
|
1011 (?$(5$\(B 0)
|
|
1012 (?$(5$](B 0)
|
|
1013 (?$(5$^(B 0)
|
|
1014 (?$(5$_(B 0)
|
|
1015 (?$(5$`(B 0)
|
|
1016 (?$(5$a(B 0)
|
|
1017 (?$(5$b(B 0)
|
|
1018 (?$(5$c(B 0)
|
|
1019 (?$(5$d(B 0)
|
|
1020 (?$(5$e(B 0)
|
|
1021 (?$(5$f(B 0)
|
|
1022 (?$(5$g(B 0)
|
|
1023 (?$(5$h(B 0)
|
|
1024 (?$(5$i(B 0)
|
|
1025 (?$(5$j(B 0)
|
|
1026 (?$(5$k(B 0)
|
|
1027 (?$(5$l(B 0)
|
|
1028 (?$(5$m(B 0)
|
|
1029 (?$(5$n(B 0)
|
|
1030 (?$(5$o(B 0)
|
|
1031 (?$(5$p(B 0)
|
|
1032 (?$(5$q(B 0)
|
|
1033 (?$(5$r(B 0)
|
|
1034 (?$(5$s(B 0)
|
|
1035 (?$(5$t(B 0)
|
|
1036 (?$(5$u(B 0)
|
|
1037 (?$(5$v(B 0)
|
|
1038 (?$(5$w(B 0)
|
|
1039 (?$(5$x(B 0)
|
|
1040 (?$(5$y(B 0)
|
|
1041 (?$(5$z(B 0)
|
|
1042 (?$(5${(B 0)
|
|
1043 (?$(5$|(B 0)
|
|
1044 (?$(5$}(B 0)
|
|
1045 (?$(5$~(B 0)
|
|
1046 ))
|
|
1047
|
|
1048 ;; Determine composition priority and rule of the array of Glyphs.
|
|
1049 ;; Sort the glyphs with their priority.
|
|
1050
|
|
1051 (defun devanagari-reorder-glyphs-for-composition (string start end)
|
|
1052 (let ((pos start)
|
|
1053 (ordered-glyphs nil))
|
|
1054 (while (< pos end)
|
|
1055 (let ((glyph (aref string pos)))
|
|
1056 (setq pos (1+ pos))
|
|
1057 (setq ordered-glyphs
|
|
1058 (append ordered-glyphs
|
|
1059 (list (assq glyph devanagari-composition-rules))))))
|
4021
|
1060 (sort ordered-glyphs #'(lambda (x y) (< (car (cdr x)) (car (cdr y)))))))
|
771
|
1061
|
|
1062 ;;(devanagari-compose-to-one-glyph "$(5"5!X![(B") => "4$(6!Xv#"5t%![0!X"5![1(B"
|
|
1063
|
|
1064 (defun devanagari-compose-to-one-glyph (devanagari-string)
|
|
1065 (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition
|
|
1066 devanagari-string 0 (length devanagari-string)))
|
|
1067 ;; List of glyphs to be composed.
|
|
1068 (cmp-glyph-list (list (car (car o-glyph-list))))
|
|
1069 (o-glyph-list (cdr o-glyph-list)))
|
|
1070 (while o-glyph-list
|
|
1071 (let* ((o-glyph (car o-glyph-list))
|
|
1072 (glyph (if (< 2 (length o-glyph))
|
|
1073 ;; default composition
|
|
1074 (list (car (cdr (cdr o-glyph))) (car o-glyph))
|
|
1075 ;; composition with a specified rule
|
|
1076 (list '(mr . ml) (car o-glyph)))))
|
|
1077 (setq o-glyph-list (cdr o-glyph-list))
|
|
1078 (setq cmp-glyph-list (append cmp-glyph-list glyph))))
|
|
1079 ;; Before applying compose-chars, convert glyphs to
|
|
1080 ;; 1-column width if possible.
|
|
1081 (setq cmp-glyph-list (devanagari-wide-to-narrow cmp-glyph-list))
|
|
1082 (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list))
|
|
1083 (apply 'compose-chars cmp-glyph-list))))
|
|
1084
|
|
1085 (defun devanagari-composition-component (string &optional start end)
|
|
1086 (or start (setq start 0))
|
|
1087 (or end (setq end (length string)))
|
|
1088 (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition
|
|
1089 string start end))
|
|
1090 ;; List of glyphs to be composed.
|
|
1091 (cmp-glyph-list (list (car (car o-glyph-list)))))
|
|
1092 (setq o-glyph-list (cdr o-glyph-list))
|
|
1093 (while o-glyph-list
|
|
1094 (let* ((o-glyph (car o-glyph-list))
|
|
1095 (glyph (if (< 2 (length o-glyph))
|
|
1096 ;; default composition
|
|
1097 (list (car (cdr (cdr o-glyph))) (car o-glyph))
|
|
1098 ;; composition with a specified rule
|
|
1099 (list '(mr . ml) (car o-glyph)))))
|
|
1100 (setq o-glyph-list (cdr o-glyph-list))
|
|
1101 (setq cmp-glyph-list (append cmp-glyph-list glyph))))
|
|
1102 ;; Convert glyphs to 1-column width if possible.
|
|
1103 (devanagari-wide-to-narrow cmp-glyph-list)))
|
|
1104
|
|
1105 ;; Utility function for Phase 2.5
|
|
1106
|
|
1107 ;; Check whether GLYPH is a Devanagari vertical modifier or not.
|
|
1108 ;; If it is a vertical modifier, whether it should be 1-column shape or not
|
|
1109 ;; depends on previous non-vertical modifier.
|
|
1110 (defun devanagari-vertical-modifier-p (glyph)
|
|
1111 (string-match (char-to-string glyph)
|
|
1112 "[$(5!"!]!^!_!`!a!b!c!h!i"p"q"r#K#L#M(B]"))
|
|
1113
|
|
1114 (defun devanagari-non-vertical-modifier-p (glyph)
|
|
1115 (string-match (char-to-string glyph)
|
|
1116 ; "[$(5!Z![!\!d!e!f!g(B]"))
|
|
1117 "[$(5![(B]"))
|
|
1118
|
|
1119 (defun devanagari-wide-to-narrow-char (char)
|
|
1120 "Convert Devanagari character CHAR to the corresponding narrow character.
|
|
1121 If there's no corresponding narrow character, return CHAR as is."
|
|
1122 (let ((narrow (cdr (assq char devanagari-1-column-char))))
|
|
1123 (or narrow char)))
|
|
1124
|
|
1125 ;;
|
|
1126 ;; Phase 2.5 Convert appropriate character to 1-column shape.
|
|
1127 ;;
|
|
1128 ;; This is temporary and should be removed out when Emacs supports
|
|
1129 ;; variable width characters.
|
|
1130 ;;
|
|
1131 ;; This will convert the composing glyphs (2 column glyphs)
|
|
1132 ;; to narrow (1 column) glyphs if they exist.
|
|
1133 ;;
|
|
1134 ;; devanagari-wide-to-narrow-old converts glyphs simply.
|
|
1135 ;; devanagari-wide-to-narrow takes care of upper/lower apply-glyphs
|
|
1136 ;; with 2 column base-glyph.
|
|
1137 ;;
|
|
1138 ;; Execution Examples
|
|
1139 ;;(devanagari-wide-to-narrow '(?$(5!3(B (ml . ml) ?$(5!a(B))
|
|
1140 ;;(devanagari-wide-to-narrow '(?$(5!F(B (ml . ml) ?$(5!a(B))
|
|
1141
|
|
1142 (defun devanagari-wide-to-narrow (src-list)
|
|
1143 (devanagari-wide-to-narrow-iter src-list t))
|
|
1144
|
|
1145 (defun devanagari-wide-to-narrow-iter (src-list 2-col-glyph)
|
|
1146 (let ((glyph (car src-list)))
|
|
1147 (cond ((null src-list) '())
|
|
1148 ; not glyph code
|
|
1149 ((not (numberp glyph))
|
|
1150 (cons glyph
|
|
1151 (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph)))
|
|
1152 ; glyphs to be processed regardless of the value of "2-col-glyph"
|
|
1153 ((devanagari-non-vertical-modifier-p glyph)
|
|
1154 (cons (devanagari-wide-to-narrow-char glyph)
|
|
1155 (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph)))
|
|
1156 ; glyphs which are depends on the value of "2-col-glyph"
|
|
1157 ((devanagari-vertical-modifier-p glyph)
|
|
1158 (if 2-col-glyph
|
|
1159 (cons glyph
|
|
1160 (devanagari-wide-to-narrow-iter (cdr src-list) t))
|
|
1161 (cons (devanagari-wide-to-narrow-char glyph)
|
|
1162 (devanagari-wide-to-narrow-iter (cdr src-list)
|
|
1163 2-col-glyph))))
|
|
1164 ; normal glyph
|
|
1165 (t
|
|
1166 (if (cdr (assq glyph devanagari-1-column-char))
|
|
1167 (cons (devanagari-wide-to-narrow-char glyph)
|
|
1168 (devanagari-wide-to-narrow-iter (cdr src-list) nil))
|
|
1169 (cons glyph
|
|
1170 (devanagari-wide-to-narrow-iter (cdr src-list) t)))))))
|
|
1171
|
|
1172
|
|
1173 ;;
|
|
1174 ;; Summary
|
|
1175 ;;
|
|
1176
|
|
1177 ;;
|
|
1178 ;; Decomposition of composite sequence.
|
|
1179 ;;
|
|
1180
|
|
1181 ;;;###autoload
|
|
1182 (defun devanagari-decompose-string (str)
|
|
1183 "Decompose Devanagari string STR"
|
|
1184 (decompose-string (copy-sequence str)))
|
|
1185
|
|
1186 ;;;###autoload
|
|
1187 (defun devanagari-decompose-region (from to)
|
|
1188 (interactive "r")
|
|
1189 (decompose-region from to))
|
|
1190
|
|
1191 ;;;
|
|
1192 ;;; Composition
|
|
1193 ;;;
|
|
1194
|
|
1195 ;;;###autoload
|
|
1196 (defun devanagari-compose-string (str &rest langs)
|
|
1197 (setq str (copy-sequence str))
|
|
1198 (let ((idx 0)
|
788
|
1199 ;rest match-b match-e
|
|
1200 )
|
771
|
1201 (while (string-match devanagari-composite-glyph-unit str idx)
|
|
1202 (let* ((match-b (match-beginning 0))
|
|
1203 (match-e (match-end 0))
|
|
1204 (cmps (devanagari-composition-component
|
|
1205 (apply
|
|
1206 'char-to-glyph-devanagari
|
|
1207 (cons (substring str match-b match-e) langs)))))
|
|
1208 (compose-string str match-b match-e cmps)
|
|
1209 (setq idx match-e))))
|
|
1210 str)
|
|
1211
|
|
1212 ;;;###autoload
|
|
1213 (defun devanagari-compose-region (from to &rest langs)
|
|
1214 (interactive "r")
|
|
1215 (save-excursion
|
|
1216 (save-restriction
|
|
1217 (narrow-to-region from to)
|
|
1218 (goto-char (point-min))
|
|
1219 (while (re-search-forward devanagari-composite-glyph-unit nil t)
|
|
1220 (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
|
|
1221 (cmps (devanagari-composition-component
|
|
1222 (apply
|
|
1223 'char-to-glyph-devanagari
|
|
1224 (cons (buffer-substring match-b match-e) langs)))))
|
|
1225 (compose-region match-b match-e cmps))))))
|
|
1226
|
|
1227 ;; For pre-write and post-read conversion
|
|
1228
|
|
1229 ;;;###autoload
|
|
1230 (defun devanagari-compose-from-is13194-region (from to)
|
|
1231 "Compose IS 13194 characters in the region to Devanagari characters."
|
|
1232 (interactive "r")
|
|
1233 (save-excursion
|
|
1234 (save-restriction
|
|
1235 (narrow-to-region from to)
|
|
1236 (indian-to-devanagari-region (point-min) (point-max))
|
|
1237 (devanagari-compose-region (point-min) (point-max))
|
|
1238 (- (point-max) (point-min)))))
|
|
1239
|
|
1240 ;;;###autoload
|
|
1241 (defun in-is13194-devanagari-post-read-conversion (len)
|
|
1242 (let ((pos (point)))
|
|
1243 (devanagari-compose-from-is13194-region pos (+ pos len))))
|
|
1244
|
|
1245 ;;;###autoload
|
|
1246 (defun devanagari-decompose-to-is13194-region (from to)
|
|
1247 "Decompose Devanagari characters in the region to IS 13194 characters."
|
|
1248 (interactive "r")
|
|
1249 (save-excursion
|
|
1250 (save-restriction
|
|
1251 (narrow-to-region from to)
|
|
1252 (devanagari-decompose-region (point-min) (point-max))
|
|
1253 (devanagari-to-indian-region (point-min) (point-max)))))
|
|
1254
|
|
1255 ;;;###autoload
|
|
1256 (defun in-is13194-devanagari-pre-write-conversion (from to)
|
|
1257 (let ((old-buf (current-buffer)))
|
|
1258 (set-buffer (generate-new-buffer " *temp*"))
|
|
1259 (if (stringp from)
|
|
1260 (insert from)
|
|
1261 (insert-buffer-substring old-buf from to))
|
|
1262 (devanagari-decompose-to-is13194-region (point-min) (point-max))
|
|
1263 ;; Should return nil as annotations.
|
|
1264 nil))
|
|
1265
|
|
1266 ;; For input/output of ITRANS
|
|
1267
|
|
1268 ;;;###autoload
|
|
1269 (defun devanagari-encode-itrans-region (from to)
|
|
1270 (interactive "r")
|
|
1271 (save-restriction
|
|
1272 (narrow-to-region from to)
|
|
1273 (devanagari-decompose-to-is13194-region (point-min) (point-max))
|
|
1274 (indian-encode-itrans-region (point-min) (point-max))))
|
|
1275
|
|
1276 ;;;###autoload
|
|
1277 (defun devanagari-decode-itrans-region (from to)
|
|
1278 (interactive "r")
|
|
1279 (save-restriction
|
|
1280 (narrow-to-region from to)
|
|
1281 (indian-decode-itrans-region (point-min) (point-max))
|
|
1282 (devanagari-compose-from-is13194-region (point-min) (point-max))))
|
|
1283
|
|
1284 ;;
|
|
1285 (provide 'devan-util)
|
|
1286
|
|
1287 ;; Local Variables:
|
|
1288 ;; coding: iso-2022-7bit
|
|
1289 ;; End:
|
778
|
1290
|
|
1291 ;;; devan-util.el end here
|