771
|
1 ;;; lao-util.el --- utilities for Lao -*- coding: iso-2022-7bit; -*-
|
|
2
|
|
3 ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN.
|
|
4 ;; Licensed to the Free Software Foundation.
|
|
5
|
|
6 ;; Keywords: multilingual, Lao
|
|
7
|
|
8 ;; This file is part of XEmacs.
|
|
9
|
|
10 ;; XEmacs is free software; you can redistribute it and/or modify it
|
|
11 ;; under the terms of the GNU General Public License as published by
|
|
12 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
13 ;; any later version.
|
|
14
|
|
15 ;; XEmacs is distributed in the hope that it will be useful, but
|
|
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
18 ;; General Public License for more details.
|
|
19
|
|
20 ;; You should have received a copy of the GNU General Public License
|
|
21 ;; along with XEmacs; see the file COPYING. If not, write to the Free
|
|
22 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
|
23 ;; 02111-1307, USA.
|
|
24
|
778
|
25 ;;; Synched up with: Emacs 21.1 (language/lao-util.el).
|
|
26
|
|
27 ;;; Commentary:
|
771
|
28
|
|
29 ;;; Code:
|
|
30
|
|
31 ;; Setting information of Thai characters.
|
|
32
|
|
33 (defconst lao-category-table (make-category-table))
|
|
34 (define-category ?c "Lao consonant" lao-category-table)
|
|
35 (define-category ?s "Lao semi-vowel" lao-category-table)
|
|
36 (define-category ?v "Lao upper/lower vowel" lao-category-table)
|
|
37 (define-category ?t "Lao tone" lao-category-table)
|
|
38
|
|
39 (let ((l '((?(1!(B consonant "LETTER KOR KAI'" "CHICKEN")
|
|
40 (?(1"(B consonant "LETTER KHOR KHAI'" "EGG")
|
|
41 (?(1#(B invalid nil)
|
|
42 (?(1$(B consonant "LETTER QHOR QHWARGN" "BUFFALO")
|
|
43 (?(1%(B invalid nil)
|
|
44 (? invalid nil)
|
|
45 (?(1'(B consonant "LETTER NGOR NGUU" "SNAKE")
|
|
46 (?(1((B consonant "LETTER JOR JUA" "BUDDHIST NOVICE")
|
|
47 (?(1)(B invalid nil)
|
|
48 (?(1*(B consonant "LETTER XOR X\"ARNG" "ELEPHANT")
|
|
49 (?(1+(B invalid nil)
|
|
50 (?(1,(B invalid nil)
|
|
51 (?(1-(B consonant "LETTER YOR YUNG" "MOSQUITO")
|
|
52 (?(1.(B invalid nil)
|
|
53 (?(1.(B invalid nil)
|
|
54 (?(1.(B invalid nil)
|
|
55 (?(1.(B invalid nil)
|
|
56 (?(1.(B invalid nil)
|
|
57 (?(1.(B invalid nil)
|
|
58 (?(14(B consonant "LETTER DOR DANG" "NOSE")
|
|
59 (?(15(B consonant "LETTER TOR TAR" "EYE")
|
|
60 (?(16(B consonant "LETTER THOR THUNG" "TO ASK,QUESTION")
|
|
61 (?(17(B consonant "LETTER DHOR DHARM" "FLAG")
|
|
62 (?(18(B invalid nil)
|
|
63 (?(19(B consonant "LETTER NOR NOK" "BIRD")
|
|
64 (?(1:(B consonant "LETTER BOR BED" "FISHHOOK")
|
|
65 (?(1;(B consonant "LETTER POR PAR" "FISH")
|
|
66 (?(1<(B consonant "LETTER HPOR HPER\"" "BEE")
|
|
67 (?(1=(B consonant "LETTER FHOR FHAR" "WALL")
|
|
68 (?(1>(B consonant "LETTER PHOR PHUU" "MOUNTAIN")
|
|
69 (?(1?(B consonant "LETTER FOR FAI" "FIRE")
|
|
70 (?(1@(B invalid nil)
|
|
71 (?(1A(B consonant "LETTER MOR MAR\"" "HORSE")
|
|
72 (?(1B(B consonant "LETTER GNOR GNAR" "MEDICINE")
|
|
73 (?(1C(B consonant "LETTER ROR ROD" "CAR")
|
|
74 (?(1D(B invalid nil)
|
|
75 (?(1E(B consonant "LETTER LOR LIING" "MONKEY")
|
|
76 (?(1F(B invalid nil)
|
|
77 (?(1G(B consonant "LETTER WOR WII" "HAND FAN")
|
|
78 (?(1H(B invalid nil)
|
|
79 (?(1I(B invalid nil)
|
|
80 (?(1J(B consonant "LETTER SOR SEA" "TIGER")
|
|
81 (?(1K(B consonant "LETTER HHOR HHAI" "JAR")
|
|
82 (?(1L(B invalid nil)
|
|
83 (?(1M(B consonant "LETTER OR OOW" "TAKE")
|
|
84 (?(1N(B consonant "LETTER HOR HEA" "BOAT")
|
|
85 (?(1O(B special "ELLIPSIS")
|
|
86 (?(1P(B vowel-base "VOWEL SIGN SARA A")
|
|
87 (?(1Q(B vowel-upper "VOWEL SIGN MAI KAN")
|
|
88 (?(1R(B vowel-base "VOWEL SIGN SARA AR")
|
|
89 (?(1S(B vowel-base "VOWEL SIGN SARA AM")
|
|
90 (?(1T(B vowel-upper "VOWEL SIGN SARA I")
|
|
91 (?(1U(B vowel-upper "VOWEL SIGN SARA II")
|
|
92 (?(1V(B vowel-upper "VOWEL SIGN SARA EU")
|
|
93 (?(1W(B vowel-upper "VOWEL SIGN SARA UR")
|
|
94 (?(1X(B vowel-lower "VOWEL SIGN SARA U")
|
|
95 (?(1Y(B vowel-lower "VOWEL SIGN SARA UU")
|
|
96 (?(1Z(B invalid nil)
|
|
97 (?(1[(B vowel-upper "VOWEL SIGN MAI KONG")
|
|
98 (?(1\(B semivowel-lower "SEMIVOWEL SIGN LO")
|
|
99 (?(1](B vowel-base "SEMIVOWEL SIGN SARA IA")
|
|
100 (?(1^(B invalid nil)
|
|
101 (?(1_(B invalid nil)
|
|
102 (?(1`(B vowel-base "VOWEL SIGN SARA EE")
|
|
103 (?(1a(B vowel-base "VOWEL SIGN SARA AA")
|
|
104 (?(1b(B vowel-base "VOWEL SIGN SARA OO")
|
|
105 (?(1c(B vowel-base "VOWEL SIGN SARA EI MAI MUAN\"")
|
|
106 (?(1d(B vowel-base "VOWEL SIGN SARA AI MAI MAY")
|
|
107 (?(1e(B invalid nil)
|
|
108 (?(1f(B special "KO LA (REPETITION)")
|
|
109 (?(1g(B invalid nil)
|
|
110 (?(1h(B tone "TONE MAI EK")
|
|
111 (?(1i(B tone "TONE MAI THO")
|
|
112 (?(1j(B tone "TONE MAI TI")
|
|
113 (?(1k(B tone "TONE MAI JADTAWAR")
|
|
114 (?(1l(B tone "CANCELLATION MARK")
|
|
115 (?(1m(B vowel-upper "VOWEL SIGN SARA OR")
|
|
116 (?(1n(B invalid nil)
|
|
117 (?(1o(B invalid nil)
|
|
118 (?(1p(B special "DIGIT ZERO")
|
|
119 (?(1q(B special "DIGIT ONE")
|
|
120 (?(1r(B special "DIGIT TWO")
|
|
121 (?(1s(B special "DIGIT THREE")
|
|
122 (?(1t(B special "DIGIT FOUR")
|
|
123 (?(1u(B special "DIGIT FIVE")
|
|
124 (?(1v(B special "DIGIT SIX")
|
|
125 (?(1w(B special "DIGIT SEVEN")
|
|
126 (?(1x(B special "DIGIT EIGHT")
|
|
127 (?(1y(B special "DIGIT NINE")
|
|
128 (?(1z(B invalid nil)
|
|
129 (?(1{(B invalid nil)
|
|
130 (?(1|(B consonant "LETTER NHOR NHUU" "MOUSE")
|
|
131 (?(1}(B consonant "LETTER MHOR MHAR" "DOG")
|
|
132 (?(1~(B invalid nil)
|
|
133 ))
|
|
134 elm)
|
|
135 (while l
|
|
136 (setq elm (car l) l (cdr l))
|
|
137 (let ((char (car elm))
|
|
138 (ptype (nth 1 elm)))
|
|
139 (cond ((eq ptype 'consonant)
|
|
140 (modify-category-entry char ?c lao-category-table))
|
|
141 ((memq ptype '(vowel-upper vowel-lower))
|
|
142 (modify-category-entry char ?v lao-category-table))
|
|
143 ((eq ptype 'semivowel-lower)
|
|
144 (modify-category-entry char ?s lao-category-table))
|
|
145 ((eq ptype 'tone)
|
|
146 (modify-category-entry char ?t lao-category-table)))
|
|
147 (put-char-code-property char 'phonetic-type ptype)
|
|
148 (put-char-code-property char 'name (nth 2 elm))
|
|
149 (put-char-code-property char 'meaning (nth 3 elm)))))
|
|
150
|
|
151 ;; The general composing rules are as follows:
|
|
152 ;;
|
|
153 ;; T
|
|
154 ;; V T V T
|
|
155 ;; CV -> C, CT -> C, CVT -> C, Cv -> C, CvT -> C
|
|
156 ;; v v
|
|
157 ;; T
|
|
158 ;; V T V T
|
|
159 ;; CsV -> C, CsT -> C, CsVT -> C, Csv -> C, CvT -> C
|
|
160 ;; s s s s s
|
|
161 ;; v v
|
|
162
|
|
163
|
|
164 ;; where C: consonant, V: vowel upper, v: vowel lower,
|
|
165 ;; T: tone mark, s: semivowel lower
|
|
166
|
|
167 (defvar lao-composition-pattern
|
|
168 "\\cc\\(\\ct\\|\\cv\\ct?\\|\\cs\\(\\ct\\|\\cv\\ct?\\)?\\)"
|
|
169 "Regular expression matching a Lao composite sequence.")
|
|
170
|
|
171 ;;;###autoload
|
|
172 (defun lao-compose-string (str)
|
|
173 (with-category-table lao-category-table
|
|
174 (let ((idx 0))
|
|
175 (while (setq idx (string-match lao-composition-pattern str idx))
|
|
176 (compose-string str idx (match-end 0))
|
|
177 (setq idx (match-end 0))))
|
|
178 str))
|
|
179
|
|
180 ;;; LRT: Lao <-> Roman Transcription
|
|
181
|
|
182 ;; Upper vowels and tone-marks are put on the letter.
|
|
183 ;; Semi-vowel-sign-lo and lower vowels are put under the letter.
|
|
184
|
|
185 (defconst lao-transcription-consonant-alist
|
|
186 (sort '(;; single consonants
|
|
187 ("k" . "(1!(B")
|
|
188 ("kh" . "(1"(B")
|
|
189 ("qh" . "(1$(B")
|
|
190 ("ng" . "(1'(B")
|
|
191 ("j" . "(1((B")
|
|
192 ("s" . "(1J(B")
|
|
193 ("x" . "(1*(B")
|
|
194 ("y" . "(1-(B")
|
|
195 ("d" . "(14(B")
|
|
196 ("t" . "(15(B")
|
|
197 ("th" . "(16(B")
|
|
198 ("dh" . "(17(B")
|
|
199 ("n" . "(19(B")
|
|
200 ("b" . "(1:(B")
|
|
201 ("p" . "(1;(B")
|
|
202 ("hp" . "(1<(B")
|
|
203 ("fh" . "(1=(B")
|
|
204 ("ph" . "(1>(B")
|
|
205 ("f" . "(1?(B")
|
|
206 ("m" . "(1A(B")
|
|
207 ("gn" . "(1B(B")
|
|
208 ("l" . "(1E(B")
|
|
209 ("r" . "(1C(B")
|
|
210 ("v" . "(1G(B")
|
|
211 ("w" . "(1G(B")
|
|
212 ("hh" . "(1K(B")
|
|
213 ("O" . "(1M(B")
|
|
214 ("h" . "(1N(B")
|
|
215 ("nh" . "(1|(B")
|
|
216 ("mh" . "(1}(B")
|
|
217 ("lh" . ["(1K\(B"])
|
|
218 ;; double consonants
|
|
219 ("ngh" . ["(1K'(B"])
|
|
220 ("yh" . ["(1K](B"])
|
|
221 ("wh" . ["(1KG(B"])
|
|
222 ("hl" . ["(1KE(B"])
|
|
223 ("hy" . ["(1K-(B"])
|
|
224 ("hn" . ["(1K9(B"])
|
|
225 ("hm" . ["(1KA(B"])
|
|
226 )
|
|
227 (function (lambda (x y) (> (length (car x)) (length (car y)))))))
|
|
228
|
|
229 (defconst lao-transcription-semi-vowel-alist
|
|
230 '(("r" . "(1\(B")))
|
|
231
|
|
232 (defconst lao-transcription-vowel-alist
|
|
233 (sort '(("a" . "(1P(B")
|
|
234 ("ar" . "(1R(B")
|
|
235 ("i" . "(1T(B")
|
|
236 ("ii" . "(1U(B")
|
|
237 ("eu" . "(1V(B")
|
|
238 ("ur" . "(1W(B")
|
|
239 ("u" . "(1X(B")
|
|
240 ("uu" . "(1Y(B")
|
|
241 ("e" . ["(1`P(B"])
|
|
242 ("ee" . "(1`(B")
|
|
243 ("ae" . ["(1aP(B"])
|
|
244 ("aa" . "(1a(B")
|
|
245 ("o" . ["(1bP(B"])
|
|
246 ("oo" . "(1b(B")
|
|
247 ("oe" . ["(1`RP(B"])
|
|
248 ("or" . "(1m(B")
|
|
249 ("er" . ["(1`T(B"])
|
|
250 ("ir" . ["(1`U(B"])
|
|
251 ("ua" . ["(1[GP(B"])
|
|
252 ("uaa" . ["(1[G(B"])
|
|
253 ("ie" . ["(1`Q]P(B"])
|
|
254 ("ia" . ["(1`Q](B"])
|
|
255 ("ea" . ["(1`VM(B"])
|
|
256 ("eaa" . ["(1`WM(B"])
|
|
257 ("ai" . "(1d(B")
|
|
258 ("ei" . "(1c(B")
|
|
259 ("ao" . ["(1`[R(B"])
|
|
260 ("aM" . "(1S(B"))
|
|
261 (function (lambda (x y) (> (length (car x)) (length (car y)))))))
|
|
262
|
|
263 ;; Maa-sakod is put at the tail.
|
|
264 (defconst lao-transcription-maa-sakod-alist
|
|
265 '(("k" . "(1!(B")
|
|
266 ("g" . "(1'(B")
|
|
267 ("y" . "(1-(B")
|
|
268 ("d" . "(14(B")
|
|
269 ("n" . "(19(B")
|
|
270 ("b" . "(1:(B")
|
|
271 ("m" . "(1A(B")
|
|
272 ("v" . "(1G(B")
|
|
273 ("w" . "(1G(B")
|
|
274 ))
|
|
275
|
|
276 (defconst lao-transcription-tone-alist
|
|
277 '(("'" . "(1h(B")
|
|
278 ("\"" . "(1i(B")
|
|
279 ("^" . "(1j(B")
|
|
280 ("+" . "(1k(B")
|
|
281 ("~" . "(1l(B")))
|
|
282
|
|
283 (defconst lao-transcription-punctuation-alist
|
|
284 '(("\\0" . "(1p(B")
|
|
285 ("\\1" . "(1q(B")
|
|
286 ("\\2" . "(1r(B")
|
|
287 ("\\3" . "(1s(B")
|
|
288 ("\\4" . "(1t(B")
|
|
289 ("\\5" . "(1u(B")
|
|
290 ("\\6" . "(1v(B")
|
|
291 ("\\7" . "(1w(B")
|
|
292 ("\\8" . "(1x(B")
|
|
293 ("\\9" . "(1y(B")
|
|
294 ("\\\\" . "(1f(B")
|
|
295 ("\\$" . "(1O(B")))
|
|
296
|
|
297 (defconst lao-transcription-pattern
|
|
298 (concat
|
|
299 "\\("
|
|
300 (mapconcat 'car lao-transcription-consonant-alist "\\|")
|
|
301 "\\)\\("
|
|
302 (mapconcat 'car lao-transcription-semi-vowel-alist "\\|")
|
|
303 "\\)?\\(\\("
|
|
304 (mapconcat 'car lao-transcription-vowel-alist "\\|")
|
|
305 "\\)\\("
|
|
306 (mapconcat 'car lao-transcription-maa-sakod-alist "\\|")
|
|
307 "\\)?\\("
|
|
308 (mapconcat (lambda (x) (regexp-quote (car x)))
|
|
309 lao-transcription-tone-alist "\\|")
|
|
310 "\\)?\\)?\\|"
|
|
311 (mapconcat (lambda (x) (regexp-quote (car x)))
|
|
312 lao-transcription-punctuation-alist "\\|")
|
|
313 )
|
|
314 "Regexp of Roman transcription pattern for one Lao syllable.")
|
|
315
|
|
316 (defconst lao-transcription-pattern
|
|
317 (concat
|
|
318 "\\("
|
|
319 (regexp-opt (mapcar 'car lao-transcription-consonant-alist))
|
|
320 "\\)\\("
|
|
321 (regexp-opt (mapcar 'car lao-transcription-semi-vowel-alist))
|
|
322 "\\)?\\(\\("
|
|
323 (regexp-opt (mapcar 'car lao-transcription-vowel-alist))
|
|
324 "\\)\\("
|
|
325 (regexp-opt (mapcar 'car lao-transcription-maa-sakod-alist))
|
|
326 "\\)?\\("
|
|
327 (regexp-opt (mapcar 'car lao-transcription-tone-alist))
|
|
328 "\\)?\\)?\\|"
|
|
329 (regexp-opt (mapcar 'car lao-transcription-punctuation-alist))
|
|
330 )
|
|
331 "Regexp of Roman transcription pattern for one Lao syllable.")
|
|
332
|
|
333 (defconst lao-vowel-reordering-rule
|
|
334 '(("(1P(B" (0 ?(1P(B) (0 ?(1Q(B))
|
|
335 ("(1R(B" (0 ?(1R(B))
|
|
336 ("(1T(B" (0 ?(1U(B))
|
|
337 ("(1U(B" (0 ?(1U(B))
|
|
338 ("(1V(B" (0 ?(1V(B))
|
|
339 ("(1W(B" (0 ?(1W(B))
|
|
340 ("(1X(B" (0 ?(1X(B))
|
|
341 ("(1Y(B" (0 ?(1Y(B))
|
|
342 ("(1`P(B" (?(1`(B 0 ?(1P(B) (?(1`(B 0 ?(1Q(B))
|
|
343 ("(1`(B" (?(1`(B 0))
|
|
344 ("(1aP(B" (?(1a(B 0 ?(1P(B) (?(1a(B 0 ?(1Q(B))
|
|
345 ("(1a(B" (?(1a(B 0))
|
|
346 ("(1bP(B" (?(1b(B 0 ?(1P(B) (0 ?(1[(B) (?(1-(B ?(1b(B 0 ?(1Q(B) (?(1G(B ?(1b(B 0 ?(1Q(B))
|
|
347 ("(1b(B" (?(1b(B 0))
|
|
348 ("(1`RP(B" (?(1`(B 0 ?(1R(B ?(1P(B) (0 ?(1Q(B ?(1M(B))
|
|
349 ("(1m(B" (0 ?(1m(B) (0 ?(1M(B))
|
|
350 ("(1`T(B" (?(1`(B 0 ?(1T(B))
|
|
351 ("(1`U(B" (?(1`(B 0 ?(1U(B))
|
|
352 ("(1[GP(B" (0 ?(1[(B ?(1G(B ?(1P(B) (0 ?(1Q(B ?(1G(B))
|
|
353 ("(1[G(B" (0 ?(1[(B ?(1G(B) (0 ?(1G(B))
|
|
354 ("(1`Q]P(B" (?(1`(B 0 ?(1Q(B ?(1](B ?(1P(B) (0 ?(1Q(B ?(1](B))
|
|
355 ("(1`Q](B" (?(1`(B 0 ?(1Q(B ?(1](B) (0 ?(1](B))
|
|
356 ("(1`VM(B" (?(1`(B 0 ?(1V(B ?(1M(B))
|
|
357 ("(1`WM(B" (?(1`(B 0 ?(1W(B ?(1M(B))
|
|
358 ("(1d(B" (?(1d(B 0))
|
|
359 ("(1c(B" (?(1c(B 0))
|
|
360 ("(1`[R(B" (?(1`(B 0 ?(1[(B ?(1R(B))
|
|
361 ("(1S(B" (0 ?(1S(B)))
|
|
362 "Alist of Lao vowel string vs the corresponding re-ordering rule.
|
|
363 Each element has this form:
|
|
364 (VOWEL NO-MAA-SAKOD-RULE WITH-MAA-SAKOD-RULE (MAA-SAKOD-0 RULE-0) ...)
|
|
365
|
|
366 VOWEL is a vowel string (e.g. \"(1`Q]P(B\").
|
|
367
|
|
368 NO-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL following a
|
|
369 consonant. It is a list vowel characters or 0. The element 0
|
|
370 indicate the place to embed a consonant.
|
|
371
|
|
372 Optional WITH-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL
|
|
373 follwoing a consonant and preceding a maa-sakod character. If it is
|
|
374 nil, NO-MAA-SAKOD-RULE is used. The maa-sakod character is alwasy
|
|
375 appended at the tail.
|
|
376
|
|
377 For instance, rule `(\"(1`WM(B\" (?(1`(B t ?(1W(B ?(1M(B))' tells that this vowel
|
|
378 string following a consonant `(1!(B' should be re-ordered as \"(1`!WM(B\".
|
|
379
|
|
380 Optional (MAA-SAKOD-n RULE-n) are rules specially applied to maa-sakod
|
|
381 character MAA-SAKOD-n.")
|
|
382
|
|
383 ;;;###autoload
|
|
384 (defun lao-transcribe-single-roman-syllable-to-lao (from to &optional str)
|
|
385 "Transcribe a Romanized Lao syllable in the region FROM and TO to Lao string.
|
|
386 Only the first syllable is transcribed.
|
|
387 The value has the form: (START END LAO-STRING), where
|
|
388 START and END are the beggining and end positions of the Roman Lao syllable,
|
|
389 LAO-STRING is the Lao character transcription of it.
|
|
390
|
|
391 Optional 3rd arg STR, if non-nil, is a string to search for Roman Lao
|
|
392 syllable. In that case, FROM and TO are indexes to STR."
|
|
393 (if str
|
|
394 (if (setq from (string-match lao-transcription-pattern str from))
|
|
395 (progn
|
|
396 (if (>= from to)
|
|
397 (setq from nil)
|
|
398 (setq to (match-end 0)))))
|
|
399 (save-excursion
|
|
400 (goto-char from)
|
|
401 (if (setq to (re-search-forward lao-transcription-pattern to t))
|
|
402 (setq from (match-beginning 0))
|
|
403 (setq from nil))))
|
|
404 (if from
|
|
405 (let* ((consonant (match-string 1 str))
|
|
406 (semivowel (match-string 3 str))
|
|
407 (vowel (match-string 5 str))
|
|
408 (maa-sakod (match-string 8 str))
|
|
409 (tone (match-string 9 str))
|
|
410 lao-consonant lao-semivowel lao-vowel lao-maa-sakod lao-tone
|
|
411 clen cidx)
|
|
412 (setq to (match-end 0))
|
|
413 (if (not consonant)
|
|
414 (setq str (cdr (assoc (match-string 0 str)
|
|
415 lao-transcription-punctuation-alist)))
|
|
416 (setq lao-consonant
|
|
417 (cdr (assoc consonant lao-transcription-consonant-alist)))
|
|
418 (if (vectorp lao-consonant)
|
|
419 (setq lao-consonant (aref lao-consonant 0)))
|
|
420 (setq clen (length lao-consonant))
|
|
421 (if semivowel
|
|
422 ;; Include semivowel in STR.
|
|
423 (setq lao-semivowel
|
|
424 (cdr (assoc semivowel lao-transcription-semi-vowel-alist))
|
|
425 str (if (= clen 1)
|
|
426 (concat lao-consonant lao-semivowel)
|
|
427 (concat (substring lao-consonant 0 1) lao-semivowel
|
|
428 (substring lao-consonant 1))))
|
|
429 (setq str lao-consonant))
|
|
430 (if vowel
|
|
431 (let (rule)
|
|
432 (setq lao-vowel
|
|
433 (cdr (assoc vowel lao-transcription-vowel-alist)))
|
|
434 (if (vectorp lao-vowel)
|
|
435 (setq lao-vowel (aref lao-vowel 0)))
|
|
436 (setq rule (assoc lao-vowel lao-vowel-reordering-rule))
|
|
437 (if (null maa-sakod)
|
|
438 (setq rule (nth 1 rule))
|
|
439 (setq lao-maa-sakod
|
|
440 (cdr (assoc maa-sakod lao-transcription-maa-sakod-alist))
|
|
441 rule
|
|
442 (or (cdr (assq (aref lao-maa-sakod 0) (nthcdr 2 rule)))
|
|
443 (nth 2 rule)
|
|
444 (nth 1 rule))))
|
|
445 (or rule
|
|
446 (error "Lao vowel %S has no re-ordering rule" lao-vowel))
|
|
447 (setq lao-consonant str str "")
|
|
448 (while rule
|
|
449 (if (= (car rule) 0)
|
|
450 (setq str (concat str lao-consonant)
|
|
451 cidx (length str))
|
|
452 (setq str (concat str (list (car rule)))))
|
|
453 (setq rule (cdr rule)))
|
|
454 (or cidx
|
|
455 (error "Lao vowel %S has malformed re-ordering rule" vowel))
|
|
456 ;; Set CIDX to after upper or lower vowel if any.
|
|
457 (let ((len (length str)))
|
|
458 (while (and (< cidx len)
|
|
459 (memq (get-char-code-property (aref str cidx)
|
|
460 'phonetic-type)
|
|
461 '(vowel-lower vowel-upper)))
|
|
462 (setq cidx (1+ cidx))))
|
|
463 (if lao-maa-sakod
|
|
464 (setq str (concat str lao-maa-sakod)))
|
|
465 (if tone
|
|
466 (setq lao-tone
|
|
467 (cdr (assoc tone lao-transcription-tone-alist))
|
|
468 str (concat (substring str 0 cidx) lao-tone
|
|
469 (substring str cidx)))))))
|
|
470 (list from to (lao-compose-string str)))))
|
|
471
|
|
472 ;;;###autoload
|
|
473 (defun lao-transcribe-roman-to-lao-string (str)
|
|
474 "Transcribe Romanized Lao string STR to Lao character string."
|
|
475 (let ((from 0)
|
|
476 (to (length str))
|
|
477 (lao-str "")
|
|
478 val)
|
|
479 (while (setq val (lao-transcribe-single-roman-syllable-to-lao from to str))
|
|
480 (let ((start (car val))
|
|
481 (end (nth 1 val))
|
|
482 (lao (nth 2 val)))
|
|
483 (if (> start from)
|
|
484 (setq lao-str (concat lao-str (substring str from start) lao))
|
|
485 (setq lao-str (concat lao-str lao)))
|
|
486 (setq from end)))
|
|
487 (if (< from to)
|
|
488 (concat lao-str (substring str from to))
|
|
489 lao-str)))
|
|
490
|
|
491 ;;;###autoload
|
|
492 (defun lao-composition-function (from to pattern &optional string)
|
|
493 "Compose Lao text in the region FROM and TO.
|
|
494 The text matches the regular expression PATTERN.
|
|
495 Optional 4th argument STRING, if non-nil, is a string containing text
|
|
496 to compose.
|
|
497
|
|
498 The return value is number of composed characters."
|
|
499 (if (< (1+ from) to)
|
|
500 (prog1 (- to from)
|
|
501 (if string
|
|
502 (compose-string string from to)
|
|
503 (compose-region from to))
|
|
504 (- to from))))
|
|
505
|
|
506 ;;;###autoload
|
|
507 (defun lao-compose-region (from to)
|
|
508 (interactive "r")
|
|
509 (save-restriction
|
|
510 (narrow-to-region from to)
|
|
511 (goto-char (point-min))
|
|
512 (with-category-table lao-category-table
|
|
513 (while (re-search-forward lao-composition-pattern nil t)
|
|
514 (compose-region (match-beginning 0) (point))))))
|
|
515
|
|
516 ;;
|
|
517 (provide 'lao-util)
|
|
518
|
|
519 ;;; lao-util.el ends here
|