Mercurial > hg > xemacs-beta
comparison lisp/language/thai-util.el @ 159:3bb7ccffb0c0 r20-3b6
Import from CVS: tag r20-3b6
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:41:43 +0200 |
parents | |
children | e45d5e7c476e |
comparison
equal
deleted
inserted
replaced
158:558dfa75ffb3 | 159:3bb7ccffb0c0 |
---|---|
1 ;;; thai-util.el --- utilities for Thai | |
2 | |
3 ;; Copyright (C) 1995 Free Software Foundation, Inc. | |
4 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
5 ;; Copyright (C) 1997 MORIOKA Tomohiko | |
6 | |
7 ;; Keywords: mule, multilingual, thai | |
8 | |
9 ;; This file is part of XEmacs. | |
10 | |
11 ;; XEmacs is free software; you can redistribute it and/or modify it | |
12 ;; under the terms of the GNU General Public License as published by | |
13 ;; the Free Software Foundation; either version 2, or (at your option) | |
14 ;; any later version. | |
15 | |
16 ;; XEmacs is distributed in the hope that it will be useful, but | |
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 ;; General Public License for more details. | |
20 | |
21 ;; You should have received a copy of the GNU General Public License | |
22 ;; along with XEmacs; see the file COPYING. If not, write to the Free | |
23 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | |
24 ;; 02111-1307, USA. | |
25 | |
26 ;;; Code: | |
27 | |
28 ;; Setting information of Thai characters. | |
29 | |
30 ;; (let ((l '((?,T!(B consonant "LETTER KO KAI") ; 0xA1 | |
31 ;; (?,T"(B consonant "LETTER KHO KHAI") ; 0xA2 | |
32 ;; (?,T#(B consonant "LETTER KHO KHUAT") ; 0xA3 | |
33 ;; (?,T$(B consonant "LETTER KHO KHWAI") ; 0xA4 | |
34 ;; (?,T%(B consonant "LETTER KHO KHON") ; 0xA5 | |
35 ;; (?,T&(B consonant "LETTER KHO RAKHANG") ; 0xA6 | |
36 ;; (?,T'(B consonant "LETTER NGO NGU") ; 0xA7 | |
37 ;; (?,T((B consonant "LETTER CHO CHAN") ; 0xA8 | |
38 ;; (?,T)(B consonant "LETTER CHO CHING") ; 0xA9 | |
39 ;; (?,T*(B consonant "LETTER CHO CHANG") ; 0xAA | |
40 ;; (?,T+(B consonant "LETTER SO SO") ; 0xAB | |
41 ;; (?,T,(B consonant "LETTER CHO CHOE") ; 0xAC | |
42 ;; (?,T-(B consonant "LETTER YO YING") ; 0xAD | |
43 ;; (?,T.(B consonant "LETTER DO CHADA") ; 0xAE | |
44 ;; (?,T/(B consonant "LETTER TO PATAK") ; 0xAF | |
45 ;; (?,T0(B consonant "LETTER THO THAN") ; 0xB0 | |
46 ;; (?,T1(B consonant "LETTER THO NANGMONTHO") ; 0xB1 | |
47 ;; (?,T2(B consonant "LETTER THO PHUTHAO") ; 0xB2 | |
48 ;; (?,T3(B consonant "LETTER NO NEN") ; 0xB3 | |
49 ;; (?,T4(B consonant "LETTER DO DEK") ; 0xB4 | |
50 ;; (?,T5(B consonant "LETTER TO TAO") ; 0xB5 | |
51 ;; (?,T6(B consonant "LETTER THO THUNG") ; 0xB6 | |
52 ;; (?,T7(B consonant "LETTER THO THAHAN") ; 0xB7 | |
53 ;; (?,T8(B consonant "LETTER THO THONG") ; 0xB8 | |
54 ;; (?,T9(B consonant "LETTER NO NU") ; 0xB9 | |
55 ;; (?,T:(B consonant "LETTER BO BAIMAI") ; 0xBA | |
56 ;; (?,T;(B consonant "LETTER PO PLA") ; 0xBB | |
57 ;; (?,T<(B consonant "LETTER PHO PHUNG") ; 0xBC | |
58 ;; (?,T=(B consonant "LETTER FO FA") ; 0xBD | |
59 ;; (?,T>(B consonant "LETTER PHO PHAN") ; 0xBE | |
60 ;; (?,T?(B consonant "LETTER FO FAN") ; 0xBF | |
61 ;; (?,T@(B consonant "LETTER PHO SAMPHAO") ; 0xC0 | |
62 ;; (?,TA(B consonant "LETTER MO MA") ; 0xC1 | |
63 ;; (?,TB(B consonant "LETTER YO YAK") ; 0xC2 | |
64 ;; (?,TC(B consonant "LETTER RO RUA") ; 0xC3 | |
65 ;; (?,TD(B vowel-base "LETTER RU (Pali vowel letter)") ; 0xC4 | |
66 ;; (?,TE(B consonant "LETTER LO LING") ; 0xC5 | |
67 ;; (?,TF(B vowel-base "LETTER LU (Pali vowel letter)") ; 0xC6 | |
68 ;; (?,TG(B consonant "LETTER WO WAEN") ; 0xC7 | |
69 ;; (?,TH(B consonant "LETTER SO SALA") ; 0xC8 | |
70 ;; (?,TI(B consonant "LETTER SO RUSI") ; 0xC9 | |
71 ;; (?,TJ(B consonant "LETTER SO SUA") ; 0xCA | |
72 ;; (?,TK(B consonant "LETTER HO HIP") ; 0xCB | |
73 ;; (?,TL(B consonant "LETTER LO CHULA") ; 0xCC | |
74 ;; (?,TM(B consonant "LETTER O ANG") ; 0xCD | |
75 ;; (?,TN(B consonant "LETTER HO NOK HUK") ; 0xCE | |
76 ;; (?,TO(B special "PAI YAN NOI (abbreviation)") ; 0xCF | |
77 ;; (?,TP(B vowel-base "VOWEL SIGN SARA A") ; 0xD0 | |
78 ;; (?,TQ(B vowel-upper "VOWEL SIGN MAI HAN-AKAT N/S-T") ; 0xD1 | |
79 ;; (?,TR(B vowel-base "VOWEL SIGN SARA AA") ; 0xD2 | |
80 ;; (?,TS(B vowel-base "VOWEL SIGN SARA AM") ; 0xD3 | |
81 ;; (?,TT(B vowel-upper "VOWEL SIGN SARA I N/S-T") ; 0xD4 | |
82 ;; (?,TU(B vowel-upper "VOWEL SIGN SARA II N/S-T") ; 0xD5 | |
83 ;; (?,TV(B vowel-upper "VOWEL SIGN SARA UE N/S-T") ; 0xD6 | |
84 ;; (?,TW(B vowel-upper "VOWEL SIGN SARA UEE N/S-T") ; 0xD7 | |
85 ;; (?,TX(B vowel-lower "VOWEL SIGN SARA U N/S-B") ; 0xD8 | |
86 ;; (?,TY(B vowel-lower "VOWEL SIGN SARA UU N/S-B") ; 0xD9 | |
87 ;; (?,TZ(B vowel-lower "VOWEL SIGN PHINTHU N/S-B (Pali virama)") ; 0xDA | |
88 ;; (?,T[(B invalid nil) ; 0xDA | |
89 ;; (?,T\(B invalid nil) ; 0xDC | |
90 ;; (?,T](B invalid nil) ; 0xDC | |
91 ;; (?,T^(B invalid nil) ; 0xDC | |
92 ;; (?,T_(B special "BAHT SIGN (currency symbol)") ; 0xDF | |
93 ;; (?,T`(B vowel-base "VOWEL SIGN SARA E") ; 0xE0 | |
94 ;; (?,Ta(B vowel-base "VOWEL SIGN SARA AE") ; 0xE1 | |
95 ;; (?,Tb(B vowel-base "VOWEL SIGN SARA O") ; 0xE2 | |
96 ;; (?,Tc(B vowel-base "VOWEL SIGN SARA MAI MUAN") ; 0xE3 | |
97 ;; (?,Td(B vowel-base "VOWEL SIGN SARA MAI MALAI") ; 0xE4 | |
98 ;; (?,Te(B vowel-base "LAK KHANG YAO") ; 0xE5 | |
99 ;; (?,Tf(B special "MAI YAMOK (repetion)") ; 0xE6 | |
100 ;; (?,Tg(B vowel-upper "VOWEL SIGN MAI TAI KHU N/S-T") ; 0xE7 | |
101 ;; (?,Th(B tone "TONE MAI EK N/S-T") ; 0xE8 | |
102 ;; (?,Ti(B tone "TONE MAI THO N/S-T") ; 0xE9 | |
103 ;; (?,Tj(B tone "TONE MAI TRI N/S-T") ; 0xEA | |
104 ;; (?,Tk(B tone "TONE MAI CHATTAWA N/S-T") ; 0xEB | |
105 ;; (?,Tl(B tone "THANTHAKHAT N/S-T (cancellation mark)") ; 0xEC | |
106 ;; (?,Tm(B tone "NIKKHAHIT N/S-T (final nasal)") ; 0xED | |
107 ;; (?,Tn(B vowel-upper "YAMAKKAN N/S-T") ; 0xEE | |
108 ;; (?,To(B special "FONRMAN") ; 0xEF | |
109 ;; (?,Tp(B special "DIGIT ZERO") ; 0xF0 | |
110 ;; (?,Tq(B special "DIGIT ONE") ; 0xF1 | |
111 ;; (?,Tr(B special "DIGIT TWO") ; 0xF2 | |
112 ;; (?,Ts(B special "DIGIT THREE") ; 0xF3 | |
113 ;; (?,Tt(B special "DIGIT FOUR") ; 0xF4 | |
114 ;; (?,Tu(B special "DIGIT FIVE") ; 0xF5 | |
115 ;; (?,Tv(B special "DIGIT SIX") ; 0xF6 | |
116 ;; (?,Tw(B special "DIGIT SEVEN") ; 0xF7 | |
117 ;; (?,Tx(B special "DIGIT EIGHT") ; 0xF8 | |
118 ;; (?,Ty(B special "DIGIT NINE") ; 0xF9 | |
119 ;; (?,Tz(B special "ANGKHANKHU (ellipsis)") ; 0xFA | |
120 ;; (?,T{(B special "KHOMUT (beginning of religious texts)") ; 0xFB | |
121 ;; (?,T|(B invalid nil) ; 0xFC | |
122 ;; (?,T}(B invalid nil) ; 0xFD | |
123 ;; (?,T~(B invalid nil) ; 0xFE | |
124 ;; )) | |
125 ;; elm) | |
126 ;; (while l | |
127 ;; (setq elm (car l)) | |
128 ;; (put-char-code-property (car elm) 'phonetic-type (car (cdr elm))) | |
129 ;; (put-char-code-property (car elm) 'name (nth 2 elm)) | |
130 ;; (setq l (cdr l)))) | |
131 | |
132 (defconst thai-character-alist | |
133 '((?,T!(B . consonant) ; 0xA1: LETTER KO KAI | |
134 (?,T"(B . consonant) ; 0xA2: LETTER KHO KHAI | |
135 (?,T#(B . consonant) ; 0xA3: LETTER KHO KHUAT (obsolete) | |
136 (?,T$(B . consonant) ; 0xA4: LETTER KHO KHWAI | |
137 (?,T%(B . consonant) ; 0xA5: LETTER KHO KHON (obsolete) | |
138 (?,T&(B . consonant) ; 0xA6: LETTER KHO RAKHANG | |
139 (?,T'(B . consonant) ; 0xA7: LETTER NGO NGU | |
140 (?,T((B . consonant) ; 0xA8: LETTER CHO CHAN | |
141 (?,T)(B . consonant) ; 0xA9: LETTER CHO CHING | |
142 (?,T*(B . consonant) ; 0xAA: LETTER CHO CHANG | |
143 (?,T+(B . consonant) ; 0xAB: LETTER SO SO | |
144 (?,T,(B . consonant) ; 0xAC: LETTER CHO CHOE | |
145 (?,T-(B . consonant) ; 0xAD: LETTER YO YING | |
146 (?,T.(B . consonant) ; 0xAE: LETTER DO CHADA | |
147 (?,T/(B . consonant) ; 0xAF: LETTER TO PATAK | |
148 (?,T0(B . consonant) ; 0xB0: LETTER THO THAN | |
149 (?,T1(B . consonant) ; 0xB1: LETTER THO NANGMONTHO | |
150 (?,T2(B . consonant) ; 0xB2: LETTER THO PHUTHAO | |
151 (?,T3(B . consonant) ; 0xB3: LETTER NO NEN | |
152 (?,T4(B . consonant) ; 0xB4: LETTER DO DEK | |
153 (?,T5(B . consonant) ; 0xB5: LETTER TO TAO | |
154 (?,T6(B . consonant) ; 0xB6: LETTER THO THUNG | |
155 (?,T7(B . consonant) ; 0xB7: LETTER THO THAHAN | |
156 (?,T8(B . consonant) ; 0xB8: LETTER THO THONG | |
157 (?,T9(B . consonant) ; 0xB9: LETTER NO NU | |
158 (?,T:(B . consonant) ; 0xBA: LETTER BO BAIMAI | |
159 (?,T;(B . consonant) ; 0xBB: LETTER PO PLA | |
160 (?,T<(B . consonant) ; 0xBC: LETTER PHO PHUNG | |
161 (?,T=(B . consonant) ; 0xBD: LETTER FO FA | |
162 (?,T>(B . consonant) ; 0xBE: LETTER PHO PHAN | |
163 (?,T?(B . consonant) ; 0xBF: LETTER FO FAN | |
164 (?,T@(B . consonant) ; 0xC0: LETTER PHO SAMPHAO | |
165 (?,TA(B . consonant) ; 0xC1: LETTER MO MA | |
166 (?,TB(B . consonant) ; 0xC2: LETTER YO YAK | |
167 (?,TC(B . consonant) ; 0xC3: LETTER RO RUA | |
168 (?,TD(B . vowel-base) ; 0xC4: LETTER RU (vowel letter used to write Pali) | |
169 (?,TE(B . consonant) ; 0xC5: LETTER LO LING | |
170 (?,TF(B . vowel-base) ; 0xC6: LETTER LU (vowel letter used to write Pali) | |
171 (?,TG(B . consonant) ; 0xC7: LETTER WO WAEN | |
172 (?,TH(B . consonant) ; 0xC8: LETTER SO SALA | |
173 (?,TI(B . consonant) ; 0xC9: LETTER SO RUSI | |
174 (?,TJ(B . consonant) ; 0xCA: LETTER SO SUA | |
175 (?,TK(B . consonant) ; 0xCB: LETTER HO HIP | |
176 (?,TL(B . consonant) ; 0xCC: LETTER LO CHULA | |
177 (?,TM(B . consonant) ; 0xCD: LETTER O ANG | |
178 (?,TN(B . consonant) ; 0xCE: LETTER HO NOK HUK | |
179 (?,TO(B . special) ; 0xCF: PAI YAN NOI (abbreviation) | |
180 (?,TP(B . vowel-base) ; 0xD0: VOWEL SIGN SARA A | |
181 (?,TQ(B . vowel-upper) ; 0xD1: VOWEL SIGN MAI HAN-AKAT N/S-T | |
182 (?,TR(B . vowel-base) ; 0xD2: VOWEL SIGN SARA AA | |
183 (?,TS(B . vowel-base) ; 0xD3: VOWEL SIGN SARA AM | |
184 (?,TT(B . vowel-upper) ; 0xD4: VOWEL SIGN SARA I N/S-T | |
185 (?,TU(B . vowel-upper) ; 0xD5: VOWEL SIGN SARA II N/S-T | |
186 (?,TV(B . vowel-upper) ; 0xD6: VOWEL SIGN SARA UE N/S-T | |
187 (?,TW(B . vowel-upper) ; 0xD7: VOWEL SIGN SARA UEE N/S-T | |
188 (?,TX(B . vowel-lower) ; 0xD8: VOWEL SIGN SARA U N/S-B | |
189 (?,TY(B . vowel-lower) ; 0xD9: VOWEL SIGN SARA UU N/S-B | |
190 (?,TZ(B . vowel-lower) ; 0xDA: VOWEL SIGN PHINTHU N/S-B (Pali virama) | |
191 (?,T[(B . not-used) ; 0xDA: | |
192 (?,T\(B . not-used) ; 0xDC: | |
193 (?,T](B . not-used) ; 0xDC: | |
194 (?,T^(B . not-used) ; 0xDC: | |
195 (?,T_(B . special) ; 0xDF: BAHT SIGN (currency symbol) | |
196 (?,T`(B . vowel-base) ; 0xE0: VOWEL SIGN SARA E | |
197 (?,Ta(B . vowel-base) ; 0xE1: VOWEL SIGN SARA AE | |
198 (?,Tb(B . vowel-base) ; 0xE2: VOWEL SIGN SARA O | |
199 (?,Tc(B . vowel-base) ; 0xE3: VOWEL SIGN SARA MAI MUAN | |
200 (?,Td(B . vowel-base) ; 0xE4: VOWEL SIGN SARA MAI MALAI | |
201 (?,Te(B . vowel-base) ; 0xE5: LAK KHANG YAO | |
202 (?,Tf(B . special) ; 0xE6: MAI YAMOK (repetion) | |
203 (?,Tg(B . vowel-upper) ; 0xE7: VOWEL SIGN MAI TAI KHU N/S-T | |
204 (?,Th(B . tone) ; 0xE8: TONE MAI EK N/S-T | |
205 (?,Ti(B . tone) ; 0xE9: TONE MAI THO N/S-T | |
206 (?,Tj(B . tone) ; 0xEA: TONE MAI TRI N/S-T | |
207 (?,Tk(B . tone) ; 0xEB: TONE MAI CHATTAWA N/S-T | |
208 (?,Tl(B . tone) ; 0xEC: THANTHAKHAT N/S-T (cancellation mark) | |
209 (?,Tm(B . tone) ; 0xED: NIKKHAHIT N/S-T (final nasal) | |
210 (?,Tn(B . vowel-upper) ; 0xEE: YAMAKKAN N/S-T | |
211 (?,To(B . special) ; 0xEF: FONRMAN | |
212 (?,Tp(B . special) ; 0xF0: DIGIT ZERO | |
213 (?,Tq(B . special) ; 0xF1: DIGIT ONE | |
214 (?,Tr(B . special) ; 0xF2: DIGIT TWO | |
215 (?,Ts(B . special) ; 0xF3: DIGIT THREE | |
216 (?,Tt(B . special) ; 0xF4: DIGIT FOUR | |
217 (?,Tu(B . special) ; 0xF5: DIGIT FIVE | |
218 (?,Tv(B . special) ; 0xF6: DIGIT SIX | |
219 (?,Tw(B . special) ; 0xF7: DIGIT SEVEN | |
220 (?,Tx(B . special) ; 0xF8: DIGIT EIGHT | |
221 (?,Ty(B . special) ; 0xF9: DIGIT NINE | |
222 (?,Tz(B . special) ; 0xFA: ANGKHANKHU (ellipsis) | |
223 (?,T{(B . special) ; 0xFB: KHOMUT (beginning of religious texts) | |
224 (?,T|(B . not-used) ; 0xFC: | |
225 (?,T}(B . not-used) ; 0xFD: | |
226 (?,T~(B . not-used) ; 0xFE: | |
227 ) | |
228 "Association list of thai-character and property.") | |
229 (setq thai-character-alist | |
230 (cons (cons (string-to-char "0,TQi1(B") 'vowel-upper-tone) | |
231 thai-character-alist)) | |
232 | |
233 (defconst thai-category-table | |
234 (copy-category-table (standard-category-table)) | |
235 "Category table for Thai.") | |
236 (define-category-mnemonic ?0 "Thai consonants" | |
237 thai-category-table) | |
238 (define-category-mnemonic ?1 "Thai upper/lower vowel or tone mark." | |
239 thai-category-table) | |
240 (define-category-mnemonic ?2 "Thai base vowel or special characters." | |
241 thai-category-table) | |
242 | |
243 (let ((chars thai-character-alist) | |
244 ch prop) | |
245 (while chars | |
246 (setq ch (car (car chars)) | |
247 prop (cdr (car chars))) | |
248 (cond ((eq prop 'consonant) | |
249 (modify-category-entry ch ?0 thai-category-table)) | |
250 ((or (eq prop 'vowel-upper) | |
251 (eq prop 'vowel-lower) | |
252 (eq prop 'tone)) | |
253 (modify-category-entry ch ?1 thai-category-table)) | |
254 ((null (eq prop 'vowel-upper-tone)) | |
255 (modify-category-entry ch ?2 thai-category-table))) | |
256 (setq chars (cdr chars)))) | |
257 | |
258 ;; ;;;###autoload | |
259 ;; (defun thai-compose-region (beg end) | |
260 ;; "Compose Thai characters in the region. | |
261 ;; When called from a program, expects two arguments, | |
262 ;; positions (integers or markers) specifying the region." | |
263 ;; (interactive "r") | |
264 ;; (save-restriction | |
265 ;; (narrow-to-region beg end) | |
266 ;; (decompose-region (point-min) (point-max)) | |
267 ;; (goto-char (point-min)) | |
268 ;; (while (re-search-forward "\\c0\\(\\c2\\|\\c3\\|\\c4\\)+" nil t) | |
269 ;; (if (aref (char-category-set (char-after (match-beginning 0))) ?t) | |
270 ;; (compose-region (match-beginning 0) (match-end 0)))))) | |
271 | |
272 ;;;###autoload | |
273 (defun thai-compose-region (beg end) | |
274 "Compose Thai characters in the region." | |
275 (interactive "r") | |
276 (save-restriction | |
277 (narrow-to-region beg end) | |
278 (decompose-region (point-min) (point-max)) | |
279 (goto-char (point-min)) | |
280 (let ((ctbl (category-table)) | |
281 str) | |
282 (unwind-protect | |
283 (progn | |
284 (set-category-table thai-category-table) | |
285 (while (re-search-forward "\\c0\\c1+" nil t) | |
286 (compose-region (match-beginning 0) (match-end 0)))) | |
287 (set-category-table ctbl))))) | |
288 | |
289 ;;;###autoload | |
290 (defun thai-compose-buffer () | |
291 "Compose Thai characters in the current buffer." | |
292 (interactive) | |
293 (thai-compose-region (point-min) (point-max))) | |
294 | |
295 ;; ;;;###autoload | |
296 ;; (defun thai-post-read-conversion (len) | |
297 ;; (save-excursion | |
298 ;; (save-restriction | |
299 ;; (let ((buffer-modified-p (buffer-modified-p))) | |
300 ;; (narrow-to-region (point) (+ (point) len)) | |
301 ;; (thai-compose-region (point-min) (point-max)) | |
302 ;; (set-buffer-modified-p buffer-modified-p) | |
303 ;; (- (point-max) (point-min)))))) | |
304 | |
305 ;; ;;;###autoload | |
306 ;; (defun thai-pre-write-conversion (from to) | |
307 ;; (let ((old-buf (current-buffer)) | |
308 ;; (work-buf (get-buffer-create " *thai-work*"))) | |
309 ;; (set-buffer work-buf) | |
310 ;; (erase-buffer) | |
311 ;; (if (stringp from) | |
312 ;; (insert from) | |
313 ;; (insert-buffer-substring old-buf from to)) | |
314 ;; (decompose-region (point-min) (point-max)) | |
315 ;; ;; Should return nil as annotations. | |
316 ;; nil)) | |
317 | |
318 ;; | |
319 (provide 'language/thai-util) | |
320 | |
321 ;;; Local Variables: | |
322 ;;; generated-autoload-file: "../loaddefs.el" | |
323 ;;; End: | |
324 ;;; thai-util.el ends here |