comparison lisp/language/thai-util.el @ 159:3bb7ccffb0c0 r20-3b6

Import from CVS: tag r20-3b6
author cvs
date Mon, 13 Aug 2007 09:41:43 +0200
parents
children e45d5e7c476e
comparison
equal deleted inserted replaced
158:558dfa75ffb3 159:3bb7ccffb0c0
1 ;;; thai-util.el --- utilities for Thai
2
3 ;; Copyright (C) 1995 Free Software Foundation, Inc.
4 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
5 ;; Copyright (C) 1997 MORIOKA Tomohiko
6
7 ;; Keywords: mule, multilingual, thai
8
9 ;; This file is part of XEmacs.
10
11 ;; XEmacs is free software; you can redistribute it and/or modify it
12 ;; under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; XEmacs is distributed in the hope that it will be useful, but
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs; see the file COPYING. If not, write to the Free
23 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24 ;; 02111-1307, USA.
25
26 ;;; Code:
27
28 ;; Setting information of Thai characters.
29
30 ;; (let ((l '((?,T!(B consonant "LETTER KO KAI") ; 0xA1
31 ;; (?,T"(B consonant "LETTER KHO KHAI") ; 0xA2
32 ;; (?,T#(B consonant "LETTER KHO KHUAT") ; 0xA3
33 ;; (?,T$(B consonant "LETTER KHO KHWAI") ; 0xA4
34 ;; (?,T%(B consonant "LETTER KHO KHON") ; 0xA5
35 ;; (?,T&(B consonant "LETTER KHO RAKHANG") ; 0xA6
36 ;; (?,T'(B consonant "LETTER NGO NGU") ; 0xA7
37 ;; (?,T((B consonant "LETTER CHO CHAN") ; 0xA8
38 ;; (?,T)(B consonant "LETTER CHO CHING") ; 0xA9
39 ;; (?,T*(B consonant "LETTER CHO CHANG") ; 0xAA
40 ;; (?,T+(B consonant "LETTER SO SO") ; 0xAB
41 ;; (?,T,(B consonant "LETTER CHO CHOE") ; 0xAC
42 ;; (?,T-(B consonant "LETTER YO YING") ; 0xAD
43 ;; (?,T.(B consonant "LETTER DO CHADA") ; 0xAE
44 ;; (?,T/(B consonant "LETTER TO PATAK") ; 0xAF
45 ;; (?,T0(B consonant "LETTER THO THAN") ; 0xB0
46 ;; (?,T1(B consonant "LETTER THO NANGMONTHO") ; 0xB1
47 ;; (?,T2(B consonant "LETTER THO PHUTHAO") ; 0xB2
48 ;; (?,T3(B consonant "LETTER NO NEN") ; 0xB3
49 ;; (?,T4(B consonant "LETTER DO DEK") ; 0xB4
50 ;; (?,T5(B consonant "LETTER TO TAO") ; 0xB5
51 ;; (?,T6(B consonant "LETTER THO THUNG") ; 0xB6
52 ;; (?,T7(B consonant "LETTER THO THAHAN") ; 0xB7
53 ;; (?,T8(B consonant "LETTER THO THONG") ; 0xB8
54 ;; (?,T9(B consonant "LETTER NO NU") ; 0xB9
55 ;; (?,T:(B consonant "LETTER BO BAIMAI") ; 0xBA
56 ;; (?,T;(B consonant "LETTER PO PLA") ; 0xBB
57 ;; (?,T<(B consonant "LETTER PHO PHUNG") ; 0xBC
58 ;; (?,T=(B consonant "LETTER FO FA") ; 0xBD
59 ;; (?,T>(B consonant "LETTER PHO PHAN") ; 0xBE
60 ;; (?,T?(B consonant "LETTER FO FAN") ; 0xBF
61 ;; (?,T@(B consonant "LETTER PHO SAMPHAO") ; 0xC0
62 ;; (?,TA(B consonant "LETTER MO MA") ; 0xC1
63 ;; (?,TB(B consonant "LETTER YO YAK") ; 0xC2
64 ;; (?,TC(B consonant "LETTER RO RUA") ; 0xC3
65 ;; (?,TD(B vowel-base "LETTER RU (Pali vowel letter)") ; 0xC4
66 ;; (?,TE(B consonant "LETTER LO LING") ; 0xC5
67 ;; (?,TF(B vowel-base "LETTER LU (Pali vowel letter)") ; 0xC6
68 ;; (?,TG(B consonant "LETTER WO WAEN") ; 0xC7
69 ;; (?,TH(B consonant "LETTER SO SALA") ; 0xC8
70 ;; (?,TI(B consonant "LETTER SO RUSI") ; 0xC9
71 ;; (?,TJ(B consonant "LETTER SO SUA") ; 0xCA
72 ;; (?,TK(B consonant "LETTER HO HIP") ; 0xCB
73 ;; (?,TL(B consonant "LETTER LO CHULA") ; 0xCC
74 ;; (?,TM(B consonant "LETTER O ANG") ; 0xCD
75 ;; (?,TN(B consonant "LETTER HO NOK HUK") ; 0xCE
76 ;; (?,TO(B special "PAI YAN NOI (abbreviation)") ; 0xCF
77 ;; (?,TP(B vowel-base "VOWEL SIGN SARA A") ; 0xD0
78 ;; (?,TQ(B vowel-upper "VOWEL SIGN MAI HAN-AKAT N/S-T") ; 0xD1
79 ;; (?,TR(B vowel-base "VOWEL SIGN SARA AA") ; 0xD2
80 ;; (?,TS(B vowel-base "VOWEL SIGN SARA AM") ; 0xD3
81 ;; (?,TT(B vowel-upper "VOWEL SIGN SARA I N/S-T") ; 0xD4
82 ;; (?,TU(B vowel-upper "VOWEL SIGN SARA II N/S-T") ; 0xD5
83 ;; (?,TV(B vowel-upper "VOWEL SIGN SARA UE N/S-T") ; 0xD6
84 ;; (?,TW(B vowel-upper "VOWEL SIGN SARA UEE N/S-T") ; 0xD7
85 ;; (?,TX(B vowel-lower "VOWEL SIGN SARA U N/S-B") ; 0xD8
86 ;; (?,TY(B vowel-lower "VOWEL SIGN SARA UU N/S-B") ; 0xD9
87 ;; (?,TZ(B vowel-lower "VOWEL SIGN PHINTHU N/S-B (Pali virama)") ; 0xDA
88 ;; (?,T[(B invalid nil) ; 0xDA
89 ;; (?,T\(B invalid nil) ; 0xDC
90 ;; (?,T](B invalid nil) ; 0xDC
91 ;; (?,T^(B invalid nil) ; 0xDC
92 ;; (?,T_(B special "BAHT SIGN (currency symbol)") ; 0xDF
93 ;; (?,T`(B vowel-base "VOWEL SIGN SARA E") ; 0xE0
94 ;; (?,Ta(B vowel-base "VOWEL SIGN SARA AE") ; 0xE1
95 ;; (?,Tb(B vowel-base "VOWEL SIGN SARA O") ; 0xE2
96 ;; (?,Tc(B vowel-base "VOWEL SIGN SARA MAI MUAN") ; 0xE3
97 ;; (?,Td(B vowel-base "VOWEL SIGN SARA MAI MALAI") ; 0xE4
98 ;; (?,Te(B vowel-base "LAK KHANG YAO") ; 0xE5
99 ;; (?,Tf(B special "MAI YAMOK (repetion)") ; 0xE6
100 ;; (?,Tg(B vowel-upper "VOWEL SIGN MAI TAI KHU N/S-T") ; 0xE7
101 ;; (?,Th(B tone "TONE MAI EK N/S-T") ; 0xE8
102 ;; (?,Ti(B tone "TONE MAI THO N/S-T") ; 0xE9
103 ;; (?,Tj(B tone "TONE MAI TRI N/S-T") ; 0xEA
104 ;; (?,Tk(B tone "TONE MAI CHATTAWA N/S-T") ; 0xEB
105 ;; (?,Tl(B tone "THANTHAKHAT N/S-T (cancellation mark)") ; 0xEC
106 ;; (?,Tm(B tone "NIKKHAHIT N/S-T (final nasal)") ; 0xED
107 ;; (?,Tn(B vowel-upper "YAMAKKAN N/S-T") ; 0xEE
108 ;; (?,To(B special "FONRMAN") ; 0xEF
109 ;; (?,Tp(B special "DIGIT ZERO") ; 0xF0
110 ;; (?,Tq(B special "DIGIT ONE") ; 0xF1
111 ;; (?,Tr(B special "DIGIT TWO") ; 0xF2
112 ;; (?,Ts(B special "DIGIT THREE") ; 0xF3
113 ;; (?,Tt(B special "DIGIT FOUR") ; 0xF4
114 ;; (?,Tu(B special "DIGIT FIVE") ; 0xF5
115 ;; (?,Tv(B special "DIGIT SIX") ; 0xF6
116 ;; (?,Tw(B special "DIGIT SEVEN") ; 0xF7
117 ;; (?,Tx(B special "DIGIT EIGHT") ; 0xF8
118 ;; (?,Ty(B special "DIGIT NINE") ; 0xF9
119 ;; (?,Tz(B special "ANGKHANKHU (ellipsis)") ; 0xFA
120 ;; (?,T{(B special "KHOMUT (beginning of religious texts)") ; 0xFB
121 ;; (?,T|(B invalid nil) ; 0xFC
122 ;; (?,T}(B invalid nil) ; 0xFD
123 ;; (?,T~(B invalid nil) ; 0xFE
124 ;; ))
125 ;; elm)
126 ;; (while l
127 ;; (setq elm (car l))
128 ;; (put-char-code-property (car elm) 'phonetic-type (car (cdr elm)))
129 ;; (put-char-code-property (car elm) 'name (nth 2 elm))
130 ;; (setq l (cdr l))))
131
132 (defconst thai-character-alist
133 '((?,T!(B . consonant) ; 0xA1: LETTER KO KAI
134 (?,T"(B . consonant) ; 0xA2: LETTER KHO KHAI
135 (?,T#(B . consonant) ; 0xA3: LETTER KHO KHUAT (obsolete)
136 (?,T$(B . consonant) ; 0xA4: LETTER KHO KHWAI
137 (?,T%(B . consonant) ; 0xA5: LETTER KHO KHON (obsolete)
138 (?,T&(B . consonant) ; 0xA6: LETTER KHO RAKHANG
139 (?,T'(B . consonant) ; 0xA7: LETTER NGO NGU
140 (?,T((B . consonant) ; 0xA8: LETTER CHO CHAN
141 (?,T)(B . consonant) ; 0xA9: LETTER CHO CHING
142 (?,T*(B . consonant) ; 0xAA: LETTER CHO CHANG
143 (?,T+(B . consonant) ; 0xAB: LETTER SO SO
144 (?,T,(B . consonant) ; 0xAC: LETTER CHO CHOE
145 (?,T-(B . consonant) ; 0xAD: LETTER YO YING
146 (?,T.(B . consonant) ; 0xAE: LETTER DO CHADA
147 (?,T/(B . consonant) ; 0xAF: LETTER TO PATAK
148 (?,T0(B . consonant) ; 0xB0: LETTER THO THAN
149 (?,T1(B . consonant) ; 0xB1: LETTER THO NANGMONTHO
150 (?,T2(B . consonant) ; 0xB2: LETTER THO PHUTHAO
151 (?,T3(B . consonant) ; 0xB3: LETTER NO NEN
152 (?,T4(B . consonant) ; 0xB4: LETTER DO DEK
153 (?,T5(B . consonant) ; 0xB5: LETTER TO TAO
154 (?,T6(B . consonant) ; 0xB6: LETTER THO THUNG
155 (?,T7(B . consonant) ; 0xB7: LETTER THO THAHAN
156 (?,T8(B . consonant) ; 0xB8: LETTER THO THONG
157 (?,T9(B . consonant) ; 0xB9: LETTER NO NU
158 (?,T:(B . consonant) ; 0xBA: LETTER BO BAIMAI
159 (?,T;(B . consonant) ; 0xBB: LETTER PO PLA
160 (?,T<(B . consonant) ; 0xBC: LETTER PHO PHUNG
161 (?,T=(B . consonant) ; 0xBD: LETTER FO FA
162 (?,T>(B . consonant) ; 0xBE: LETTER PHO PHAN
163 (?,T?(B . consonant) ; 0xBF: LETTER FO FAN
164 (?,T@(B . consonant) ; 0xC0: LETTER PHO SAMPHAO
165 (?,TA(B . consonant) ; 0xC1: LETTER MO MA
166 (?,TB(B . consonant) ; 0xC2: LETTER YO YAK
167 (?,TC(B . consonant) ; 0xC3: LETTER RO RUA
168 (?,TD(B . vowel-base) ; 0xC4: LETTER RU (vowel letter used to write Pali)
169 (?,TE(B . consonant) ; 0xC5: LETTER LO LING
170 (?,TF(B . vowel-base) ; 0xC6: LETTER LU (vowel letter used to write Pali)
171 (?,TG(B . consonant) ; 0xC7: LETTER WO WAEN
172 (?,TH(B . consonant) ; 0xC8: LETTER SO SALA
173 (?,TI(B . consonant) ; 0xC9: LETTER SO RUSI
174 (?,TJ(B . consonant) ; 0xCA: LETTER SO SUA
175 (?,TK(B . consonant) ; 0xCB: LETTER HO HIP
176 (?,TL(B . consonant) ; 0xCC: LETTER LO CHULA
177 (?,TM(B . consonant) ; 0xCD: LETTER O ANG
178 (?,TN(B . consonant) ; 0xCE: LETTER HO NOK HUK
179 (?,TO(B . special) ; 0xCF: PAI YAN NOI (abbreviation)
180 (?,TP(B . vowel-base) ; 0xD0: VOWEL SIGN SARA A
181 (?,TQ(B . vowel-upper) ; 0xD1: VOWEL SIGN MAI HAN-AKAT N/S-T
182 (?,TR(B . vowel-base) ; 0xD2: VOWEL SIGN SARA AA
183 (?,TS(B . vowel-base) ; 0xD3: VOWEL SIGN SARA AM
184 (?,TT(B . vowel-upper) ; 0xD4: VOWEL SIGN SARA I N/S-T
185 (?,TU(B . vowel-upper) ; 0xD5: VOWEL SIGN SARA II N/S-T
186 (?,TV(B . vowel-upper) ; 0xD6: VOWEL SIGN SARA UE N/S-T
187 (?,TW(B . vowel-upper) ; 0xD7: VOWEL SIGN SARA UEE N/S-T
188 (?,TX(B . vowel-lower) ; 0xD8: VOWEL SIGN SARA U N/S-B
189 (?,TY(B . vowel-lower) ; 0xD9: VOWEL SIGN SARA UU N/S-B
190 (?,TZ(B . vowel-lower) ; 0xDA: VOWEL SIGN PHINTHU N/S-B (Pali virama)
191 (?,T[(B . not-used) ; 0xDA:
192 (?,T\(B . not-used) ; 0xDC:
193 (?,T](B . not-used) ; 0xDC:
194 (?,T^(B . not-used) ; 0xDC:
195 (?,T_(B . special) ; 0xDF: BAHT SIGN (currency symbol)
196 (?,T`(B . vowel-base) ; 0xE0: VOWEL SIGN SARA E
197 (?,Ta(B . vowel-base) ; 0xE1: VOWEL SIGN SARA AE
198 (?,Tb(B . vowel-base) ; 0xE2: VOWEL SIGN SARA O
199 (?,Tc(B . vowel-base) ; 0xE3: VOWEL SIGN SARA MAI MUAN
200 (?,Td(B . vowel-base) ; 0xE4: VOWEL SIGN SARA MAI MALAI
201 (?,Te(B . vowel-base) ; 0xE5: LAK KHANG YAO
202 (?,Tf(B . special) ; 0xE6: MAI YAMOK (repetion)
203 (?,Tg(B . vowel-upper) ; 0xE7: VOWEL SIGN MAI TAI KHU N/S-T
204 (?,Th(B . tone) ; 0xE8: TONE MAI EK N/S-T
205 (?,Ti(B . tone) ; 0xE9: TONE MAI THO N/S-T
206 (?,Tj(B . tone) ; 0xEA: TONE MAI TRI N/S-T
207 (?,Tk(B . tone) ; 0xEB: TONE MAI CHATTAWA N/S-T
208 (?,Tl(B . tone) ; 0xEC: THANTHAKHAT N/S-T (cancellation mark)
209 (?,Tm(B . tone) ; 0xED: NIKKHAHIT N/S-T (final nasal)
210 (?,Tn(B . vowel-upper) ; 0xEE: YAMAKKAN N/S-T
211 (?,To(B . special) ; 0xEF: FONRMAN
212 (?,Tp(B . special) ; 0xF0: DIGIT ZERO
213 (?,Tq(B . special) ; 0xF1: DIGIT ONE
214 (?,Tr(B . special) ; 0xF2: DIGIT TWO
215 (?,Ts(B . special) ; 0xF3: DIGIT THREE
216 (?,Tt(B . special) ; 0xF4: DIGIT FOUR
217 (?,Tu(B . special) ; 0xF5: DIGIT FIVE
218 (?,Tv(B . special) ; 0xF6: DIGIT SIX
219 (?,Tw(B . special) ; 0xF7: DIGIT SEVEN
220 (?,Tx(B . special) ; 0xF8: DIGIT EIGHT
221 (?,Ty(B . special) ; 0xF9: DIGIT NINE
222 (?,Tz(B . special) ; 0xFA: ANGKHANKHU (ellipsis)
223 (?,T{(B . special) ; 0xFB: KHOMUT (beginning of religious texts)
224 (?,T|(B . not-used) ; 0xFC:
225 (?,T}(B . not-used) ; 0xFD:
226 (?,T~(B . not-used) ; 0xFE:
227 )
228 "Association list of thai-character and property.")
229 (setq thai-character-alist
230 (cons (cons (string-to-char "0,TQi1(B") 'vowel-upper-tone)
231 thai-character-alist))
232
233 (defconst thai-category-table
234 (copy-category-table (standard-category-table))
235 "Category table for Thai.")
236 (define-category-mnemonic ?0 "Thai consonants"
237 thai-category-table)
238 (define-category-mnemonic ?1 "Thai upper/lower vowel or tone mark."
239 thai-category-table)
240 (define-category-mnemonic ?2 "Thai base vowel or special characters."
241 thai-category-table)
242
243 (let ((chars thai-character-alist)
244 ch prop)
245 (while chars
246 (setq ch (car (car chars))
247 prop (cdr (car chars)))
248 (cond ((eq prop 'consonant)
249 (modify-category-entry ch ?0 thai-category-table))
250 ((or (eq prop 'vowel-upper)
251 (eq prop 'vowel-lower)
252 (eq prop 'tone))
253 (modify-category-entry ch ?1 thai-category-table))
254 ((null (eq prop 'vowel-upper-tone))
255 (modify-category-entry ch ?2 thai-category-table)))
256 (setq chars (cdr chars))))
257
258 ;; ;;;###autoload
259 ;; (defun thai-compose-region (beg end)
260 ;; "Compose Thai characters in the region.
261 ;; When called from a program, expects two arguments,
262 ;; positions (integers or markers) specifying the region."
263 ;; (interactive "r")
264 ;; (save-restriction
265 ;; (narrow-to-region beg end)
266 ;; (decompose-region (point-min) (point-max))
267 ;; (goto-char (point-min))
268 ;; (while (re-search-forward "\\c0\\(\\c2\\|\\c3\\|\\c4\\)+" nil t)
269 ;; (if (aref (char-category-set (char-after (match-beginning 0))) ?t)
270 ;; (compose-region (match-beginning 0) (match-end 0))))))
271
272 ;;;###autoload
273 (defun thai-compose-region (beg end)
274 "Compose Thai characters in the region."
275 (interactive "r")
276 (save-restriction
277 (narrow-to-region beg end)
278 (decompose-region (point-min) (point-max))
279 (goto-char (point-min))
280 (let ((ctbl (category-table))
281 str)
282 (unwind-protect
283 (progn
284 (set-category-table thai-category-table)
285 (while (re-search-forward "\\c0\\c1+" nil t)
286 (compose-region (match-beginning 0) (match-end 0))))
287 (set-category-table ctbl)))))
288
289 ;;;###autoload
290 (defun thai-compose-buffer ()
291 "Compose Thai characters in the current buffer."
292 (interactive)
293 (thai-compose-region (point-min) (point-max)))
294
295 ;; ;;;###autoload
296 ;; (defun thai-post-read-conversion (len)
297 ;; (save-excursion
298 ;; (save-restriction
299 ;; (let ((buffer-modified-p (buffer-modified-p)))
300 ;; (narrow-to-region (point) (+ (point) len))
301 ;; (thai-compose-region (point-min) (point-max))
302 ;; (set-buffer-modified-p buffer-modified-p)
303 ;; (- (point-max) (point-min))))))
304
305 ;; ;;;###autoload
306 ;; (defun thai-pre-write-conversion (from to)
307 ;; (let ((old-buf (current-buffer))
308 ;; (work-buf (get-buffer-create " *thai-work*")))
309 ;; (set-buffer work-buf)
310 ;; (erase-buffer)
311 ;; (if (stringp from)
312 ;; (insert from)
313 ;; (insert-buffer-substring old-buf from to))
314 ;; (decompose-region (point-min) (point-max))
315 ;; ;; Should return nil as annotations.
316 ;; nil))
317
318 ;;
319 (provide 'language/thai-util)
320
321 ;;; Local Variables:
322 ;;; generated-autoload-file: "../loaddefs.el"
323 ;;; End:
324 ;;; thai-util.el ends here