771
|
1 ;;; tibet-util.el --- utilities for Tibetan -*- coding: iso-2022-7bit; -*-
|
|
2
|
|
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
|
|
4 ;; Licensed to the Free Software Foundation.
|
|
5
|
|
6 ;; Keywords: multilingual, Tibetan
|
|
7
|
|
8 ;; This file is part of XEmacs.
|
|
9
|
|
10 ;; XEmacs is free software; you can redistribute it and/or modify it
|
|
11 ;; under the terms of the GNU General Public License as published by
|
|
12 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
13 ;; any later version.
|
|
14
|
|
15 ;; XEmacs is distributed in the hope that it will be useful, but
|
|
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
18 ;; General Public License for more details.
|
|
19
|
|
20 ;; You should have received a copy of the GNU General Public License
|
|
21 ;; along with XEmacs; see the file COPYING. If not, write to the Free
|
|
22 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
|
23 ;; 02111-1307, USA.
|
|
24
|
|
25 ;;; Synched up with: Emacs 21.0.103 (language/tibet-util.el).
|
|
26
|
|
27 ;; Author: Toru TOMABECHI, <Toru.Tomabechi@orient.unil.ch>
|
|
28
|
|
29 ;; Created: Feb. 17. 1997
|
|
30
|
|
31 ;; History:
|
|
32 ;; 1997.03.13 Modification in treatment of text properties;
|
|
33 ;; Support for some special signs and punctuations.
|
|
34 ;; 1999.10.25 Modification for a new composition way by K.Handa.
|
|
35
|
|
36 ;;; Code:
|
|
37
|
|
38 ;;;###autoload
|
|
39 (defun tibetan-char-p (ch)
|
|
40 "Check if char CH is Tibetan character.
|
|
41 Returns non-nil if CH is Tibetan. Otherwise, returns nil."
|
|
42 (memq (char-charset ch) '(tibetan tibetan-1-column)))
|
|
43
|
|
44 ;;; Functions for Tibetan <-> Tibetan-transcription.
|
|
45
|
|
46 ;;;###autoload
|
|
47 (defun tibetan-tibetan-to-transcription (str)
|
|
48 "Transcribe Tibetan string STR and return the corresponding Roman string."
|
|
49 (let (;; Accumulate transcriptions here in reverse order.
|
|
50 (trans nil)
|
|
51 (len (length str))
|
|
52 (i 0)
|
|
53 ch this-trans)
|
|
54 (while (< i len)
|
|
55 (let ((idx (string-match tibetan-precomposition-rule-regexp str i)))
|
|
56 (if (eq idx i)
|
|
57 ;; Ith character and the followings matches precomposable
|
|
58 ;; Tibetan sequence.
|
|
59 (setq i (match-end 0)
|
|
60 this-trans
|
|
61 (car (rassoc
|
|
62 (cdr (assoc (match-string 0 str)
|
|
63 tibetan-precomposition-rule-alist))
|
|
64 tibetan-precomposed-transcription-alist)))
|
|
65 (setq ch (substring str i (1+ i))
|
|
66 i (1+ i)
|
|
67 this-trans
|
|
68 (car (or (rassoc ch tibetan-consonant-transcription-alist)
|
|
69 (rassoc ch tibetan-vowel-transcription-alist)
|
|
70 (rassoc ch tibetan-subjoined-transcription-alist)))))
|
|
71 (setq trans (cons this-trans trans))))
|
|
72 (apply 'concat (nreverse trans))))
|
|
73
|
|
74 ;;;###autoload
|
|
75 (defun tibetan-transcription-to-tibetan (str)
|
|
76 "Convert Tibetan Roman string STR to Tibetan character string.
|
|
77 The returned string has no composition information."
|
|
78 (let (;; Case is significant.
|
|
79 (case-fold-search nil)
|
|
80 (idx 0)
|
|
81 ;; Accumulate Tibetan strings here in reverse order.
|
|
82 (t-str-list nil)
|
|
83 i subtrans)
|
|
84 (while (setq i (string-match tibetan-regexp str idx))
|
|
85 (if (< idx i)
|
|
86 ;; STR contains a pattern that doesn't match Tibetan
|
|
87 ;; transcription. Include the pattern as is.
|
|
88 (setq t-str-list (cons (substring str idx i) t-str-list)))
|
|
89 (setq subtrans (match-string 0 str)
|
|
90 idx (match-end 0))
|
|
91 (let ((t-char (cdr (assoc subtrans
|
|
92 tibetan-precomposed-transcription-alist))))
|
|
93 (if t-char
|
|
94 ;; SUBTRANS corresponds to a transcription for
|
|
95 ;; precomposable Tibetan sequence.
|
|
96 (setq t-char (car (rassoc t-char
|
|
97 tibetan-precomposition-rule-alist)))
|
|
98 (setq t-char
|
|
99 (cdr
|
|
100 (or (assoc subtrans tibetan-consonant-transcription-alist)
|
|
101 (assoc subtrans tibetan-vowel-transcription-alist)
|
|
102 (assoc subtrans tibetan-modifier-transcription-alist)
|
|
103 (assoc subtrans tibetan-subjoined-transcription-alist)))))
|
|
104 (setq t-str-list (cons t-char t-str-list))))
|
|
105 (if (< idx (length str))
|
|
106 (setq t-str-list (cons (substring str idx) t-str-list)))
|
|
107 (apply 'concat (nreverse t-str-list))))
|
|
108
|
|
109 ;;;
|
|
110 ;;; Functions for composing/decomposing Tibetan sequence.
|
|
111 ;;;
|
|
112 ;;; A Tibetan syllable is typically structured as follows:
|
|
113 ;;;
|
|
114 ;;; [Prefix] C [C+] V [M] [Suffix [Post suffix]]
|
|
115 ;;;
|
|
116 ;;; where C's are all vertically stacked, V appears below or above
|
|
117 ;;; consonant cluster and M is always put above the C[C+]V combination.
|
|
118 ;;; (Sanskrit visarga, though it is a vowel modifier, is considered
|
|
119 ;;; to be a punctuation.)
|
|
120 ;;;
|
|
121 ;;; Here are examples of the words "bsgrubs" and "hfauM"
|
|
122 ;;;
|
|
123 ;;; 4$(7"70"714%qx!"U0"G###C"U14"70"714"G0"G1(B 4$(7"Hx!"Rx!"Ur'"_0"H"R"U"_1(B
|
|
124 ;;;
|
|
125 ;;; M
|
|
126 ;;; b s b s h
|
|
127 ;;; g fa
|
|
128 ;;; r u
|
|
129 ;;; u
|
|
130 ;;;
|
|
131 ;;; Consonants `'' ($(7"A(B), `w' ($(7">(B), `y' ($(7"B(B), `r' ($(7"C(B) take special
|
|
132 ;;; forms when they are used as subjoined consonant. Consonant `r'
|
|
133 ;;; takes another special form when used as superjoined in such a case
|
|
134 ;;; as "rka", while it does not change its form when conjoined with
|
|
135 ;;; subjoined `'', `w' or `y' as in "rwa", "rya".
|
|
136
|
|
137 ;; Append a proper composition rule and glyph to COMPONENTS to compose
|
|
138 ;; CHAR with a composition that has COMPONENTS.
|
|
139
|
|
140 (defun tibetan-add-components (components char)
|
|
141 (let ((last (last components))
|
|
142 (stack-upper '(tc . bc))
|
|
143 (stack-under '(bc . tc))
|
|
144 rule comp-vowel tmp)
|
|
145 ;; Special treatment for 'a chung.
|
|
146 ;; If 'a follows a consonant, turn it into the subjoined form.
|
|
147 ;; * Disabled by Tomabechi 2000/06/09 *
|
|
148 ;; Because in Unicode, $(7"A(B may follow directly a consonant without
|
|
149 ;; any intervening vowel, as in 4$(7"90"914""0"""Q14"A0"A1!;(B=4$(7"90"91(B 4$(7""0""1(B 4$(7"A0"A1(B not 4$(7"90"91(B 4$(7""0""1(B $(7"Q(B 4$(7"A0"A1(B
|
|
150 ;;(if (and (= char ?$(7"A(B)
|
|
151 ;; (aref (char-category-set (car last)) ?0))
|
|
152 ;; (setq char ?$(7"R(B)) ;; modified for new font by Tomabechi 1999/12/10
|
|
153
|
|
154 ;; Composite vowel signs are decomposed before being added
|
|
155 ;; Added by Tomabechi 2000/06/08
|
|
156 (if (memq char '(?$(7"T(B ?$(7"V(B ?$(7"W(B ?$(7"X(B ?$(7"Y(B ?$(7"Z(B ?$(7"b(B))
|
|
157 (setq comp-vowel
|
|
158 (copy-sequence
|
|
159 (cddr (assoc (char-to-string char)
|
|
160 tibetan-composite-vowel-alist)))
|
|
161 char
|
|
162 (cadr (assoc (char-to-string char)
|
|
163 tibetan-composite-vowel-alist))))
|
|
164 (cond
|
|
165 ;; Compose upper vowel sign vertically over.
|
|
166 ((aref (char-category-set char) ?2)
|
|
167 (setq rule stack-upper))
|
|
168
|
|
169 ;; Compose lower vowel sign vertically under.
|
|
170 ((aref (char-category-set char) ?3)
|
|
171 (if (eq char ?$(7"Q(B) ;; `$(7"Q(B' should not visible when composed.
|
|
172 (setq rule nil)
|
|
173 (setq rule stack-under)))
|
|
174 ;; Transform ra-mgo (superscribed r) if followed by a subjoined
|
|
175 ;; consonant other than w, ', y, r.
|
|
176 ((and (= (car last) ?$(7"C(B)
|
|
177 (not (memq char '(?$(7#>(B ?$(7"R(B ?$(7#B(B ?$(7#C(B))))
|
|
178 (setcar last ?$(7!"(B) ;; modified for newfont by Tomabechi 1999/12/10
|
|
179 (setq rule stack-under))
|
|
180 ;; Transform initial base consonant if followed by a subjoined
|
|
181 ;; consonant but 'a.
|
|
182 (t
|
|
183 (let ((laststr (char-to-string (car last))))
|
|
184 (if (and (/= char ?$(7"R(B) ;; modified for new font by Tomabechi
|
|
185 (string-match "[$(7"!(B-$(7"="?"@"D(B-$(7"J"K(B]" laststr))
|
|
186 (setcar last (string-to-char
|
|
187 (cdr (assoc (char-to-string (car last))
|
|
188 tibetan-base-to-subjoined-alist)))))
|
|
189 (setq rule stack-under))))
|
|
190
|
|
191 (if rule
|
|
192 (setcdr last (list rule char)))
|
|
193 ;; Added by Tomabechi 2000/06/08
|
|
194 (if comp-vowel
|
|
195 (nconc last comp-vowel))
|
|
196 ))
|
|
197
|
|
198 ;;;###autoload
|
|
199 (defun tibetan-compose-string (str)
|
|
200 "Compose Tibetan string STR."
|
|
201 (let ((idx 0))
|
|
202 ;; `$(7"A(B' is included in the pattern for subjoined consonants
|
|
203 ;; because we treat it specially in tibetan-add-components.
|
|
204 ;; (This feature is removed by Tomabechi 2000/06/08)
|
|
205 (while (setq idx (string-match tibetan-composable-pattern str idx))
|
|
206 (let ((from idx)
|
|
207 (to (match-end 0))
|
|
208 components)
|
|
209 (if (eq (string-match tibetan-precomposition-rule-regexp str idx) idx)
|
|
210 (setq idx (match-end 0)
|
|
211 components
|
|
212 (list (string-to-char
|
|
213 (cdr
|
|
214 (assoc (match-string 0 str)
|
|
215 tibetan-precomposition-rule-alist)))))
|
|
216 (setq components (list (aref str idx))
|
|
217 idx (1+ idx)))
|
|
218 (while (< idx to)
|
|
219 (tibetan-add-components components (aref str idx))
|
|
220 (setq idx (1+ idx)))
|
|
221 (compose-string str from to components))))
|
|
222 str)
|
|
223
|
|
224 ;;;###autoload
|
|
225 (defun tibetan-compose-region (beg end)
|
|
226 "Compose Tibetan text the region BEG and END."
|
|
227 (interactive "r")
|
|
228 (let (str result chars)
|
|
229 (save-excursion
|
|
230 (save-restriction
|
|
231 (narrow-to-region beg end)
|
|
232 (goto-char (point-min))
|
|
233 ;; `$(7"A(B' is included in the pattern for subjoined consonants
|
|
234 ;; because we treat it specially in tibetan-add-components.
|
|
235 ;; (This feature is removed by Tomabechi 2000/06/08)
|
|
236 (while (re-search-forward tibetan-composable-pattern nil t)
|
|
237 (let ((from (match-beginning 0))
|
|
238 (to (match-end 0))
|
|
239 components)
|
|
240 (goto-char from)
|
|
241 (if (looking-at tibetan-precomposition-rule-regexp)
|
|
242 (progn
|
|
243 (setq components
|
|
244 (list (string-to-char
|
|
245 (cdr
|
|
246 (assoc (match-string 0)
|
|
247 tibetan-precomposition-rule-alist)))))
|
|
248 (goto-char (match-end 0)))
|
|
249 (setq components (list (char-after from)))
|
|
250 (forward-char 1))
|
|
251 (while (< (point) to)
|
|
252 (tibetan-add-components components (following-char))
|
|
253 (forward-char 1))
|
|
254 (compose-region from to components)))))))
|
|
255
|
|
256 (defvar tibetan-decompose-precomposition-alist
|
|
257 (mapcar (function (lambda (x) (cons (string-to-char (cdr x)) (car x))))
|
|
258 tibetan-precomposition-rule-alist))
|
|
259
|
|
260 ;;;###autoload
|
|
261 (defun tibetan-decompose-region (from to)
|
|
262 "Decompose Tibetan text in the region FROM and TO.
|
|
263 This is different from decompose-region because precomposed Tibetan characters
|
|
264 are decomposed into normal Tiebtan character sequences."
|
|
265 (interactive "r")
|
|
266 (save-restriction
|
|
267 (narrow-to-region from to)
|
|
268 (decompose-region from to)
|
|
269 (goto-char from)
|
|
270 (while (not (eobp))
|
|
271 (let* ((char (following-char))
|
|
272 (slot (assq char tibetan-decompose-precomposition-alist)))
|
|
273 (if slot
|
|
274 (progn
|
|
275 (delete-char 1)
|
|
276 (insert (cdr slot)))
|
|
277 (forward-char 1))))))
|
|
278
|
|
279
|
|
280 ;;;###autoload
|
|
281 (defun tibetan-decompose-string (str)
|
|
282 "Decompose Tibetan string STR.
|
|
283 This is different from decompose-string because precomposed Tibetan characters
|
|
284 are decomposed into normal Tiebtan character sequences."
|
|
285 (let ((new "")
|
|
286 (len (length str))
|
|
287 (idx 0)
|
|
288 char slot)
|
|
289 (while (< idx len)
|
|
290 (setq char (aref str idx)
|
|
291 slot (assq (aref str idx) tibetan-decompose-precomposition-alist)
|
|
292 new (concat new (if slot (cdr slot) (char-to-string char)))
|
|
293 idx (1+ idx)))
|
|
294 new))
|
|
295
|
|
296 ;;;###autoload
|
|
297 (defun tibetan-composition-function (from to pattern &optional string)
|
|
298 (if string
|
|
299 (tibetan-compose-string string)
|
|
300 (tibetan-compose-region from to))
|
|
301 (- to from))
|
|
302
|
|
303 ;;;
|
|
304 ;;; This variable is used to avoid repeated decomposition.
|
|
305 ;;;
|
|
306 (setq-default tibetan-decomposed nil)
|
|
307
|
|
308 ;;;###autoload
|
|
309 (defun tibetan-decompose-buffer ()
|
|
310 "Decomposes Tibetan characters in the buffer into their components.
|
|
311 See also the documentation of the function `tibetan-decompose-region'."
|
|
312 (interactive)
|
|
313 (make-local-variable 'tibetan-decomposed)
|
|
314 (cond ((not tibetan-decomposed)
|
|
315 (tibetan-decompose-region (point-min) (point-max))
|
|
316 (setq tibetan-decomposed t))))
|
|
317
|
|
318 ;;;###autoload
|
|
319 (defun tibetan-compose-buffer ()
|
|
320 "Composes Tibetan character components in the buffer.
|
|
321 See also docstring of the function tibetan-compose-region."
|
|
322 (interactive)
|
|
323 (make-local-variable 'tibetan-decomposed)
|
|
324 (tibetan-compose-region (point-min) (point-max))
|
|
325 (setq tibetan-decomposed nil))
|
|
326
|
|
327 ;;;###autoload
|
|
328 (defun tibetan-post-read-conversion (len)
|
|
329 (save-excursion
|
|
330 (save-restriction
|
|
331 (let ((buffer-modified-p (buffer-modified-p)))
|
|
332 (narrow-to-region (point) (+ (point) len))
|
|
333 (tibetan-compose-region (point-min) (point-max))
|
|
334 (set-buffer-modified-p buffer-modified-p)
|
|
335 (make-local-variable 'tibetan-decomposed)
|
|
336 (setq tibetan-decomposed nil)
|
|
337 (- (point-max) (point-min))))))
|
|
338
|
|
339
|
|
340 ;;;###autoload
|
|
341 (defun tibetan-pre-write-conversion (from to)
|
|
342 (setq tibetan-decomposed-temp tibetan-decomposed)
|
|
343 (let ((old-buf (current-buffer)))
|
|
344 (set-buffer (generate-new-buffer " *temp*"))
|
|
345 (if (stringp from)
|
|
346 (insert from)
|
|
347 (insert-buffer-substring old-buf from to))
|
|
348 (if (not tibetan-decomposed-temp)
|
|
349 (tibetan-decompose-region (point-min) (point-max)))
|
|
350 ;; Should return nil as annotations.
|
|
351 nil))
|
|
352
|
|
353 (provide 'tibet-util)
|
|
354
|
|
355 ;;; language/tibet-util.el ends here.
|