771
|
1 ;;; tibet-util.el --- utilities for Tibetan -*- coding: iso-2022-7bit; -*-
|
|
2
|
|
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
|
|
4 ;; Licensed to the Free Software Foundation.
|
|
5
|
|
6 ;; Keywords: multilingual, Tibetan
|
|
7
|
|
8 ;; This file is part of XEmacs.
|
|
9
|
|
10 ;; XEmacs is free software; you can redistribute it and/or modify it
|
|
11 ;; under the terms of the GNU General Public License as published by
|
|
12 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
13 ;; any later version.
|
|
14
|
|
15 ;; XEmacs is distributed in the hope that it will be useful, but
|
|
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
18 ;; General Public License for more details.
|
|
19
|
|
20 ;; You should have received a copy of the GNU General Public License
|
|
21 ;; along with XEmacs; see the file COPYING. If not, write to the Free
|
|
22 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
|
23 ;; 02111-1307, USA.
|
|
24
|
778
|
25 ;;; Synched up with: Emacs 21.1 (language/tibet-util.el).
|
771
|
26
|
|
27 ;; Author: Toru TOMABECHI, <Toru.Tomabechi@orient.unil.ch>
|
|
28
|
|
29 ;; Created: Feb. 17. 1997
|
|
30
|
778
|
31 ;;; History:
|
771
|
32 ;; 1997.03.13 Modification in treatment of text properties;
|
|
33 ;; Support for some special signs and punctuations.
|
|
34 ;; 1999.10.25 Modification for a new composition way by K.Handa.
|
|
35
|
778
|
36 ;;; Commentary:
|
|
37
|
771
|
38 ;;; Code:
|
|
39
|
|
40 ;;;###autoload
|
|
41 (defun tibetan-char-p (ch)
|
|
42 "Check if char CH is Tibetan character.
|
|
43 Returns non-nil if CH is Tibetan. Otherwise, returns nil."
|
|
44 (memq (char-charset ch) '(tibetan tibetan-1-column)))
|
|
45
|
|
46 ;;; Functions for Tibetan <-> Tibetan-transcription.
|
|
47
|
|
48 ;;;###autoload
|
|
49 (defun tibetan-tibetan-to-transcription (str)
|
|
50 "Transcribe Tibetan string STR and return the corresponding Roman string."
|
|
51 (let (;; Accumulate transcriptions here in reverse order.
|
|
52 (trans nil)
|
|
53 (len (length str))
|
|
54 (i 0)
|
|
55 ch this-trans)
|
|
56 (while (< i len)
|
|
57 (let ((idx (string-match tibetan-precomposition-rule-regexp str i)))
|
|
58 (if (eq idx i)
|
|
59 ;; Ith character and the followings matches precomposable
|
|
60 ;; Tibetan sequence.
|
|
61 (setq i (match-end 0)
|
|
62 this-trans
|
|
63 (car (rassoc
|
|
64 (cdr (assoc (match-string 0 str)
|
|
65 tibetan-precomposition-rule-alist))
|
|
66 tibetan-precomposed-transcription-alist)))
|
|
67 (setq ch (substring str i (1+ i))
|
|
68 i (1+ i)
|
|
69 this-trans
|
|
70 (car (or (rassoc ch tibetan-consonant-transcription-alist)
|
|
71 (rassoc ch tibetan-vowel-transcription-alist)
|
|
72 (rassoc ch tibetan-subjoined-transcription-alist)))))
|
|
73 (setq trans (cons this-trans trans))))
|
|
74 (apply 'concat (nreverse trans))))
|
|
75
|
|
76 ;;;###autoload
|
|
77 (defun tibetan-transcription-to-tibetan (str)
|
|
78 "Convert Tibetan Roman string STR to Tibetan character string.
|
|
79 The returned string has no composition information."
|
|
80 (let (;; Case is significant.
|
|
81 (case-fold-search nil)
|
|
82 (idx 0)
|
|
83 ;; Accumulate Tibetan strings here in reverse order.
|
|
84 (t-str-list nil)
|
|
85 i subtrans)
|
|
86 (while (setq i (string-match tibetan-regexp str idx))
|
|
87 (if (< idx i)
|
|
88 ;; STR contains a pattern that doesn't match Tibetan
|
|
89 ;; transcription. Include the pattern as is.
|
|
90 (setq t-str-list (cons (substring str idx i) t-str-list)))
|
|
91 (setq subtrans (match-string 0 str)
|
|
92 idx (match-end 0))
|
|
93 (let ((t-char (cdr (assoc subtrans
|
|
94 tibetan-precomposed-transcription-alist))))
|
|
95 (if t-char
|
|
96 ;; SUBTRANS corresponds to a transcription for
|
|
97 ;; precomposable Tibetan sequence.
|
|
98 (setq t-char (car (rassoc t-char
|
|
99 tibetan-precomposition-rule-alist)))
|
|
100 (setq t-char
|
|
101 (cdr
|
|
102 (or (assoc subtrans tibetan-consonant-transcription-alist)
|
|
103 (assoc subtrans tibetan-vowel-transcription-alist)
|
|
104 (assoc subtrans tibetan-modifier-transcription-alist)
|
|
105 (assoc subtrans tibetan-subjoined-transcription-alist)))))
|
|
106 (setq t-str-list (cons t-char t-str-list))))
|
|
107 (if (< idx (length str))
|
|
108 (setq t-str-list (cons (substring str idx) t-str-list)))
|
|
109 (apply 'concat (nreverse t-str-list))))
|
|
110
|
|
111 ;;;
|
|
112 ;;; Functions for composing/decomposing Tibetan sequence.
|
|
113 ;;;
|
|
114 ;;; A Tibetan syllable is typically structured as follows:
|
|
115 ;;;
|
|
116 ;;; [Prefix] C [C+] V [M] [Suffix [Post suffix]]
|
|
117 ;;;
|
|
118 ;;; where C's are all vertically stacked, V appears below or above
|
|
119 ;;; consonant cluster and M is always put above the C[C+]V combination.
|
|
120 ;;; (Sanskrit visarga, though it is a vowel modifier, is considered
|
|
121 ;;; to be a punctuation.)
|
|
122 ;;;
|
|
123 ;;; Here are examples of the words "bsgrubs" and "hfauM"
|
|
124 ;;;
|
|
125 ;;; 4$(7"70"714%qx!"U0"G###C"U14"70"714"G0"G1(B 4$(7"Hx!"Rx!"Ur'"_0"H"R"U"_1(B
|
|
126 ;;;
|
|
127 ;;; M
|
|
128 ;;; b s b s h
|
|
129 ;;; g fa
|
|
130 ;;; r u
|
|
131 ;;; u
|
|
132 ;;;
|
|
133 ;;; Consonants `'' ($(7"A(B), `w' ($(7">(B), `y' ($(7"B(B), `r' ($(7"C(B) take special
|
|
134 ;;; forms when they are used as subjoined consonant. Consonant `r'
|
|
135 ;;; takes another special form when used as superjoined in such a case
|
|
136 ;;; as "rka", while it does not change its form when conjoined with
|
|
137 ;;; subjoined `'', `w' or `y' as in "rwa", "rya".
|
|
138
|
|
139 ;; Append a proper composition rule and glyph to COMPONENTS to compose
|
|
140 ;; CHAR with a composition that has COMPONENTS.
|
|
141
|
|
142 (defun tibetan-add-components (components char)
|
|
143 (let ((last (last components))
|
|
144 (stack-upper '(tc . bc))
|
|
145 (stack-under '(bc . tc))
|
788
|
146 rule comp-vowel ;tmp
|
|
147 )
|
771
|
148 ;; Special treatment for 'a chung.
|
|
149 ;; If 'a follows a consonant, turn it into the subjoined form.
|
|
150 ;; * Disabled by Tomabechi 2000/06/09 *
|
|
151 ;; Because in Unicode, $(7"A(B may follow directly a consonant without
|
|
152 ;; any intervening vowel, as in 4$(7"90"914""0"""Q14"A0"A1!;(B=4$(7"90"91(B 4$(7""0""1(B 4$(7"A0"A1(B not 4$(7"90"91(B 4$(7""0""1(B $(7"Q(B 4$(7"A0"A1(B
|
|
153 ;;(if (and (= char ?$(7"A(B)
|
788
|
154 ;; (char-in-category-p (car last) ?0))
|
771
|
155 ;; (setq char ?$(7"R(B)) ;; modified for new font by Tomabechi 1999/12/10
|
|
156
|
|
157 ;; Composite vowel signs are decomposed before being added
|
|
158 ;; Added by Tomabechi 2000/06/08
|
|
159 (if (memq char '(?$(7"T(B ?$(7"V(B ?$(7"W(B ?$(7"X(B ?$(7"Y(B ?$(7"Z(B ?$(7"b(B))
|
|
160 (setq comp-vowel
|
|
161 (copy-sequence
|
|
162 (cddr (assoc (char-to-string char)
|
|
163 tibetan-composite-vowel-alist)))
|
|
164 char
|
|
165 (cadr (assoc (char-to-string char)
|
|
166 tibetan-composite-vowel-alist))))
|
|
167 (cond
|
|
168 ;; Compose upper vowel sign vertically over.
|
788
|
169 ((char-in-category-p char ?2)
|
771
|
170 (setq rule stack-upper))
|
|
171
|
|
172 ;; Compose lower vowel sign vertically under.
|
788
|
173 ((char-in-category-p char ?3)
|
771
|
174 (if (eq char ?$(7"Q(B) ;; `$(7"Q(B' should not visible when composed.
|
|
175 (setq rule nil)
|
|
176 (setq rule stack-under)))
|
|
177 ;; Transform ra-mgo (superscribed r) if followed by a subjoined
|
|
178 ;; consonant other than w, ', y, r.
|
|
179 ((and (= (car last) ?$(7"C(B)
|
|
180 (not (memq char '(?$(7#>(B ?$(7"R(B ?$(7#B(B ?$(7#C(B))))
|
|
181 (setcar last ?$(7!"(B) ;; modified for newfont by Tomabechi 1999/12/10
|
|
182 (setq rule stack-under))
|
|
183 ;; Transform initial base consonant if followed by a subjoined
|
|
184 ;; consonant but 'a.
|
|
185 (t
|
|
186 (let ((laststr (char-to-string (car last))))
|
|
187 (if (and (/= char ?$(7"R(B) ;; modified for new font by Tomabechi
|
|
188 (string-match "[$(7"!(B-$(7"="?"@"D(B-$(7"J"K(B]" laststr))
|
|
189 (setcar last (string-to-char
|
|
190 (cdr (assoc (char-to-string (car last))
|
|
191 tibetan-base-to-subjoined-alist)))))
|
|
192 (setq rule stack-under))))
|
|
193
|
|
194 (if rule
|
|
195 (setcdr last (list rule char)))
|
|
196 ;; Added by Tomabechi 2000/06/08
|
|
197 (if comp-vowel
|
|
198 (nconc last comp-vowel))
|
|
199 ))
|
|
200
|
|
201 ;;;###autoload
|
|
202 (defun tibetan-compose-string (str)
|
|
203 "Compose Tibetan string STR."
|
|
204 (let ((idx 0))
|
|
205 ;; `$(7"A(B' is included in the pattern for subjoined consonants
|
|
206 ;; because we treat it specially in tibetan-add-components.
|
|
207 ;; (This feature is removed by Tomabechi 2000/06/08)
|
|
208 (while (setq idx (string-match tibetan-composable-pattern str idx))
|
|
209 (let ((from idx)
|
|
210 (to (match-end 0))
|
|
211 components)
|
|
212 (if (eq (string-match tibetan-precomposition-rule-regexp str idx) idx)
|
|
213 (setq idx (match-end 0)
|
|
214 components
|
|
215 (list (string-to-char
|
|
216 (cdr
|
|
217 (assoc (match-string 0 str)
|
|
218 tibetan-precomposition-rule-alist)))))
|
|
219 (setq components (list (aref str idx))
|
|
220 idx (1+ idx)))
|
|
221 (while (< idx to)
|
|
222 (tibetan-add-components components (aref str idx))
|
|
223 (setq idx (1+ idx)))
|
|
224 (compose-string str from to components))))
|
|
225 str)
|
|
226
|
|
227 ;;;###autoload
|
|
228 (defun tibetan-compose-region (beg end)
|
|
229 "Compose Tibetan text the region BEG and END."
|
|
230 (interactive "r")
|
788
|
231 ;(let (str result chars)
|
771
|
232 (save-excursion
|
|
233 (save-restriction
|
|
234 (narrow-to-region beg end)
|
|
235 (goto-char (point-min))
|
|
236 ;; `$(7"A(B' is included in the pattern for subjoined consonants
|
|
237 ;; because we treat it specially in tibetan-add-components.
|
|
238 ;; (This feature is removed by Tomabechi 2000/06/08)
|
|
239 (while (re-search-forward tibetan-composable-pattern nil t)
|
|
240 (let ((from (match-beginning 0))
|
|
241 (to (match-end 0))
|
|
242 components)
|
|
243 (goto-char from)
|
|
244 (if (looking-at tibetan-precomposition-rule-regexp)
|
|
245 (progn
|
|
246 (setq components
|
|
247 (list (string-to-char
|
|
248 (cdr
|
|
249 (assoc (match-string 0)
|
|
250 tibetan-precomposition-rule-alist)))))
|
|
251 (goto-char (match-end 0)))
|
|
252 (setq components (list (char-after from)))
|
|
253 (forward-char 1))
|
|
254 (while (< (point) to)
|
|
255 (tibetan-add-components components (following-char))
|
|
256 (forward-char 1))
|
788
|
257 (compose-region from to components)))))
|
|
258 ;)
|
|
259 )
|
771
|
260
|
|
261 (defvar tibetan-decompose-precomposition-alist
|
|
262 (mapcar (function (lambda (x) (cons (string-to-char (cdr x)) (car x))))
|
|
263 tibetan-precomposition-rule-alist))
|
|
264
|
|
265 ;;;###autoload
|
|
266 (defun tibetan-decompose-region (from to)
|
|
267 "Decompose Tibetan text in the region FROM and TO.
|
|
268 This is different from decompose-region because precomposed Tibetan characters
|
|
269 are decomposed into normal Tiebtan character sequences."
|
|
270 (interactive "r")
|
|
271 (save-restriction
|
|
272 (narrow-to-region from to)
|
|
273 (decompose-region from to)
|
|
274 (goto-char from)
|
|
275 (while (not (eobp))
|
|
276 (let* ((char (following-char))
|
|
277 (slot (assq char tibetan-decompose-precomposition-alist)))
|
|
278 (if slot
|
|
279 (progn
|
|
280 (delete-char 1)
|
|
281 (insert (cdr slot)))
|
|
282 (forward-char 1))))))
|
|
283
|
|
284
|
|
285 ;;;###autoload
|
|
286 (defun tibetan-decompose-string (str)
|
|
287 "Decompose Tibetan string STR.
|
|
288 This is different from decompose-string because precomposed Tibetan characters
|
|
289 are decomposed into normal Tiebtan character sequences."
|
|
290 (let ((new "")
|
|
291 (len (length str))
|
|
292 (idx 0)
|
|
293 char slot)
|
|
294 (while (< idx len)
|
|
295 (setq char (aref str idx)
|
|
296 slot (assq (aref str idx) tibetan-decompose-precomposition-alist)
|
|
297 new (concat new (if slot (cdr slot) (char-to-string char)))
|
|
298 idx (1+ idx)))
|
|
299 new))
|
|
300
|
|
301 ;;;###autoload
|
|
302 (defun tibetan-composition-function (from to pattern &optional string)
|
|
303 (if string
|
|
304 (tibetan-compose-string string)
|
|
305 (tibetan-compose-region from to))
|
|
306 (- to from))
|
|
307
|
|
308 ;;;
|
|
309 ;;; This variable is used to avoid repeated decomposition.
|
|
310 ;;;
|
|
311 (setq-default tibetan-decomposed nil)
|
|
312
|
|
313 ;;;###autoload
|
|
314 (defun tibetan-decompose-buffer ()
|
|
315 "Decomposes Tibetan characters in the buffer into their components.
|
|
316 See also the documentation of the function `tibetan-decompose-region'."
|
|
317 (interactive)
|
|
318 (make-local-variable 'tibetan-decomposed)
|
|
319 (cond ((not tibetan-decomposed)
|
|
320 (tibetan-decompose-region (point-min) (point-max))
|
|
321 (setq tibetan-decomposed t))))
|
|
322
|
|
323 ;;;###autoload
|
|
324 (defun tibetan-compose-buffer ()
|
|
325 "Composes Tibetan character components in the buffer.
|
|
326 See also docstring of the function tibetan-compose-region."
|
|
327 (interactive)
|
|
328 (make-local-variable 'tibetan-decomposed)
|
|
329 (tibetan-compose-region (point-min) (point-max))
|
|
330 (setq tibetan-decomposed nil))
|
|
331
|
|
332 ;;;###autoload
|
|
333 (defun tibetan-post-read-conversion (len)
|
|
334 (save-excursion
|
|
335 (save-restriction
|
|
336 (let ((buffer-modified-p (buffer-modified-p)))
|
|
337 (narrow-to-region (point) (+ (point) len))
|
|
338 (tibetan-compose-region (point-min) (point-max))
|
|
339 (set-buffer-modified-p buffer-modified-p)
|
|
340 (make-local-variable 'tibetan-decomposed)
|
|
341 (setq tibetan-decomposed nil)
|
|
342 (- (point-max) (point-min))))))
|
|
343
|
|
344
|
|
345 ;;;###autoload
|
|
346 (defun tibetan-pre-write-conversion (from to)
|
|
347 (setq tibetan-decomposed-temp tibetan-decomposed)
|
|
348 (let ((old-buf (current-buffer)))
|
|
349 (set-buffer (generate-new-buffer " *temp*"))
|
|
350 (if (stringp from)
|
|
351 (insert from)
|
|
352 (insert-buffer-substring old-buf from to))
|
|
353 (if (not tibetan-decomposed-temp)
|
|
354 (tibetan-decompose-region (point-min) (point-max)))
|
|
355 ;; Should return nil as annotations.
|
|
356 nil))
|
|
357
|
|
358 (provide 'tibet-util)
|
|
359
|
778
|
360 ;;; tibet-util.el ends here
|