comparison lisp/mule/tibet-util.el @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents
children 2923009caf47
comparison
equal deleted inserted replaced
770:336a418893b5 771:943eaba38521
1 ;;; tibet-util.el --- utilities for Tibetan -*- coding: iso-2022-7bit; -*-
2
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5
6 ;; Keywords: multilingual, Tibetan
7
8 ;; This file is part of XEmacs.
9
10 ;; XEmacs is free software; you can redistribute it and/or modify it
11 ;; under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; XEmacs is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs; see the file COPYING. If not, write to the Free
22 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 ;; 02111-1307, USA.
24
25 ;;; Synched up with: Emacs 21.0.103 (language/tibet-util.el).
26
27 ;; Author: Toru TOMABECHI, <Toru.Tomabechi@orient.unil.ch>
28
29 ;; Created: Feb. 17. 1997
30
31 ;; History:
32 ;; 1997.03.13 Modification in treatment of text properties;
33 ;; Support for some special signs and punctuations.
34 ;; 1999.10.25 Modification for a new composition way by K.Handa.
35
36 ;;; Code:
37
38 ;;;###autoload
39 (defun tibetan-char-p (ch)
40 "Check if char CH is Tibetan character.
41 Returns non-nil if CH is Tibetan. Otherwise, returns nil."
42 (memq (char-charset ch) '(tibetan tibetan-1-column)))
43
44 ;;; Functions for Tibetan <-> Tibetan-transcription.
45
46 ;;;###autoload
47 (defun tibetan-tibetan-to-transcription (str)
48 "Transcribe Tibetan string STR and return the corresponding Roman string."
49 (let (;; Accumulate transcriptions here in reverse order.
50 (trans nil)
51 (len (length str))
52 (i 0)
53 ch this-trans)
54 (while (< i len)
55 (let ((idx (string-match tibetan-precomposition-rule-regexp str i)))
56 (if (eq idx i)
57 ;; Ith character and the followings matches precomposable
58 ;; Tibetan sequence.
59 (setq i (match-end 0)
60 this-trans
61 (car (rassoc
62 (cdr (assoc (match-string 0 str)
63 tibetan-precomposition-rule-alist))
64 tibetan-precomposed-transcription-alist)))
65 (setq ch (substring str i (1+ i))
66 i (1+ i)
67 this-trans
68 (car (or (rassoc ch tibetan-consonant-transcription-alist)
69 (rassoc ch tibetan-vowel-transcription-alist)
70 (rassoc ch tibetan-subjoined-transcription-alist)))))
71 (setq trans (cons this-trans trans))))
72 (apply 'concat (nreverse trans))))
73
74 ;;;###autoload
75 (defun tibetan-transcription-to-tibetan (str)
76 "Convert Tibetan Roman string STR to Tibetan character string.
77 The returned string has no composition information."
78 (let (;; Case is significant.
79 (case-fold-search nil)
80 (idx 0)
81 ;; Accumulate Tibetan strings here in reverse order.
82 (t-str-list nil)
83 i subtrans)
84 (while (setq i (string-match tibetan-regexp str idx))
85 (if (< idx i)
86 ;; STR contains a pattern that doesn't match Tibetan
87 ;; transcription. Include the pattern as is.
88 (setq t-str-list (cons (substring str idx i) t-str-list)))
89 (setq subtrans (match-string 0 str)
90 idx (match-end 0))
91 (let ((t-char (cdr (assoc subtrans
92 tibetan-precomposed-transcription-alist))))
93 (if t-char
94 ;; SUBTRANS corresponds to a transcription for
95 ;; precomposable Tibetan sequence.
96 (setq t-char (car (rassoc t-char
97 tibetan-precomposition-rule-alist)))
98 (setq t-char
99 (cdr
100 (or (assoc subtrans tibetan-consonant-transcription-alist)
101 (assoc subtrans tibetan-vowel-transcription-alist)
102 (assoc subtrans tibetan-modifier-transcription-alist)
103 (assoc subtrans tibetan-subjoined-transcription-alist)))))
104 (setq t-str-list (cons t-char t-str-list))))
105 (if (< idx (length str))
106 (setq t-str-list (cons (substring str idx) t-str-list)))
107 (apply 'concat (nreverse t-str-list))))
108
109 ;;;
110 ;;; Functions for composing/decomposing Tibetan sequence.
111 ;;;
112 ;;; A Tibetan syllable is typically structured as follows:
113 ;;;
114 ;;; [Prefix] C [C+] V [M] [Suffix [Post suffix]]
115 ;;;
116 ;;; where C's are all vertically stacked, V appears below or above
117 ;;; consonant cluster and M is always put above the C[C+]V combination.
118 ;;; (Sanskrit visarga, though it is a vowel modifier, is considered
119 ;;; to be a punctuation.)
120 ;;;
121 ;;; Here are examples of the words "bsgrubs" and "hfauM"
122 ;;;
123 ;;; 4$(7"70"714%qx!"U0"G###C"U14"70"714"G0"G1(B 4$(7"Hx!"Rx!"Ur'"_0"H"R"U"_1(B
124 ;;;
125 ;;; M
126 ;;; b s b s h
127 ;;; g fa
128 ;;; r u
129 ;;; u
130 ;;;
131 ;;; Consonants `'' ($(7"A(B), `w' ($(7">(B), `y' ($(7"B(B), `r' ($(7"C(B) take special
132 ;;; forms when they are used as subjoined consonant. Consonant `r'
133 ;;; takes another special form when used as superjoined in such a case
134 ;;; as "rka", while it does not change its form when conjoined with
135 ;;; subjoined `'', `w' or `y' as in "rwa", "rya".
136
137 ;; Append a proper composition rule and glyph to COMPONENTS to compose
138 ;; CHAR with a composition that has COMPONENTS.
139
140 (defun tibetan-add-components (components char)
141 (let ((last (last components))
142 (stack-upper '(tc . bc))
143 (stack-under '(bc . tc))
144 rule comp-vowel tmp)
145 ;; Special treatment for 'a chung.
146 ;; If 'a follows a consonant, turn it into the subjoined form.
147 ;; * Disabled by Tomabechi 2000/06/09 *
148 ;; Because in Unicode, $(7"A(B may follow directly a consonant without
149 ;; any intervening vowel, as in 4$(7"90"914""0"""Q14"A0"A1!;(B=4$(7"90"91(B 4$(7""0""1(B 4$(7"A0"A1(B not 4$(7"90"91(B 4$(7""0""1(B $(7"Q(B 4$(7"A0"A1(B
150 ;;(if (and (= char ?$(7"A(B)
151 ;; (aref (char-category-set (car last)) ?0))
152 ;; (setq char ?$(7"R(B)) ;; modified for new font by Tomabechi 1999/12/10
153
154 ;; Composite vowel signs are decomposed before being added
155 ;; Added by Tomabechi 2000/06/08
156 (if (memq char '(?$(7"T(B ?$(7"V(B ?$(7"W(B ?$(7"X(B ?$(7"Y(B ?$(7"Z(B ?$(7"b(B))
157 (setq comp-vowel
158 (copy-sequence
159 (cddr (assoc (char-to-string char)
160 tibetan-composite-vowel-alist)))
161 char
162 (cadr (assoc (char-to-string char)
163 tibetan-composite-vowel-alist))))
164 (cond
165 ;; Compose upper vowel sign vertically over.
166 ((aref (char-category-set char) ?2)
167 (setq rule stack-upper))
168
169 ;; Compose lower vowel sign vertically under.
170 ((aref (char-category-set char) ?3)
171 (if (eq char ?$(7"Q(B) ;; `$(7"Q(B' should not visible when composed.
172 (setq rule nil)
173 (setq rule stack-under)))
174 ;; Transform ra-mgo (superscribed r) if followed by a subjoined
175 ;; consonant other than w, ', y, r.
176 ((and (= (car last) ?$(7"C(B)
177 (not (memq char '(?$(7#>(B ?$(7"R(B ?$(7#B(B ?$(7#C(B))))
178 (setcar last ?$(7!"(B) ;; modified for newfont by Tomabechi 1999/12/10
179 (setq rule stack-under))
180 ;; Transform initial base consonant if followed by a subjoined
181 ;; consonant but 'a.
182 (t
183 (let ((laststr (char-to-string (car last))))
184 (if (and (/= char ?$(7"R(B) ;; modified for new font by Tomabechi
185 (string-match "[$(7"!(B-$(7"="?"@"D(B-$(7"J"K(B]" laststr))
186 (setcar last (string-to-char
187 (cdr (assoc (char-to-string (car last))
188 tibetan-base-to-subjoined-alist)))))
189 (setq rule stack-under))))
190
191 (if rule
192 (setcdr last (list rule char)))
193 ;; Added by Tomabechi 2000/06/08
194 (if comp-vowel
195 (nconc last comp-vowel))
196 ))
197
198 ;;;###autoload
199 (defun tibetan-compose-string (str)
200 "Compose Tibetan string STR."
201 (let ((idx 0))
202 ;; `$(7"A(B' is included in the pattern for subjoined consonants
203 ;; because we treat it specially in tibetan-add-components.
204 ;; (This feature is removed by Tomabechi 2000/06/08)
205 (while (setq idx (string-match tibetan-composable-pattern str idx))
206 (let ((from idx)
207 (to (match-end 0))
208 components)
209 (if (eq (string-match tibetan-precomposition-rule-regexp str idx) idx)
210 (setq idx (match-end 0)
211 components
212 (list (string-to-char
213 (cdr
214 (assoc (match-string 0 str)
215 tibetan-precomposition-rule-alist)))))
216 (setq components (list (aref str idx))
217 idx (1+ idx)))
218 (while (< idx to)
219 (tibetan-add-components components (aref str idx))
220 (setq idx (1+ idx)))
221 (compose-string str from to components))))
222 str)
223
224 ;;;###autoload
225 (defun tibetan-compose-region (beg end)
226 "Compose Tibetan text the region BEG and END."
227 (interactive "r")
228 (let (str result chars)
229 (save-excursion
230 (save-restriction
231 (narrow-to-region beg end)
232 (goto-char (point-min))
233 ;; `$(7"A(B' is included in the pattern for subjoined consonants
234 ;; because we treat it specially in tibetan-add-components.
235 ;; (This feature is removed by Tomabechi 2000/06/08)
236 (while (re-search-forward tibetan-composable-pattern nil t)
237 (let ((from (match-beginning 0))
238 (to (match-end 0))
239 components)
240 (goto-char from)
241 (if (looking-at tibetan-precomposition-rule-regexp)
242 (progn
243 (setq components
244 (list (string-to-char
245 (cdr
246 (assoc (match-string 0)
247 tibetan-precomposition-rule-alist)))))
248 (goto-char (match-end 0)))
249 (setq components (list (char-after from)))
250 (forward-char 1))
251 (while (< (point) to)
252 (tibetan-add-components components (following-char))
253 (forward-char 1))
254 (compose-region from to components)))))))
255
256 (defvar tibetan-decompose-precomposition-alist
257 (mapcar (function (lambda (x) (cons (string-to-char (cdr x)) (car x))))
258 tibetan-precomposition-rule-alist))
259
260 ;;;###autoload
261 (defun tibetan-decompose-region (from to)
262 "Decompose Tibetan text in the region FROM and TO.
263 This is different from decompose-region because precomposed Tibetan characters
264 are decomposed into normal Tiebtan character sequences."
265 (interactive "r")
266 (save-restriction
267 (narrow-to-region from to)
268 (decompose-region from to)
269 (goto-char from)
270 (while (not (eobp))
271 (let* ((char (following-char))
272 (slot (assq char tibetan-decompose-precomposition-alist)))
273 (if slot
274 (progn
275 (delete-char 1)
276 (insert (cdr slot)))
277 (forward-char 1))))))
278
279
280 ;;;###autoload
281 (defun tibetan-decompose-string (str)
282 "Decompose Tibetan string STR.
283 This is different from decompose-string because precomposed Tibetan characters
284 are decomposed into normal Tiebtan character sequences."
285 (let ((new "")
286 (len (length str))
287 (idx 0)
288 char slot)
289 (while (< idx len)
290 (setq char (aref str idx)
291 slot (assq (aref str idx) tibetan-decompose-precomposition-alist)
292 new (concat new (if slot (cdr slot) (char-to-string char)))
293 idx (1+ idx)))
294 new))
295
296 ;;;###autoload
297 (defun tibetan-composition-function (from to pattern &optional string)
298 (if string
299 (tibetan-compose-string string)
300 (tibetan-compose-region from to))
301 (- to from))
302
303 ;;;
304 ;;; This variable is used to avoid repeated decomposition.
305 ;;;
306 (setq-default tibetan-decomposed nil)
307
308 ;;;###autoload
309 (defun tibetan-decompose-buffer ()
310 "Decomposes Tibetan characters in the buffer into their components.
311 See also the documentation of the function `tibetan-decompose-region'."
312 (interactive)
313 (make-local-variable 'tibetan-decomposed)
314 (cond ((not tibetan-decomposed)
315 (tibetan-decompose-region (point-min) (point-max))
316 (setq tibetan-decomposed t))))
317
318 ;;;###autoload
319 (defun tibetan-compose-buffer ()
320 "Composes Tibetan character components in the buffer.
321 See also docstring of the function tibetan-compose-region."
322 (interactive)
323 (make-local-variable 'tibetan-decomposed)
324 (tibetan-compose-region (point-min) (point-max))
325 (setq tibetan-decomposed nil))
326
327 ;;;###autoload
328 (defun tibetan-post-read-conversion (len)
329 (save-excursion
330 (save-restriction
331 (let ((buffer-modified-p (buffer-modified-p)))
332 (narrow-to-region (point) (+ (point) len))
333 (tibetan-compose-region (point-min) (point-max))
334 (set-buffer-modified-p buffer-modified-p)
335 (make-local-variable 'tibetan-decomposed)
336 (setq tibetan-decomposed nil)
337 (- (point-max) (point-min))))))
338
339
340 ;;;###autoload
341 (defun tibetan-pre-write-conversion (from to)
342 (setq tibetan-decomposed-temp tibetan-decomposed)
343 (let ((old-buf (current-buffer)))
344 (set-buffer (generate-new-buffer " *temp*"))
345 (if (stringp from)
346 (insert from)
347 (insert-buffer-substring old-buf from to))
348 (if (not tibetan-decomposed-temp)
349 (tibetan-decompose-region (point-min) (point-max)))
350 ;; Should return nil as annotations.
351 nil))
352
353 (provide 'tibet-util)
354
355 ;;; language/tibet-util.el ends here.