771
+ − 1 ;;; tibet-util.el --- utilities for Tibetan -*- coding: iso-2022-7bit; -*-
+ − 2
+ − 3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
+ − 4 ;; Licensed to the Free Software Foundation.
+ − 5
+ − 6 ;; Keywords: multilingual, Tibetan
+ − 7
+ − 8 ;; This file is part of XEmacs.
+ − 9
+ − 10 ;; XEmacs is free software; you can redistribute it and/or modify it
+ − 11 ;; under the terms of the GNU General Public License as published by
+ − 12 ;; the Free Software Foundation; either version 2, or (at your option)
+ − 13 ;; any later version.
+ − 14
+ − 15 ;; XEmacs is distributed in the hope that it will be useful, but
+ − 16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
+ − 17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ − 18 ;; General Public License for more details.
+ − 19
+ − 20 ;; You should have received a copy of the GNU General Public License
+ − 21 ;; along with XEmacs; see the file COPYING. If not, write to the Free
+ − 22 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ − 23 ;; 02111-1307, USA.
+ − 24
778
+ − 25 ;;; Synched up with: Emacs 21.1 (language/tibet-util.el).
771
+ − 26
+ − 27 ;; Author: Toru TOMABECHI, <Toru.Tomabechi@orient.unil.ch>
+ − 28
+ − 29 ;; Created: Feb. 17. 1997
+ − 30
778
+ − 31 ;;; History:
771
+ − 32 ;; 1997.03.13 Modification in treatment of text properties;
+ − 33 ;; Support for some special signs and punctuations.
+ − 34 ;; 1999.10.25 Modification for a new composition way by K.Handa.
+ − 35
778
+ − 36 ;;; Commentary:
+ − 37
771
+ − 38 ;;; Code:
+ − 39
+ − 40 ;;;###autoload
+ − 41 (defun tibetan-char-p (ch)
+ − 42 "Check if char CH is Tibetan character.
+ − 43 Returns non-nil if CH is Tibetan. Otherwise, returns nil."
+ − 44 (memq (char-charset ch) '(tibetan tibetan-1-column)))
+ − 45
+ − 46 ;;; Functions for Tibetan <-> Tibetan-transcription.
+ − 47
+ − 48 ;;;###autoload
+ − 49 (defun tibetan-tibetan-to-transcription (str)
+ − 50 "Transcribe Tibetan string STR and return the corresponding Roman string."
+ − 51 (let (;; Accumulate transcriptions here in reverse order.
+ − 52 (trans nil)
+ − 53 (len (length str))
+ − 54 (i 0)
+ − 55 ch this-trans)
+ − 56 (while (< i len)
+ − 57 (let ((idx (string-match tibetan-precomposition-rule-regexp str i)))
+ − 58 (if (eq idx i)
+ − 59 ;; Ith character and the followings matches precomposable
+ − 60 ;; Tibetan sequence.
+ − 61 (setq i (match-end 0)
+ − 62 this-trans
+ − 63 (car (rassoc
+ − 64 (cdr (assoc (match-string 0 str)
+ − 65 tibetan-precomposition-rule-alist))
+ − 66 tibetan-precomposed-transcription-alist)))
+ − 67 (setq ch (substring str i (1+ i))
+ − 68 i (1+ i)
+ − 69 this-trans
+ − 70 (car (or (rassoc ch tibetan-consonant-transcription-alist)
+ − 71 (rassoc ch tibetan-vowel-transcription-alist)
+ − 72 (rassoc ch tibetan-subjoined-transcription-alist)))))
+ − 73 (setq trans (cons this-trans trans))))
+ − 74 (apply 'concat (nreverse trans))))
+ − 75
+ − 76 ;;;###autoload
+ − 77 (defun tibetan-transcription-to-tibetan (str)
+ − 78 "Convert Tibetan Roman string STR to Tibetan character string.
+ − 79 The returned string has no composition information."
+ − 80 (let (;; Case is significant.
+ − 81 (case-fold-search nil)
+ − 82 (idx 0)
+ − 83 ;; Accumulate Tibetan strings here in reverse order.
+ − 84 (t-str-list nil)
+ − 85 i subtrans)
+ − 86 (while (setq i (string-match tibetan-regexp str idx))
+ − 87 (if (< idx i)
+ − 88 ;; STR contains a pattern that doesn't match Tibetan
+ − 89 ;; transcription. Include the pattern as is.
+ − 90 (setq t-str-list (cons (substring str idx i) t-str-list)))
+ − 91 (setq subtrans (match-string 0 str)
+ − 92 idx (match-end 0))
+ − 93 (let ((t-char (cdr (assoc subtrans
+ − 94 tibetan-precomposed-transcription-alist))))
+ − 95 (if t-char
+ − 96 ;; SUBTRANS corresponds to a transcription for
+ − 97 ;; precomposable Tibetan sequence.
+ − 98 (setq t-char (car (rassoc t-char
+ − 99 tibetan-precomposition-rule-alist)))
+ − 100 (setq t-char
+ − 101 (cdr
+ − 102 (or (assoc subtrans tibetan-consonant-transcription-alist)
+ − 103 (assoc subtrans tibetan-vowel-transcription-alist)
+ − 104 (assoc subtrans tibetan-modifier-transcription-alist)
+ − 105 (assoc subtrans tibetan-subjoined-transcription-alist)))))
+ − 106 (setq t-str-list (cons t-char t-str-list))))
+ − 107 (if (< idx (length str))
+ − 108 (setq t-str-list (cons (substring str idx) t-str-list)))
+ − 109 (apply 'concat (nreverse t-str-list))))
+ − 110
+ − 111 ;;;
+ − 112 ;;; Functions for composing/decomposing Tibetan sequence.
+ − 113 ;;;
+ − 114 ;;; A Tibetan syllable is typically structured as follows:
+ − 115 ;;;
+ − 116 ;;; [Prefix] C [C+] V [M] [Suffix [Post suffix]]
+ − 117 ;;;
+ − 118 ;;; where C's are all vertically stacked, V appears below or above
+ − 119 ;;; consonant cluster and M is always put above the C[C+]V combination.
+ − 120 ;;; (Sanskrit visarga, though it is a vowel modifier, is considered
+ − 121 ;;; to be a punctuation.)
+ − 122 ;;;
+ − 123 ;;; Here are examples of the words "bsgrubs" and "hfauM"
+ − 124 ;;;
+ − 125 ;;; 4$(7"70"714%qx!"U0"G###C"U14"70"714"G0"G1(B 4$(7"Hx!"Rx!"Ur'"_0"H"R"U"_1(B
+ − 126 ;;;
+ − 127 ;;; M
+ − 128 ;;; b s b s h
+ − 129 ;;; g fa
+ − 130 ;;; r u
+ − 131 ;;; u
+ − 132 ;;;
+ − 133 ;;; Consonants `'' ($(7"A(B), `w' ($(7">(B), `y' ($(7"B(B), `r' ($(7"C(B) take special
+ − 134 ;;; forms when they are used as subjoined consonant. Consonant `r'
+ − 135 ;;; takes another special form when used as superjoined in such a case
+ − 136 ;;; as "rka", while it does not change its form when conjoined with
+ − 137 ;;; subjoined `'', `w' or `y' as in "rwa", "rya".
+ − 138
+ − 139 ;; Append a proper composition rule and glyph to COMPONENTS to compose
+ − 140 ;; CHAR with a composition that has COMPONENTS.
+ − 141
+ − 142 (defun tibetan-add-components (components char)
+ − 143 (let ((last (last components))
+ − 144 (stack-upper '(tc . bc))
+ − 145 (stack-under '(bc . tc))
788
+ − 146 rule comp-vowel ;tmp
+ − 147 )
771
+ − 148 ;; Special treatment for 'a chung.
+ − 149 ;; If 'a follows a consonant, turn it into the subjoined form.
+ − 150 ;; * Disabled by Tomabechi 2000/06/09 *
+ − 151 ;; Because in Unicode, $(7"A(B may follow directly a consonant without
+ − 152 ;; any intervening vowel, as in 4$(7"90"914""0"""Q14"A0"A1!;(B=4$(7"90"91(B 4$(7""0""1(B 4$(7"A0"A1(B not 4$(7"90"91(B 4$(7""0""1(B $(7"Q(B 4$(7"A0"A1(B
+ − 153 ;;(if (and (= char ?$(7"A(B)
788
+ − 154 ;; (char-in-category-p (car last) ?0))
771
+ − 155 ;; (setq char ?$(7"R(B)) ;; modified for new font by Tomabechi 1999/12/10
+ − 156
+ − 157 ;; Composite vowel signs are decomposed before being added
+ − 158 ;; Added by Tomabechi 2000/06/08
+ − 159 (if (memq char '(?$(7"T(B ?$(7"V(B ?$(7"W(B ?$(7"X(B ?$(7"Y(B ?$(7"Z(B ?$(7"b(B))
+ − 160 (setq comp-vowel
+ − 161 (copy-sequence
+ − 162 (cddr (assoc (char-to-string char)
+ − 163 tibetan-composite-vowel-alist)))
+ − 164 char
+ − 165 (cadr (assoc (char-to-string char)
+ − 166 tibetan-composite-vowel-alist))))
+ − 167 (cond
+ − 168 ;; Compose upper vowel sign vertically over.
788
+ − 169 ((char-in-category-p char ?2)
771
+ − 170 (setq rule stack-upper))
+ − 171
+ − 172 ;; Compose lower vowel sign vertically under.
788
+ − 173 ((char-in-category-p char ?3)
771
+ − 174 (if (eq char ?$(7"Q(B) ;; `$(7"Q(B' should not visible when composed.
+ − 175 (setq rule nil)
+ − 176 (setq rule stack-under)))
+ − 177 ;; Transform ra-mgo (superscribed r) if followed by a subjoined
+ − 178 ;; consonant other than w, ', y, r.
+ − 179 ((and (= (car last) ?$(7"C(B)
+ − 180 (not (memq char '(?$(7#>(B ?$(7"R(B ?$(7#B(B ?$(7#C(B))))
+ − 181 (setcar last ?$(7!"(B) ;; modified for newfont by Tomabechi 1999/12/10
+ − 182 (setq rule stack-under))
+ − 183 ;; Transform initial base consonant if followed by a subjoined
+ − 184 ;; consonant but 'a.
+ − 185 (t
+ − 186 (let ((laststr (char-to-string (car last))))
+ − 187 (if (and (/= char ?$(7"R(B) ;; modified for new font by Tomabechi
+ − 188 (string-match "[$(7"!(B-$(7"="?"@"D(B-$(7"J"K(B]" laststr))
+ − 189 (setcar last (string-to-char
+ − 190 (cdr (assoc (char-to-string (car last))
+ − 191 tibetan-base-to-subjoined-alist)))))
+ − 192 (setq rule stack-under))))
+ − 193
+ − 194 (if rule
+ − 195 (setcdr last (list rule char)))
+ − 196 ;; Added by Tomabechi 2000/06/08
+ − 197 (if comp-vowel
+ − 198 (nconc last comp-vowel))
+ − 199 ))
+ − 200
+ − 201 ;;;###autoload
+ − 202 (defun tibetan-compose-string (str)
+ − 203 "Compose Tibetan string STR."
+ − 204 (let ((idx 0))
+ − 205 ;; `$(7"A(B' is included in the pattern for subjoined consonants
+ − 206 ;; because we treat it specially in tibetan-add-components.
+ − 207 ;; (This feature is removed by Tomabechi 2000/06/08)
+ − 208 (while (setq idx (string-match tibetan-composable-pattern str idx))
+ − 209 (let ((from idx)
+ − 210 (to (match-end 0))
+ − 211 components)
+ − 212 (if (eq (string-match tibetan-precomposition-rule-regexp str idx) idx)
+ − 213 (setq idx (match-end 0)
+ − 214 components
+ − 215 (list (string-to-char
+ − 216 (cdr
+ − 217 (assoc (match-string 0 str)
+ − 218 tibetan-precomposition-rule-alist)))))
+ − 219 (setq components (list (aref str idx))
+ − 220 idx (1+ idx)))
+ − 221 (while (< idx to)
+ − 222 (tibetan-add-components components (aref str idx))
+ − 223 (setq idx (1+ idx)))
+ − 224 (compose-string str from to components))))
+ − 225 str)
+ − 226
+ − 227 ;;;###autoload
+ − 228 (defun tibetan-compose-region (beg end)
+ − 229 "Compose Tibetan text the region BEG and END."
+ − 230 (interactive "r")
788
+ − 231 ;(let (str result chars)
771
+ − 232 (save-excursion
+ − 233 (save-restriction
+ − 234 (narrow-to-region beg end)
+ − 235 (goto-char (point-min))
+ − 236 ;; `$(7"A(B' is included in the pattern for subjoined consonants
+ − 237 ;; because we treat it specially in tibetan-add-components.
+ − 238 ;; (This feature is removed by Tomabechi 2000/06/08)
+ − 239 (while (re-search-forward tibetan-composable-pattern nil t)
+ − 240 (let ((from (match-beginning 0))
+ − 241 (to (match-end 0))
+ − 242 components)
+ − 243 (goto-char from)
+ − 244 (if (looking-at tibetan-precomposition-rule-regexp)
+ − 245 (progn
+ − 246 (setq components
+ − 247 (list (string-to-char
+ − 248 (cdr
+ − 249 (assoc (match-string 0)
+ − 250 tibetan-precomposition-rule-alist)))))
+ − 251 (goto-char (match-end 0)))
+ − 252 (setq components (list (char-after from)))
+ − 253 (forward-char 1))
+ − 254 (while (< (point) to)
+ − 255 (tibetan-add-components components (following-char))
+ − 256 (forward-char 1))
788
+ − 257 (compose-region from to components)))))
+ − 258 ;)
+ − 259 )
771
+ − 260
+ − 261 (defvar tibetan-decompose-precomposition-alist
+ − 262 (mapcar (function (lambda (x) (cons (string-to-char (cdr x)) (car x))))
+ − 263 tibetan-precomposition-rule-alist))
+ − 264
+ − 265 ;;;###autoload
+ − 266 (defun tibetan-decompose-region (from to)
+ − 267 "Decompose Tibetan text in the region FROM and TO.
+ − 268 This is different from decompose-region because precomposed Tibetan characters
+ − 269 are decomposed into normal Tiebtan character sequences."
+ − 270 (interactive "r")
+ − 271 (save-restriction
+ − 272 (narrow-to-region from to)
+ − 273 (decompose-region from to)
+ − 274 (goto-char from)
+ − 275 (while (not (eobp))
+ − 276 (let* ((char (following-char))
+ − 277 (slot (assq char tibetan-decompose-precomposition-alist)))
+ − 278 (if slot
+ − 279 (progn
+ − 280 (delete-char 1)
+ − 281 (insert (cdr slot)))
+ − 282 (forward-char 1))))))
+ − 283
+ − 284
+ − 285 ;;;###autoload
+ − 286 (defun tibetan-decompose-string (str)
+ − 287 "Decompose Tibetan string STR.
+ − 288 This is different from decompose-string because precomposed Tibetan characters
+ − 289 are decomposed into normal Tiebtan character sequences."
+ − 290 (let ((new "")
+ − 291 (len (length str))
+ − 292 (idx 0)
+ − 293 char slot)
+ − 294 (while (< idx len)
+ − 295 (setq char (aref str idx)
+ − 296 slot (assq (aref str idx) tibetan-decompose-precomposition-alist)
+ − 297 new (concat new (if slot (cdr slot) (char-to-string char)))
+ − 298 idx (1+ idx)))
+ − 299 new))
+ − 300
+ − 301 ;;;###autoload
+ − 302 (defun tibetan-composition-function (from to pattern &optional string)
+ − 303 (if string
+ − 304 (tibetan-compose-string string)
+ − 305 (tibetan-compose-region from to))
+ − 306 (- to from))
+ − 307
+ − 308 ;;;
+ − 309 ;;; This variable is used to avoid repeated decomposition.
+ − 310 ;;;
+ − 311 (setq-default tibetan-decomposed nil)
+ − 312
+ − 313 ;;;###autoload
+ − 314 (defun tibetan-decompose-buffer ()
+ − 315 "Decomposes Tibetan characters in the buffer into their components.
+ − 316 See also the documentation of the function `tibetan-decompose-region'."
+ − 317 (interactive)
+ − 318 (make-local-variable 'tibetan-decomposed)
+ − 319 (cond ((not tibetan-decomposed)
+ − 320 (tibetan-decompose-region (point-min) (point-max))
+ − 321 (setq tibetan-decomposed t))))
+ − 322
+ − 323 ;;;###autoload
+ − 324 (defun tibetan-compose-buffer ()
+ − 325 "Composes Tibetan character components in the buffer.
+ − 326 See also docstring of the function tibetan-compose-region."
+ − 327 (interactive)
+ − 328 (make-local-variable 'tibetan-decomposed)
+ − 329 (tibetan-compose-region (point-min) (point-max))
+ − 330 (setq tibetan-decomposed nil))
+ − 331
+ − 332 ;;;###autoload
+ − 333 (defun tibetan-post-read-conversion (len)
+ − 334 (save-excursion
+ − 335 (save-restriction
+ − 336 (let ((buffer-modified-p (buffer-modified-p)))
+ − 337 (narrow-to-region (point) (+ (point) len))
+ − 338 (tibetan-compose-region (point-min) (point-max))
+ − 339 (set-buffer-modified-p buffer-modified-p)
+ − 340 (make-local-variable 'tibetan-decomposed)
+ − 341 (setq tibetan-decomposed nil)
+ − 342 (- (point-max) (point-min))))))
+ − 343
+ − 344
+ − 345 ;;;###autoload
+ − 346 (defun tibetan-pre-write-conversion (from to)
+ − 347 (setq tibetan-decomposed-temp tibetan-decomposed)
+ − 348 (let ((old-buf (current-buffer)))
+ − 349 (set-buffer (generate-new-buffer " *temp*"))
+ − 350 (if (stringp from)
+ − 351 (insert from)
+ − 352 (insert-buffer-substring old-buf from to))
+ − 353 (if (not tibetan-decomposed-temp)
+ − 354 (tibetan-decompose-region (point-min) (point-max)))
+ − 355 ;; Should return nil as annotations.
+ − 356 nil))
+ − 357
+ − 358 (provide 'tibet-util)
+ − 359
778
+ − 360 ;;; tibet-util.el ends here