comparison lisp/mule/indian.el @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents
children 2923009caf47
comparison
equal deleted inserted replaced
770:336a418893b5 771:943eaba38521
1 ;;; indian.el --- Support for Indian Languages -*- coding: iso-2022-7bit; -*-
2
3 ;; Copyright (C) 1995 Free Software Foundation, Inc.
4
5 ;; Author: KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
6
7 ;; Keywords: multilingual, Indian
8
9 ;; This file is part of XEmacs.
10
11 ;; XEmacs is free software; you can redistribute it and/or modify it
12 ;; under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; XEmacs is distributed in the hope that it will be useful, but
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs; see the file COPYING. If not, write to the Free
23 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24 ;; 02111-1307, USA.
25
26 ;;; Synched up with: Emacs 21.0.103 (language/indian.el).
27
28 ;;; Commentary:
29
30 ;; History:
31 ;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
32
33 ;; For Indian, the character set IS 13194 is supported.
34 ;;
35 ;; IS 13194 does not specifically assign glyphs for each characters.
36 ;; Following code is not specific to each Indian language.
37 ;;
38 ;; Eventually, this code will support generic information about
39 ;; following scripts.
40 ;;
41 ;; Devanagari
42 ;; Bengali
43 ;; Gurmukhi
44 ;; Gujarati
45 ;; Oriya
46 ;; Tamil
47 ;; Telgu
48 ;; Kannada
49 ;; Malayalam
50 ;;
51 ;; In this file, charsets other than charset-ascii and charset-indian-is13194
52 ;; should not be used except in the comment.
53
54 ;;; Code:
55
56 ;; Followings are what you see when you refer to the Emacs
57 ;; representations of IS 13194 charcters. However, this is merely
58 ;; tentative apperance, and you must convert them by
59 ;; indian-to-xxxxxx(specific script) function to use them.
60 ;; Devanagari is not an exception of this rule.
61
62 ;; 0xa0 //(5!"#$%&'()*+,-./(B
63 ;; 0xb0 (50123456789:;<=>?(B
64 ;; 0xc0 (5@ABCDEFGHIJKLMNO(B
65 ;; 0xd0 (5PQRSTUVWXYZ[\]^_(B
66 ;; 0xe0 (5`abcdefghijklmno(B
67 ;; 0xf0 (5pqrstuvwxyz{|}~(B//
68
69 ;; Note - In IS 13194, several symbols are obtained by special
70 ;; combination of several characters and Nukta sign.
71 ;;
72 ;; Sanskrit Vowel R -> (5*(B + (5i(B
73 ;; Sanskrit Vowel L -> (5&(B + (5i(B
74 ;; Sanskrit Vowel LL -> (5'(B + (5i(B
75 ;; Sanskrit Avagrah -> (5j(B + (5i(B
76 ;; OM -> (5!(B + (5i(B
77 ;;
78 ;; Note - IS 13194 defines ATR(0xEF) and EXT(0xF0), but they are
79 ;; not used in Emacs.
80 ;;
81 ;; Note - the above characters DO NOT represent any script. For
82 ;; example, if you want to obtain Devanagari character, you must do
83 ;; something like the following.
84 ;;
85 ;; (char-to-string (indian-to-devanagari ?(5$(B))
86 ;; "$(5!$(B"
87
88 ;;; ITRANS
89 ;;
90 ;; ITRANS is one of the most popular method to exchange indian scripts
91 ;; electronically. Here is the table to convert between ITRANS code and
92 ;; IS 13194 code.
93
94 (defvar indian-itrans-consonant-alist
95 '(
96 ("k" . "(53(B")
97 ("kh" . "(54(B")
98 ("g" . "(55(B")
99 ("gh" . "(56(B")
100 ("N^" . "(57(B")
101 ("ch" . "(58(B")
102 ("chh" . "(59(B")
103 ("j" . "(5:(B")
104 ("jh" . "(5;(B")
105 ("JN" . "(5<(B")
106 ("T" . "(5=(B")
107 ("Th" . "(5>(B")
108 ("D" . "(5?(B")
109 ("Dh" . "(5@(B")
110 ("N" . "(5A(B")
111 ("t" . "(5B(B")
112 ("th" . "(5C(B")
113 ("d" . "(5D(B")
114 ("dh" . "(5E(B")
115 ("n" . "(5F(B")
116 ("nh" . "(5G(B") ; For transcription of non-Devanagari Languages.
117 ("p" . "(5H(B")
118 ("ph" . "(5I(B")
119 ("b" . "(5J(B")
120 ("bh" . "(5K(B")
121 ("m" . "(5L(B")
122 ("y" . "(5M(B")
123 ("yh" . "(5N(B") ; For transcription of non-Devanagari Languages.
124 ("r" . "(5O(B")
125 ("rh" . "(5P(B") ; For transcription of non-Devanagari Languages.
126 ("l" . "(5Q(B")
127 ("v" . "(5T(B")
128 ("sh" . "(5U(B")
129 ("shh" . "(5V(B")
130 ("s" . "(5W(B")
131 ("h" . "(5X(B")
132 ("ld" . "(5R(B")
133 ("L" . "(5R(B")
134 ("ksh" . "$(5!3!h!V(B")
135 ("GY" . "***GY***") ; Must check out later.
136 ;; special consonants
137 ("q" . "(53i(B")
138 ("K" . "(54i(B")
139 ("G" . "(55i(B")
140 ("z" . "(5:i(B")
141 ("f" . "(5Ii(B")
142 (".D" . "(5?i(B")
143 (".Dh" . "(5@i(B")
144 ))
145
146 (defvar indian-itrans-vowel-sign-alist
147 '(
148 ;; Special treatment unique to IS 13194 Transliteration
149 ("" . "(5h(B")
150 ("a" . "")
151 ;; Matra (Vowel Sign)
152 ("aa" . "(5Z(B")
153 ("A" . "(5Z(B")
154 ("i" . "(5[(B")
155 ("ii" . "(5\(B")
156 ("I" . "(5\(B")
157 ("u" . "(5](B")
158 ("uu" . "(5^(B")
159 ("U" . "(5^(B")
160 ("R^i" . "(5_(B") ; These must be checked out later.
161 ("R^I" . "(5_i(B")
162 ("L^i" . "(5[i(B")
163 ("L^I" . "(5\i(B")
164 ("E" . "(5`(B") ; For transcription of non-Devanangri Languages.
165 ("e" . "(5a(B")
166 ("ai" . "(5b(B")
167 ;; ("e.c" . "(5c(B") ; Tentatively suppressed.
168 ("O" . "(5d(B") ; For transcription of non-Devanagari Languages.
169 ("o" . "(5e(B")
170 ("au" . "(5f(B")
171 ;; ("o.c" . "(5g(B") ; Tentatively suppressed.
172 ))
173
174 ;;
175 ;; Independent vowels and other signs.
176 ;;
177
178 (defvar indian-itrans-other-letters-alist
179 '(
180 ("a" . "(5$(B")
181 ("aa" . "(5%(B")
182 ("A" . "(5%(B")
183 ("i" . "(5&(B")
184 ("ii" . "(5'(B")
185 ("I" . "(5'(B")
186 ("u" . "(5((B")
187 ("uu" . "(5)(B")
188 ("U" . "(5)(B")
189 ("R^i" . "(5*(B")
190 ("R^I" . "(5*i(B")
191 ("L^i" . "(5&i(B")
192 ("L^I" . "(5'i(B")
193 ("E" . "(5+(B") ; For transcription of non-Devanagari Languages.
194 ("e" . "(5,(B")
195 ("ai" . "(5-(B")
196 ;; ("e.c" . "(5.(B") ; Candra E
197 ("O" . "(5/(B") ; For transcription of non-Devanagari Languages.
198 ("o" . "(50(B")
199 ("au" . "(51(B")
200 ;; ("o.c" . "(52(B") ; Candra O
201 ("M" . "(5$(B")
202 ("H" . "(5#(B")
203 ("AUM" . "(5!i(B")
204 ("OM" . "(5!i(B")
205 (".r" . "(5Oh(B")
206 (".n" . "(5"(B")
207 (".N" . "(5!(B")
208 (".h" . "(5h(B") ; Halant
209 (".." . "(5j(B")
210 (".a" . "(5ji(B") ; Avagrah
211 ("0" . "(5q(B")
212 ("1" . "(5r(B")
213 ("2" . "(5s(B")
214 ("3" . "(5t(B")
215 ("4" . "(5u(B")
216 ("5" . "(5v(B")
217 ("6" . "(5w(B")
218 ("7" . "(5x(B")
219 ("8" . "(5y(B")
220 ("9" . "(5z(B")
221 ))
222
223 ;; Regular expression matching single Indian character represented
224 ;; by ITRANS.
225
226 (defvar indian-itrans-regexp
227 (let ((consonant "\\([cs]hh?\\)\\|[kgjTDnpbyr]h?\\|\\(N\\^?\\)\\|\\(jN\\)\\|[mvqKGzfs]\\|\\(ld?\\)\\|\\(ksh\\)\\|\\(GY\\)\\|\\(\\.Dh?\\)")
228 (vowel "\\(a[aiu]\\)\\|\\(ii\\)\\|\\(uu\\)\\|\\([RL]\\^[iI]\\)\\|[AIEOeoaiu]")
229 (misc "[MH0-9]\\|\\(AUM\\)\\|\\(OM\\)\\|\\(\\.[rnNh\\.a]\\)")
230 (lpre "\\(") (rpre "\\)") (orre "\\|"))
231 (concat lpre misc rpre orre
232 lpre lpre consonant rpre "?" lpre vowel rpre rpre orre
233 lpre consonant rpre )))
234
235 ;;
236 ;; Regular expression matching single ITRANS unit for IS 13194 characters.
237 ;;
238
239 (defvar itrans-indian-regexp
240 (let ((vowel "[(5$(B-(52(B]")
241 (consonant "[(53(B-(5X(B]")
242 (matra "[(5Z(B-(5g(B]")
243 (misc "[(5q(B-(5z(B]")
244 (lpre "\\(") (rpre "\\)") (orre "\\|"))
245 (concat misc orre
246 lpre consonant matra "?" rpre orre
247 vowel)))
248
249 ;;
250 ;; IS13194 - ITRANS conversion table for string matching above regexp.
251 ;;
252
253 (defvar indian-itrans-alist
254 (let ((cl indian-itrans-consonant-alist)
255 (ml indian-itrans-other-letters-alist) rules)
256 (while cl
257 (let ((vl indian-itrans-vowel-sign-alist))
258 (while vl
259 (setq rules
260 (cons (cons (concat (car (car cl)) (car (car vl)))
261 (concat (cdr (car cl)) (cdr (car vl))))
262 rules))
263 (setq vl (cdr vl))))
264 (setq cl (cdr cl)))
265 (while ml
266 (setq rules (cons (cons (car (car ml))
267 (cdr (car ml)))
268 rules))
269 (setq ml (cdr ml)))
270 rules))
271
272 ;;
273 ;; Utility program to convert from ITRANS to IS 13194 in specified region.
274 ;;
275
276 (defun indian-decode-itrans-region (from to)
277 "Convert `ITRANS' mnemonics of the current region to Indian characters.
278 When called from a program, expects two arguments,
279 positions (integers or markers) specifying the stretch of the region."
280 (interactive "r")
281 (save-restriction
282 (narrow-to-region from to)
283 (goto-char (point-min))
284 (while (re-search-forward indian-itrans-regexp nil t)
285 (let* ((itrans (buffer-substring (match-beginning 0) (match-end 0)))
286 (ch (cdr (assoc itrans indian-itrans-alist))))
287 (if ch
288 (progn
289 (delete-region (match-beginning 0) (match-end 0))
290 (insert ch)))))
291 (goto-char (point-min))
292 (while (re-search-forward "\\((5h(B\\)[^\\c0]" nil t)
293 (delete-region (match-beginning 1) (match-end 1)))))
294
295 ;;
296 ;; Utility program to convert from IS 13194 to ITRANS in specified region.
297 ;;
298
299 (defun indian-encode-itrans-region (from to)
300 "Convert indian region to ITRANS mnemonics."
301 (interactive "r")
302 (save-restriction
303 (narrow-to-region from to)
304 (goto-char (point-min))
305 (while (re-search-forward itrans-indian-regexp nil t)
306 (let* ((indian (buffer-substring (match-beginning 0) (match-end 0)))
307 (ch (car (rassoc indian indian-itrans-alist))))
308 (if ch
309 (progn
310 (delete-region (match-beginning 0) (match-end 0))
311 (insert ch)))))
312 (goto-char (point-min))))
313
314 (provide 'indian)
315
316 ;;; indian.el ends here