comparison lisp/mule/viet-util.el @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents
children 2923009caf47
comparison
equal deleted inserted replaced
770:336a418893b5 771:943eaba38521
1 ;;; viet-util.el --- utilities for Vietnamese -*- coding: iso-2022-7bit; -*-
2
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5
6 ;; Keywords: mule, multilingual, Vietnamese
7
8 ;; This file is part of XEmacs.
9
10 ;; XEmacs is free software; you can redistribute it and/or modify it
11 ;; under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; XEmacs is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs; see the file COPYING. If not, write to the Free
22 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 ;; 02111-1307, USA.
24
25 ;;; Synched up with: Emacs 21.0.103 (language/viet-util.el).
26
27 ;;; Commentary:
28
29 ;; Vietnamese uses ASCII characters and additional 134 unique
30 ;; characters (these are Latin alphabets with various diacritical and
31 ;; tone marks). As far as I know, Vietnamese now has 4 different ways
32 ;; for representing these characters: VISCII, VSCII, VIQR, and
33 ;; Unicode. VISCII and VSCII are simple 1-byte code which assigns 134
34 ;; unique characters in control-code area (0x00..0x1F) and right half
35 ;; area (0x80..0xFF). VIQR is a menmonic encoding specification
36 ;; representing diacritical marks by following ASCII characters.
37
38 ;;; Code:
39
40 ;;;###autoload
41 (defun viet-encode-viscii-char (char)
42 "Return VISCII character code of CHAR if appropriate."
43 (aref (char-table-extra-slot viet-viscii-nonascii-translation-table 0)
44 char))
45
46 ;; VIQR is a menmonic encoding specification for Vietnamese.
47 ;; It represents diacritical marks by ASCII characters as follows:
48 ;; ------------+----------+--------
49 ;; mark | mnemonic | example
50 ;; ------------+----------+---------
51 ;; breve | ( | a( -> ,1e(B
52 ;; circumflex | ^ | a^ -> ,1b(B
53 ;; horn | + | o+ -> ,1=(B
54 ;; ------------+----------+---------
55 ;; acute | ' | a' -> ,1a(B
56 ;; grave | ` | a` -> ,1`(B
57 ;; hook above | ? | a? -> ,1d(B
58 ;; tilde | ~ | a~ -> ,1c(B
59 ;; dot below | . | a. -> ,1U(B
60 ;; ------------+----------+---------
61 ;; d bar | dd | dd -> ,1p(B
62 ;; ------------+----------+---------
63
64 (defvar viet-viqr-alist
65 '(;; lowercase
66 (?,1!(B . "a('") ; 161
67 (?,1"(B . "a(`") ; 162
68 (?,1#(B . "a(.") ; 163
69 (?,1$(B . "a^'") ; 164
70 (?,1%(B . "a^`") ; 165
71 (?,1&(B . "a^?") ; 166
72 (?,1'(B . "a^.") ; 167
73 (?,1((B . "e~") ; 168
74 (?,1)(B . "e.") ; 169
75 (?,1*(B . "e^'") ; 170
76 (?,1+(B . "e^`") ; 171
77 (?,1,(B . "e^?") ; 172
78 (?,1-(B . "e^~") ; 173
79 (?,1.(B . "e^.") ; 174
80 (?,1/(B . "o^'") ; 175
81 (?,10(B . "o^`") ; 176
82 (?,11(B . "o^?") ; 177
83 (?,12(B . "o^~") ; 178
84 (?,15(B . "o^.") ; 181
85 (?,16(B . "o+`") ; 182
86 (?,17(B . "o+?") ; 183
87 (?,18(B . "i.") ; 184
88 (?,1=(B . "o+") ; 189
89 (?,1>(B . "o+'") ; 190
90 (?,1F(B . "a(?") ; 198
91 (?,1G(B . "a(~") ; 199
92 (?,1O(B . "y`") ; 207
93 (?,1Q(B . "u+'") ; 209
94 (?,1U(B . "a.") ; 213
95 (?,1V(B . "y?") ; 214
96 (?,1W(B . "u+`") ; 215
97 (?,1X(B . "u+?") ; 216
98 (?,1[(B . "y~") ; 219
99 (?,1\(B . "y.") ; 220
100 (?,1^(B . "o+~") ; 222
101 (?,1_(B . "u+") ; 223
102 (?,1`(B . "a`") ; 224
103 (?,1a(B . "a'") ; 225
104 (?,1b(B . "a^") ; 226
105 (?,1c(B . "a~") ; 227
106 (?,1d(B . "a?") ; 228
107 (?,1e(B . "a(") ; 229
108 (?,1f(B . "u+~") ; 230
109 (?,1g(B . "a^~") ; 231
110 (?,1h(B . "e`") ; 232
111 (?,1i(B . "e'") ; 233
112 (?,1j(B . "e^") ; 234
113 (?,1k(B . "e?") ; 235
114 (?,1l(B . "i`") ; 236
115 (?,1m(B . "i'") ; 237
116 (?,1n(B . "i~") ; 238
117 (?,1o(B . "i?") ; 239
118 (?,1p(B . "dd") ; 240
119 (?,1q(B . "u+.") ; 241
120 (?,1r(B . "o`") ; 242
121 (?,1s(B . "o'") ; 243
122 (?,1t(B . "o^") ; 244
123 (?,1u(B . "o~") ; 245
124 (?,1v(B . "o?") ; 246
125 (?,1w(B . "o.") ; 247
126 (?,1x(B . "u.") ; 248
127 (?,1y(B . "u`") ; 249
128 (?,1z(B . "u'") ; 250
129 (?,1{(B . "u~") ; 251
130 (?,1|(B . "u?") ; 252
131 (?,1}(B . "y'") ; 253
132 (?,1~(B . "o+.") ; 254
133
134 ;; upper case
135 (?,2!(B . "A('") ; 161
136 (?,2"(B . "A(`") ; 162
137 (?,2#(B . "A(.") ; 163
138 (?,2$(B . "A^'") ; 164
139 (?,2%(B . "A^`") ; 165
140 (?,2&(B . "A^?") ; 166
141 (?,2'(B . "A^.") ; 167
142 (?,2((B . "E~") ; 168
143 (?,2)(B . "E.") ; 169
144 (?,2*(B . "E^'") ; 170
145 (?,2+(B . "E^`") ; 171
146 (?,2,(B . "E^?") ; 172
147 (?,2-(B . "E^~") ; 173
148 (?,2.(B . "E^.") ; 174
149 (?,2/(B . "O^'") ; 175
150 (?,20(B . "O^`") ; 176
151 (?,21(B . "O^?") ; 177
152 (?,22(B . "O^~") ; 178
153 (?,25(B . "O^.") ; 181
154 (?,26(B . "O+`") ; 182
155 (?,27(B . "O+?") ; 183
156 (?,28(B . "I.") ; 184
157 (?,2=(B . "O+") ; 189
158 (?,2>(B . "O+'") ; 190
159 (?,2F(B . "A(?") ; 198
160 (?,2G(B . "A(~") ; 199
161 (?,2O(B . "Y`") ; 207
162 (?,2Q(B . "U+'") ; 209
163 (?,2U(B . "A.") ; 213
164 (?,2V(B . "Y?") ; 214
165 (?,2W(B . "U+`") ; 215
166 (?,2X(B . "U+?") ; 216
167 (?,2[(B . "Y~") ; 219
168 (?,2\(B . "Y.") ; 220
169 (?,2^(B . "O+~") ; 222
170 (?,2_(B . "U+") ; 223
171 (?,2`(B . "A`") ; 224
172 (?,2a(B . "A'") ; 225
173 (?,2b(B . "A^") ; 226
174 (?,2c(B . "A~") ; 227
175 (?,2d(B . "A?") ; 228
176 (?,2e(B . "A(") ; 229
177 (?,2f(B . "U+~") ; 230
178 (?,2g(B . "A^~") ; 231
179 (?,2h(B . "E`") ; 232
180 (?,2i(B . "E'") ; 233
181 (?,2j(B . "E^") ; 234
182 (?,2k(B . "E?") ; 235
183 (?,2l(B . "I`") ; 236
184 (?,2m(B . "I'") ; 237
185 (?,2n(B . "I~") ; 238
186 (?,2o(B . "I?") ; 239
187 (?,2p(B . "DD") ; 240
188 (?,2p(B . "dD") ; 240
189 (?,2p(B . "Dd") ; 240
190 (?,2q(B . "U+.") ; 241
191 (?,2r(B . "O`") ; 242
192 (?,2s(B . "O'") ; 243
193 (?,2t(B . "O^") ; 244
194 (?,2u(B . "O~") ; 245
195 (?,2v(B . "O?") ; 246
196 (?,2w(B . "O.") ; 247
197 (?,2x(B . "U.") ; 248
198 (?,2y(B . "U`") ; 249
199 (?,2z(B . "U'") ; 250
200 (?,2{(B . "U~") ; 251
201 (?,2|(B . "U?") ; 252
202 (?,2}(B . "Y'") ; 253
203 (?,2~(B . "O+.") ; 254
204
205 ;; escape from composition
206 (?\( . "\\(") ; breve (left parenthesis)
207 (?^ . "\\^") ; circumflex (caret)
208 (?+ . "\\+") ; horn (plus sign)
209 (?' . "\\'") ; acute (apostrophe)
210 (?` . "\\`") ; grave (backquote)
211 (?? . "\\?") ; hook above (question mark)
212 (?~ . "\\~") ; tilde (tilde)
213 (?. . "\\.") ; dot below (period)
214 (?d . "\\d") ; d-bar (d)
215 (?\\ . "\\\\") ; literal backslash
216 )
217 "Alist of Vietnamese characters vs corresponding `VIQR' string.")
218
219 ;; Regular expression matching single Vietnamese character represented
220 ;; by VIQR.
221 (defconst viqr-regexp
222 "[aeiouyAEIOUY]\\([(^+]?['`?~.]\\|[(^+]\\)\\|[Dd][Dd]")
223
224 ;;;###autoload
225 (defun viet-decode-viqr-region (from to)
226 "Convert `VIQR' mnemonics of the current region to Vietnamese characaters.
227 When called from a program, expects two arguments,
228 positions (integers or markers) specifying the stretch of the region."
229 (interactive "r")
230 (save-restriction
231 (narrow-to-region from to)
232 (goto-char (point-min))
233 (while (re-search-forward viqr-regexp nil t)
234 (let* ((viqr (buffer-substring (match-beginning 0) (match-end 0)))
235 (ch (car (rassoc viqr viet-viqr-alist))))
236 (if ch
237 (progn
238 (delete-region (match-beginning 0) (match-end 0))
239 (insert ch)))))))
240
241 ;;;###autoload
242 (defun viet-decode-viqr-buffer ()
243 "Convert `VIQR' mnemonics of the current buffer to Vietnamese characaters."
244 (interactive)
245 (viet-decode-viqr-region (point-min) (point-max)))
246
247 ;;;###autoload
248 (defun viet-encode-viqr-region (from to)
249 "Convert Vietnamese characaters of the current region to `VIQR' mnemonics.
250 When called from a program, expects two arguments,
251 positions (integers or markers) specifying the stretch of the region."
252 (interactive "r")
253 (save-restriction
254 (narrow-to-region from to)
255 (goto-char (point-min))
256 (while (re-search-forward "\\cv" nil t)
257 (let* ((ch (preceding-char))
258 (viqr (cdr (assq ch viet-viqr-alist))))
259 (if viqr
260 (progn
261 (delete-char -1)
262 (insert viqr)))))))
263
264 ;;;###autoload
265 (defun viet-encode-viqr-buffer ()
266 "Convert Vietnamese characaters of the current buffer to `VIQR' mnemonics."
267 (interactive)
268 (viet-encode-viqr-region (point-min) (point-max)))
269
270 ;;;###autoload
271 (defun viqr-post-read-conversion (len)
272 (save-excursion
273 (save-restriction
274 (narrow-to-region (point) (+ (point) len))
275 (let ((buffer-modified-p (buffer-modified-p)))
276 (viet-decode-viqr-region (point-min) (point-max))
277 (set-buffer-modified-p buffer-modified-p)
278 (- (point-max) (point-min))))))
279
280 ;;;###autoload
281 (defun viqr-pre-write-conversion (from to)
282 (let ((old-buf (current-buffer)))
283 (set-buffer (generate-new-buffer " *temp*"))
284 (if (stringp from)
285 (insert from)
286 (insert-buffer-substring old-buf from to))
287 (viet-encode-viqr-region (point-min) (point-max))
288 ;; Should return nil as annotations.
289 nil))
290
291 ;;;
292 (provide 'viet-util)
293
294 ;;; viet-util.el ends here