Mercurial > hg > xemacs-beta
annotate lisp/mule/chinese.el @ 4884:29fb3baea939
Fix the bugs necessary to resolve the trivial test failures in mule-tests.el
lisp/ChangeLog addition:
2010-01-26 Aidan Kehoe <kehoea@parhasard.net>
* mule/vietnamese.el (viscii): Correct the mapping here, #xA6 is
actually *SMALL* LETTER A WITH CIRCUMFLEX AND HOOK ABOVE.
* mule/cyrillic.el (koi8-c): Correct the mapping here, #x8C is
actually ?\u04D9. Add a case mapping for it.
tests/ChangeLog addition:
2010-01-26 Aidan Kehoe <kehoea@parhasard.net>
* automated/mule-tests.el:
Only give the list of character sets in HELLO once; correct it to
reflect its current contents and the extant character sets.
etc/ChangeLog addition:
2010-01-26 Aidan Kehoe <kehoea@parhasard.net>
* HELLO: Encode the Arabic in arabic-iso8859-6 once more; delete
the old, experimentally-encoded Thai.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Tue, 26 Jan 2010 02:22:10 +0000 |
parents | 1d74a1d115ee |
children | 308d34e9f07d |
rev | line source |
---|---|
428 | 1 ;;; chinese.el --- Support for Chinese -*- coding: iso-2022-7bit; -*- |
2 | |
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
4 ;; Licensed to the Free Software Foundation. | |
5 ;; Copyright (C) 1997 MORIOKA Tomohiko | |
776 | 6 ;; Copyright (C) 2000, 2001, 2002 Ben Wing. |
428 | 7 |
8 ;; Keywords: multilingual, Chinese | |
9 | |
10 ;; This file is part of XEmacs. | |
11 | |
12 ;; XEmacs is free software; you can redistribute it and/or modify it | |
13 ;; under the terms of the GNU General Public License as published by | |
14 ;; the Free Software Foundation; either version 2, or (at your option) | |
15 ;; any later version. | |
16 | |
17 ;; XEmacs is distributed in the hope that it will be useful, but | |
18 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20 ;; General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
23 ;; along with XEmacs; see the file COPYING. If not, write to the Free | |
24 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | |
25 ;; 02111-1307, USA. | |
26 | |
27 ;;; Commentary: | |
28 | |
29 ;; For Chinese, three character sets GB2312, BIG5, and CNS11643 are | |
30 ;; supported. | |
31 | |
32 ;;; Code: | |
33 | |
4080 | 34 (eval-when-compile (progn (require 'ccl) (require 'china-util))) |
778 | 35 |
428 | 36 ;; Syntax of Chinese characters. |
37 (loop for row in '(33 34 41) | |
38 do (modify-syntax-entry `[chinese-gb2312 ,row] ".")) | |
778 | 39 |
428 | 40 ;; CNS11643 Plane3 thru Plane7 |
41 ;; These represent more and more obscure Chinese characters. | |
42 ;; By the time you get to Plane 7, we're talking about characters | |
43 ;; that appear once in some ancient manuscript and whose meaning | |
44 ;; is unknown. | |
45 | |
46 (flet | |
47 ((make-chinese-cns11643-charset | |
48 (name plane final) | |
49 (make-charset | |
50 name (concat "CNS 11643 Plane " plane " (Chinese traditional)") | |
3659 | 51 `(registries |
52 ,(vector (concat "cns11643.1992-" plane )) | |
428 | 53 dimension 2 |
54 chars 94 | |
55 final ,final | |
778 | 56 graphic 0 |
57 short-name ,(concat "CNS11643-" plane) | |
58 long-name ,(format "CNS11643-%s (Chinese traditional): ISO-IR-183" | |
59 plane))) | |
428 | 60 (modify-syntax-entry name "w") |
61 (modify-category-entry name ?t) | |
62 )) | |
63 (make-chinese-cns11643-charset 'chinese-cns11643-3 "3" ?I) | |
64 (make-chinese-cns11643-charset 'chinese-cns11643-4 "4" ?J) | |
65 (make-chinese-cns11643-charset 'chinese-cns11643-5 "5" ?K) | |
66 (make-chinese-cns11643-charset 'chinese-cns11643-6 "6" ?L) | |
67 (make-chinese-cns11643-charset 'chinese-cns11643-7 "7" ?M) | |
68 ) | |
69 | |
70 ;; ISO-IR-165 (CCITT Extended GB) | |
71 ;; It is based on CCITT Recommendation T.101, includes GB 2312-80 + | |
72 ;; GB 8565-88 table A4 + 293 characters. | |
778 | 73 (make-charset ;; not in FSF 21.1 |
428 | 74 'chinese-isoir165 |
75 "ISO-IR-165 (CCITT Extended GB; Chinese simplified)" | |
3659 | 76 `(registries ["isoir165-0"] |
428 | 77 dimension 2 |
78 chars 94 | |
79 final ?E | |
778 | 80 graphic 0 |
81 short-name "ISO-IR-165" | |
82 long-name "ISO-IR-165 (CCITT Extended GB; Chinese simplified)")) | |
428 | 83 |
84 ;; PinYin-ZhuYin | |
778 | 85 (make-charset 'chinese-sisheng |
86 "SiSheng characters for PinYin/ZhuYin" | |
87 '(dimension | |
88 1 | |
89 ;; XEmacs addition: second half of registry spec | |
3659 | 90 registries ["omron_udc_zh-0" "sisheng_cwnn-0"] |
428 | 91 chars 94 |
778 | 92 columns 1 |
93 direction l2r | |
428 | 94 final ?0 |
95 graphic 0 | |
778 | 96 short-name "SiSheng" |
97 long-name "SiSheng (PinYin/ZhuYin)" | |
428 | 98 )) |
99 | |
100 ;; If you prefer QUAIL to EGG, please modify below as you wish. | |
101 ;;(when (and (featurep 'egg) (featurep 'wnn)) | |
102 ;; (setq wnn-server-type 'cserver) | |
103 ;; (load "pinyin") | |
104 ;; (setq its:*standard-modes* | |
105 ;; (cons (its:get-mode-map "PinYin") its:*standard-modes*))) | |
106 | |
107 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
108 ;;; Chinese (general) | |
109 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
110 | |
111 ;; (make-coding-system | |
112 ;; 'iso-2022-cn 2 ?C | |
113 ;; "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)" | |
114 ;; '(ascii | |
115 ;; (nil chinese-gb2312 chinese-cns11643-1) | |
116 ;; (nil chinese-cns11643-2) | |
117 ;; nil | |
118 ;; nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil | |
119 ;; init-bol) | |
120 ;; '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2) | |
121 ;; (mime-charset . iso-2022-cn))) | |
122 | |
123 ;; (define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn) | |
124 | |
125 ;; (make-coding-system | |
126 ;; 'iso-2022-cn-ext 2 ?C | |
127 ;; "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)" | |
128 ;; '(ascii | |
129 ;; (nil chinese-gb2312 chinese-cns11643-1) | |
130 ;; (nil chinese-cns11643-2) | |
131 ;; (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 | |
132 ;; chinese-cns11643-6 chinese-cns11643-7) | |
133 ;; nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil | |
134 ;; init-bol) | |
135 ;; '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2 | |
136 ;; chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 | |
137 ;; chinese-cns11643-6 chinese-cns11643-7) | |
138 ;; (mime-charset . iso-2022-cn-ext))) | |
139 | |
140 | |
141 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
142 ;;; Chinese GB2312 (simplified) | |
143 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
144 | |
145 ;; (make-coding-system | |
146 ;; 'chinese-iso-8bit 2 ?c | |
147 ;; "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:CN-GB-2312)" | |
148 ;; '(ascii chinese-gb2312 nil nil | |
149 ;; nil ascii-eol ascii-cntl nil nil nil nil) | |
150 ;; '((safe-charsets ascii chinese-gb2312) | |
151 ;; (mime-charset . cn-gb-2312))) | |
152 | |
153 (make-coding-system | |
154 'cn-gb-2312 'iso2022 | |
771 | 155 "Chinese EUC" |
428 | 156 '(charset-g0 ascii |
157 charset-g1 chinese-gb2312 | |
778 | 158 charset-g2 chinese-sisheng |
428 | 159 charset-g3 t |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
160 safe-charsets (ascii chinese-gb2312 chinese-sisheng) |
428 | 161 mnemonic "Zh-GB/EUC" |
771 | 162 documentation |
163 "Chinese EUC (Extended Unix Code), the standard Chinese encoding on Unix. | |
164 This follows the same overall EUC principles as Japanese EUC (see the | |
165 description under Japanese EUC), but specifies different character sets: | |
166 | |
167 G0: ASCII | |
168 G1: Chinese-GB2312 | |
169 G2: Sisheng (PinYin - ZhuYin)" | |
428 | 170 )) |
171 | |
3172 | 172 ;; For consistency with euc-jp, euc-ko |
173 (define-coding-system-alias 'euc-cn 'cn-gb-2312) | |
428 | 174 |
175 (define-coding-system-alias 'gb2312 'cn-gb-2312) | |
176 (define-coding-system-alias 'chinese-euc 'cn-gb-2312) | |
177 | |
178 ;; (make-coding-system | |
179 ;; 'chinese-hz 0 ?z | |
180 ;; "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)" | |
181 ;; nil | |
182 ;; '((safe-charsets ascii chinese-gb2312) | |
183 ;; (mime-charset . hz-gb-2312) | |
184 ;; (post-read-conversion . post-read-decode-hz) | |
185 ;; (pre-write-conversion . pre-write-encode-hz))) | |
186 ;; (put 'chinese-hz 'post-read-conversion 'post-read-decode-hz) | |
187 ;; (put 'chinese-hz 'pre-write-conversion 'pre-write-encode-hz) | |
188 | |
189 (make-coding-system | |
190 'hz-gb-2312 'no-conversion | |
771 | 191 "Hz/ZW (Chinese)" |
428 | 192 '(mnemonic "Zh-GB/Hz" |
193 eol-type lf | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
194 safe-charsets (ascii chinese-gb2312) |
428 | 195 post-read-conversion post-read-decode-hz |
771 | 196 pre-write-conversion pre-write-encode-hz |
197 documentation "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)" | |
198 )) | |
428 | 199 |
200 ;; (define-coding-system-alias 'hz-gb-2312 'chinese-hz) | |
201 ;; (define-coding-system-alias 'hz 'chinese-hz) | |
202 | |
203 (define-coding-system-alias 'hz 'hz-gb-2312) | |
204 | |
205 (defun post-read-decode-hz (len) | |
206 (let ((pos (point)) | |
207 (buffer-modified-p (buffer-modified-p)) | |
776 | 208 ;last-coding-system-used |
209 ) | |
428 | 210 (prog1 |
211 (decode-hz-region pos (+ pos len)) | |
212 (set-buffer-modified-p buffer-modified-p)))) | |
213 | |
214 (defun pre-write-encode-hz (from to) | |
215 (let ((buf (current-buffer))) | |
216 (set-buffer (generate-new-buffer " *temp*")) | |
217 (if (stringp from) | |
218 (insert from) | |
219 (insert-buffer-substring buf from to)) | |
776 | 220 ;(let (last-coding-system-used) |
221 (encode-hz-region 1 (point-max)) | |
428 | 222 nil)) |
771 | 223 |
428 | 224 (set-language-info-alist |
225 "Chinese-GB" '((setup-function . setup-chinese-gb-environment-internal) | |
778 | 226 (charset chinese-gb2312 chinese-sisheng) |
428 | 227 (coding-system cn-gb-2312 iso-2022-7bit hz-gb-2312) |
228 (coding-priority cn-gb-2312 big5 iso-2022-7bit) | |
771 | 229 (cygwin-locale "zh") |
230 (mswindows-locale ("CHINESE" . "CHINESE_SIMPLIFIED")) | |
231 (native-coding-system cn-gb-2312) | |
428 | 232 (input-method . "chinese-py-punct") |
233 (features china-util) | |
234 (sample-text . "Chinese ($AVPND(B,$AFUM(;0(B,$A::So(B) $ADc:C(B") | |
771 | 235 (documentation . |
236 "Supports Simplified Chinese, used in mainland China. | |
237 Uses the GB2312 character set.")) | |
428 | 238 '("Chinese")) |
239 | |
4145 | 240 ;; Set the locale information separately so that the lambda gets compiled. |
241 (set-language-info "Chinese-GB" | |
242 'locale | |
243 (list "zh_CN.eucCN" "zh_CN.EUC" "zh_CN" "chinese-s" "zh" | |
244 (lambda (arg) | |
245 (and arg (let ((case-fold-search t)) | |
246 (string-match "^zh_.*.GB.*" arg)))))) | |
247 | |
428 | 248 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
249 ;; Chinese BIG5 (traditional) | |
250 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
251 | |
252 ;; (make-coding-system | |
253 ;; 'chinese-big5 3 ?B "BIG5 8-bit encoding for Chinese (MIME:CN-BIG5)" | |
254 ;; nil | |
255 ;; '((safe-charsets ascii chinese-big5-1 chinese-big5-2) | |
256 ;; (mime-charset . cn-big5) | |
257 ;; (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char) | |
258 ;; (chinese-big5-2 "BIG5" encode-big5-char)))) | |
259 | |
260 (make-coding-system | |
261 'big5 'big5 | |
771 | 262 "Big5" |
263 '(mnemonic "Zh/Big5" | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
264 safe-charsets (ascii chinese-big5-1 chinese-big5-2) |
771 | 265 documentation |
266 "A non-modal encoding formed by five large Taiwanese companies | |
267 \(hence \"Big5\") to produce a character set and encoding for | |
268 traditional Chinese writing. Big5 encodes some 13,000+ characters. | |
269 ASCII is encoded as normal, and Chinese characters as two bytes, but | |
270 Chinese characters do not exclusively use the high half. The first | |
271 byte is in the high half standard position A1-FE, but the second byte | |
272 is in either low 40-7E or high A1-FE. Thus Big5 suffers from the | |
273 classic \"it might look like a slash, but it's really the second byte | |
274 of a Chinese character\".")) | |
428 | 275 |
276 ;; (define-coding-system-alias 'big5 'chinese-big5) | |
277 ;; (define-coding-system-alias 'cn-big5 'chinese-big5) | |
278 | |
279 (define-coding-system-alias 'cn-big5 'big5) | |
280 | |
281 ;; Big5 font requires special encoding. | |
282 (define-ccl-program ccl-encode-big5-font | |
283 `(0 | |
284 ;; In: R0:chinese-big5-1 or chinese-big5-2 | |
285 ;; R1:position code 1 | |
286 ;; R2:position code 2 | |
287 ;; Out: R1:font code point 1 | |
288 ;; R2:font code point 2 | |
4072 | 289 ((r2 = ((((r1 - #x21) * 94) + r2) - #x21)) |
428 | 290 (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280)) |
4072 | 291 (r1 = ((r2 / 157) + #xA1)) |
428 | 292 (r2 %= 157) |
4072 | 293 (if (r2 < #x3F) (r2 += #x40) (r2 += #x62)))) |
428 | 294 "CCL program to encode a Big5 code to code point of Big5 font.") |
295 | |
444 | 296 (set-charset-ccl-program 'chinese-big5-1 'ccl-encode-big5-font) |
297 (set-charset-ccl-program 'chinese-big5-2 'ccl-encode-big5-font) | |
428 | 298 |
299 (set-language-info-alist | |
300 "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2) | |
301 (coding-system big5 iso-2022-7bit) | |
302 (coding-priority big5 cn-gb-2312 iso-2022-7bit) | |
771 | 303 (cygwin-locale "zh_TW") |
304 (mswindows-locale ("CHINESE" . "CHINESE_TRADITIONAL")) | |
305 (native-coding-system big5) | |
428 | 306 (input-method . "chinese-py-punct-b5") |
307 (features china-util) | |
308 (sample-text . "Cantonese ($(0GnM$(B,$(0N]0*Hd(B) $(0*/=((B, $(0+$)p(B") | |
771 | 309 (documentation . |
310 "Supports Traditional Chinese, used in Taiwan, Hong Kong, and Singapore. | |
311 Uses the Chinese Big5 character set." | |
312 )) | |
428 | 313 '("Chinese")) |
314 | |
4145 | 315 ;; Set the locale information separately so that the lambda gets compiled. |
316 (set-language-info "Chinese-BIG5" | |
317 'locale | |
318 (list "zh_TW.Big5" "zh_TW.big5" "zh_CN.big5" "zh_TW" | |
319 "chinese-t" | |
320 (lambda (arg) | |
321 (and arg (let ((case-fold-search t)) | |
322 (string-match "^zh_.*.BIG5.*" arg)))))) | |
323 | |
428 | 324 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
325 ;; Chinese CNS11643 (traditional) | |
326 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
327 | |
328 ;; (set-language-info-alist | |
329 ;; "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2 | |
330 ;; chinese-cns11643-3 chinese-cns11643-4 | |
331 ;; chinese-cns11643-5 chinese-cns11643-6 | |
332 ;; chinese-cns11643-7) | |
333 ;; (coding-system iso-2022-cn) | |
334 ;; (coding-priority iso-2022-cn chinese-big5 chinese-iso-8bit) | |
335 ;; (features china-util) | |
336 ;; (input-method . "chinese-cns-quick") | |
337 ;; (documentation . "Support for Chinese CNS character sets.")) | |
338 ;; '("Chinese")) | |
339 | |
340 ;;; chinese.el ends here |