428
|
1 ;;; chinese.el --- Support for Chinese -*- coding: iso-2022-7bit; -*-
|
|
2
|
|
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
|
|
4 ;; Licensed to the Free Software Foundation.
|
|
5 ;; Copyright (C) 1997 MORIOKA Tomohiko
|
776
|
6 ;; Copyright (C) 2000, 2001, 2002 Ben Wing.
|
428
|
7
|
|
8 ;; Keywords: multilingual, Chinese
|
|
9
|
|
10 ;; This file is part of XEmacs.
|
|
11
|
|
12 ;; XEmacs is free software; you can redistribute it and/or modify it
|
|
13 ;; under the terms of the GNU General Public License as published by
|
|
14 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
15 ;; any later version.
|
|
16
|
|
17 ;; XEmacs is distributed in the hope that it will be useful, but
|
|
18 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
20 ;; General Public License for more details.
|
|
21
|
|
22 ;; You should have received a copy of the GNU General Public License
|
|
23 ;; along with XEmacs; see the file COPYING. If not, write to the Free
|
|
24 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
|
25 ;; 02111-1307, USA.
|
|
26
|
|
27 ;;; Commentary:
|
|
28
|
|
29 ;; For Chinese, three character sets GB2312, BIG5, and CNS11643 are
|
|
30 ;; supported.
|
|
31
|
|
32 ;;; Code:
|
|
33
|
776
|
34 (eval-when-compile
|
|
35 (require 'china-util))
|
|
36
|
778
|
37 ; (make-charset 'chinese-gb2312
|
|
38 ; "GB2312 Chinese simplified: ISO-IR-58"
|
|
39 ; '(dimension
|
|
40 ; 2
|
|
41 ; registry "GB2312.1980"
|
|
42 ; chars 94
|
|
43 ; columns 2
|
|
44 ; direction l2r
|
|
45 ; final ?A
|
|
46 ; graphic 0
|
|
47 ; short-name "GB2312"
|
|
48 ; long-name "GB2312: ISO-IR-58"
|
|
49 ; ))
|
|
50
|
|
51 ; (make-charset 'chinese-cns11643-1
|
|
52 ; "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
|
|
53 ; '(dimension
|
|
54 ; 2
|
|
55 ; registry "CNS11643.1992-1"
|
|
56 ; chars 94
|
|
57 ; columns 2
|
|
58 ; direction l2r
|
|
59 ; final ?G
|
|
60 ; graphic 0
|
|
61 ; short-name "CNS11643-1"
|
|
62 ; long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
|
|
63 ; ))
|
|
64
|
|
65 ; (make-charset 'chinese-cns11643-2
|
|
66 ; "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
|
|
67 ; '(dimension
|
|
68 ; 2
|
|
69 ; registry "CNS11643.1992-2"
|
|
70 ; chars 94
|
|
71 ; columns 2
|
|
72 ; direction l2r
|
|
73 ; final ?H
|
|
74 ; graphic 0
|
|
75 ; short-name "CNS11643-2"
|
|
76 ; long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
|
|
77 ; ))
|
|
78
|
|
79 ; (make-charset 'chinese-big5-1
|
|
80 ; "Frequently used part (A141-C67F) of Big5 (Chinese traditional)"
|
|
81 ; '(dimension
|
|
82 ; 2
|
|
83 ; registry "Big5"
|
|
84 ; chars 94
|
|
85 ; columns 2
|
|
86 ; direction l2r
|
|
87 ; final ?0
|
|
88 ; graphic 0
|
|
89 ; short-name "Big5 (Level-1)"
|
|
90 ; long-name "Big5 (Level-1) A141-C67F"
|
|
91 ; ))
|
|
92
|
|
93 ; (make-charset 'chinese-big5-2
|
|
94 ; "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
|
|
95 ; '(dimension
|
|
96 ; 2
|
|
97 ; registry "Big5"
|
|
98 ; chars 94
|
|
99 ; columns 2
|
|
100 ; direction l2r
|
|
101 ; final ?1
|
|
102 ; graphic 0
|
|
103 ; short-name "Big5 (Level-2)"
|
|
104 ; long-name "Big5 (Level-2) C940-FEFE"
|
|
105 ; ))
|
|
106
|
428
|
107 ;; Syntax of Chinese characters.
|
|
108 (modify-syntax-entry 'chinese-gb2312 "w")
|
|
109 (loop for row in '(33 34 41)
|
|
110 do (modify-syntax-entry `[chinese-gb2312 ,row] "."))
|
|
111 ;;(loop for row from 35 to 40
|
|
112 ;; do (modify-syntax-entry `[chinese-gb2312 ,row] "w"))
|
|
113 ;;(loop for row from 42 to 126
|
|
114 ;; do (modify-syntax-entry `[chinese-gb2312 ,row] "w"))
|
|
115
|
|
116 (modify-syntax-entry 'chinese-cns11643-1 "w")
|
|
117 (modify-syntax-entry 'chinese-cns11643-2 "w")
|
|
118 (modify-syntax-entry 'chinese-big5-1 "w")
|
|
119 (modify-syntax-entry 'chinese-big5-2 "w")
|
|
120
|
778
|
121 ; ;; Chinese CNS11643 Plane3 thru Plane7. Although these are official
|
|
122 ; ;; character sets, the use is rare and don't have to be treated
|
|
123 ; ;; space-efficiently in the buffer.
|
|
124 ; (make-charset 'chinese-cns11643-3
|
|
125 ; "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
|
|
126 ; '(dimension
|
|
127 ; 2
|
|
128 ; registry "CNS11643.1992-3"
|
|
129 ; chars 94
|
|
130 ; columns 2
|
|
131 ; direction l2r
|
|
132 ; final ?I
|
|
133 ; graphic 0
|
|
134 ; short-name "CNS11643-3"
|
|
135 ; long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
|
|
136 ; ))
|
|
137
|
428
|
138 ;; CNS11643 Plane3 thru Plane7
|
|
139 ;; These represent more and more obscure Chinese characters.
|
|
140 ;; By the time you get to Plane 7, we're talking about characters
|
|
141 ;; that appear once in some ancient manuscript and whose meaning
|
|
142 ;; is unknown.
|
|
143
|
|
144 (flet
|
|
145 ((make-chinese-cns11643-charset
|
|
146 (name plane final)
|
|
147 (make-charset
|
|
148 name (concat "CNS 11643 Plane " plane " (Chinese traditional)")
|
|
149 `(registry
|
|
150 ,(concat "CNS11643[.-]\\(.*[.-]\\)?" plane "$")
|
|
151 dimension 2
|
|
152 chars 94
|
|
153 final ,final
|
778
|
154 graphic 0
|
|
155 short-name ,(concat "CNS11643-" plane)
|
|
156 long-name ,(format "CNS11643-%s (Chinese traditional): ISO-IR-183"
|
|
157 plane)))
|
428
|
158 (modify-syntax-entry name "w")
|
|
159 (modify-category-entry name ?t)
|
|
160 ))
|
|
161 (make-chinese-cns11643-charset 'chinese-cns11643-3 "3" ?I)
|
|
162 (make-chinese-cns11643-charset 'chinese-cns11643-4 "4" ?J)
|
|
163 (make-chinese-cns11643-charset 'chinese-cns11643-5 "5" ?K)
|
|
164 (make-chinese-cns11643-charset 'chinese-cns11643-6 "6" ?L)
|
|
165 (make-chinese-cns11643-charset 'chinese-cns11643-7 "7" ?M)
|
|
166 )
|
|
167
|
|
168 ;; ISO-IR-165 (CCITT Extended GB)
|
|
169 ;; It is based on CCITT Recommendation T.101, includes GB 2312-80 +
|
|
170 ;; GB 8565-88 table A4 + 293 characters.
|
778
|
171 (make-charset ;; not in FSF 21.1
|
428
|
172 'chinese-isoir165
|
|
173 "ISO-IR-165 (CCITT Extended GB; Chinese simplified)"
|
|
174 `(registry "isoir165"
|
|
175 dimension 2
|
|
176 chars 94
|
|
177 final ?E
|
778
|
178 graphic 0
|
|
179 short-name "ISO-IR-165"
|
|
180 long-name "ISO-IR-165 (CCITT Extended GB; Chinese simplified)"))
|
428
|
181
|
|
182 ;; PinYin-ZhuYin
|
778
|
183 (make-charset 'chinese-sisheng
|
|
184 "SiSheng characters for PinYin/ZhuYin"
|
|
185 '(dimension
|
|
186 1
|
|
187 ;; XEmacs addition: second half of registry spec
|
|
188 registry "sisheng_cwnn\\|OMRON_UDC_ZH"
|
428
|
189 chars 94
|
778
|
190 columns 1
|
|
191 direction l2r
|
428
|
192 final ?0
|
|
193 graphic 0
|
778
|
194 short-name "SiSheng"
|
|
195 long-name "SiSheng (PinYin/ZhuYin)"
|
428
|
196 ))
|
|
197
|
|
198 ;; If you prefer QUAIL to EGG, please modify below as you wish.
|
|
199 ;;(when (and (featurep 'egg) (featurep 'wnn))
|
|
200 ;; (setq wnn-server-type 'cserver)
|
|
201 ;; (load "pinyin")
|
|
202 ;; (setq its:*standard-modes*
|
|
203 ;; (cons (its:get-mode-map "PinYin") its:*standard-modes*)))
|
|
204
|
|
205 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
206 ;;; Chinese (general)
|
|
207 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
208
|
|
209 ;; (make-coding-system
|
|
210 ;; 'iso-2022-cn 2 ?C
|
|
211 ;; "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)"
|
|
212 ;; '(ascii
|
|
213 ;; (nil chinese-gb2312 chinese-cns11643-1)
|
|
214 ;; (nil chinese-cns11643-2)
|
|
215 ;; nil
|
|
216 ;; nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
|
|
217 ;; init-bol)
|
|
218 ;; '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2)
|
|
219 ;; (mime-charset . iso-2022-cn)))
|
|
220
|
|
221 ;; (define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn)
|
|
222
|
|
223 ;; (make-coding-system
|
|
224 ;; 'iso-2022-cn-ext 2 ?C
|
|
225 ;; "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)"
|
|
226 ;; '(ascii
|
|
227 ;; (nil chinese-gb2312 chinese-cns11643-1)
|
|
228 ;; (nil chinese-cns11643-2)
|
|
229 ;; (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
|
|
230 ;; chinese-cns11643-6 chinese-cns11643-7)
|
|
231 ;; nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil
|
|
232 ;; init-bol)
|
|
233 ;; '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2
|
|
234 ;; chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5
|
|
235 ;; chinese-cns11643-6 chinese-cns11643-7)
|
|
236 ;; (mime-charset . iso-2022-cn-ext)))
|
|
237
|
|
238
|
|
239 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
240 ;;; Chinese GB2312 (simplified)
|
|
241 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
242
|
|
243 ;; (make-coding-system
|
|
244 ;; 'chinese-iso-8bit 2 ?c
|
|
245 ;; "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:CN-GB-2312)"
|
|
246 ;; '(ascii chinese-gb2312 nil nil
|
|
247 ;; nil ascii-eol ascii-cntl nil nil nil nil)
|
|
248 ;; '((safe-charsets ascii chinese-gb2312)
|
|
249 ;; (mime-charset . cn-gb-2312)))
|
|
250
|
|
251 (make-coding-system
|
|
252 'cn-gb-2312 'iso2022
|
771
|
253 "Chinese EUC"
|
428
|
254 '(charset-g0 ascii
|
|
255 charset-g1 chinese-gb2312
|
778
|
256 charset-g2 chinese-sisheng
|
428
|
257 charset-g3 t
|
|
258 mnemonic "Zh-GB/EUC"
|
771
|
259 documentation
|
|
260 "Chinese EUC (Extended Unix Code), the standard Chinese encoding on Unix.
|
|
261 This follows the same overall EUC principles as Japanese EUC (see the
|
|
262 description under Japanese EUC), but specifies different character sets:
|
|
263
|
|
264 G0: ASCII
|
|
265 G1: Chinese-GB2312
|
|
266 G2: Sisheng (PinYin - ZhuYin)"
|
428
|
267 ))
|
|
268
|
|
269 ;; (define-coding-system-alias 'cn-gb-2312 'chinese-iso-8bit)
|
|
270 ;; (define-coding-system-alias 'euc-china 'chinese-iso-8bit)
|
|
271 ;; (define-coding-system-alias 'euc-cn 'chinese-iso-8bit)
|
|
272
|
|
273 (define-coding-system-alias 'gb2312 'cn-gb-2312)
|
|
274 (define-coding-system-alias 'chinese-euc 'cn-gb-2312)
|
|
275
|
|
276 ;; (make-coding-system
|
|
277 ;; 'chinese-hz 0 ?z
|
|
278 ;; "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)"
|
|
279 ;; nil
|
|
280 ;; '((safe-charsets ascii chinese-gb2312)
|
|
281 ;; (mime-charset . hz-gb-2312)
|
|
282 ;; (post-read-conversion . post-read-decode-hz)
|
|
283 ;; (pre-write-conversion . pre-write-encode-hz)))
|
|
284 ;; (put 'chinese-hz 'post-read-conversion 'post-read-decode-hz)
|
|
285 ;; (put 'chinese-hz 'pre-write-conversion 'pre-write-encode-hz)
|
|
286
|
|
287 (make-coding-system
|
|
288 'hz-gb-2312 'no-conversion
|
771
|
289 "Hz/ZW (Chinese)"
|
428
|
290 '(mnemonic "Zh-GB/Hz"
|
|
291 eol-type lf
|
|
292 post-read-conversion post-read-decode-hz
|
771
|
293 pre-write-conversion pre-write-encode-hz
|
|
294 documentation "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)"
|
|
295 ))
|
428
|
296
|
|
297 ;; (define-coding-system-alias 'hz-gb-2312 'chinese-hz)
|
|
298 ;; (define-coding-system-alias 'hz 'chinese-hz)
|
|
299
|
|
300 (define-coding-system-alias 'hz 'hz-gb-2312)
|
|
301
|
|
302 (defun post-read-decode-hz (len)
|
|
303 (let ((pos (point))
|
|
304 (buffer-modified-p (buffer-modified-p))
|
776
|
305 ;last-coding-system-used
|
|
306 )
|
428
|
307 (prog1
|
|
308 (decode-hz-region pos (+ pos len))
|
|
309 (set-buffer-modified-p buffer-modified-p))))
|
|
310
|
|
311 (defun pre-write-encode-hz (from to)
|
|
312 (let ((buf (current-buffer)))
|
|
313 (set-buffer (generate-new-buffer " *temp*"))
|
|
314 (if (stringp from)
|
|
315 (insert from)
|
|
316 (insert-buffer-substring buf from to))
|
776
|
317 ;(let (last-coding-system-used)
|
|
318 (encode-hz-region 1 (point-max))
|
428
|
319 nil))
|
771
|
320
|
428
|
321 (set-language-info-alist
|
|
322 "Chinese-GB" '((setup-function . setup-chinese-gb-environment-internal)
|
778
|
323 (charset chinese-gb2312 chinese-sisheng)
|
428
|
324 (coding-system cn-gb-2312 iso-2022-7bit hz-gb-2312)
|
|
325 (coding-priority cn-gb-2312 big5 iso-2022-7bit)
|
771
|
326 (cygwin-locale "zh")
|
|
327 (locale "zh_CN.eucCN" "zh_CN.EUC" "zh_CN"
|
|
328 "chinese-s" "zh"
|
810
|
329 (lambda (arg)
|
771
|
330 (and arg (let ((case-fold-search t))
|
|
331 (string-match "^zh_.*.GB.*" arg)))))
|
|
332 (mswindows-locale ("CHINESE" . "CHINESE_SIMPLIFIED"))
|
|
333 (native-coding-system cn-gb-2312)
|
428
|
334 (input-method . "chinese-py-punct")
|
|
335 (features china-util)
|
|
336 (sample-text . "Chinese ($AVPND(B,$AFUM(;0(B,$A::So(B) $ADc:C(B")
|
771
|
337 (documentation .
|
|
338 "Supports Simplified Chinese, used in mainland China.
|
|
339 Uses the GB2312 character set."))
|
428
|
340 '("Chinese"))
|
|
341
|
|
342 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
343 ;; Chinese BIG5 (traditional)
|
|
344 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
345
|
|
346 ;; (make-coding-system
|
|
347 ;; 'chinese-big5 3 ?B "BIG5 8-bit encoding for Chinese (MIME:CN-BIG5)"
|
|
348 ;; nil
|
|
349 ;; '((safe-charsets ascii chinese-big5-1 chinese-big5-2)
|
|
350 ;; (mime-charset . cn-big5)
|
|
351 ;; (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char)
|
|
352 ;; (chinese-big5-2 "BIG5" encode-big5-char))))
|
|
353
|
|
354 (make-coding-system
|
|
355 'big5 'big5
|
771
|
356 "Big5"
|
|
357 '(mnemonic "Zh/Big5"
|
|
358 documentation
|
|
359 "A non-modal encoding formed by five large Taiwanese companies
|
|
360 \(hence \"Big5\") to produce a character set and encoding for
|
|
361 traditional Chinese writing. Big5 encodes some 13,000+ characters.
|
|
362 ASCII is encoded as normal, and Chinese characters as two bytes, but
|
|
363 Chinese characters do not exclusively use the high half. The first
|
|
364 byte is in the high half standard position A1-FE, but the second byte
|
|
365 is in either low 40-7E or high A1-FE. Thus Big5 suffers from the
|
|
366 classic \"it might look like a slash, but it's really the second byte
|
|
367 of a Chinese character\"."))
|
428
|
368
|
|
369 ;; (define-coding-system-alias 'big5 'chinese-big5)
|
|
370 ;; (define-coding-system-alias 'cn-big5 'chinese-big5)
|
|
371
|
|
372 (define-coding-system-alias 'cn-big5 'big5)
|
|
373
|
|
374 ;; Big5 font requires special encoding.
|
|
375 (define-ccl-program ccl-encode-big5-font
|
|
376 `(0
|
|
377 ;; In: R0:chinese-big5-1 or chinese-big5-2
|
|
378 ;; R1:position code 1
|
|
379 ;; R2:position code 2
|
|
380 ;; Out: R1:font code point 1
|
|
381 ;; R2:font code point 2
|
|
382 ((r2 = ((((r1 - ?\x21) * 94) + r2) - ?\x21))
|
|
383 (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280))
|
|
384 (r1 = ((r2 / 157) + ?\xA1))
|
|
385 (r2 %= 157)
|
|
386 (if (r2 < ?\x3F) (r2 += ?\x40) (r2 += ?\x62))))
|
|
387 "CCL program to encode a Big5 code to code point of Big5 font.")
|
|
388
|
|
389 ;; (setq font-ccl-encoder-alist
|
|
390 ;; (cons (cons "big5" ccl-encode-big5-font) font-ccl-encoder-alist))
|
|
391
|
444
|
392 (set-charset-ccl-program 'chinese-big5-1 'ccl-encode-big5-font)
|
|
393 (set-charset-ccl-program 'chinese-big5-2 'ccl-encode-big5-font)
|
428
|
394
|
|
395 (set-language-info-alist
|
|
396 "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2)
|
|
397 (coding-system big5 iso-2022-7bit)
|
|
398 (coding-priority big5 cn-gb-2312 iso-2022-7bit)
|
771
|
399 (cygwin-locale "zh_TW")
|
|
400 (locale "zh_TW.Big5" "zh_TW.big5" "zh_CN.big5" "zh_TW"
|
|
401 "chinese-t"
|
810
|
402 (lambda (arg)
|
771
|
403 (and arg (let ((case-fold-search t))
|
|
404 (string-match "^zh_.*.BIG5.*" arg)))))
|
|
405 (mswindows-locale ("CHINESE" . "CHINESE_TRADITIONAL"))
|
|
406 (native-coding-system big5)
|
428
|
407 (input-method . "chinese-py-punct-b5")
|
|
408 (features china-util)
|
|
409 (sample-text . "Cantonese ($(0GnM$(B,$(0N]0*Hd(B) $(0*/=((B, $(0+$)p(B")
|
771
|
410 (documentation .
|
|
411 "Supports Traditional Chinese, used in Taiwan, Hong Kong, and Singapore.
|
|
412 Uses the Chinese Big5 character set."
|
|
413 ))
|
428
|
414 '("Chinese"))
|
|
415
|
|
416 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
417 ;; Chinese CNS11643 (traditional)
|
|
418 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
419
|
|
420 ;; (set-language-info-alist
|
|
421 ;; "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2
|
|
422 ;; chinese-cns11643-3 chinese-cns11643-4
|
|
423 ;; chinese-cns11643-5 chinese-cns11643-6
|
|
424 ;; chinese-cns11643-7)
|
|
425 ;; (coding-system iso-2022-cn)
|
|
426 ;; (coding-priority iso-2022-cn chinese-big5 chinese-iso-8bit)
|
|
427 ;; (features china-util)
|
|
428 ;; (input-method . "chinese-cns-quick")
|
|
429 ;; (documentation . "Support for Chinese CNS character sets."))
|
|
430 ;; '("Chinese"))
|
|
431
|
|
432 ;;; chinese.el ends here
|