Mercurial > hg > xemacs-beta
annotate lisp/mule/chinese.el @ 5791:9fae6227ede5
Silence texinfo 5.2 warnings, primarily by adding next, prev, and up
pointers to all nodes. See xemacs-patches message with ID
<5315f7bf.sHpFD7lXYR05GH6E%james@xemacs.org>.
| author | Jerry James <james@xemacs.org> |
|---|---|
| date | Thu, 27 Mar 2014 08:59:03 -0600 |
| parents | 3bc58dc9d688 |
| children |
| rev | line source |
|---|---|
| 428 | 1 ;;; chinese.el --- Support for Chinese -*- coding: iso-2022-7bit; -*- |
| 2 | |
| 3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
| 4 ;; Licensed to the Free Software Foundation. | |
| 5 ;; Copyright (C) 1997 MORIOKA Tomohiko | |
| 776 | 6 ;; Copyright (C) 2000, 2001, 2002 Ben Wing. |
| 428 | 7 |
| 8 ;; Keywords: multilingual, Chinese | |
| 9 | |
| 10 ;; This file is part of XEmacs. | |
| 11 | |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
12 ;; XEmacs is free software: you can redistribute it and/or modify it |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
13 ;; under the terms of the GNU General Public License as published by the |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
14 ;; Free Software Foundation, either version 3 of the License, or (at your |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
15 ;; option) any later version. |
| 428 | 16 |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
17 ;; XEmacs is distributed in the hope that it will be useful, but WITHOUT |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
18 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
19 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
20 ;; for more details. |
| 428 | 21 |
| 22 ;; You should have received a copy of the GNU General Public License | |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
23 ;; along with XEmacs. If not, see <http://www.gnu.org/licenses/>. |
| 428 | 24 |
| 25 ;;; Commentary: | |
| 26 | |
| 27 ;; For Chinese, three character sets GB2312, BIG5, and CNS11643 are | |
| 28 ;; supported. | |
| 29 | |
| 30 ;;; Code: | |
| 31 | |
| 4080 | 32 (eval-when-compile (progn (require 'ccl) (require 'china-util))) |
| 778 | 33 |
| 428 | 34 ;; Syntax of Chinese characters. |
| 35 (loop for row in '(33 34 41) | |
| 36 do (modify-syntax-entry `[chinese-gb2312 ,row] ".")) | |
| 778 | 37 |
| 428 | 38 ;; CNS11643 Plane3 thru Plane7 |
| 39 ;; These represent more and more obscure Chinese characters. | |
| 40 ;; By the time you get to Plane 7, we're talking about characters | |
| 41 ;; that appear once in some ancient manuscript and whose meaning | |
| 42 ;; is unknown. | |
| 43 | |
|
5567
3bc58dc9d688
Replace #'flet by #'labels where appropriate, core code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
44 (labels |
| 428 | 45 ((make-chinese-cns11643-charset |
| 46 (name plane final) | |
| 47 (make-charset | |
| 48 name (concat "CNS 11643 Plane " plane " (Chinese traditional)") | |
| 3659 | 49 `(registries |
| 50 ,(vector (concat "cns11643.1992-" plane )) | |
| 428 | 51 dimension 2 |
| 52 chars 94 | |
| 53 final ,final | |
| 778 | 54 graphic 0 |
| 55 short-name ,(concat "CNS11643-" plane) | |
| 56 long-name ,(format "CNS11643-%s (Chinese traditional): ISO-IR-183" | |
| 57 plane))) | |
| 428 | 58 (modify-syntax-entry name "w") |
| 59 (modify-category-entry name ?t) | |
| 60 )) | |
| 61 (make-chinese-cns11643-charset 'chinese-cns11643-3 "3" ?I) | |
| 62 (make-chinese-cns11643-charset 'chinese-cns11643-4 "4" ?J) | |
| 63 (make-chinese-cns11643-charset 'chinese-cns11643-5 "5" ?K) | |
| 64 (make-chinese-cns11643-charset 'chinese-cns11643-6 "6" ?L) | |
| 65 (make-chinese-cns11643-charset 'chinese-cns11643-7 "7" ?M) | |
| 66 ) | |
| 67 | |
| 68 ;; ISO-IR-165 (CCITT Extended GB) | |
| 69 ;; It is based on CCITT Recommendation T.101, includes GB 2312-80 + | |
| 70 ;; GB 8565-88 table A4 + 293 characters. | |
| 778 | 71 (make-charset ;; not in FSF 21.1 |
| 428 | 72 'chinese-isoir165 |
| 73 "ISO-IR-165 (CCITT Extended GB; Chinese simplified)" | |
| 3659 | 74 `(registries ["isoir165-0"] |
| 428 | 75 dimension 2 |
| 76 chars 94 | |
| 77 final ?E | |
| 778 | 78 graphic 0 |
| 79 short-name "ISO-IR-165" | |
| 80 long-name "ISO-IR-165 (CCITT Extended GB; Chinese simplified)")) | |
| 428 | 81 |
| 82 ;; PinYin-ZhuYin | |
| 778 | 83 (make-charset 'chinese-sisheng |
| 84 "SiSheng characters for PinYin/ZhuYin" | |
| 85 '(dimension | |
| 86 1 | |
| 87 ;; XEmacs addition: second half of registry spec | |
| 3659 | 88 registries ["omron_udc_zh-0" "sisheng_cwnn-0"] |
| 428 | 89 chars 94 |
| 778 | 90 columns 1 |
| 91 direction l2r | |
| 428 | 92 final ?0 |
| 93 graphic 0 | |
| 778 | 94 short-name "SiSheng" |
| 95 long-name "SiSheng (PinYin/ZhuYin)" | |
| 428 | 96 )) |
| 97 | |
| 98 ;; If you prefer QUAIL to EGG, please modify below as you wish. | |
| 99 ;;(when (and (featurep 'egg) (featurep 'wnn)) | |
| 100 ;; (setq wnn-server-type 'cserver) | |
| 101 ;; (load "pinyin") | |
| 102 ;; (setq its:*standard-modes* | |
| 103 ;; (cons (its:get-mode-map "PinYin") its:*standard-modes*))) | |
| 104 | |
| 105 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
| 106 ;;; Chinese (general) | |
| 107 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
| 108 | |
| 109 ;; (make-coding-system | |
| 110 ;; 'iso-2022-cn 2 ?C | |
| 111 ;; "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN)" | |
| 112 ;; '(ascii | |
| 113 ;; (nil chinese-gb2312 chinese-cns11643-1) | |
| 114 ;; (nil chinese-cns11643-2) | |
| 115 ;; nil | |
| 116 ;; nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil | |
| 117 ;; init-bol) | |
| 118 ;; '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2) | |
| 119 ;; (mime-charset . iso-2022-cn))) | |
| 120 | |
| 121 ;; (define-coding-system-alias 'chinese-iso-7bit 'iso-2022-cn) | |
| 122 | |
| 123 ;; (make-coding-system | |
| 124 ;; 'iso-2022-cn-ext 2 ?C | |
| 125 ;; "ISO 2022 based 7bit encoding for Chinese GB and CNS (MIME:ISO-2022-CN-EXT)" | |
| 126 ;; '(ascii | |
| 127 ;; (nil chinese-gb2312 chinese-cns11643-1) | |
| 128 ;; (nil chinese-cns11643-2) | |
| 129 ;; (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 | |
| 130 ;; chinese-cns11643-6 chinese-cns11643-7) | |
| 131 ;; nil ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil | |
| 132 ;; init-bol) | |
| 133 ;; '((safe-charsets ascii chinese-gb2312 chinese-cns11643-1 chinese-cns11643-2 | |
| 134 ;; chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 | |
| 135 ;; chinese-cns11643-6 chinese-cns11643-7) | |
| 136 ;; (mime-charset . iso-2022-cn-ext))) | |
| 137 | |
| 138 | |
| 139 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
| 140 ;;; Chinese GB2312 (simplified) | |
| 141 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
| 142 | |
| 143 ;; (make-coding-system | |
| 144 ;; 'chinese-iso-8bit 2 ?c | |
| 145 ;; "ISO 2022 based EUC encoding for Chinese GB2312 (MIME:CN-GB-2312)" | |
| 146 ;; '(ascii chinese-gb2312 nil nil | |
| 147 ;; nil ascii-eol ascii-cntl nil nil nil nil) | |
| 148 ;; '((safe-charsets ascii chinese-gb2312) | |
| 149 ;; (mime-charset . cn-gb-2312))) | |
| 150 | |
| 151 (make-coding-system | |
| 152 'cn-gb-2312 'iso2022 | |
| 771 | 153 "Chinese EUC" |
| 428 | 154 '(charset-g0 ascii |
| 155 charset-g1 chinese-gb2312 | |
| 778 | 156 charset-g2 chinese-sisheng |
| 428 | 157 charset-g3 t |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
158 safe-charsets (ascii chinese-gb2312 chinese-sisheng) |
| 428 | 159 mnemonic "Zh-GB/EUC" |
| 771 | 160 documentation |
| 161 "Chinese EUC (Extended Unix Code), the standard Chinese encoding on Unix. | |
| 162 This follows the same overall EUC principles as Japanese EUC (see the | |
| 163 description under Japanese EUC), but specifies different character sets: | |
| 164 | |
| 165 G0: ASCII | |
| 166 G1: Chinese-GB2312 | |
| 167 G2: Sisheng (PinYin - ZhuYin)" | |
| 428 | 168 )) |
| 169 | |
| 3172 | 170 ;; For consistency with euc-jp, euc-ko |
| 171 (define-coding-system-alias 'euc-cn 'cn-gb-2312) | |
| 428 | 172 |
| 173 (define-coding-system-alias 'gb2312 'cn-gb-2312) | |
| 174 (define-coding-system-alias 'chinese-euc 'cn-gb-2312) | |
| 175 | |
| 176 ;; (make-coding-system | |
| 177 ;; 'chinese-hz 0 ?z | |
| 178 ;; "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)" | |
| 179 ;; nil | |
| 180 ;; '((safe-charsets ascii chinese-gb2312) | |
| 181 ;; (mime-charset . hz-gb-2312) | |
| 182 ;; (post-read-conversion . post-read-decode-hz) | |
| 183 ;; (pre-write-conversion . pre-write-encode-hz))) | |
| 184 ;; (put 'chinese-hz 'post-read-conversion 'post-read-decode-hz) | |
| 185 ;; (put 'chinese-hz 'pre-write-conversion 'pre-write-encode-hz) | |
| 186 | |
| 187 (make-coding-system | |
| 188 'hz-gb-2312 'no-conversion | |
| 771 | 189 "Hz/ZW (Chinese)" |
| 428 | 190 '(mnemonic "Zh-GB/Hz" |
| 191 eol-type lf | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
192 safe-charsets (ascii chinese-gb2312) |
| 428 | 193 post-read-conversion post-read-decode-hz |
| 771 | 194 pre-write-conversion pre-write-encode-hz |
| 195 documentation "Hz/ZW 7-bit encoding for Chinese GB2312 (MIME:HZ-GB-2312)" | |
| 196 )) | |
| 428 | 197 |
| 198 ;; (define-coding-system-alias 'hz-gb-2312 'chinese-hz) | |
| 199 ;; (define-coding-system-alias 'hz 'chinese-hz) | |
| 200 | |
| 201 (define-coding-system-alias 'hz 'hz-gb-2312) | |
| 202 | |
| 203 (defun post-read-decode-hz (len) | |
| 204 (let ((pos (point)) | |
| 205 (buffer-modified-p (buffer-modified-p)) | |
| 776 | 206 ;last-coding-system-used |
| 207 ) | |
| 428 | 208 (prog1 |
| 209 (decode-hz-region pos (+ pos len)) | |
| 210 (set-buffer-modified-p buffer-modified-p)))) | |
| 211 | |
| 212 (defun pre-write-encode-hz (from to) | |
| 213 (let ((buf (current-buffer))) | |
| 214 (set-buffer (generate-new-buffer " *temp*")) | |
| 215 (if (stringp from) | |
| 216 (insert from) | |
| 217 (insert-buffer-substring buf from to)) | |
| 776 | 218 ;(let (last-coding-system-used) |
| 219 (encode-hz-region 1 (point-max)) | |
| 428 | 220 nil)) |
| 771 | 221 |
| 428 | 222 (set-language-info-alist |
| 223 "Chinese-GB" '((setup-function . setup-chinese-gb-environment-internal) | |
| 778 | 224 (charset chinese-gb2312 chinese-sisheng) |
| 428 | 225 (coding-system cn-gb-2312 iso-2022-7bit hz-gb-2312) |
| 226 (coding-priority cn-gb-2312 big5 iso-2022-7bit) | |
| 771 | 227 (cygwin-locale "zh") |
| 228 (mswindows-locale ("CHINESE" . "CHINESE_SIMPLIFIED")) | |
| 229 (native-coding-system cn-gb-2312) | |
| 428 | 230 (input-method . "chinese-py-punct") |
| 231 (features china-util) | |
| 232 (sample-text . "Chinese ($AVPND(B,$AFUM(;0(B,$A::So(B) $ADc:C(B") | |
| 771 | 233 (documentation . |
| 234 "Supports Simplified Chinese, used in mainland China. | |
| 235 Uses the GB2312 character set.")) | |
| 428 | 236 '("Chinese")) |
| 237 | |
| 4145 | 238 ;; Set the locale information separately so that the lambda gets compiled. |
| 239 (set-language-info "Chinese-GB" | |
| 240 'locale | |
| 241 (list "zh_CN.eucCN" "zh_CN.EUC" "zh_CN" "chinese-s" "zh" | |
| 242 (lambda (arg) | |
| 243 (and arg (let ((case-fold-search t)) | |
| 244 (string-match "^zh_.*.GB.*" arg)))))) | |
| 245 | |
| 428 | 246 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 247 ;; Chinese BIG5 (traditional) | |
| 248 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
| 249 | |
| 250 ;; (make-coding-system | |
| 251 ;; 'chinese-big5 3 ?B "BIG5 8-bit encoding for Chinese (MIME:CN-BIG5)" | |
| 252 ;; nil | |
| 253 ;; '((safe-charsets ascii chinese-big5-1 chinese-big5-2) | |
| 254 ;; (mime-charset . cn-big5) | |
| 255 ;; (charset-origin-alist (chinese-big5-1 "BIG5" encode-big5-char) | |
| 256 ;; (chinese-big5-2 "BIG5" encode-big5-char)))) | |
| 257 | |
| 258 (make-coding-system | |
| 259 'big5 'big5 | |
| 771 | 260 "Big5" |
| 261 '(mnemonic "Zh/Big5" | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
262 safe-charsets (ascii chinese-big5-1 chinese-big5-2) |
| 771 | 263 documentation |
| 264 "A non-modal encoding formed by five large Taiwanese companies | |
| 265 \(hence \"Big5\") to produce a character set and encoding for | |
| 266 traditional Chinese writing. Big5 encodes some 13,000+ characters. | |
| 267 ASCII is encoded as normal, and Chinese characters as two bytes, but | |
| 268 Chinese characters do not exclusively use the high half. The first | |
| 269 byte is in the high half standard position A1-FE, but the second byte | |
| 270 is in either low 40-7E or high A1-FE. Thus Big5 suffers from the | |
| 271 classic \"it might look like a slash, but it's really the second byte | |
| 272 of a Chinese character\".")) | |
| 428 | 273 |
| 274 ;; (define-coding-system-alias 'big5 'chinese-big5) | |
| 275 ;; (define-coding-system-alias 'cn-big5 'chinese-big5) | |
| 276 | |
| 277 (define-coding-system-alias 'cn-big5 'big5) | |
| 278 | |
| 279 ;; Big5 font requires special encoding. | |
| 280 (define-ccl-program ccl-encode-big5-font | |
| 281 `(0 | |
| 282 ;; In: R0:chinese-big5-1 or chinese-big5-2 | |
| 283 ;; R1:position code 1 | |
| 284 ;; R2:position code 2 | |
| 285 ;; Out: R1:font code point 1 | |
| 286 ;; R2:font code point 2 | |
| 4072 | 287 ((r2 = ((((r1 - #x21) * 94) + r2) - #x21)) |
| 428 | 288 (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280)) |
| 4072 | 289 (r1 = ((r2 / 157) + #xA1)) |
| 428 | 290 (r2 %= 157) |
| 4072 | 291 (if (r2 < #x3F) (r2 += #x40) (r2 += #x62)))) |
| 428 | 292 "CCL program to encode a Big5 code to code point of Big5 font.") |
| 293 | |
| 444 | 294 (set-charset-ccl-program 'chinese-big5-1 'ccl-encode-big5-font) |
| 295 (set-charset-ccl-program 'chinese-big5-2 'ccl-encode-big5-font) | |
| 428 | 296 |
| 297 (set-language-info-alist | |
| 298 "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2) | |
| 299 (coding-system big5 iso-2022-7bit) | |
| 300 (coding-priority big5 cn-gb-2312 iso-2022-7bit) | |
| 771 | 301 (cygwin-locale "zh_TW") |
| 302 (mswindows-locale ("CHINESE" . "CHINESE_TRADITIONAL")) | |
| 303 (native-coding-system big5) | |
| 428 | 304 (input-method . "chinese-py-punct-b5") |
| 305 (features china-util) | |
| 306 (sample-text . "Cantonese ($(0GnM$(B,$(0N]0*Hd(B) $(0*/=((B, $(0+$)p(B") | |
| 771 | 307 (documentation . |
| 308 "Supports Traditional Chinese, used in Taiwan, Hong Kong, and Singapore. | |
| 309 Uses the Chinese Big5 character set." | |
| 310 )) | |
| 428 | 311 '("Chinese")) |
| 312 | |
| 4145 | 313 ;; Set the locale information separately so that the lambda gets compiled. |
| 314 (set-language-info "Chinese-BIG5" | |
| 315 'locale | |
| 316 (list "zh_TW.Big5" "zh_TW.big5" "zh_CN.big5" "zh_TW" | |
| 317 "chinese-t" | |
| 318 (lambda (arg) | |
| 319 (and arg (let ((case-fold-search t)) | |
| 320 (string-match "^zh_.*.BIG5.*" arg)))))) | |
| 321 | |
| 428 | 322 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 323 ;; Chinese CNS11643 (traditional) | |
| 324 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
| 325 | |
| 326 ;; (set-language-info-alist | |
| 327 ;; "Chinese-CNS" '((charset chinese-cns11643-1 chinese-cns11643-2 | |
| 328 ;; chinese-cns11643-3 chinese-cns11643-4 | |
| 329 ;; chinese-cns11643-5 chinese-cns11643-6 | |
| 330 ;; chinese-cns11643-7) | |
| 331 ;; (coding-system iso-2022-cn) | |
| 332 ;; (coding-priority iso-2022-cn chinese-big5 chinese-iso-8bit) | |
| 333 ;; (features china-util) | |
| 334 ;; (input-method . "chinese-cns-quick") | |
| 335 ;; (documentation . "Support for Chinese CNS character sets.")) | |
| 336 ;; '("Chinese")) | |
| 337 | |
| 338 ;;; chinese.el ends here |
