Mercurial > hg > xemacs-beta
annotate lisp/mule/japanese.el @ 5818:15b0715c204d
Avoid passing patterns to with charset property to FcNameUnparse.
Prevents crash reported by Raymond Toy.
| author | Stephen J. Turnbull <stephen@xemacs.org> |
|---|---|
| date | Sat, 18 Oct 2014 21:20:42 +0900 |
| parents | ac37a5f7e5be |
| children | bbe4146603db |
| rev | line source |
|---|---|
| 428 | 1 ;;; japanese.el --- Japanese support -*- coding: iso-2022-7bit; -*- |
| 2 | |
| 3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
| 4 ;; Licensed to the Free Software Foundation. | |
| 5 ;; Copyright (C) 1997 MORIOKA Tomohiko | |
| 771 | 6 ;; Copyright (C) 2000, 2002 Ben Wing. |
| 428 | 7 |
| 8 ;; Keywords: multilingual, Japanese | |
| 9 | |
| 10 ;; This file is part of XEmacs. | |
| 11 | |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
12 ;; XEmacs is free software: you can redistribute it and/or modify it |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
13 ;; under the terms of the GNU General Public License as published by the |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
14 ;; Free Software Foundation, either version 3 of the License, or (at your |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
15 ;; option) any later version. |
| 428 | 16 |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
17 ;; XEmacs is distributed in the hope that it will be useful, but WITHOUT |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
18 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
19 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
20 ;; for more details. |
| 428 | 21 |
| 22 ;; You should have received a copy of the GNU General Public License | |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
23 ;; along with XEmacs. If not, see <http://www.gnu.org/licenses/>. |
| 428 | 24 |
| 771 | 25 ;;; Synched up with: Emacs 20.6 (international/japanese.el). |
| 26 | |
| 428 | 27 ;;; Commentary: |
| 28 | |
| 29 ;; For Japanese, character sets JISX0201, JISX0208, JISX0212 are | |
| 30 ;; supported. | |
| 31 | |
| 32 ;;; Code: | |
| 33 | |
| 778 | 34 (make-charset 'japanese-jisx0213-1 "JISX0213 Plane 1 (Japanese)" |
| 35 '(dimension | |
| 36 2 | |
| 3659 | 37 registries ["JISX0213.2000-1"] |
| 778 | 38 chars 94 |
| 39 columns 2 | |
| 40 direction l2r | |
| 41 final ?O | |
| 42 graphic 0 | |
| 43 short-name "JISX0213-1" | |
| 44 long-name "JISX0213-1" | |
| 45 )) | |
| 46 | |
| 47 ;; JISX0213 Plane 2 | |
| 48 (make-charset 'japanese-jisx0213-2 "JISX0213 Plane 2 (Japanese)" | |
| 49 '(dimension | |
| 50 2 | |
| 3659 | 51 registries ["JISX0213.2000-2"] |
| 778 | 52 chars 94 |
| 53 columns 2 | |
| 54 direction l2r | |
| 55 final ?P | |
| 56 graphic 0 | |
| 57 short-name "JISX0213-2" | |
| 58 long-name "JISX0213-2" | |
| 59 )) | |
| 60 | |
| 428 | 61 ;;; Syntax of Japanese characters. |
| 62 (loop for row in '(33 34 40) | |
| 63 do (modify-syntax-entry `[japanese-jisx0208 ,row] "_")) | |
| 64 (loop for char in '(?$B!<(B ?$B!+(B ?$B!,(B ?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B) | |
| 65 do (modify-syntax-entry char "w")) | |
| 66 (modify-syntax-entry ?\$B!J(B "($B!K(B") | |
| 67 (modify-syntax-entry ?\$B!N(B "($B!O(B") | |
| 68 (modify-syntax-entry ?\$B!P(B "($B!Q(B") | |
| 69 (modify-syntax-entry ?\$B!V(B "($B!W(B") | |
| 70 (modify-syntax-entry ?\$B!X(B "($B!Y(B") | |
| 71 (modify-syntax-entry ?\$B!K(B ")$B!J(B") | |
| 72 (modify-syntax-entry ?\$B!O(B ")$B!N(B") | |
| 73 (modify-syntax-entry ?\$B!Q(B ")$B!P(B") | |
| 74 (modify-syntax-entry ?\$B!W(B ")$B!V(B") | |
| 75 (modify-syntax-entry ?\$B!Y(B ")$B!X(B") | |
| 76 | |
| 77 ;;; Character categories S, A, H, K, G, Y, and C | |
| 78 (define-category ?S "Japanese 2-byte symbol character.") | |
| 79 (modify-category-entry [japanese-jisx0208 33] ?S) | |
| 80 (modify-category-entry [japanese-jisx0208 34] ?S) | |
| 81 (modify-category-entry [japanese-jisx0208 40] ?S) | |
| 82 (define-category ?A "Japanese 2-byte Alphanumeric character.") | |
| 83 (modify-category-entry [japanese-jisx0208 35] ?A) | |
| 84 (define-category ?H "Japanese 2-byte Hiragana character.") | |
| 85 (modify-category-entry [japanese-jisx0208 36] ?H) | |
| 86 (define-category ?K "Japanese 2-byte Katakana character.") | |
| 87 (modify-category-entry [japanese-jisx0208 37] ?K) | |
| 88 (define-category ?G "Japanese 2-byte Greek character.") | |
| 89 (modify-category-entry [japanese-jisx0208 38] ?G) | |
| 90 (define-category ?Y "Japanese 2-byte Cyrillic character.") | |
| 91 (modify-category-entry [japanese-jisx0208 39] ?Y) | |
| 92 (define-category ?C "Japanese 2-byte Kanji characters.") | |
| 93 (loop for row from 48 to 126 | |
| 94 do (modify-category-entry `[japanese-jisx0208 ,row] ?C)) | |
| 95 (loop for char in '(?$B!<(B ?$B!+(B ?$B!,(B) | |
| 96 do (modify-category-entry char ?K) | |
| 97 (modify-category-entry char ?H)) | |
| 98 (loop for char in '(?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B) | |
| 99 do (modify-category-entry char ?C)) | |
| 100 (modify-category-entry 'japanese-jisx0212 ?C) | |
| 101 | |
| 102 (defvar japanese-word-regexp | |
| 103 "\\cA+\\cH*\\|\\cK+\\cH*\\|\\cC+\\cH*\\|\\cH+\\|\\ck+\\|\\sw+" | |
| 104 "Regular expression used to match a Japanese word.") | |
| 105 | |
| 106 (set-word-regexp japanese-word-regexp) | |
| 107 (setq forward-word-regexp "\\w\\>") | |
| 108 (setq backward-word-regexp "\\<\\w") | |
| 109 | |
| 110 ;;; Paragraph setting | |
| 111 (setq sentence-end | |
| 112 (concat | |
| 113 "\\(" | |
| 114 "\\(" | |
| 115 "[.?!][]\"')}]*" | |
| 116 "\\|" | |
| 117 "[$B!%!)!*(B][$B!O!I!G!K!Q!M!S!U!W!Y(B]*" | |
| 118 "\\)" | |
| 119 "\\($\\|\t\\| \\)" | |
| 120 "\\|" | |
| 121 "$B!#(B" | |
| 122 "\\)" | |
| 123 "[ \t\n]*")) | |
| 1891 | 124 |
| 125 ;; allow paragraphs to start with a zenkaku space | |
| 126 (setq paragraph-start "[ $B!!(B\t\n\f]") | |
| 127 (setq paragraph-separate "[ $B!!(B\t\f]*$") | |
| 428 | 128 |
| 129 ;; EGG specific setup | |
| 130 (define-egg-environment 'japanese | |
| 131 "Japanese settings for egg." | |
| 132 (lambda () | |
| 771 | 133 (with-boundp '(its:*standard-modes* its:*current-map* wnn-server-type) |
| 134 (with-fboundp 'its:get-mode-map | |
| 135 (when (not (featurep 'egg-jpn)) | |
| 136 (load "its-hira") | |
| 137 (load "its-kata") | |
| 138 (load "its-hankaku") | |
| 139 (load "its-zenkaku") | |
| 140 (setq its:*standard-modes* | |
| 141 (append | |
| 142 (list (its:get-mode-map "roma-kana") | |
| 143 (its:get-mode-map "roma-kata") | |
| 144 (its:get-mode-map "downcase") | |
| 145 (its:get-mode-map "upcase") | |
| 146 (its:get-mode-map "zenkaku-downcase") | |
| 147 (its:get-mode-map "zenkaku-upcase")) | |
| 148 its:*standard-modes*)) | |
| 149 (provide 'egg-jpn)) | |
| 150 (setq wnn-server-type 'jserver) | |
| 151 ;; Can't do this here any more. Must do it when selecting egg-wnn | |
| 152 ;; or egg-sj3 | |
| 153 ;; (setq egg-default-startup-file "eggrc-wnn") | |
| 154 (setq-default its:*current-map* (its:get-mode-map "roma-kana")))))) | |
| 428 | 155 |
| 450 | 156 ;; stuff for providing grammatic processing of Japanese text |
| 428 | 157 ;; something like this should probably be created for all environments... |
| 450 | 158 ;; #### Arrgh. This stuff should defvar'd in either fill.el or kinsoku.el. |
| 159 ;; Then the language environment should set these things, probably buffer- | |
| 160 ;; locally. | |
| 428 | 161 |
| 771 | 162 ;; #### will be moved to fill.el |
| 163 (defvar space-insertable | |
| 164 (let* ((aletter (concat "\\(" ascii-char "\\|" kanji-char "\\)")) | |
| 165 (kanji-space-insertable | |
| 166 (concat | |
| 428 | 167 "$B!"(B" aletter "\\|" |
| 168 "$B!#(B" aletter "\\|" | |
| 169 aletter "$B!J(B" "\\|" | |
| 170 "$B!K(B" aletter "\\|" | |
| 171 ascii-alphanumeric kanji-kanji-char "\\|" | |
| 771 | 172 kanji-kanji-char ascii-alphanumeric))) |
| 173 (concat " " aletter "\\|" kanji-space-insertable)) | |
| 174 "Regexp for finding points that can have spaces inserted into them for justification") | |
| 428 | 175 |
| 771 | 176 ;; Beginning of FSF synching with international/japanese.el. |
| 177 | |
| 428 | 178 ;; (make-coding-system |
| 179 ;; 'iso-2022-jp 2 ?J | |
| 180 ;; "ISO 2022 based 7bit encoding for Japanese (MIME:ISO-2022-JP)" | |
| 181 ;; '((ascii japanese-jisx0208-1978 japanese-jisx0208 | |
| 182 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201) nil nil nil | |
| 183 ;; short ascii-eol ascii-cntl seven) | |
| 184 ;; '((safe-charsets ascii japanese-jisx0208-1978 japanese-jisx0208 | |
| 185 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201) | |
| 186 ;; (mime-charset . iso-2022-jp))) | |
| 187 | |
| 188 (make-coding-system | |
| 189 'iso-2022-jp 'iso2022 | |
| 771 | 190 "ISO-2022-JP (Japanese mail)" |
| 428 | 191 '(charset-g0 ascii |
| 192 short t | |
| 193 seven t | |
| 194 input-charset-conversion ((latin-jisx0201 ascii) | |
| 195 (japanese-jisx0208-1978 japanese-jisx0208)) | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
196 safe-charsets (ascii japanese-jisx0208-1978 japanese-jisx0208 |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
197 latin-jisx0201 japanese-jisx0212 katakana-jisx0201) |
| 428 | 198 mnemonic "MULE/7bit" |
| 771 | 199 documentation |
| 200 "Coding system used for communication with mail and news in Japan." | |
| 201 )) | |
| 202 | |
| 203 (make-coding-system | |
| 204 'jis7 'iso2022 | |
| 205 "JIS7 (old Japanese 7-bit encoding)" | |
| 206 '(charset-g0 ascii | |
| 207 charset-g1 katakana-jisx0201 | |
| 208 short t | |
| 209 seven t | |
| 210 lock-shift t | |
| 211 input-charset-conversion ((latin-jisx0201 ascii) | |
| 212 (japanese-jisx0208-1978 japanese-jisx0208)) | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
213 safe-charsets (latin-jisx0201 ascii japanese-jisx0208-1978 japanese-jisx0208) |
| 771 | 214 mnemonic "JIS7" |
| 215 documentation | |
| 216 "Old JIS 7-bit encoding; mostly superseded by ISO-2022-JP. | |
| 217 Uses locking-shift (SI/SO) to select half-width katakana." | |
| 218 )) | |
| 219 | |
| 220 (make-coding-system | |
| 221 'jis8 'iso2022 | |
| 222 "JIS8 (old Japanese 8-bit encoding)" | |
| 223 '(charset-g0 ascii | |
| 224 charset-g1 katakana-jisx0201 | |
| 225 short t | |
| 226 input-charset-conversion ((latin-jisx0201 ascii) | |
| 227 (japanese-jisx0208-1978 japanese-jisx0208)) | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
228 safe-charsets (latin-jisx0201 ascii japanese-jisx0208-1978 |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
229 japanese-jisx0208) |
| 771 | 230 mnemonic "JIS8" |
| 231 documentation | |
| 232 "Old JIS 8-bit encoding; mostly superseded by ISO-2022-JP. | |
| 233 Uses high bytes for half-width katakana." | |
| 428 | 234 )) |
| 235 | |
| 236 (define-coding-system-alias 'junet 'iso-2022-jp) | |
| 237 | |
| 238 ;; (make-coding-system | |
| 239 ;; 'iso-2022-jp-2 2 ?J | |
| 240 ;; "ISO 2022 based 7bit encoding for CJK, Latin-1, and Greek (MIME:ISO-2022-JP-2)" | |
| 241 ;; '((ascii japanese-jisx0208-1978 japanese-jisx0208 | |
| 242 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201 | |
| 243 ;; chinese-gb2312 korean-ksc5601) nil | |
| 244 ;; (nil latin-iso8859-1 greek-iso8859-7) nil | |
| 245 ;; short ascii-eol ascii-cntl seven nil single-shift) | |
| 246 ;; '((safe-charsets ascii japanese-jisx0208-1978 japanese-jisx0208 | |
| 247 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201 | |
| 248 ;; chinese-gb2312 korean-ksc5601 | |
| 249 ;; latin-iso8859-1 greek-iso8859-7) | |
| 250 ;; (mime-charset . iso-2022-jp-2))) | |
| 251 | |
| 252 ;; (make-coding-system | |
| 253 ;; 'japanese-shift-jis 1 ?S | |
| 254 ;; "Shift-JIS 8-bit encoding for Japanese (MIME:SHIFT_JIS)" | |
| 255 ;; nil | |
| 256 ;; '((safe-charsets ascii japanese-jisx0208 japanese-jisx0208-1978 | |
| 257 ;; latin-jisx0201 katakana-jisx0201) | |
| 771 | 258 ;; (mime-charset . shift-jis) |
| 428 | 259 ;; (charset-origin-alist (japanese-jisx0208 "SJIS" encode-sjis-char) |
| 260 ;; (katakana-jisx0201 "SJIS" encode-sjis-char)))) | |
| 261 | |
| 262 (make-coding-system | |
| 771 | 263 'shift-jis 'shift-jis |
| 264 "Shift-JIS" | |
| 265 '(mnemonic "Ja/SJIS" | |
| 266 documentation "The standard Japanese encoding in MS Windows." | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
267 safe-charsets (ascii japanese-jisx0208 japanese-jisx0208-1978 |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
268 latin-jisx0201 katakana-jisx0201) |
| 771 | 269 )) |
| 428 | 270 |
| 771 | 271 ;; A former name? |
| 272 (define-coding-system-alias 'shift_jis 'shift-jis) | |
| 273 | |
| 274 ;; FSF: | |
| 275 ;; (define-coding-system-alias 'shift-jis 'japanese-shift-jis) | |
| 428 | 276 ;; (define-coding-system-alias 'sjis 'japanese-shift-jis) |
| 277 | |
| 278 ;; (make-coding-system | |
| 279 ;; 'japanese-iso-7bit-1978-irv 2 ?j | |
| 280 ;; "ISO 2022 based 7-bit encoding for Japanese JISX0208-1978 and JISX0201-Roman" | |
| 281 ;; '((ascii japanese-jisx0208-1978 japanese-jisx0208 | |
| 282 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201 t) nil nil nil | |
| 283 ;; short ascii-eol ascii-cntl seven nil nil use-roman use-oldjis) | |
| 284 ;; '(ascii japanese-jisx0208-1978 japanese-jisx0208 latin-jisx0201)) | |
| 285 | |
| 286 (make-coding-system | |
| 287 'iso-2022-jp-1978-irv 'iso2022 | |
| 771 | 288 "ISO-2022-JP-1978-IRV (Old JIS)" |
| 428 | 289 '(charset-g0 ascii |
| 290 short t | |
| 291 seven t | |
| 292 output-charset-conversion ((ascii latin-jisx0201) | |
| 293 (japanese-jisx0208 japanese-jisx0208-1978)) | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
294 safe-charsets (ascii latin-jisx0201 japanese-jisx0208 |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
295 japanese-jisx0208-1978) |
| 771 | 296 documentation |
| 297 "This is a coding system used for old JIS terminals. It's an ISO | |
| 298 2022 based 7-bit encoding for Japanese JISX0208-1978 and JISX0201-Roman." | |
| 428 | 299 mnemonic "Ja-78/7bit" |
| 300 )) | |
| 301 | |
| 771 | 302 ;; FSF: |
| 428 | 303 ;; (define-coding-system-alias 'iso-2022-jp-1978-irv 'japanese-iso-7bit-1978-irv) |
| 304 ;; (define-coding-system-alias 'old-jis 'japanese-iso-7bit-1978-irv) | |
| 305 | |
| 306 (define-coding-system-alias 'old-jis 'iso-2022-jp-1978-irv) | |
| 307 | |
| 308 ;; (make-coding-system | |
| 309 ;; 'japanese-iso-8bit 2 ?E | |
| 310 ;; "ISO 2022 based EUC encoding for Japanese (MIME:EUC-JP)" | |
| 311 ;; '(ascii japanese-jisx0208 katakana-jisx0201 japanese-jisx0212 | |
| 312 ;; short ascii-eol ascii-cntl nil nil single-shift) | |
| 313 ;; '((safe-charsets ascii latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978 | |
| 771 | 314 ;; katakana-jisx0201 japanese-jisx0212) |
| 428 | 315 ;; (mime-charset . euc-jp))) |
| 771 | 316 ;; |
| 428 | 317 (make-coding-system |
| 318 'euc-jp 'iso2022 | |
| 771 | 319 "Japanese EUC" |
| 428 | 320 '(charset-g0 ascii |
| 321 charset-g1 japanese-jisx0208 | |
| 322 charset-g2 katakana-jisx0201 | |
| 323 charset-g3 japanese-jisx0212 | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
324 safe-charsets (ascii japanese-jisx0208 katakana-jisx0201 japanese-jisx0212) |
| 428 | 325 short t |
| 326 mnemonic "Ja/EUC" | |
| 771 | 327 documentation |
| 328 "Japanese EUC (Extended Unix Code), the standard Japanese encoding in Unix. | |
| 329 Equivalent MIME encoding: EUC-JP. | |
| 330 | |
| 331 Japanese EUC was the forefather of all the different EUC's, which all follow | |
| 332 a similar structure: | |
| 333 | |
| 334 1. Up to four character sets can be encoded. | |
| 335 | |
| 336 2. This is a non-modal encoding, i.e. it is impossible to set a global state | |
| 337 that affects anything more than the directly following character. [Modal | |
| 338 encodings typically have escape sequences to change global settings, which | |
| 339 affect all the following characters until the setting is turned off. | |
| 340 Modal encodings are typically used when it's necessary to support text in | |
| 341 a wide variety of character sets and still keep basic ASCII compatibility, | |
| 342 or in cases (e.g. sending email) where the allowed characters that can | |
| 343 pass the gateway are small and (typically) no high-bit range is available. | |
| 344 | |
| 345 3. The first character set is always ASCII or some national variant of it, | |
| 346 and encoded in the standard ASCII position. All characters in all other | |
| 347 character sets are encoded entirely using high-half bytes. Therefore, | |
| 348 it is safe to scan for ASCII characters, such as '/' to separate path | |
| 349 components, in the obvious way. | |
| 350 | |
| 351 4. Each of the other three character sets can be of dimension 1, 2, or 3. | |
| 352 A dimension-1 character set contains 96 bytes; a dimension-2 character | |
| 353 set contains 96 x 96 bytes; and a dimension-3 character set contains | |
| 354 96 x 96 x 96 bytes. 94 instead of 96 as the number of characters per | |
| 355 dimension is also supported. Character sets of dimensions 1, 2, and 3 | |
| 356 use 1-3 bytes, respectively, to encode a character, and each byte is | |
| 357 in the range A0-FF (or A1-FE for those with 94 bytes per dimension). | |
| 358 | |
| 359 5. The four character sets encoded in EUC are called G0, G1, G2, and G3. | |
| 360 As mentioned earlier, G0 is ASCII or some variant, and encoded into | |
| 361 the ASCII positions 00 - 7F. G1 is encoded directly by laying out | |
| 362 its bytes. G2 is encoded using an 8E byte followed by the character's | |
| 363 bytes. G3 is encoded using an 8F byte followed by the character's bytes." | |
| 364 | |
| 428 | 365 )) |
| 366 | |
| 771 | 367 ;; FSF: |
| 428 | 368 ;; (define-coding-system-alias 'euc-japan-1990 'japanese-iso-8bit) |
| 369 ;; (define-coding-system-alias 'euc-japan 'japanese-iso-8bit) | |
| 370 ;; (define-coding-system-alias 'euc-jp 'japanese-iso-8bit) | |
| 371 | |
| 372 (define-coding-system-alias 'euc-japan 'euc-jp) ; only for w3 | |
| 373 (define-coding-system-alias 'japanese-euc 'euc-jp) | |
| 374 | |
| 375 (set-language-info-alist | |
| 376 "Japanese" '((setup-function . setup-japanese-environment-internal) | |
| 377 (exit-function . exit-japanese-environment) | |
| 378 (tutorial . "TUTORIAL.ja") | |
| 379 (charset japanese-jisx0208 japanese-jisx0208-1978 | |
| 380 japanese-jisx0212 latin-jisx0201 katakana-jisx0201) | |
| 381 (coding-system iso-2022-jp euc-jp | |
| 771 | 382 shift-jis iso-2022-jp-2) |
| 428 | 383 (coding-priority iso-2022-jp euc-jp |
| 771 | 384 shift-jis iso-2022-jp-2) |
| 385 ;; These locale names come from the X11R6 locale.alias file. | |
| 386 ;; What an incredible fucking mess!!!!!!!!!!!!!!!!!!!!!!!!!! | |
| 387 ;; What's worse is that typical Unix implementations of | |
| 388 ;; setlocale() return back exactly what you passed them, even | |
| 389 ;; though it's perfectly allowed (and in fact done under | |
| 390 ;; Windows) to expand the locale to its full form (including | |
| 391 ;; encoding), so you have some hint as to the encoding!!! | |
| 392 ;; | |
| 393 ;; We order them in such a way that we're maximally likely | |
| 394 ;; to get an encoding name. | |
| 395 ;; | |
| 396 (locale | |
| 397 ;; SunOS 5.7: ja ja_JP.PCK ja_JP.UTF-8 japanese | |
| 398 ;; RedHat Linux 6.2J: ja ja_JP ja_JP.eucJP ja_JP.ujis \ | |
| 399 ;; japanese japanese.euc | |
| 400 ;; HP-UX 10.20: ja_JP.SJIS ja_JP.eucJPput ja_JP.kana8 | |
| 401 ;; Cygwin b20.1: ja_JP.EUC | |
| 402 ;; FreeBSD 2.2.8: ja_JP.EUC ja_JP.SJIS | |
| 403 | |
| 404 ;; EUC locales | |
| 405 "ja_JP.EUC" | |
| 406 "ja_JP.eucJP" | |
| 407 "ja_JP.AJEC" | |
| 408 "ja_JP.ujis" | |
| 409 "Japanese-EUC" | |
| 410 "japanese.euc" | |
| 411 | |
| 412 ;; Shift-JIS locales | |
| 413 "ja_JP.SJIS" | |
| 414 "ja_JP.mscode" | |
| 415 "ja.SJIS" | |
| 416 | |
| 417 ;; 7-bit locales | |
| 418 "ja_JP.ISO-2022-JP" | |
| 419 "ja_JP.jis7" | |
| 420 "ja_JP.pjis" | |
| 421 "ja_JP.JIS" | |
| 422 "ja.JIS" | |
| 423 | |
| 424 ;; 8-bit locales | |
| 425 "ja_JP.jis8" | |
| 426 | |
| 427 ;; encoding-unspecified locales | |
| 428 "ja_JP" | |
| 429 "Ja_JP" | |
| 430 "Jp_JP" | |
| 431 "japanese" | |
| 432 "japan" | |
| 433 "ja" | |
| 434 ) | |
| 428 | 435 ;; (input-method . "japanese") |
| 436 (features japan-util) | |
| 450 | 437 (sample-text . "Japanese ($BF|K\8l(B) $B$3$s$K$A$O(B, (I:]FAJ(B") |
| 428 | 438 (documentation . t))) |
| 439 | |
| 4145 | 440 ;; Set the native-coding-system separately so the lambdas get compiled. (Not |
| 441 ;; a huge speed improvement, but this code is called at startup, and every | |
| 442 ;; little helps there.) | |
| 443 (set-language-info "Japanese" | |
| 444 'native-coding-system | |
|
5363
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
445 ;; first, see if an explicit encoding was given. |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
446 (lambda (locale) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
447 (let ((case-fold-search t)) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
448 (cond |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
449 ;; many unix versions |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
450 ((string-match "\\.euc" locale) 'euc-jp) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
451 ((string-match "\\.sjis" locale) 'shift-jis) |
| 4145 | 452 |
|
5363
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
453 ;; X11R6 (CJKV p. 471) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
454 ((string-match "\\.jis7" locale) 'jis7) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
455 ((string-match "\\.jis8" locale) 'jis8) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
456 ((string-match "\\.mscode" locale) 'shift-jis) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
457 ((string-match "\\.pjis" locale) 'iso-2022-jp) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
458 ((string-match "\\.ujis" locale) 'euc-jp) |
| 4145 | 459 |
|
5363
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
460 ;; other names in X11R6 locale.alias |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
461 ((string-match "\\.ajec" locale) 'euc-jp) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
462 ((string-match "-euc" locale) 'euc-jp) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
463 ((string-match "\\.iso-2022-jp" locale) 'iso-2022-jp) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
464 ((string-match "\\.jis" locale) 'jis7) ;; or just jis? |
| 4145 | 465 |
|
5363
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
466 ;; aix (CJKV p. 465) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
467 ((and (eq system-type 'aix) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
468 (string-match "^Ja_JP" locale)) 'shift-jis) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
469 ((and (eq system-type 'aix) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
470 (string-match "^ja_JP" locale)) 'euc-jp) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
471 |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
472 ;; other X11R6 locale.alias |
| 4145 | 473 ((string-match "^Jp_JP" locale) 'euc-jp) |
| 474 ((and (eq system-type 'hpux) (eq locale "japanese")) | |
|
5363
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
475 'shift-jis) |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
476 ;; fallback |
|
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
477 (t 'euc-jp))))) |
| 4145 | 478 |
| 428 | 479 ;;; japanese.el ends here |
