Mercurial > hg > xemacs-beta
annotate lisp/mule/japanese.el @ 5544:c2301b2c88c8
Improve documentation of syntax table internals.
author | Stephen J. Turnbull <stephen@xemacs.org> |
---|---|
date | Mon, 08 Aug 2011 13:57:20 +0900 |
parents | ac37a5f7e5be |
children | bbe4146603db |
rev | line source |
---|---|
428 | 1 ;;; japanese.el --- Japanese support -*- coding: iso-2022-7bit; -*- |
2 | |
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
4 ;; Licensed to the Free Software Foundation. | |
5 ;; Copyright (C) 1997 MORIOKA Tomohiko | |
771 | 6 ;; Copyright (C) 2000, 2002 Ben Wing. |
428 | 7 |
8 ;; Keywords: multilingual, Japanese | |
9 | |
10 ;; This file is part of XEmacs. | |
11 | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
12 ;; XEmacs is free software: you can redistribute it and/or modify it |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
13 ;; under the terms of the GNU General Public License as published by the |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
14 ;; Free Software Foundation, either version 3 of the License, or (at your |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
15 ;; option) any later version. |
428 | 16 |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
17 ;; XEmacs is distributed in the hope that it will be useful, but WITHOUT |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
18 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
19 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
20 ;; for more details. |
428 | 21 |
22 ;; You should have received a copy of the GNU General Public License | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4568
diff
changeset
|
23 ;; along with XEmacs. If not, see <http://www.gnu.org/licenses/>. |
428 | 24 |
771 | 25 ;;; Synched up with: Emacs 20.6 (international/japanese.el). |
26 | |
428 | 27 ;;; Commentary: |
28 | |
29 ;; For Japanese, character sets JISX0201, JISX0208, JISX0212 are | |
30 ;; supported. | |
31 | |
32 ;;; Code: | |
33 | |
778 | 34 (make-charset 'japanese-jisx0213-1 "JISX0213 Plane 1 (Japanese)" |
35 '(dimension | |
36 2 | |
3659 | 37 registries ["JISX0213.2000-1"] |
778 | 38 chars 94 |
39 columns 2 | |
40 direction l2r | |
41 final ?O | |
42 graphic 0 | |
43 short-name "JISX0213-1" | |
44 long-name "JISX0213-1" | |
45 )) | |
46 | |
47 ;; JISX0213 Plane 2 | |
48 (make-charset 'japanese-jisx0213-2 "JISX0213 Plane 2 (Japanese)" | |
49 '(dimension | |
50 2 | |
3659 | 51 registries ["JISX0213.2000-2"] |
778 | 52 chars 94 |
53 columns 2 | |
54 direction l2r | |
55 final ?P | |
56 graphic 0 | |
57 short-name "JISX0213-2" | |
58 long-name "JISX0213-2" | |
59 )) | |
60 | |
428 | 61 ;;; Syntax of Japanese characters. |
62 (loop for row in '(33 34 40) | |
63 do (modify-syntax-entry `[japanese-jisx0208 ,row] "_")) | |
64 (loop for char in '(?$B!<(B ?$B!+(B ?$B!,(B ?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B) | |
65 do (modify-syntax-entry char "w")) | |
66 (modify-syntax-entry ?\$B!J(B "($B!K(B") | |
67 (modify-syntax-entry ?\$B!N(B "($B!O(B") | |
68 (modify-syntax-entry ?\$B!P(B "($B!Q(B") | |
69 (modify-syntax-entry ?\$B!V(B "($B!W(B") | |
70 (modify-syntax-entry ?\$B!X(B "($B!Y(B") | |
71 (modify-syntax-entry ?\$B!K(B ")$B!J(B") | |
72 (modify-syntax-entry ?\$B!O(B ")$B!N(B") | |
73 (modify-syntax-entry ?\$B!Q(B ")$B!P(B") | |
74 (modify-syntax-entry ?\$B!W(B ")$B!V(B") | |
75 (modify-syntax-entry ?\$B!Y(B ")$B!X(B") | |
76 | |
77 ;;; Character categories S, A, H, K, G, Y, and C | |
78 (define-category ?S "Japanese 2-byte symbol character.") | |
79 (modify-category-entry [japanese-jisx0208 33] ?S) | |
80 (modify-category-entry [japanese-jisx0208 34] ?S) | |
81 (modify-category-entry [japanese-jisx0208 40] ?S) | |
82 (define-category ?A "Japanese 2-byte Alphanumeric character.") | |
83 (modify-category-entry [japanese-jisx0208 35] ?A) | |
84 (define-category ?H "Japanese 2-byte Hiragana character.") | |
85 (modify-category-entry [japanese-jisx0208 36] ?H) | |
86 (define-category ?K "Japanese 2-byte Katakana character.") | |
87 (modify-category-entry [japanese-jisx0208 37] ?K) | |
88 (define-category ?G "Japanese 2-byte Greek character.") | |
89 (modify-category-entry [japanese-jisx0208 38] ?G) | |
90 (define-category ?Y "Japanese 2-byte Cyrillic character.") | |
91 (modify-category-entry [japanese-jisx0208 39] ?Y) | |
92 (define-category ?C "Japanese 2-byte Kanji characters.") | |
93 (loop for row from 48 to 126 | |
94 do (modify-category-entry `[japanese-jisx0208 ,row] ?C)) | |
95 (loop for char in '(?$B!<(B ?$B!+(B ?$B!,(B) | |
96 do (modify-category-entry char ?K) | |
97 (modify-category-entry char ?H)) | |
98 (loop for char in '(?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B) | |
99 do (modify-category-entry char ?C)) | |
100 (modify-category-entry 'japanese-jisx0212 ?C) | |
101 | |
102 (defvar japanese-word-regexp | |
103 "\\cA+\\cH*\\|\\cK+\\cH*\\|\\cC+\\cH*\\|\\cH+\\|\\ck+\\|\\sw+" | |
104 "Regular expression used to match a Japanese word.") | |
105 | |
106 (set-word-regexp japanese-word-regexp) | |
107 (setq forward-word-regexp "\\w\\>") | |
108 (setq backward-word-regexp "\\<\\w") | |
109 | |
110 ;;; Paragraph setting | |
111 (setq sentence-end | |
112 (concat | |
113 "\\(" | |
114 "\\(" | |
115 "[.?!][]\"')}]*" | |
116 "\\|" | |
117 "[$B!%!)!*(B][$B!O!I!G!K!Q!M!S!U!W!Y(B]*" | |
118 "\\)" | |
119 "\\($\\|\t\\| \\)" | |
120 "\\|" | |
121 "$B!#(B" | |
122 "\\)" | |
123 "[ \t\n]*")) | |
1891 | 124 |
125 ;; allow paragraphs to start with a zenkaku space | |
126 (setq paragraph-start "[ $B!!(B\t\n\f]") | |
127 (setq paragraph-separate "[ $B!!(B\t\f]*$") | |
428 | 128 |
129 ;; EGG specific setup | |
130 (define-egg-environment 'japanese | |
131 "Japanese settings for egg." | |
132 (lambda () | |
771 | 133 (with-boundp '(its:*standard-modes* its:*current-map* wnn-server-type) |
134 (with-fboundp 'its:get-mode-map | |
135 (when (not (featurep 'egg-jpn)) | |
136 (load "its-hira") | |
137 (load "its-kata") | |
138 (load "its-hankaku") | |
139 (load "its-zenkaku") | |
140 (setq its:*standard-modes* | |
141 (append | |
142 (list (its:get-mode-map "roma-kana") | |
143 (its:get-mode-map "roma-kata") | |
144 (its:get-mode-map "downcase") | |
145 (its:get-mode-map "upcase") | |
146 (its:get-mode-map "zenkaku-downcase") | |
147 (its:get-mode-map "zenkaku-upcase")) | |
148 its:*standard-modes*)) | |
149 (provide 'egg-jpn)) | |
150 (setq wnn-server-type 'jserver) | |
151 ;; Can't do this here any more. Must do it when selecting egg-wnn | |
152 ;; or egg-sj3 | |
153 ;; (setq egg-default-startup-file "eggrc-wnn") | |
154 (setq-default its:*current-map* (its:get-mode-map "roma-kana")))))) | |
428 | 155 |
450 | 156 ;; stuff for providing grammatic processing of Japanese text |
428 | 157 ;; something like this should probably be created for all environments... |
450 | 158 ;; #### Arrgh. This stuff should defvar'd in either fill.el or kinsoku.el. |
159 ;; Then the language environment should set these things, probably buffer- | |
160 ;; locally. | |
428 | 161 |
771 | 162 ;; #### will be moved to fill.el |
163 (defvar space-insertable | |
164 (let* ((aletter (concat "\\(" ascii-char "\\|" kanji-char "\\)")) | |
165 (kanji-space-insertable | |
166 (concat | |
428 | 167 "$B!"(B" aletter "\\|" |
168 "$B!#(B" aletter "\\|" | |
169 aletter "$B!J(B" "\\|" | |
170 "$B!K(B" aletter "\\|" | |
171 ascii-alphanumeric kanji-kanji-char "\\|" | |
771 | 172 kanji-kanji-char ascii-alphanumeric))) |
173 (concat " " aletter "\\|" kanji-space-insertable)) | |
174 "Regexp for finding points that can have spaces inserted into them for justification") | |
428 | 175 |
771 | 176 ;; Beginning of FSF synching with international/japanese.el. |
177 | |
428 | 178 ;; (make-coding-system |
179 ;; 'iso-2022-jp 2 ?J | |
180 ;; "ISO 2022 based 7bit encoding for Japanese (MIME:ISO-2022-JP)" | |
181 ;; '((ascii japanese-jisx0208-1978 japanese-jisx0208 | |
182 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201) nil nil nil | |
183 ;; short ascii-eol ascii-cntl seven) | |
184 ;; '((safe-charsets ascii japanese-jisx0208-1978 japanese-jisx0208 | |
185 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201) | |
186 ;; (mime-charset . iso-2022-jp))) | |
187 | |
188 (make-coding-system | |
189 'iso-2022-jp 'iso2022 | |
771 | 190 "ISO-2022-JP (Japanese mail)" |
428 | 191 '(charset-g0 ascii |
192 short t | |
193 seven t | |
194 input-charset-conversion ((latin-jisx0201 ascii) | |
195 (japanese-jisx0208-1978 japanese-jisx0208)) | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
196 safe-charsets (ascii japanese-jisx0208-1978 japanese-jisx0208 |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
197 latin-jisx0201 japanese-jisx0212 katakana-jisx0201) |
428 | 198 mnemonic "MULE/7bit" |
771 | 199 documentation |
200 "Coding system used for communication with mail and news in Japan." | |
201 )) | |
202 | |
203 (make-coding-system | |
204 'jis7 'iso2022 | |
205 "JIS7 (old Japanese 7-bit encoding)" | |
206 '(charset-g0 ascii | |
207 charset-g1 katakana-jisx0201 | |
208 short t | |
209 seven t | |
210 lock-shift t | |
211 input-charset-conversion ((latin-jisx0201 ascii) | |
212 (japanese-jisx0208-1978 japanese-jisx0208)) | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
213 safe-charsets (latin-jisx0201 ascii japanese-jisx0208-1978 japanese-jisx0208) |
771 | 214 mnemonic "JIS7" |
215 documentation | |
216 "Old JIS 7-bit encoding; mostly superseded by ISO-2022-JP. | |
217 Uses locking-shift (SI/SO) to select half-width katakana." | |
218 )) | |
219 | |
220 (make-coding-system | |
221 'jis8 'iso2022 | |
222 "JIS8 (old Japanese 8-bit encoding)" | |
223 '(charset-g0 ascii | |
224 charset-g1 katakana-jisx0201 | |
225 short t | |
226 input-charset-conversion ((latin-jisx0201 ascii) | |
227 (japanese-jisx0208-1978 japanese-jisx0208)) | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
228 safe-charsets (latin-jisx0201 ascii japanese-jisx0208-1978 |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
229 japanese-jisx0208) |
771 | 230 mnemonic "JIS8" |
231 documentation | |
232 "Old JIS 8-bit encoding; mostly superseded by ISO-2022-JP. | |
233 Uses high bytes for half-width katakana." | |
428 | 234 )) |
235 | |
236 (define-coding-system-alias 'junet 'iso-2022-jp) | |
237 | |
238 ;; (make-coding-system | |
239 ;; 'iso-2022-jp-2 2 ?J | |
240 ;; "ISO 2022 based 7bit encoding for CJK, Latin-1, and Greek (MIME:ISO-2022-JP-2)" | |
241 ;; '((ascii japanese-jisx0208-1978 japanese-jisx0208 | |
242 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201 | |
243 ;; chinese-gb2312 korean-ksc5601) nil | |
244 ;; (nil latin-iso8859-1 greek-iso8859-7) nil | |
245 ;; short ascii-eol ascii-cntl seven nil single-shift) | |
246 ;; '((safe-charsets ascii japanese-jisx0208-1978 japanese-jisx0208 | |
247 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201 | |
248 ;; chinese-gb2312 korean-ksc5601 | |
249 ;; latin-iso8859-1 greek-iso8859-7) | |
250 ;; (mime-charset . iso-2022-jp-2))) | |
251 | |
252 ;; (make-coding-system | |
253 ;; 'japanese-shift-jis 1 ?S | |
254 ;; "Shift-JIS 8-bit encoding for Japanese (MIME:SHIFT_JIS)" | |
255 ;; nil | |
256 ;; '((safe-charsets ascii japanese-jisx0208 japanese-jisx0208-1978 | |
257 ;; latin-jisx0201 katakana-jisx0201) | |
771 | 258 ;; (mime-charset . shift-jis) |
428 | 259 ;; (charset-origin-alist (japanese-jisx0208 "SJIS" encode-sjis-char) |
260 ;; (katakana-jisx0201 "SJIS" encode-sjis-char)))) | |
261 | |
262 (make-coding-system | |
771 | 263 'shift-jis 'shift-jis |
264 "Shift-JIS" | |
265 '(mnemonic "Ja/SJIS" | |
266 documentation "The standard Japanese encoding in MS Windows." | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
267 safe-charsets (ascii japanese-jisx0208 japanese-jisx0208-1978 |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
268 latin-jisx0201 katakana-jisx0201) |
771 | 269 )) |
428 | 270 |
771 | 271 ;; A former name? |
272 (define-coding-system-alias 'shift_jis 'shift-jis) | |
273 | |
274 ;; FSF: | |
275 ;; (define-coding-system-alias 'shift-jis 'japanese-shift-jis) | |
428 | 276 ;; (define-coding-system-alias 'sjis 'japanese-shift-jis) |
277 | |
278 ;; (make-coding-system | |
279 ;; 'japanese-iso-7bit-1978-irv 2 ?j | |
280 ;; "ISO 2022 based 7-bit encoding for Japanese JISX0208-1978 and JISX0201-Roman" | |
281 ;; '((ascii japanese-jisx0208-1978 japanese-jisx0208 | |
282 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201 t) nil nil nil | |
283 ;; short ascii-eol ascii-cntl seven nil nil use-roman use-oldjis) | |
284 ;; '(ascii japanese-jisx0208-1978 japanese-jisx0208 latin-jisx0201)) | |
285 | |
286 (make-coding-system | |
287 'iso-2022-jp-1978-irv 'iso2022 | |
771 | 288 "ISO-2022-JP-1978-IRV (Old JIS)" |
428 | 289 '(charset-g0 ascii |
290 short t | |
291 seven t | |
292 output-charset-conversion ((ascii latin-jisx0201) | |
293 (japanese-jisx0208 japanese-jisx0208-1978)) | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
294 safe-charsets (ascii latin-jisx0201 japanese-jisx0208 |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
295 japanese-jisx0208-1978) |
771 | 296 documentation |
297 "This is a coding system used for old JIS terminals. It's an ISO | |
298 2022 based 7-bit encoding for Japanese JISX0208-1978 and JISX0201-Roman." | |
428 | 299 mnemonic "Ja-78/7bit" |
300 )) | |
301 | |
771 | 302 ;; FSF: |
428 | 303 ;; (define-coding-system-alias 'iso-2022-jp-1978-irv 'japanese-iso-7bit-1978-irv) |
304 ;; (define-coding-system-alias 'old-jis 'japanese-iso-7bit-1978-irv) | |
305 | |
306 (define-coding-system-alias 'old-jis 'iso-2022-jp-1978-irv) | |
307 | |
308 ;; (make-coding-system | |
309 ;; 'japanese-iso-8bit 2 ?E | |
310 ;; "ISO 2022 based EUC encoding for Japanese (MIME:EUC-JP)" | |
311 ;; '(ascii japanese-jisx0208 katakana-jisx0201 japanese-jisx0212 | |
312 ;; short ascii-eol ascii-cntl nil nil single-shift) | |
313 ;; '((safe-charsets ascii latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978 | |
771 | 314 ;; katakana-jisx0201 japanese-jisx0212) |
428 | 315 ;; (mime-charset . euc-jp))) |
771 | 316 ;; |
428 | 317 (make-coding-system |
318 'euc-jp 'iso2022 | |
771 | 319 "Japanese EUC" |
428 | 320 '(charset-g0 ascii |
321 charset-g1 japanese-jisx0208 | |
322 charset-g2 katakana-jisx0201 | |
323 charset-g3 japanese-jisx0212 | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4145
diff
changeset
|
324 safe-charsets (ascii japanese-jisx0208 katakana-jisx0201 japanese-jisx0212) |
428 | 325 short t |
326 mnemonic "Ja/EUC" | |
771 | 327 documentation |
328 "Japanese EUC (Extended Unix Code), the standard Japanese encoding in Unix. | |
329 Equivalent MIME encoding: EUC-JP. | |
330 | |
331 Japanese EUC was the forefather of all the different EUC's, which all follow | |
332 a similar structure: | |
333 | |
334 1. Up to four character sets can be encoded. | |
335 | |
336 2. This is a non-modal encoding, i.e. it is impossible to set a global state | |
337 that affects anything more than the directly following character. [Modal | |
338 encodings typically have escape sequences to change global settings, which | |
339 affect all the following characters until the setting is turned off. | |
340 Modal encodings are typically used when it's necessary to support text in | |
341 a wide variety of character sets and still keep basic ASCII compatibility, | |
342 or in cases (e.g. sending email) where the allowed characters that can | |
343 pass the gateway are small and (typically) no high-bit range is available. | |
344 | |
345 3. The first character set is always ASCII or some national variant of it, | |
346 and encoded in the standard ASCII position. All characters in all other | |
347 character sets are encoded entirely using high-half bytes. Therefore, | |
348 it is safe to scan for ASCII characters, such as '/' to separate path | |
349 components, in the obvious way. | |
350 | |
351 4. Each of the other three character sets can be of dimension 1, 2, or 3. | |
352 A dimension-1 character set contains 96 bytes; a dimension-2 character | |
353 set contains 96 x 96 bytes; and a dimension-3 character set contains | |
354 96 x 96 x 96 bytes. 94 instead of 96 as the number of characters per | |
355 dimension is also supported. Character sets of dimensions 1, 2, and 3 | |
356 use 1-3 bytes, respectively, to encode a character, and each byte is | |
357 in the range A0-FF (or A1-FE for those with 94 bytes per dimension). | |
358 | |
359 5. The four character sets encoded in EUC are called G0, G1, G2, and G3. | |
360 As mentioned earlier, G0 is ASCII or some variant, and encoded into | |
361 the ASCII positions 00 - 7F. G1 is encoded directly by laying out | |
362 its bytes. G2 is encoded using an 8E byte followed by the character's | |
363 bytes. G3 is encoded using an 8F byte followed by the character's bytes." | |
364 | |
428 | 365 )) |
366 | |
771 | 367 ;; FSF: |
428 | 368 ;; (define-coding-system-alias 'euc-japan-1990 'japanese-iso-8bit) |
369 ;; (define-coding-system-alias 'euc-japan 'japanese-iso-8bit) | |
370 ;; (define-coding-system-alias 'euc-jp 'japanese-iso-8bit) | |
371 | |
372 (define-coding-system-alias 'euc-japan 'euc-jp) ; only for w3 | |
373 (define-coding-system-alias 'japanese-euc 'euc-jp) | |
374 | |
375 (set-language-info-alist | |
376 "Japanese" '((setup-function . setup-japanese-environment-internal) | |
377 (exit-function . exit-japanese-environment) | |
378 (tutorial . "TUTORIAL.ja") | |
379 (charset japanese-jisx0208 japanese-jisx0208-1978 | |
380 japanese-jisx0212 latin-jisx0201 katakana-jisx0201) | |
381 (coding-system iso-2022-jp euc-jp | |
771 | 382 shift-jis iso-2022-jp-2) |
428 | 383 (coding-priority iso-2022-jp euc-jp |
771 | 384 shift-jis iso-2022-jp-2) |
385 ;; These locale names come from the X11R6 locale.alias file. | |
386 ;; What an incredible fucking mess!!!!!!!!!!!!!!!!!!!!!!!!!! | |
387 ;; What's worse is that typical Unix implementations of | |
388 ;; setlocale() return back exactly what you passed them, even | |
389 ;; though it's perfectly allowed (and in fact done under | |
390 ;; Windows) to expand the locale to its full form (including | |
391 ;; encoding), so you have some hint as to the encoding!!! | |
392 ;; | |
393 ;; We order them in such a way that we're maximally likely | |
394 ;; to get an encoding name. | |
395 ;; | |
396 (locale | |
397 ;; SunOS 5.7: ja ja_JP.PCK ja_JP.UTF-8 japanese | |
398 ;; RedHat Linux 6.2J: ja ja_JP ja_JP.eucJP ja_JP.ujis \ | |
399 ;; japanese japanese.euc | |
400 ;; HP-UX 10.20: ja_JP.SJIS ja_JP.eucJPput ja_JP.kana8 | |
401 ;; Cygwin b20.1: ja_JP.EUC | |
402 ;; FreeBSD 2.2.8: ja_JP.EUC ja_JP.SJIS | |
403 | |
404 ;; EUC locales | |
405 "ja_JP.EUC" | |
406 "ja_JP.eucJP" | |
407 "ja_JP.AJEC" | |
408 "ja_JP.ujis" | |
409 "Japanese-EUC" | |
410 "japanese.euc" | |
411 | |
412 ;; Shift-JIS locales | |
413 "ja_JP.SJIS" | |
414 "ja_JP.mscode" | |
415 "ja.SJIS" | |
416 | |
417 ;; 7-bit locales | |
418 "ja_JP.ISO-2022-JP" | |
419 "ja_JP.jis7" | |
420 "ja_JP.pjis" | |
421 "ja_JP.JIS" | |
422 "ja.JIS" | |
423 | |
424 ;; 8-bit locales | |
425 "ja_JP.jis8" | |
426 | |
427 ;; encoding-unspecified locales | |
428 "ja_JP" | |
429 "Ja_JP" | |
430 "Jp_JP" | |
431 "japanese" | |
432 "japan" | |
433 "ja" | |
434 ) | |
428 | 435 ;; (input-method . "japanese") |
436 (features japan-util) | |
450 | 437 (sample-text . "Japanese ($BF|K\8l(B) $B$3$s$K$A$O(B, (I:]FAJ(B") |
428 | 438 (documentation . t))) |
439 | |
4145 | 440 ;; Set the native-coding-system separately so the lambdas get compiled. (Not |
441 ;; a huge speed improvement, but this code is called at startup, and every | |
442 ;; little helps there.) | |
443 (set-language-info "Japanese" | |
444 'native-coding-system | |
5363
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
445 ;; first, see if an explicit encoding was given. |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
446 (lambda (locale) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
447 (let ((case-fold-search t)) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
448 (cond |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
449 ;; many unix versions |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
450 ((string-match "\\.euc" locale) 'euc-jp) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
451 ((string-match "\\.sjis" locale) 'shift-jis) |
4145 | 452 |
5363
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
453 ;; X11R6 (CJKV p. 471) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
454 ((string-match "\\.jis7" locale) 'jis7) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
455 ((string-match "\\.jis8" locale) 'jis8) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
456 ((string-match "\\.mscode" locale) 'shift-jis) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
457 ((string-match "\\.pjis" locale) 'iso-2022-jp) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
458 ((string-match "\\.ujis" locale) 'euc-jp) |
4145 | 459 |
5363
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
460 ;; other names in X11R6 locale.alias |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
461 ((string-match "\\.ajec" locale) 'euc-jp) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
462 ((string-match "-euc" locale) 'euc-jp) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
463 ((string-match "\\.iso-2022-jp" locale) 'iso-2022-jp) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
464 ((string-match "\\.jis" locale) 'jis7) ;; or just jis? |
4145 | 465 |
5363
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
466 ;; aix (CJKV p. 465) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
467 ((and (eq system-type 'aix) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
468 (string-match "^Ja_JP" locale)) 'shift-jis) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
469 ((and (eq system-type 'aix) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
470 (string-match "^ja_JP" locale)) 'euc-jp) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
471 |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
472 ;; other X11R6 locale.alias |
4145 | 473 ((string-match "^Jp_JP" locale) 'euc-jp) |
474 ((and (eq system-type 'hpux) (eq locale "japanese")) | |
5363
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
475 'shift-jis) |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
476 ;; fallback |
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
477 (t 'euc-jp))))) |
4145 | 478 |
428 | 479 ;;; japanese.el ends here |