comparison lisp/mule/japanese.el @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents 98528da0b7fc
children 2923009caf47
comparison
equal deleted inserted replaced
770:336a418893b5 771:943eaba38521
1 ;;; japanese.el --- Japanese support -*- coding: iso-2022-7bit; -*- 1 ;;; japanese.el --- Japanese support -*- coding: iso-2022-7bit; -*-
2 2
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. 3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation. 4 ;; Licensed to the Free Software Foundation.
5 ;; Copyright (C) 1997 MORIOKA Tomohiko 5 ;; Copyright (C) 1997 MORIOKA Tomohiko
6 ;; Copyright (C) 2000, 2002 Ben Wing.
6 7
7 ;; Keywords: multilingual, Japanese 8 ;; Keywords: multilingual, Japanese
8 9
9 ;; This file is part of XEmacs. 10 ;; This file is part of XEmacs.
10 11
20 21
21 ;; You should have received a copy of the GNU General Public License 22 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs; see the file COPYING. If not, write to the Free 23 ;; along with XEmacs; see the file COPYING. If not, write to the Free
23 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 24 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24 ;; 02111-1307, USA. 25 ;; 02111-1307, USA.
26
27 ;;; Synched up with: Emacs 20.6 (international/japanese.el).
25 28
26 ;;; Commentary: 29 ;;; Commentary:
27 30
28 ;; For Japanese, character sets JISX0201, JISX0208, JISX0212 are 31 ;; For Japanese, character sets JISX0201, JISX0208, JISX0212 are
29 ;; supported. 32 ;; supported.
102 105
103 ;; EGG specific setup 106 ;; EGG specific setup
104 (define-egg-environment 'japanese 107 (define-egg-environment 'japanese
105 "Japanese settings for egg." 108 "Japanese settings for egg."
106 (lambda () 109 (lambda ()
107 (when (not (featurep 'egg-jpn)) 110 (with-boundp '(its:*standard-modes* its:*current-map* wnn-server-type)
108 (load "its-hira") 111 (with-fboundp 'its:get-mode-map
109 (load "its-kata") 112 (when (not (featurep 'egg-jpn))
110 (load "its-hankaku") 113 (load "its-hira")
111 (load "its-zenkaku") 114 (load "its-kata")
112 (setq its:*standard-modes* 115 (load "its-hankaku")
113 (append 116 (load "its-zenkaku")
114 (list (its:get-mode-map "roma-kana") 117 (setq its:*standard-modes*
115 (its:get-mode-map "roma-kata") 118 (append
116 (its:get-mode-map "downcase") 119 (list (its:get-mode-map "roma-kana")
117 (its:get-mode-map "upcase") 120 (its:get-mode-map "roma-kata")
118 (its:get-mode-map "zenkaku-downcase") 121 (its:get-mode-map "downcase")
119 (its:get-mode-map "zenkaku-upcase")) 122 (its:get-mode-map "upcase")
120 its:*standard-modes*)) 123 (its:get-mode-map "zenkaku-downcase")
121 (provide 'egg-jpn)) 124 (its:get-mode-map "zenkaku-upcase"))
122 (setq wnn-server-type 'jserver) 125 its:*standard-modes*))
123 ;; Can't do this here any more. Must do it when selecting egg-wnn 126 (provide 'egg-jpn))
124 ;; or egg-sj3 127 (setq wnn-server-type 'jserver)
125 ;; (setq egg-default-startup-file "eggrc-wnn") 128 ;; Can't do this here any more. Must do it when selecting egg-wnn
126 (setq-default its:*current-map* (its:get-mode-map "roma-kana")))) 129 ;; or egg-sj3
130 ;; (setq egg-default-startup-file "eggrc-wnn")
131 (setq-default its:*current-map* (its:get-mode-map "roma-kana"))))))
127 132
128 ;; stuff for providing grammatic processing of Japanese text 133 ;; stuff for providing grammatic processing of Japanese text
129 ;; something like this should probably be created for all environments... 134 ;; something like this should probably be created for all environments...
130 ;; #### Arrgh. This stuff should defvar'd in either fill.el or kinsoku.el. 135 ;; #### Arrgh. This stuff should defvar'd in either fill.el or kinsoku.el.
131 ;; Then the language environment should set these things, probably buffer- 136 ;; Then the language environment should set these things, probably buffer-
132 ;; locally. 137 ;; locally.
133 138
134 (defvar aletter (concat "\\(" ascii-char "\\|" kanji-char "\\)")) 139 ;; #### will be moved to fill.el
135 (defvar kanji-space-insertable (concat 140 (defvar space-insertable
141 (let* ((aletter (concat "\\(" ascii-char "\\|" kanji-char "\\)"))
142 (kanji-space-insertable
143 (concat
136 "$B!"(B" aletter "\\|" 144 "$B!"(B" aletter "\\|"
137 "$B!#(B" aletter "\\|" 145 "$B!#(B" aletter "\\|"
138 aletter "$B!J(B" "\\|" 146 aletter "$B!J(B" "\\|"
139 "$B!K(B" aletter "\\|" 147 "$B!K(B" aletter "\\|"
140 ascii-alphanumeric kanji-kanji-char "\\|" 148 ascii-alphanumeric kanji-kanji-char "\\|"
141 kanji-kanji-char ascii-alphanumeric )) 149 kanji-kanji-char ascii-alphanumeric)))
142 150 (concat " " aletter "\\|" kanji-space-insertable))
143 ;; #### will be moved to fill.el
144 (defvar space-insertable (concat " " aletter "\\|" kanji-space-insertable)
145 "Regexp for finding points that can have spaces inserted into them for justification") 151 "Regexp for finding points that can have spaces inserted into them for justification")
146 152
153 ;; Beginning of FSF synching with international/japanese.el.
154
147 ;; (make-coding-system 155 ;; (make-coding-system
148 ;; 'iso-2022-jp 2 ?J 156 ;; 'iso-2022-jp 2 ?J
149 ;; "ISO 2022 based 7bit encoding for Japanese (MIME:ISO-2022-JP)" 157 ;; "ISO 2022 based 7bit encoding for Japanese (MIME:ISO-2022-JP)"
150 ;; '((ascii japanese-jisx0208-1978 japanese-jisx0208 158 ;; '((ascii japanese-jisx0208-1978 japanese-jisx0208
151 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201) nil nil nil 159 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201) nil nil nil
154 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201) 162 ;; latin-jisx0201 japanese-jisx0212 katakana-jisx0201)
155 ;; (mime-charset . iso-2022-jp))) 163 ;; (mime-charset . iso-2022-jp)))
156 164
157 (make-coding-system 165 (make-coding-system
158 'iso-2022-jp 'iso2022 166 'iso-2022-jp 'iso2022
159 "Coding-system used for communication with mail and news in Japan." 167 "ISO-2022-JP (Japanese mail)"
160 '(charset-g0 ascii 168 '(charset-g0 ascii
161 short t 169 short t
162 seven t 170 seven t
163 input-charset-conversion ((latin-jisx0201 ascii) 171 input-charset-conversion ((latin-jisx0201 ascii)
164 (japanese-jisx0208-1978 japanese-jisx0208)) 172 (japanese-jisx0208-1978 japanese-jisx0208))
165 mnemonic "MULE/7bit" 173 mnemonic "MULE/7bit"
174 documentation
175 "Coding system used for communication with mail and news in Japan."
176 ))
177
178 (make-coding-system
179 'jis7 'iso2022
180 "JIS7 (old Japanese 7-bit encoding)"
181 '(charset-g0 ascii
182 charset-g1 katakana-jisx0201
183 short t
184 seven t
185 lock-shift t
186 input-charset-conversion ((latin-jisx0201 ascii)
187 (japanese-jisx0208-1978 japanese-jisx0208))
188 mnemonic "JIS7"
189 documentation
190 "Old JIS 7-bit encoding; mostly superseded by ISO-2022-JP.
191 Uses locking-shift (SI/SO) to select half-width katakana."
192 ))
193
194 (make-coding-system
195 'jis8 'iso2022
196 "JIS8 (old Japanese 8-bit encoding)"
197 '(charset-g0 ascii
198 charset-g1 katakana-jisx0201
199 short t
200 input-charset-conversion ((latin-jisx0201 ascii)
201 (japanese-jisx0208-1978 japanese-jisx0208))
202 mnemonic "JIS8"
203 documentation
204 "Old JIS 8-bit encoding; mostly superseded by ISO-2022-JP.
205 Uses high bytes for half-width katakana."
166 )) 206 ))
167 207
168 (define-coding-system-alias 'junet 'iso-2022-jp) 208 (define-coding-system-alias 'junet 'iso-2022-jp)
169 209
170 ;; (make-coding-system 210 ;; (make-coding-system
185 ;; 'japanese-shift-jis 1 ?S 225 ;; 'japanese-shift-jis 1 ?S
186 ;; "Shift-JIS 8-bit encoding for Japanese (MIME:SHIFT_JIS)" 226 ;; "Shift-JIS 8-bit encoding for Japanese (MIME:SHIFT_JIS)"
187 ;; nil 227 ;; nil
188 ;; '((safe-charsets ascii japanese-jisx0208 japanese-jisx0208-1978 228 ;; '((safe-charsets ascii japanese-jisx0208 japanese-jisx0208-1978
189 ;; latin-jisx0201 katakana-jisx0201) 229 ;; latin-jisx0201 katakana-jisx0201)
190 ;; (mime-charset . shift_jis) 230 ;; (mime-charset . shift-jis)
191 ;; (charset-origin-alist (japanese-jisx0208 "SJIS" encode-sjis-char) 231 ;; (charset-origin-alist (japanese-jisx0208 "SJIS" encode-sjis-char)
192 ;; (katakana-jisx0201 "SJIS" encode-sjis-char)))) 232 ;; (katakana-jisx0201 "SJIS" encode-sjis-char))))
193 233
194 (make-coding-system 234 (make-coding-system
195 'shift_jis 'shift-jis 235 'shift-jis 'shift-jis
196 "Coding-system of Shift-JIS used in Japan." 236 "Shift-JIS"
197 '(mnemonic "Ja/SJIS")) 237 '(mnemonic "Ja/SJIS"
198 238 documentation "The standard Japanese encoding in MS Windows."
199 ;; (define-coding-system-alias 'shift_jis 'japanese-shift-jis) 239 ))
240
241 ;; A former name?
242 (define-coding-system-alias 'shift_jis 'shift-jis)
243
244 ;; FSF:
245 ;; (define-coding-system-alias 'shift-jis 'japanese-shift-jis)
200 ;; (define-coding-system-alias 'sjis 'japanese-shift-jis) 246 ;; (define-coding-system-alias 'sjis 'japanese-shift-jis)
201 247
202 ;; (make-coding-system 248 ;; (make-coding-system
203 ;; 'japanese-iso-7bit-1978-irv 2 ?j 249 ;; 'japanese-iso-7bit-1978-irv 2 ?j
204 ;; "ISO 2022 based 7-bit encoding for Japanese JISX0208-1978 and JISX0201-Roman" 250 ;; "ISO 2022 based 7-bit encoding for Japanese JISX0208-1978 and JISX0201-Roman"
207 ;; short ascii-eol ascii-cntl seven nil nil use-roman use-oldjis) 253 ;; short ascii-eol ascii-cntl seven nil nil use-roman use-oldjis)
208 ;; '(ascii japanese-jisx0208-1978 japanese-jisx0208 latin-jisx0201)) 254 ;; '(ascii japanese-jisx0208-1978 japanese-jisx0208 latin-jisx0201))
209 255
210 (make-coding-system 256 (make-coding-system
211 'iso-2022-jp-1978-irv 'iso2022 257 'iso-2022-jp-1978-irv 'iso2022
212 "Coding-system used for old JIS terminal." 258 "ISO-2022-JP-1978-IRV (Old JIS)"
213 '(charset-g0 ascii 259 '(charset-g0 ascii
214 short t 260 short t
215 seven t 261 seven t
216 output-charset-conversion ((ascii latin-jisx0201) 262 output-charset-conversion ((ascii latin-jisx0201)
217 (japanese-jisx0208 japanese-jisx0208-1978)) 263 (japanese-jisx0208 japanese-jisx0208-1978))
264 documentation
265 "This is a coding system used for old JIS terminals. It's an ISO
266 2022 based 7-bit encoding for Japanese JISX0208-1978 and JISX0201-Roman."
218 mnemonic "Ja-78/7bit" 267 mnemonic "Ja-78/7bit"
219 )) 268 ))
220 269
270 ;; FSF:
221 ;; (define-coding-system-alias 'iso-2022-jp-1978-irv 'japanese-iso-7bit-1978-irv) 271 ;; (define-coding-system-alias 'iso-2022-jp-1978-irv 'japanese-iso-7bit-1978-irv)
222 ;; (define-coding-system-alias 'old-jis 'japanese-iso-7bit-1978-irv) 272 ;; (define-coding-system-alias 'old-jis 'japanese-iso-7bit-1978-irv)
223 273
224 (define-coding-system-alias 'old-jis 'iso-2022-jp-1978-irv) 274 (define-coding-system-alias 'old-jis 'iso-2022-jp-1978-irv)
225 275
227 ;; 'japanese-iso-8bit 2 ?E 277 ;; 'japanese-iso-8bit 2 ?E
228 ;; "ISO 2022 based EUC encoding for Japanese (MIME:EUC-JP)" 278 ;; "ISO 2022 based EUC encoding for Japanese (MIME:EUC-JP)"
229 ;; '(ascii japanese-jisx0208 katakana-jisx0201 japanese-jisx0212 279 ;; '(ascii japanese-jisx0208 katakana-jisx0201 japanese-jisx0212
230 ;; short ascii-eol ascii-cntl nil nil single-shift) 280 ;; short ascii-eol ascii-cntl nil nil single-shift)
231 ;; '((safe-charsets ascii latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978 281 ;; '((safe-charsets ascii latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978
232 ;; katakana-jisx0201 japanese-jisx0212) 282 ;; katakana-jisx0201 japanese-jisx0212)
233 ;; (mime-charset . euc-jp))) 283 ;; (mime-charset . euc-jp)))
234 284 ;;
235 (make-coding-system 285 (make-coding-system
236 'euc-jp 'iso2022 286 'euc-jp 'iso2022
237 "Coding-system of Japanese EUC (Extended Unix Code)." 287 "Japanese EUC"
238 '(charset-g0 ascii 288 '(charset-g0 ascii
239 charset-g1 japanese-jisx0208 289 charset-g1 japanese-jisx0208
240 charset-g2 katakana-jisx0201 290 charset-g2 katakana-jisx0201
241 charset-g3 japanese-jisx0212 291 charset-g3 japanese-jisx0212
242 short t 292 short t
243 mnemonic "Ja/EUC" 293 mnemonic "Ja/EUC"
294 documentation
295 "Japanese EUC (Extended Unix Code), the standard Japanese encoding in Unix.
296 Equivalent MIME encoding: EUC-JP.
297
298 Japanese EUC was the forefather of all the different EUC's, which all follow
299 a similar structure:
300
301 1. Up to four character sets can be encoded.
302
303 2. This is a non-modal encoding, i.e. it is impossible to set a global state
304 that affects anything more than the directly following character. [Modal
305 encodings typically have escape sequences to change global settings, which
306 affect all the following characters until the setting is turned off.
307 Modal encodings are typically used when it's necessary to support text in
308 a wide variety of character sets and still keep basic ASCII compatibility,
309 or in cases (e.g. sending email) where the allowed characters that can
310 pass the gateway are small and (typically) no high-bit range is available.
311
312 3. The first character set is always ASCII or some national variant of it,
313 and encoded in the standard ASCII position. All characters in all other
314 character sets are encoded entirely using high-half bytes. Therefore,
315 it is safe to scan for ASCII characters, such as '/' to separate path
316 components, in the obvious way.
317
318 4. Each of the other three character sets can be of dimension 1, 2, or 3.
319 A dimension-1 character set contains 96 bytes; a dimension-2 character
320 set contains 96 x 96 bytes; and a dimension-3 character set contains
321 96 x 96 x 96 bytes. 94 instead of 96 as the number of characters per
322 dimension is also supported. Character sets of dimensions 1, 2, and 3
323 use 1-3 bytes, respectively, to encode a character, and each byte is
324 in the range A0-FF (or A1-FE for those with 94 bytes per dimension).
325
326 5. The four character sets encoded in EUC are called G0, G1, G2, and G3.
327 As mentioned earlier, G0 is ASCII or some variant, and encoded into
328 the ASCII positions 00 - 7F. G1 is encoded directly by laying out
329 its bytes. G2 is encoded using an 8E byte followed by the character's
330 bytes. G3 is encoded using an 8F byte followed by the character's bytes."
331
244 )) 332 ))
245 333
334 ;; FSF:
246 ;; (define-coding-system-alias 'euc-japan-1990 'japanese-iso-8bit) 335 ;; (define-coding-system-alias 'euc-japan-1990 'japanese-iso-8bit)
247 ;; (define-coding-system-alias 'euc-japan 'japanese-iso-8bit) 336 ;; (define-coding-system-alias 'euc-japan 'japanese-iso-8bit)
248 ;; (define-coding-system-alias 'euc-jp 'japanese-iso-8bit) 337 ;; (define-coding-system-alias 'euc-jp 'japanese-iso-8bit)
249 338
250 (define-coding-system-alias 'euc-japan 'euc-jp) ; only for w3 339 (define-coding-system-alias 'euc-japan 'euc-jp) ; only for w3
255 (exit-function . exit-japanese-environment) 344 (exit-function . exit-japanese-environment)
256 (tutorial . "TUTORIAL.ja") 345 (tutorial . "TUTORIAL.ja")
257 (charset japanese-jisx0208 japanese-jisx0208-1978 346 (charset japanese-jisx0208 japanese-jisx0208-1978
258 japanese-jisx0212 latin-jisx0201 katakana-jisx0201) 347 japanese-jisx0212 latin-jisx0201 katakana-jisx0201)
259 (coding-system iso-2022-jp euc-jp 348 (coding-system iso-2022-jp euc-jp
260 shift_jis iso-2022-jp-2) 349 shift-jis iso-2022-jp-2)
261 (coding-priority iso-2022-jp euc-jp 350 (coding-priority iso-2022-jp euc-jp
262 shift_jis iso-2022-jp-2) 351 shift-jis iso-2022-jp-2)
352 ;; These locale names come from the X11R6 locale.alias file.
353 ;; What an incredible fucking mess!!!!!!!!!!!!!!!!!!!!!!!!!!
354 ;; What's worse is that typical Unix implementations of
355 ;; setlocale() return back exactly what you passed them, even
356 ;; though it's perfectly allowed (and in fact done under
357 ;; Windows) to expand the locale to its full form (including
358 ;; encoding), so you have some hint as to the encoding!!!
359 ;;
360 ;; We order them in such a way that we're maximally likely
361 ;; to get an encoding name.
362 ;;
363 (locale
364 ;; SunOS 5.7: ja ja_JP.PCK ja_JP.UTF-8 japanese
365 ;; RedHat Linux 6.2J: ja ja_JP ja_JP.eucJP ja_JP.ujis \
366 ;; japanese japanese.euc
367 ;; HP-UX 10.20: ja_JP.SJIS ja_JP.eucJPput ja_JP.kana8
368 ;; Cygwin b20.1: ja_JP.EUC
369 ;; FreeBSD 2.2.8: ja_JP.EUC ja_JP.SJIS
370
371 ;; EUC locales
372 "ja_JP.EUC"
373 "ja_JP.eucJP"
374 "ja_JP.AJEC"
375 "ja_JP.ujis"
376 "Japanese-EUC"
377 "japanese.euc"
378
379 ;; Shift-JIS locales
380 "ja_JP.SJIS"
381 "ja_JP.mscode"
382 "ja.SJIS"
383
384 ;; 7-bit locales
385 "ja_JP.ISO-2022-JP"
386 "ja_JP.jis7"
387 "ja_JP.pjis"
388 "ja_JP.JIS"
389 "ja.JIS"
390
391 ;; 8-bit locales
392 "ja_JP.jis8"
393
394 ;; encoding-unspecified locales
395 "ja_JP"
396 "Ja_JP"
397 "Jp_JP"
398 "japanese"
399 "japan"
400 "ja"
401 )
402
403 (native-coding-system
404 ;; first, see if an explicit encoding was given.
405 #'(lambda (locale)
406 (let ((case-fold-search t))
407 (cond
408 ;; many unix versions
409 ((string-match "\\.euc" locale) 'euc-jp)
410 ((string-match "\\.sjis" locale) 'shift-jis)
411
412 ;; X11R6 (CJKV p. 471)
413 ((string-match "\\.jis7" locale) 'jis7)
414 ((string-match "\\.jis8" locale) 'jis8)
415 ((string-match "\\.mscode" locale) 'shift-jis)
416 ((string-match "\\.pjis" locale) 'iso-2022-jp)
417 ((string-match "\\.ujis" locale) 'euc-jp)
418
419 ;; other names in X11R6 locale.alias
420 ((string-match "\\.ajec" locale) 'euc-jp)
421 ((string-match "-euc" locale) 'euc-jp)
422 ((string-match "\\.iso-2022-jp" locale) 'iso-2022-jp)
423 ((string-match "\\.jis" locale) 'jis7) ;; or just jis?
424 )))
425
426 ;; aix (CJKV p. 465)
427 #'(lambda (locale)
428 (when (eq system-type 'aix)
429 (cond
430 ((string-match "^Ja_JP" locale) 'shift-jis)
431 ((string-match "^ja_JP" locale) 'euc-jp))))
432
433 ;; other X11R6 locale.alias
434 #'(lambda (locale)
435 (cond
436 ((string-match "^Jp_JP" locale) 'euc-jp)
437 ((and (eq system-type 'hpux) (eq locale "japanese"))
438 'shift-jis)))
439
440 ;; fallback
441 'euc-jp)
442
263 ;; (input-method . "japanese") 443 ;; (input-method . "japanese")
264 (features japan-util) 444 (features japan-util)
265 (sample-text . "Japanese ($BF|K\8l(B) $B$3$s$K$A$O(B, (I:]FAJ(B") 445 (sample-text . "Japanese ($BF|K\8l(B) $B$3$s$K$A$O(B, (I:]FAJ(B")
266 (documentation . t))) 446 (documentation . t)))
267 447