502
|
1 ;;; mule-charset.el --- Charset functions for Mule. -*- coding: iso-2022-7bit; -*-
|
428
|
2
|
|
3 ;; Copyright (C) 1992 Free Software Foundation, Inc.
|
|
4 ;; Copyright (C) 1995 Amdahl Corporation.
|
|
5 ;; Copyright (C) 1996 Sun Microsystems.
|
777
|
6 ;; Copyright (C) 2002 Ben Wing.
|
428
|
7
|
|
8 ;; Author: Unknown
|
|
9 ;; Keywords: i18n, mule, internal
|
|
10
|
|
11 ;; This file is part of XEmacs.
|
|
12
|
|
13 ;; XEmacs is free software; you can redistribute it and/or modify it
|
|
14 ;; under the terms of the GNU General Public License as published by
|
|
15 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
16 ;; any later version.
|
|
17
|
|
18 ;; XEmacs is distributed in the hope that it will be useful, but
|
|
19 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
21 ;; General Public License for more details.
|
|
22
|
|
23 ;; You should have received a copy of the GNU General Public License
|
|
24 ;; along with XEmacs; see the file COPYING. If not, write to the
|
|
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
26 ;; Boston, MA 02111-1307, USA.
|
|
27
|
|
28 ;;; Synched up with: Not synched. API at source level synched with FSF 20.3.9.
|
|
29
|
|
30 ;;; Commentary:
|
|
31
|
|
32 ;; These functions are not compatible at the bytecode level with Emacs/Mule,
|
|
33 ;; and they never will be. -sb [1999-05-26]
|
|
34
|
|
35 ;;; Code:
|
|
36
|
|
37 ;;;; Classifying text according to charsets
|
|
38
|
|
39 (defun charsets-in-region (start end &optional buffer)
|
|
40 "Return a list of the charsets in the region between START and END.
|
|
41 BUFFER defaults to the current buffer if omitted."
|
|
42 (let (list)
|
|
43 (save-excursion
|
|
44 (if buffer
|
|
45 (set-buffer buffer))
|
|
46 (save-restriction
|
|
47 (narrow-to-region start end)
|
|
48 (goto-char (point-min))
|
|
49 (while (not (eobp))
|
|
50 (let* (prev-charset
|
|
51 (ch (char-after (point)))
|
|
52 (charset (char-charset ch)))
|
|
53 (if (not (eq prev-charset charset))
|
|
54 (progn
|
|
55 (setq prev-charset charset)
|
|
56 (or (memq charset list)
|
|
57 (setq list (cons charset list))))))
|
|
58 (forward-char))))
|
|
59 list))
|
|
60
|
|
61 (defun charsets-in-string (string)
|
|
62 "Return a list of the charsets in STRING."
|
|
63 (let ((i 0)
|
|
64 (len (length string))
|
|
65 prev-charset charset list)
|
|
66 (while (< i len)
|
|
67 (setq charset (char-charset (aref string i)))
|
|
68 (if (not (eq prev-charset charset))
|
|
69 (progn
|
|
70 (setq prev-charset charset)
|
|
71 (or (memq charset list)
|
|
72 (setq list (cons charset list)))))
|
|
73 (setq i (1+ i)))
|
|
74 list))
|
|
75
|
771
|
76 (defalias 'find-charset-string 'charsets-in-string)
|
|
77 (defalias 'find-charset-region 'charsets-in-region)
|
428
|
78
|
|
79 ;;;; Charset accessors
|
|
80
|
|
81 (defun charset-iso-graphic-plane (charset)
|
|
82 "Return the `graphic' property of CHARSET.
|
|
83 See `make-charset'."
|
|
84 (charset-property charset 'graphic))
|
|
85
|
|
86 (defun charset-iso-final-char (charset)
|
|
87 "Return the final byte of the ISO 2022 escape sequence designating CHARSET."
|
|
88 (charset-property charset 'final))
|
|
89
|
|
90 (defun charset-chars (charset)
|
|
91 "Return the number of characters per dimension of CHARSET."
|
|
92 (charset-property charset 'chars))
|
|
93
|
|
94 (defun charset-width (charset)
|
|
95 "Return the number of display columns per character of CHARSET.
|
|
96 This only applies to TTY mode (under X, the actual display width can
|
|
97 be automatically determined)."
|
|
98 (charset-property charset 'columns))
|
|
99
|
|
100 ;; #### FSFmacs returns 0
|
|
101 (defun charset-direction (charset)
|
|
102 "Return the display direction (0 for `l2r' or 1 for `r2l') of CHARSET.
|
|
103 Only left-to-right is currently implemented."
|
|
104 (if (eq (charset-property charset 'direction) 'l2r)
|
|
105 0
|
|
106 1))
|
|
107
|
|
108 ;; Not in Emacs/Mule
|
|
109 (defun charset-registry (charset)
|
|
110 "Return the registry of CHARSET.
|
|
111 This is a regular expression matching the registry field of fonts
|
|
112 that can display the characters in CHARSET."
|
|
113 (charset-property charset 'registry))
|
|
114
|
|
115 (defun charset-ccl-program (charset)
|
|
116 "Return the CCL program of CHARSET.
|
|
117 See `make-charset'."
|
|
118 (charset-property charset 'ccl-program))
|
|
119
|
|
120 (defun charset-bytes (charset)
|
|
121 "Useless in XEmacs, returns 1."
|
|
122 1)
|
|
123
|
|
124 (define-obsolete-function-alias 'charset-columns 'charset-width) ;; 19990409
|
|
125 (define-obsolete-function-alias 'charset-final 'charset-iso-final-char) ;; 19990409
|
|
126 (define-obsolete-function-alias 'charset-graphic 'charset-iso-graphic-plane) ;; 19990409
|
|
127 (define-obsolete-function-alias 'charset-doc-string 'charset-description) ;; 19990409
|
|
128
|
|
129 ;;;; Define setf methods for all settable Charset properties
|
|
130
|
|
131 (defsetf charset-registry set-charset-registry)
|
|
132 (defsetf charset-ccl-program set-charset-ccl-program)
|
|
133
|
|
134 ;;; FSF compatibility functions
|
|
135 (defun charset-after (&optional pos)
|
|
136 "Return charset of a character in current buffer at position POS.
|
|
137 If POS is nil, it defauls to the current point.
|
|
138 If POS is out of range, the value is nil."
|
|
139 (when (null pos)
|
|
140 (setq pos (point)))
|
|
141 (check-argument-type 'integerp pos)
|
|
142 (unless (or (< pos (point-min))
|
|
143 (> pos (point-max)))
|
|
144 (char-charset (char-after pos))))
|
|
145
|
|
146 ;; Yuck!
|
771
|
147 ;; We're not going to support these.
|
|
148 ;(defun charset-info (charset) [incredibly broken function with random vectors]
|
|
149 ;(defun define-charset (...) [incredibly broken function with random vectors]
|
428
|
150
|
|
151 ;;; Charset property
|
|
152
|
|
153 (defalias 'get-charset-property 'get)
|
|
154 (defalias 'put-charset-property 'put)
|
|
155 (defalias 'charset-plist 'object-plist)
|
|
156 (defalias 'set-charset-plist 'setplist)
|
|
157
|
771
|
158
|
442
|
159 ;; Setup auto-fill-chars for charsets that should invoke auto-filling.
|
777
|
160 ;; SPACE and NEWLINE are already set.
|
442
|
161 (let ((l '(katakana-jisx0201
|
|
162 japanese-jisx0208 japanese-jisx0212
|
|
163 chinese-gb2312 chinese-big5-1 chinese-big5-2)))
|
|
164 (while l
|
|
165 (put-char-table (car l) t auto-fill-chars)
|
|
166 (setq l (cdr l))))
|
|
167
|
778
|
168
|
|
169 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
170 ; charsets ;
|
|
171 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
172
|
|
173 ;; Synched up with: FSF 21.1.
|
|
174
|
|
175 ;; All FSF charset definitions are in mule-conf.el. I copied the relevant
|
|
176 ;; part of that file below, then converted all charset definitions using
|
|
177 ;; the macro below, then globally replaced 'direction 0' with 'direction
|
|
178 ;; l2r' and 'direction 1' with 'direction r2l', then commented everything
|
|
179 ;; out. Copy the definitions as necessary to individual files.
|
|
180
|
|
181 ;; Kbd macro to convert from FSF-style define-charset to our make-charset.
|
|
182
|
|
183 ; (setq last-kbd-macro (read-kbd-macro
|
|
184 ; "<right> M-d make <M-right> M-d <home> <down> TAB '[dimension DEL SPC <M-right> RET TAB chars SPC <M-right> RET TAB columns SPC <M-right> RET TAB direction SPC <M-right> RET TAB final SPC <M-right> RET TAB graphic SPC <M-right> RET TAB short- name SPC <M-right> RET TAB long- name SPC <M-right> RET TAB <S-M-right> <f2> DEL TAB <end> ] <M-left> <end> SPC <f4> 3*<M-left> <left> <M-right> RET <down>"))
|
|
185
|
|
186 ;; Kbd macro to take one registry entry from the list of registry entries,
|
|
187 ;; find the appropriate make-charset call, and add the appropriate registry
|
|
188 ;; property.
|
|
189
|
|
190 ; (setq last-kbd-macro (read-kbd-macro
|
|
191 ; "3*<right> <S-M-right> C-x x 1 <right> <S-M-right> C-x x 2 <home> C-x r m foo RET <M-down> M-x sear TAB for TAB RET C-x g 1 RET C-s dimen RET <end> RET TAB 3*<backspace> registry SPC C-x g 2 C-x r b RET <down>"))
|
|
192
|
|
193 ;; List from FSF international/fontset.el of registries for charsets.
|
|
194
|
|
195 ;; latin-iso8859-1 "ISO8859-1"
|
|
196 ;; latin-iso8859-2 "ISO8859-2"
|
|
197 ;; latin-iso8859-3 "ISO8859-3"
|
|
198 ;; latin-iso8859-4 "ISO8859-4"
|
|
199 ;; thai-tis620 "TIS620"
|
|
200 ;; greek-iso8859-7 "ISO8859-7"
|
|
201 ;; arabic-iso8859-6 "ISO8859-6"
|
|
202 ;; hebrew-iso8859-8 "ISO8859-8"
|
|
203 ;; katakana-jisx0201 "JISX0201"
|
|
204 ;; latin-jisx0201 "JISX0201"
|
|
205 ;; cyrillic-iso8859-5 "ISO8859-5"
|
|
206 ;; latin-iso8859-9 "ISO8859-9"
|
|
207 ;; japanese-jisx0208-1978 "JISX0208.1978"
|
|
208 ;; chinese-gb2312 "GB2312.1980"
|
|
209 ;; japanese-jisx0208 "JISX0208.1990"
|
|
210 ;; korean-ksc5601 "KSC5601.1989"
|
|
211 ;; japanese-jisx0212 "JISX0212"
|
|
212 ;; chinese-cns11643-1 "CNS11643.1992-1"
|
|
213 ;; chinese-cns11643-2 "CNS11643.1992-2"
|
|
214 ;; chinese-cns11643-3 "CNS11643.1992-3"
|
|
215 ;; chinese-cns11643-4 "CNS11643.1992-4"
|
|
216 ;; chinese-cns11643-5 "CNS11643.1992-5"
|
|
217 ;; chinese-cns11643-6 "CNS11643.1992-6"
|
|
218 ;; chinese-cns11643-7 "CNS11643.1992-7"
|
|
219 ;; chinese-big5-1 "Big5"
|
|
220 ;; chinese-big5-2 "Big5"
|
|
221 ;; chinese-sisheng "sisheng_cwnn"
|
|
222 ;; vietnamese-viscii-lower "VISCII1.1"
|
|
223 ;; vietnamese-viscii-upper "VISCII1.1"
|
|
224 ;; arabic-digit "MuleArabic-0"
|
|
225 ;; arabic-1-column "MuleArabic-1"
|
|
226 ;; arabic-2-column "MuleArabic-2"
|
|
227 ;; ipa "MuleIPA"
|
|
228 ;; ethiopic "Ethiopic-Unicode"
|
|
229 ;; ascii-right-to-left "ISO8859-1"
|
|
230 ;; indian-is13194 "IS13194-Devanagari"
|
|
231 ;; indian-2-column "MuleIndian-2"
|
|
232 ;; indian-1-column "MuleIndian-1"
|
|
233 ;; lao "MuleLao-1"
|
|
234 ;; tibetan "MuleTibetan-2"
|
|
235 ;; tibetan-1-column "MuleTibetan-1"
|
|
236 ;; latin-iso8859-14 "ISO8859-14"
|
|
237 ;; latin-iso8859-15 "ISO8859-15"
|
|
238 ;; mule-unicode-0100-24ff "ISO10646-1"
|
|
239 ;; mule-unicode-2500-33ff "ISO10646-1"
|
|
240 ;; mule-unicode-e000-ffff "ISO10646-1"
|
|
241 ;; japanese-jisx0213-1 "JISX0213.2000-1"
|
|
242 ;; japanese-jisx0213-2 "JISX0213.2000-2"
|
|
243
|
|
244 ;;; Begin stuff from international/mule-conf.el.
|
|
245
|
|
246 ; ;;; Definitions of character sets.
|
|
247
|
|
248 ; ;; Basic (official) character sets. These character sets are treated
|
|
249 ; ;; efficiently with respect to buffer memory.
|
|
250
|
|
251 ; ;; Syntax:
|
|
252 ; ;; (define-charset CHARSET-ID CHARSET
|
|
253 ; ;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
|
|
254 ; ;; SHORT-NAME LONG-NAME DESCRIPTION ])
|
|
255 ; ;; ASCII charset is defined in src/charset.c as below.
|
|
256 ; ;; (define-charset 0 ascii
|
|
257 ; ;; [1 94 1 0 ?B 0 "ASCII" "ASCII" "ASCII (ISO646 IRV)"])
|
|
258
|
|
259 ; ;; 1-byte charsets. Valid range of CHARSET-ID is 128..143.
|
|
260
|
|
261 ; ;; CHARSET-ID 128 is not used.
|
|
262
|
|
263 ; ; An extra level of commenting means an official (done in C) charset.
|
|
264 ; ; (make-charset 'latin-iso8859-1
|
|
265 ; ; "Right-Hand Part of Latin Alphabet 1 (ISO/IEC 8859-1): ISO-IR-100"
|
|
266 ; ; '(dimension
|
|
267 ; ; 1
|
|
268 ; ; registry "ISO8859-1"
|
|
269 ; ; chars 96
|
|
270 ; ; columns 1
|
|
271 ; ; direction l2r
|
|
272 ; ; final ?A
|
|
273 ; ; graphic 1
|
|
274 ; ; short-name "RHP of Latin-1"
|
|
275 ; ; long-name "RHP of Latin-1 (ISO 8859-1): ISO-IR-100"
|
|
276 ; ; ))
|
|
277
|
|
278 ; ; (make-charset 'latin-iso8859-2
|
|
279 ; ; "Right-Hand Part of Latin Alphabet 2 (ISO/IEC 8859-2): ISO-IR-101"
|
|
280 ; ; '(dimension
|
|
281 ; ; 1
|
|
282 ; ; registry "ISO8859-2"
|
|
283 ; ; chars 96
|
|
284 ; ; columns 1
|
|
285 ; ; direction l2r
|
|
286 ; ; final ?B
|
|
287 ; ; graphic 1
|
|
288 ; ; short-name "RHP of Latin-2"
|
|
289 ; ; long-name "RHP of Latin-2 (ISO 8859-2): ISO-IR-101"
|
|
290 ; ; ))
|
|
291
|
|
292 ; ; (make-charset 'latin-iso8859-3
|
|
293 ; ; "Right-Hand Part of Latin Alphabet 3 (ISO/IEC 8859-3): ISO-IR-109"
|
|
294 ; ; '(dimension
|
|
295 ; ; 1
|
|
296 ; ; registry "ISO8859-3"
|
|
297 ; ; chars 96
|
|
298 ; ; columns 1
|
|
299 ; ; direction l2r
|
|
300 ; ; final ?C
|
|
301 ; ; graphic 1
|
|
302 ; ; short-name "RHP of Latin-3"
|
|
303 ; ; long-name "RHP of Latin-3 (ISO 8859-3): ISO-IR-109"
|
|
304 ; ; ))
|
|
305
|
|
306 ; ; (make-charset 'latin-iso8859-4
|
|
307 ; ; "Right-Hand Part of Latin Alphabet 4 (ISO/IEC 8859-4): ISO-IR-110"
|
|
308 ; ; '(dimension
|
|
309 ; ; 1
|
|
310 ; ; registry "ISO8859-4"
|
|
311 ; ; chars 96
|
|
312 ; ; columns 1
|
|
313 ; ; direction l2r
|
|
314 ; ; final ?D
|
|
315 ; ; graphic 1
|
|
316 ; ; short-name "RHP of Latin-4"
|
|
317 ; ; long-name "RHP of Latin-4 (ISO 8859-4): ISO-IR-110"
|
|
318 ; ; ))
|
|
319
|
|
320 ; ; (make-charset 'thai-tis620
|
|
321 ; ; "Right-Hand Part of TIS620.2533 (Thai): ISO-IR-166"
|
|
322 ; ; '(dimension
|
|
323 ; ; 1
|
|
324 ; ; registry "TIS620"
|
|
325 ; ; chars 96
|
|
326 ; ; columns 1
|
|
327 ; ; direction l2r
|
|
328 ; ; final ?T
|
|
329 ; ; graphic 1
|
|
330 ; ; short-name "RHP of TIS620"
|
|
331 ; ; long-name "RHP of Thai (TIS620): ISO-IR-166"
|
|
332 ; ; ))
|
|
333
|
|
334 ; ; (make-charset 'greek-iso8859-7
|
|
335 ; ; "Right-Hand Part of Latin/Greek Alphabet (ISO/IEC 8859-7): ISO-IR-126"
|
|
336 ; ; '(dimension
|
|
337 ; ; 1
|
|
338 ; ; registry "ISO8859-7"
|
|
339 ; ; chars 96
|
|
340 ; ; columns 1
|
|
341 ; ; direction l2r
|
|
342 ; ; final ?F
|
|
343 ; ; graphic 1
|
|
344 ; ; short-name "RHP of ISO8859/7"
|
|
345 ; ; long-name "RHP of Greek (ISO 8859-7): ISO-IR-126"
|
|
346 ; ; ))
|
|
347
|
|
348 ; ; (make-charset 'arabic-iso8859-6
|
|
349 ; ; "Right-Hand Part of Latin/Arabic Alphabet (ISO/IEC 8859-6): ISO-IR-127"
|
|
350 ; ; '(dimension
|
|
351 ; ; 1
|
|
352 ; ; registry "ISO8859-6"
|
|
353 ; ; chars 96
|
|
354 ; ; columns 1
|
|
355 ; ; direction r2l
|
|
356 ; ; final ?G
|
|
357 ; ; graphic 1
|
|
358 ; ; short-name "RHP of ISO8859/6"
|
|
359 ; ; long-name "RHP of Arabic (ISO 8859-6): ISO-IR-127"
|
|
360 ; ; ))
|
|
361
|
|
362 ; ; (make-charset 'hebrew-iso8859-8
|
|
363 ; ; "Right-Hand Part of Latin/Hebrew Alphabet (ISO/IEC 8859-8): ISO-IR-138"
|
|
364 ; ; '(dimension
|
|
365 ; ; 1
|
|
366 ; ; registry "ISO8859-8"
|
|
367 ; ; chars 96
|
|
368 ; ; columns 1
|
|
369 ; ; direction r2l
|
|
370 ; ; final ?H
|
|
371 ; ; graphic 1
|
|
372 ; ; short-name "RHP of ISO8859/8"
|
|
373 ; ; long-name "RHP of Hebrew (ISO 8859-8): ISO-IR-138"
|
|
374 ; ; ))
|
|
375
|
|
376 ; ; (make-charset 'katakana-jisx0201
|
|
377 ; ; "Katakana Part of JISX0201.1976"
|
|
378 ; ; '(dimension
|
|
379 ; ; 1
|
|
380 ; ; registry "JISX0201"
|
|
381 ; ; chars 94
|
|
382 ; ; columns 1
|
|
383 ; ; direction l2r
|
|
384 ; ; final ?I
|
|
385 ; ; graphic 1
|
|
386 ; ; short-name "JISX0201 Katakana"
|
|
387 ; ; long-name "Japanese Katakana (JISX0201.1976)"
|
|
388 ; ; ))
|
|
389
|
|
390 ; ; (make-charset 'latin-jisx0201
|
|
391 ; ; "Roman Part of JISX0201.1976"
|
|
392 ; ; '(dimension
|
|
393 ; ; 1
|
|
394 ; ; registry "JISX0201"
|
|
395 ; ; chars 94
|
|
396 ; ; columns 1
|
|
397 ; ; direction l2r
|
|
398 ; ; final ?J
|
|
399 ; ; graphic 0
|
|
400 ; ; short-name "JISX0201 Roman"
|
|
401 ; ; long-name "Japanese Roman (JISX0201.1976)"
|
|
402 ; ; ))
|
|
403
|
|
404
|
|
405 ; ;; CHARSET-ID is not used 139.
|
|
406
|
|
407 ; ; (make-charset 'cyrillic-iso8859-5
|
|
408 ; ; "Right-Hand Part of Latin/Cyrillic Alphabet (ISO/IEC 8859-5): ISO-IR-144"
|
|
409 ; ; '(dimension
|
|
410 ; ; 1
|
|
411 ; ; registry "ISO8859-5"
|
|
412 ; ; chars 96
|
|
413 ; ; columns 1
|
|
414 ; ; direction l2r
|
|
415 ; ; final ?L
|
|
416 ; ; graphic 1
|
|
417 ; ; short-name "RHP of ISO8859/5"
|
|
418 ; ; long-name "RHP of Cyrillic (ISO 8859-5): ISO-IR-144"
|
|
419 ; ; ))
|
|
420
|
|
421 ; ; (make-charset 'latin-iso8859-9
|
|
422 ; ; "Right-Hand Part of Latin Alphabet 5 (ISO/IEC 8859-9): ISO-IR-148"
|
|
423 ; ; '(dimension
|
|
424 ; ; 1
|
|
425 ; ; registry "ISO8859-9"
|
|
426 ; ; chars 96
|
|
427 ; ; columns 1
|
|
428 ; ; direction l2r
|
|
429 ; ; final ?M
|
|
430 ; ; graphic 1
|
|
431 ; ; short-name "RHP of Latin-5"
|
|
432 ; ; long-name "RHP of Latin-5 (ISO 8859-9): ISO-IR-148"
|
|
433 ; ; ))
|
|
434
|
|
435 ; ; (make-charset 'latin-iso8859-15
|
|
436 ; ; "Right-Hand Part of Latin Alphabet 9 (ISO/IEC 8859-15): ISO-IR-203"
|
|
437 ; ; '(dimension
|
|
438 ; ; 1
|
|
439 ; ; registry "ISO8859-15"
|
|
440 ; ; chars 96
|
|
441 ; ; columns 1
|
|
442 ; ; direction l2r
|
|
443 ; ; final ?b
|
|
444 ; ; graphic 1
|
|
445 ; ; short-name "RHP of Latin-9"
|
|
446 ; ; long-name "RHP of Latin-9 (ISO 8859-15): ISO-IR-203"
|
|
447 ; ; ))
|
|
448
|
|
449 ; (make-charset 'latin-iso8859-14
|
|
450 ; "Right-Hand Part of Latin Alphabet 8 (ISO/IEC 8859-14)"
|
|
451 ; '(dimension
|
|
452 ; 1
|
|
453 ; registry "ISO8859-14"
|
|
454 ; chars 96
|
|
455 ; columns 1
|
|
456 ; direction l2r
|
|
457 ; final ?_
|
|
458 ; graphic 1
|
|
459 ; short-name "RHP of Latin-8"
|
|
460 ; long-name "RHP of Latin-8 (ISO 8859-14)"
|
|
461 ; ))
|
|
462
|
|
463
|
|
464 ; ;; 2-byte charsets. Valid range of CHARSET-ID is 144..153.
|
|
465
|
|
466 ; ; (make-charset 'japanese-jisx0208-1978
|
|
467 ; ; "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42"
|
|
468 ; ; '(dimension
|
|
469 ; ; 2
|
|
470 ; ; registry "JISX0208.1990"
|
|
471 ; ; registry "JISX0208.1978"
|
|
472 ; ; chars 94
|
|
473 ; ; columns 2
|
|
474 ; ; direction l2r
|
|
475 ; ; final ?@
|
|
476 ; ; graphic 0
|
|
477 ; ; short-name "JISX0208.1978"
|
|
478 ; ; long-name "JISX0208.1978 (Japanese): ISO-IR-42"
|
|
479 ; ; ))
|
|
480
|
|
481 ; ; (make-charset 'chinese-gb2312
|
|
482 ; ; "GB2312 Chinese simplified: ISO-IR-58"
|
|
483 ; ; '(dimension
|
|
484 ; ; 2
|
|
485 ; ; registry "GB2312.1980"
|
|
486 ; ; chars 94
|
|
487 ; ; columns 2
|
|
488 ; ; direction l2r
|
|
489 ; ; final ?A
|
|
490 ; ; graphic 0
|
|
491 ; ; short-name "GB2312"
|
|
492 ; ; long-name "GB2312: ISO-IR-58"
|
|
493 ; ; ))
|
|
494
|
|
495 ; ; (make-charset 'japanese-jisx0208
|
|
496 ; ; "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87"
|
|
497 ; ; '(dimension
|
|
498 ; ; 2
|
|
499 ; ; chars 94
|
|
500 ; ; columns 2
|
|
501 ; ; direction l2r
|
|
502 ; ; final ?B
|
|
503 ; ; graphic 0
|
|
504 ; ; short-name "JISX0208"
|
|
505 ; ; long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87"
|
|
506 ; ; ))
|
|
507
|
|
508 ; ; (make-charset 'korean-ksc5601
|
|
509 ; ; "KSC5601 Korean Hangul and Hanja: ISO-IR-149"
|
|
510 ; ; '(dimension
|
|
511 ; ; 2
|
|
512 ; ; registry "KSC5601.1989"
|
|
513 ; ; chars 94
|
|
514 ; ; columns 2
|
|
515 ; ; direction l2r
|
|
516 ; ; final ?C
|
|
517 ; ; graphic 0
|
|
518 ; ; short-name "KSC5601"
|
|
519 ; ; long-name "KSC5601 (Korean): ISO-IR-149"
|
|
520 ; ; ))
|
|
521
|
|
522 ; ; (make-charset 'japanese-jisx0212
|
|
523 ; ; "JISX0212 Japanese supplement: ISO-IR-159"
|
|
524 ; ; '(dimension
|
|
525 ; ; 2
|
|
526 ; ; registry "JISX0212"
|
|
527 ; ; chars 94
|
|
528 ; ; columns 2
|
|
529 ; ; direction l2r
|
|
530 ; ; final ?D
|
|
531 ; ; graphic 0
|
|
532 ; ; short-name "JISX0212"
|
|
533 ; ; long-name "JISX0212 (Japanese): ISO-IR-159"
|
|
534 ; ; ))
|
|
535
|
|
536 ; ; (make-charset 'chinese-cns11643-1
|
|
537 ; ; "CNS11643 Plane 1 Chinese traditional: ISO-IR-171"
|
|
538 ; ; '(dimension
|
|
539 ; ; 2
|
|
540 ; ; registry "CNS11643.1992-1"
|
|
541 ; ; chars 94
|
|
542 ; ; columns 2
|
|
543 ; ; direction l2r
|
|
544 ; ; final ?G
|
|
545 ; ; graphic 0
|
|
546 ; ; short-name "CNS11643-1"
|
|
547 ; ; long-name "CNS11643-1 (Chinese traditional): ISO-IR-171"
|
|
548 ; ; ))
|
|
549
|
|
550 ; ; (make-charset 'chinese-cns11643-2
|
|
551 ; ; "CNS11643 Plane 2 Chinese traditional: ISO-IR-172"
|
|
552 ; ; '(dimension
|
|
553 ; ; 2
|
|
554 ; ; registry "CNS11643.1992-2"
|
|
555 ; ; chars 94
|
|
556 ; ; columns 2
|
|
557 ; ; direction l2r
|
|
558 ; ; final ?H
|
|
559 ; ; graphic 0
|
|
560 ; ; short-name "CNS11643-2"
|
|
561 ; ; long-name "CNS11643-2 (Chinese traditional): ISO-IR-172"
|
|
562 ; ; ))
|
|
563
|
|
564 ; (make-charset 'japanese-jisx0213-1 "JISX0213 Plane 1 (Japanese)"
|
|
565 ; '(dimension
|
|
566 ; 2
|
|
567 ; registry "JISX0213.2000-1"
|
|
568 ; chars 94
|
|
569 ; columns 2
|
|
570 ; direction l2r
|
|
571 ; final ?O
|
|
572 ; graphic 0
|
|
573 ; short-name "JISX0213-1"
|
|
574 ; long-name "JISX0213-1"
|
|
575 ; ))
|
|
576
|
|
577 ; ; (make-charset 'chinese-big5-1
|
|
578 ; ; "Frequently used part (A141-C67F) of Big5 (Chinese traditional)"
|
|
579 ; ; '(dimension
|
|
580 ; ; 2
|
|
581 ; ; registry "Big5"
|
|
582 ; ; chars 94
|
|
583 ; ; columns 2
|
|
584 ; ; direction l2r
|
|
585 ; ; final ?0
|
|
586 ; ; graphic 0
|
|
587 ; ; short-name "Big5 (Level-1)"
|
|
588 ; ; long-name "Big5 (Level-1) A141-C67F"
|
|
589 ; ; ))
|
|
590
|
|
591 ; ; (make-charset 'chinese-big5-2
|
|
592 ; ; "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)"
|
|
593 ; ; '(dimension
|
|
594 ; ; 2
|
|
595 ; ; registry "Big5"
|
|
596 ; ; chars 94
|
|
597 ; ; columns 2
|
|
598 ; ; direction l2r
|
|
599 ; ; final ?1
|
|
600 ; ; graphic 0
|
|
601 ; ; short-name "Big5 (Level-2)"
|
|
602 ; ; long-name "Big5 (Level-2) C940-FEFE"
|
|
603 ; ; ))
|
|
604
|
|
605
|
|
606 ; ;; Additional (private) character sets. These character sets are
|
|
607 ; ;; treated less space-efficiently in the buffer.
|
|
608
|
|
609 ; ;; Syntax:
|
|
610 ; ;; (define-charset CHARSET-ID CHARSET
|
|
611 ; ;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
|
|
612 ; ;; SHORT-NAME LONG-NAME DESCRIPTION ])
|
|
613
|
|
614 ; ;; ISO-2022 allows a use of character sets not registered in ISO with
|
|
615 ; ;; final characters `0' (0x30) through `?' (0x3F). Among them, Emacs
|
|
616 ; ;; reserves `0' through `9' to support several private character sets.
|
|
617 ; ;; The remaining final characters `:' through `?' are for users.
|
|
618
|
|
619 ; ;; 1-byte 1-column charsets. Valid range of CHARSET-ID is 160..223.
|
|
620
|
|
621 ; (make-charset 'chinese-sisheng
|
|
622 ; "SiSheng characters for PinYin/ZhuYin"
|
|
623 ; '(dimension
|
|
624 ; 1
|
|
625 ; registry "sisheng_cwnn"
|
|
626 ; chars 94
|
|
627 ; columns 1
|
|
628 ; direction l2r
|
|
629 ; final ?0
|
|
630 ; graphic 0
|
|
631 ; short-name "SiSheng"
|
|
632 ; long-name "SiSheng (PinYin/ZhuYin)"
|
|
633 ; ))
|
|
634
|
|
635
|
|
636 ; ;; IPA characters for phonetic symbols.
|
|
637 ; (make-charset 'ipa "IPA (International Phonetic Association)"
|
|
638 ; '(dimension
|
|
639 ; 1
|
|
640 ; registry "MuleIPA"
|
|
641 ; chars 96
|
|
642 ; columns 1
|
|
643 ; direction l2r
|
|
644 ; final ?0
|
|
645 ; graphic 1
|
|
646 ; short-name "IPA"
|
|
647 ; long-name "IPA"
|
|
648 ; ))
|
|
649
|
|
650
|
|
651 ; ;; Vietnamese VISCII. VISCII is 1-byte character set which contains
|
|
652 ; ;; more than 96 characters. Since Emacs can't handle it as one
|
|
653 ; ;; character set, it is divided into two: lower case letters and upper
|
|
654 ; ;; case letters.
|
|
655 ; (make-charset 'vietnamese-viscii-lower "VISCII1.1 lower-case"
|
|
656 ; '(dimension
|
|
657 ; 1
|
|
658 ; registry "VISCII1.1"
|
|
659 ; chars 96
|
|
660 ; columns 1
|
|
661 ; direction l2r
|
|
662 ; final ?1
|
|
663 ; graphic 1
|
|
664 ; short-name "VISCII lower"
|
|
665 ; long-name "VISCII lower-case"
|
|
666 ; ))
|
|
667
|
|
668 ; (make-charset 'vietnamese-viscii-upper "VISCII1.1 upper-case"
|
|
669 ; '(dimension
|
|
670 ; 1
|
|
671 ; registry "VISCII1.1"
|
|
672 ; chars 96
|
|
673 ; columns 1
|
|
674 ; direction l2r
|
|
675 ; final ?2
|
|
676 ; graphic 1
|
|
677 ; short-name "VISCII upper"
|
|
678 ; long-name "VISCII upper-case"
|
|
679 ; ))
|
|
680
|
|
681
|
|
682 ; ;; For Arabic, we need three different types of character sets.
|
|
683 ; ;; Digits are of direction left-to-right and of width 1-column.
|
|
684 ; ;; Others are of direction right-to-left and of width 1-column or
|
|
685 ; ;; 2-column.
|
|
686 ; (make-charset 'arabic-digit "Arabic digit"
|
|
687 ; '(dimension
|
|
688 ; 1
|
|
689 ; registry "MuleArabic-0"
|
|
690 ; chars 94
|
|
691 ; columns 1
|
|
692 ; direction l2r
|
|
693 ; final ?2
|
|
694 ; graphic 0
|
|
695 ; short-name "Arabic digit"
|
|
696 ; long-name "Arabic digit"
|
|
697 ; ))
|
|
698
|
|
699 ; (make-charset 'arabic-1-column "Arabic 1-column"
|
|
700 ; '(dimension
|
|
701 ; 1
|
|
702 ; registry "MuleArabic-1"
|
|
703 ; chars 94
|
|
704 ; columns 1
|
|
705 ; direction r2l
|
|
706 ; final ?3
|
|
707 ; graphic 0
|
|
708 ; short-name "Arabic 1-col"
|
|
709 ; long-name "Arabic 1-column"
|
|
710 ; ))
|
|
711
|
|
712
|
|
713 ; ;; ASCII with right-to-left direction.
|
|
714 ; (make-charset 'ascii-right-to-left
|
|
715 ; "ASCII (left half of ISO 8859-1) with right-to-left direction"
|
|
716 ; '(dimension
|
|
717 ; 1
|
|
718 ; registry "ISO8859-1"
|
|
719 ; chars 94
|
|
720 ; columns 1
|
|
721 ; direction r2l
|
|
722 ; final ?B
|
|
723 ; graphic 0
|
|
724 ; short-name "rev ASCII"
|
|
725 ; long-name "ASCII with right-to-left direction"
|
|
726 ; ))
|
|
727
|
|
728
|
|
729 ; ;; Lao script.
|
|
730 ; ;; ISO10646's 0x0E80..0x0EDF are mapped to 0x20..0x7F.
|
|
731 ; (make-charset 'lao "Lao characters (ISO10646 0E80..0EDF)"
|
|
732 ; '(dimension
|
|
733 ; 1
|
|
734 ; registry "MuleLao-1"
|
|
735 ; chars 94
|
|
736 ; columns 1
|
|
737 ; direction l2r
|
|
738 ; final ?1
|
|
739 ; graphic 0
|
|
740 ; short-name "Lao"
|
|
741 ; long-name "Lao"
|
|
742 ; ))
|
|
743
|
|
744
|
|
745 ; ;; CHARSET-IDs 168..223 are not used.
|
|
746
|
|
747 ; ;; 1-byte 2-column charsets. Valid range of CHARSET-ID is 224..239.
|
|
748
|
|
749 ; (make-charset 'arabic-2-column "Arabic 2-column"
|
|
750 ; '(dimension
|
|
751 ; 1
|
|
752 ; registry "MuleArabic-2"
|
|
753 ; chars 94
|
|
754 ; columns 2
|
|
755 ; direction r2l
|
|
756 ; final ?4
|
|
757 ; graphic 0
|
|
758 ; short-name "Arabic 2-col"
|
|
759 ; long-name "Arabic 2-column"
|
|
760 ; ))
|
|
761
|
|
762
|
|
763 ; ;; Indian scripts. Symbolic charset for data exchange. Glyphs are
|
|
764 ; ;; not assigned. They are automatically converted to each Indian
|
|
765 ; ;; script which IS-13194 supports.
|
|
766
|
|
767 ; (make-charset 'indian-is13194
|
|
768 ; "Generic Indian charset for data exchange with IS 13194"
|
|
769 ; '(dimension
|
|
770 ; 1
|
|
771 ; registry "IS13194-Devanagari"
|
|
772 ; chars 94
|
|
773 ; columns 2
|
|
774 ; direction l2r
|
|
775 ; final ?5
|
|
776 ; graphic 1
|
|
777 ; short-name "IS 13194"
|
|
778 ; long-name "Indian IS 13194"
|
|
779 ; ))
|
|
780
|
|
781
|
|
782 ; ;; CHARSET-IDs 226..239 are not used.
|
|
783
|
|
784 ; ;; 2-byte 1-column charsets. Valid range of CHARSET-ID is 240..244.
|
|
785
|
|
786 ; ;; Actual Glyph for 1-column width.
|
|
787 ; (make-charset 'indian-1-column
|
|
788 ; "Indian charset for 2-column width glyphs"
|
|
789 ; '(dimension
|
|
790 ; 2
|
|
791 ; registry "MuleIndian-1"
|
|
792 ; chars 94
|
|
793 ; columns 1
|
|
794 ; direction l2r
|
|
795 ; final ?6
|
|
796 ; graphic 0
|
|
797 ; short-name "Indian 1-col"
|
|
798 ; long-name "Indian 1 Column"
|
|
799 ; ))
|
|
800
|
|
801
|
|
802 ; (make-charset 'tibetan-1-column "Tibetan 1 column glyph"
|
|
803 ; '(dimension
|
|
804 ; 2
|
|
805 ; registry "MuleTibetan-1"
|
|
806 ; chars 94
|
|
807 ; columns 1
|
|
808 ; direction l2r
|
|
809 ; final ?8
|
|
810 ; graphic 0
|
|
811 ; short-name "Tibetan 1-col"
|
|
812 ; long-name "Tibetan 1 column"
|
|
813 ; ))
|
|
814
|
|
815
|
|
816 ; ;; Subsets of Unicode.
|
|
817
|
|
818 ; (make-charset 'mule-unicode-2500-33ff
|
|
819 ; "Unicode characters of the range U+2500..U+33FF."
|
|
820 ; '(dimension
|
|
821 ; 2
|
|
822 ; registry "ISO10646-1"
|
|
823 ; chars 96
|
|
824 ; columns 1
|
|
825 ; direction l2r
|
|
826 ; final ?2
|
|
827 ; graphic 0
|
|
828 ; short-name "Unicode subset 2"
|
|
829 ; long-name "Unicode subset (U+2500..U+33FF)"
|
|
830 ; ))
|
|
831
|
|
832
|
|
833 ; (make-charset 'mule-unicode-e000-ffff
|
|
834 ; "Unicode characters of the range U+E000..U+FFFF."
|
|
835 ; '(dimension
|
|
836 ; 2
|
|
837 ; registry "ISO10646-1"
|
|
838 ; chars 96
|
|
839 ; columns 1
|
|
840 ; direction l2r
|
|
841 ; final ?3
|
|
842 ; graphic 0
|
|
843 ; short-name "Unicode subset 3"
|
|
844 ; long-name "Unicode subset (U+E000+FFFF)"
|
|
845 ; ))
|
|
846
|
|
847
|
|
848 ; (make-charset 'mule-unicode-0100-24ff
|
|
849 ; "Unicode characters of the range U+0100..U+24FF."
|
|
850 ; '(dimension
|
|
851 ; 2
|
|
852 ; registry "ISO10646-1"
|
|
853 ; chars 96
|
|
854 ; columns 1
|
|
855 ; direction l2r
|
|
856 ; final ?1
|
|
857 ; graphic 0
|
|
858 ; short-name "Unicode subset"
|
|
859 ; long-name "Unicode subset (U+0100..U+24FF)"
|
|
860 ; ))
|
|
861
|
|
862
|
|
863 ; ;; 2-byte 2-column charsets. Valid range of CHARSET-ID is 245..254.
|
|
864
|
|
865 ; ;; Ethiopic characters (Amahric and Tigrigna).
|
|
866 ; (make-charset 'ethiopic "Ethiopic characters"
|
|
867 ; '(dimension
|
|
868 ; 2
|
|
869 ; registry "Ethiopic-Unicode"
|
|
870 ; chars 94
|
|
871 ; columns 2
|
|
872 ; direction l2r
|
|
873 ; final ?3
|
|
874 ; graphic 0
|
|
875 ; short-name "Ethiopic"
|
|
876 ; long-name "Ethiopic characters"
|
|
877 ; ))
|
|
878
|
|
879
|
|
880 ; ;; Chinese CNS11643 Plane3 thru Plane7. Although these are official
|
|
881 ; ;; character sets, the use is rare and don't have to be treated
|
|
882 ; ;; space-efficiently in the buffer.
|
|
883 ; (make-charset 'chinese-cns11643-3
|
|
884 ; "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183"
|
|
885 ; '(dimension
|
|
886 ; 2
|
|
887 ; registry "CNS11643.1992-3"
|
|
888 ; chars 94
|
|
889 ; columns 2
|
|
890 ; direction l2r
|
|
891 ; final ?I
|
|
892 ; graphic 0
|
|
893 ; short-name "CNS11643-3"
|
|
894 ; long-name "CNS11643-3 (Chinese traditional): ISO-IR-183"
|
|
895 ; ))
|
|
896
|
|
897 ; (make-charset 'chinese-cns11643-4
|
|
898 ; "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184"
|
|
899 ; '(dimension
|
|
900 ; 2
|
|
901 ; registry "CNS11643.1992-4"
|
|
902 ; chars 94
|
|
903 ; columns 2
|
|
904 ; direction l2r
|
|
905 ; final ?J
|
|
906 ; graphic 0
|
|
907 ; short-name "CNS11643-4"
|
|
908 ; long-name "CNS11643-4 (Chinese traditional): ISO-IR-184"
|
|
909 ; ))
|
|
910
|
|
911 ; (make-charset 'chinese-cns11643-5
|
|
912 ; "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185"
|
|
913 ; '(dimension
|
|
914 ; 2
|
|
915 ; registry "CNS11643.1992-5"
|
|
916 ; chars 94
|
|
917 ; columns 2
|
|
918 ; direction l2r
|
|
919 ; final ?K
|
|
920 ; graphic 0
|
|
921 ; short-name "CNS11643-5"
|
|
922 ; long-name "CNS11643-5 (Chinese traditional): ISO-IR-185"
|
|
923 ; ))
|
|
924
|
|
925 ; (make-charset 'chinese-cns11643-6
|
|
926 ; "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186"
|
|
927 ; '(dimension
|
|
928 ; 2
|
|
929 ; registry "CNS11643.1992-6"
|
|
930 ; chars 94
|
|
931 ; columns 2
|
|
932 ; direction l2r
|
|
933 ; final ?L
|
|
934 ; graphic 0
|
|
935 ; short-name "CNS11643-6"
|
|
936 ; long-name "CNS11643-6 (Chinese traditional): ISO-IR-186"
|
|
937 ; ))
|
|
938
|
|
939 ; (make-charset 'chinese-cns11643-7
|
|
940 ; "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187"
|
|
941 ; '(dimension
|
|
942 ; 2
|
|
943 ; registry "CNS11643.1992-7"
|
|
944 ; chars 94
|
|
945 ; columns 2
|
|
946 ; direction l2r
|
|
947 ; final ?M
|
|
948 ; graphic 0
|
|
949 ; short-name "CNS11643-7"
|
|
950 ; long-name "CNS11643-7 (Chinese traditional): ISO-IR-187"
|
|
951 ; ))
|
|
952
|
|
953
|
|
954 ; ;; Actual Glyph for 2-column width.
|
|
955 ; (make-charset 'indian-2-column
|
|
956 ; "Indian charset for 2-column width glyphs"
|
|
957 ; '(dimension
|
|
958 ; 2
|
|
959 ; registry "MuleIndian-2"
|
|
960 ; chars 94
|
|
961 ; columns 2
|
|
962 ; direction l2r
|
|
963 ; final ?5
|
|
964 ; graphic 0
|
|
965 ; short-name "Indian 2-col"
|
|
966 ; long-name "Indian 2 Column"
|
|
967 ; ))
|
|
968
|
|
969
|
|
970 ; ;; Tibetan script.
|
|
971 ; (make-charset 'tibetan "Tibetan characters"
|
|
972 ; '(dimension
|
|
973 ; 2
|
|
974 ; registry "MuleTibetan-2"
|
|
975 ; chars 94
|
|
976 ; columns 2
|
|
977 ; direction l2r
|
|
978 ; final ?7
|
|
979 ; graphic 0
|
|
980 ; short-name "Tibetan 2-col"
|
|
981 ; long-name "Tibetan 2 column"
|
|
982 ; ))
|
|
983
|
|
984
|
|
985 ; ;; CHARSET-ID 253 is not used.
|
|
986
|
|
987 ; ;; JISX0213 Plane 2
|
|
988 ; (make-charset 'japanese-jisx0213-2 "JISX0213 Plane 2 (Japanese)"
|
|
989 ; '(dimension
|
|
990 ; 2
|
|
991 ; registry "JISX0213.2000-2"
|
|
992 ; chars 94
|
|
993 ; columns 2
|
|
994 ; direction l2r
|
|
995 ; final ?P
|
|
996 ; graphic 0
|
|
997 ; short-name "JISX0213-2"
|
|
998 ; long-name "JISX0213-2"
|
|
999 ; ))
|
|
1000
|
428
|
1001 ;;; mule-charset.el ends here
|
778
|
1002
|