Mercurial > hg > xemacs-beta
annotate lisp/mule/mule-charset.el @ 4975:c5cb3cb79110
Automatic merge
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Fri, 05 Feb 2010 04:27:45 -0600 |
parents | 68d1ca56cffa |
children | 308d34e9f07d |
rev | line source |
---|---|
502 | 1 ;;; mule-charset.el --- Charset functions for Mule. -*- coding: iso-2022-7bit; -*- |
428 | 2 |
788 | 3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. |
4 ;; Copyright (C) 1992, 2001 Free Software Foundation, Inc. | |
5 ;; Licensed to the Free Software Foundation. | |
428 | 6 ;; Copyright (C) 1995 Amdahl Corporation. |
7 ;; Copyright (C) 1996 Sun Microsystems. | |
777 | 8 ;; Copyright (C) 2002 Ben Wing. |
428 | 9 |
10 ;; Author: Unknown | |
11 ;; Keywords: i18n, mule, internal | |
12 | |
13 ;; This file is part of XEmacs. | |
14 | |
15 ;; XEmacs is free software; you can redistribute it and/or modify it | |
16 ;; under the terms of the GNU General Public License as published by | |
17 ;; the Free Software Foundation; either version 2, or (at your option) | |
18 ;; any later version. | |
19 | |
20 ;; XEmacs is distributed in the hope that it will be useful, but | |
21 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
23 ;; General Public License for more details. | |
24 | |
25 ;; You should have received a copy of the GNU General Public License | |
26 ;; along with XEmacs; see the file COPYING. If not, write to the | |
27 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
28 ;; Boston, MA 02111-1307, USA. | |
29 | |
30 ;;; Synched up with: Not synched. API at source level synched with FSF 20.3.9. | |
31 | |
32 ;;; Commentary: | |
33 | |
34 ;; These functions are not compatible at the bytecode level with Emacs/Mule, | |
35 ;; and they never will be. -sb [1999-05-26] | |
36 | |
37 ;;; Code: | |
38 | |
39 ;;;; Classifying text according to charsets | |
40 | |
41 (defun charsets-in-string (string) | |
42 "Return a list of the charsets in STRING." | |
3681 | 43 (let (res) |
44 (with-string-as-buffer-contents string | |
45 ;; charsets-in-region now in C. | |
46 (setq res (charsets-in-region (point-min) (point-max)))) | |
47 res)) | |
428 | 48 |
771 | 49 (defalias 'find-charset-string 'charsets-in-string) |
3681 | 50 |
771 | 51 (defalias 'find-charset-region 'charsets-in-region) |
818 | 52 |
428 | 53 |
54 ;;;; Charset accessors | |
55 | |
56 (defun charset-iso-graphic-plane (charset) | |
57 "Return the `graphic' property of CHARSET. | |
58 See `make-charset'." | |
59 (charset-property charset 'graphic)) | |
60 | |
61 (defun charset-iso-final-char (charset) | |
62 "Return the final byte of the ISO 2022 escape sequence designating CHARSET." | |
63 (charset-property charset 'final)) | |
64 | |
65 (defun charset-chars (charset) | |
66 "Return the number of characters per dimension of CHARSET." | |
67 (charset-property charset 'chars)) | |
68 | |
69 (defun charset-width (charset) | |
70 "Return the number of display columns per character of CHARSET. | |
71 This only applies to TTY mode (under X, the actual display width can | |
72 be automatically determined)." | |
73 (charset-property charset 'columns)) | |
74 | |
75 ;; #### FSFmacs returns 0 | |
76 (defun charset-direction (charset) | |
77 "Return the display direction (0 for `l2r' or 1 for `r2l') of CHARSET. | |
78 Only left-to-right is currently implemented." | |
79 (if (eq (charset-property charset 'direction) 'l2r) | |
80 0 | |
81 1)) | |
82 | |
3659 | 83 ;; Not in GNU Emacs/Mule |
428 | 84 (defun charset-registry (charset) |
3712 | 85 "Obsolete; use charset-registries instead. " |
3659 | 86 (lwarn 'xintl 'warning |
87 "charset-registry is obsolete--use charset-registries instead. ") | |
88 (when (charset-property charset 'registries) | |
89 (elt (charset-property charset 'registries) 0))) | |
90 | |
3712 | 91 (make-obsolete 'charset-registry 'charset-registries) |
92 | |
3659 | 93 (defun charset-registries (charset) |
94 "Return the registries of CHARSET." | |
95 (charset-property charset 'registries)) | |
96 | |
97 (defun set-charset-registry (charset registry) | |
98 "Obsolete; use set-charset-registries instead. " | |
99 (check-argument-type 'stringp registry) | |
100 (check-argument-type 'charsetp (find-charset charset)) | |
101 (unless (equal registry (regexp-quote registry)) | |
102 (lwarn 'xintl 'warning | |
103 "Regexps no longer allowed for charset-registry. Treating %s%s" | |
104 registry " as a string.")) | |
105 (set-charset-registries | |
106 charset | |
107 (apply 'vector registry (append (charset-registries charset) nil)))) | |
428 | 108 |
3712 | 109 (make-obsolete 'set-charset-registry 'set-charset-registries) |
110 | |
428 | 111 (defun charset-ccl-program (charset) |
112 "Return the CCL program of CHARSET. | |
113 See `make-charset'." | |
114 (charset-property charset 'ccl-program)) | |
115 | |
116 (defun charset-bytes (charset) | |
117 "Useless in XEmacs, returns 1." | |
118 1) | |
119 | |
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
120 (defun charset-skip-chars-string (charset) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
121 "Given CHARSET, return a string suitable for for `skip-chars-forward'. |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
122 Passing the string to `skip-chars-forward' will cause it to skip all |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
123 characters in CHARSET." |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
124 (setq charset (get-charset charset)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
125 (cond |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
126 ;; Aargh, the general algorithm doesn't work for these charsets, because |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
127 ;; make-char strips the high bit. Hard code them. |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
128 ((eq (find-charset 'ascii) charset) "\x00-\x7f") |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
129 ((eq (find-charset 'control-1) charset) "\x80-\x9f") |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
130 (t |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
131 (let (charset-lower charset-upper row-upper row-lower) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
132 (if (= 1 (charset-dimension charset)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
133 (condition-case args-out-of-range |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
134 (make-char charset #x100) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
135 (args-out-of-range |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
136 (setq charset-lower (third args-out-of-range) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
137 charset-upper (fourth args-out-of-range)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
138 (format "%c-%c" |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
139 (make-char charset charset-lower) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
140 (make-char charset charset-upper)))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
141 (condition-case args-out-of-range |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
142 (make-char charset #x100 #x22) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
143 (args-out-of-range |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
144 (setq row-lower (third args-out-of-range) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
145 row-upper (fourth args-out-of-range)))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
146 (condition-case args-out-of-range |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
147 (make-char charset #x22 #x100) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
148 (args-out-of-range |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
149 (setq charset-lower (third args-out-of-range) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
150 charset-upper (fourth args-out-of-range)))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
151 (format "%c-%c" |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
152 (make-char charset row-lower charset-lower) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
153 (make-char charset row-upper charset-upper))))))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
154 ;; From GNU. |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
155 (defun map-charset-chars (func charset) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
156 "Use FUNC to map over all characters in CHARSET for side effects. |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
157 FUNC is a function of two args, the start and end (inclusive) of a |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
158 character code range. Thus FUNC should iterate over [START, END]." |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
159 (check-argument-type #'functionp func) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
160 (check-argument-type #'charsetp (setq charset (find-charset charset))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
161 (let* ((dim (charset-dimension charset)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
162 (chars (charset-chars charset)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
163 (start (if (= chars 94) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
164 33 |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
165 32))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
166 (if (= dim 1) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
167 (cond |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
168 ((eq (find-charset 'ascii) charset) (funcall func ?\x00 ?\x7f)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
169 ((eq (find-charset 'control-1) charset) (funcall func ?\x80 ?\x9f)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
170 (t |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
171 (funcall func |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
172 (make-char charset start) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
173 (make-char charset (+ start chars -1))))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
174 (dotimes (i chars) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
175 (funcall func |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
176 (make-char charset (+ i start) start) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
177 (make-char charset (+ i start) (+ start chars -1))))))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4072
diff
changeset
|
178 |
428 | 179 ;;;; Define setf methods for all settable Charset properties |
180 | |
181 (defsetf charset-registry set-charset-registry) | |
182 (defsetf charset-ccl-program set-charset-ccl-program) | |
3712 | 183 (defsetf charset-registries set-charset-registries) |
428 | 184 |
185 ;;; FSF compatibility functions | |
186 (defun charset-after (&optional pos) | |
187 "Return charset of a character in current buffer at position POS. | |
188 If POS is nil, it defauls to the current point. | |
189 If POS is out of range, the value is nil." | |
190 (when (null pos) | |
191 (setq pos (point))) | |
192 (check-argument-type 'integerp pos) | |
193 (unless (or (< pos (point-min)) | |
194 (> pos (point-max))) | |
195 (char-charset (char-after pos)))) | |
196 | |
197 ;; Yuck! | |
771 | 198 ;; We're not going to support these. |
199 ;(defun charset-info (charset) [incredibly broken function with random vectors] | |
200 ;(defun define-charset (...) [incredibly broken function with random vectors] | |
428 | 201 |
202 ;;; Charset property | |
203 | |
204 (defalias 'get-charset-property 'get) | |
205 (defalias 'put-charset-property 'put) | |
206 (defalias 'charset-plist 'object-plist) | |
207 (defalias 'set-charset-plist 'setplist) | |
208 | |
771 | 209 |
788 | 210 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
211 ; translation tables ; | |
212 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
213 | |
214 (defstruct (translation-table (:constructor internal-make-translation-table)) | |
215 forward | |
216 reverse) | |
217 | |
218 (defun make-translation-table (&rest args) | |
219 "Make a translation table from arguments. | |
2116 | 220 A translation table is a char table intended for character translation |
221 in CCL programs. | |
788 | 222 |
223 Each argument is a list of elemnts of the form (FROM . TO), where FROM | |
224 is a character to be translated to TO. | |
225 | |
226 FROM can be a generic character (see `make-char'). In this case, TO is | |
227 a generic character containing the same number of characters, or a | |
228 ordinary character. If FROM and TO are both generic characters, all | |
229 characters belonging to FROM are translated to characters belonging to TO | |
230 without changing their position code(s). | |
231 | |
232 The arguments and forms in each argument are processed in the given | |
233 order, and if a previous form already translates TO to some other | |
234 character, say TO-ALT, FROM is also translated to TO-ALT." | |
235 (let ((table (internal-make-translation-table | |
236 :forward (make-char-table 'generic))) | |
237 revlist) | |
238 (while args | |
239 (let ((elts (car args))) | |
240 (while elts | |
241 (let* ((from (car (car elts))) | |
242 (from-i 0) ; degree of freedom of FROM | |
243 (from-rev (nreverse (split-char from))) | |
244 (to (cdr (car elts))) | |
245 (to-i 0) ; degree of freedom of TO | |
246 (to-rev (nreverse (split-char to)))) | |
247 ;; Check numbers of heading 0s in FROM-REV and TO-REV. | |
248 (while (eq (car from-rev) 0) | |
249 (setq from-i (1+ from-i) from-rev (cdr from-rev))) | |
250 (while (eq (car to-rev) 0) | |
251 (setq to-i (1+ to-i) to-rev (cdr to-rev))) | |
252 (if (and (/= from-i to-i) (/= to-i 0)) | |
253 (error "Invalid character pair (%d . %d)" from to)) | |
254 ;; If we have already translated TO to TO-ALT, FROM should | |
255 ;; also be translated to TO-ALT. But, this is only if TO | |
256 ;; is a generic character or TO-ALT is not a generic | |
257 ;; character. | |
258 (let ((to-alt (get-char-table to table))) | |
259 (if (and to-alt | |
260 (or (> to-i 0) (not (find-charset to-alt)))) | |
261 (setq to to-alt))) | |
262 (if (> from-i 0) | |
263 (set-char-table-default table from to) | |
264 (put-char-table from to table)) | |
265 ;; If we have already translated some chars to FROM, they | |
266 ;; should also be translated to TO. | |
267 (let ((l (assq from revlist))) | |
268 (if l | |
269 (let ((ch (car l))) | |
270 (setcar l to) | |
271 (setq l (cdr l)) | |
272 (while l | |
273 (put-char-table ch to table) | |
274 (setq l (cdr l)) )))) | |
275 ;; Now update REVLIST. | |
276 (let ((l (assq to revlist))) | |
277 (if l | |
278 (setcdr l (cons from (cdr l))) | |
279 (setq revlist (cons (list to from) revlist))))) | |
280 (setq elts (cdr elts)))) | |
281 (setq args (cdr args))) | |
282 ;; Return TABLE just created. | |
283 table)) | |
284 | |
285 ;; Do we really need this? | |
286 ; (defun make-translation-table-from-vector (vec) | |
287 ; "Make translation table from decoding vector VEC. | |
288 ; VEC is an array of 256 elements to map unibyte codes to multibyte characters. | |
289 ; See also the variable `nonascii-translation-table'." | |
290 ; (let ((table (make-char-table 'translation-table)) | |
291 ; (rev-table (make-char-table 'translation-table)) | |
292 ; (i 0) | |
293 ; ch) | |
294 ; (while (< i 256) | |
295 ; (setq ch (aref vec i)) | |
296 ; (aset table i ch) | |
297 ; (if (>= ch 256) | |
298 ; (aset rev-table ch i)) | |
299 ; (setq i (1+ i))) | |
300 ; (set-char-table-extra-slot table 0 rev-table) | |
301 ; table)) | |
302 | |
303 (defvar named-translation-table-hash-table (make-hash-table)) | |
304 | |
305 (defun define-translation-table (symbol &rest args) | |
306 "Define SYMBOL as the name of translation table made by ARGS. | |
307 This sets up information so that the table can be used for | |
308 translations in a CCL program. | |
309 | |
310 If the first element of ARGS is a translation table, just define SYMBOL to | |
311 name it. (Note that this function does not bind SYMBOL.) | |
312 | |
313 Any other ARGS should be suitable as arguments of the function | |
314 `make-translation-table' (which see). | |
315 | |
316 Look up a named translation table using `find-translation-table' or | |
317 `get-translation-table'." | |
318 (let ((table (if (translation-table-p (car args)) | |
319 (car args) | |
320 (apply 'make-translation-table args)))) | |
321 (puthash symbol table named-translation-table-hash-table))) | |
322 | |
323 (defun find-translation-table (table-or-name) | |
324 "Retrieve the translation table of the given name. | |
325 If TABLE-OR-NAME is a translation table object, it is simply returned. | |
326 Otherwise, TABLE-OR-NAME should be a symbol. If there is no such | |
327 translation table, nil is returned. Otherwise the associated translation | |
328 table object is returned." | |
329 (if (translation-table-p table-or-name) | |
330 table-or-name | |
331 (check-argument-type 'symbolp table-or-name) | |
332 (gethash table-or-name named-translation-table-hash-table))) | |
333 | |
334 (defun get-translation-table (table-or-name) | |
335 "Retrieve the translation table of the given name. | |
336 Same as `find-translation-table' except an error is signalled if there is | |
337 no such translation table instead of returning nil." | |
338 (or (find-translation-table table-or-name) | |
339 (error 'invalid-argument "No such translation table" table-or-name))) | |
340 | |
341 | |
442 | 342 ;; Setup auto-fill-chars for charsets that should invoke auto-filling. |
777 | 343 ;; SPACE and NEWLINE are already set. |
442 | 344 (let ((l '(katakana-jisx0201 |
345 japanese-jisx0208 japanese-jisx0212 | |
346 chinese-gb2312 chinese-big5-1 chinese-big5-2))) | |
347 (while l | |
348 (put-char-table (car l) t auto-fill-chars) | |
349 (setq l (cdr l)))) | |
350 | |
778 | 351 |
352 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
353 ; charsets ; | |
354 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
355 | |
356 ;; Synched up with: FSF 21.1. | |
357 | |
358 ;; All FSF charset definitions are in mule-conf.el. I copied the relevant | |
359 ;; part of that file below, then converted all charset definitions using | |
360 ;; the macro below, then globally replaced 'direction 0' with 'direction | |
361 ;; l2r' and 'direction 1' with 'direction r2l', then commented everything | |
362 ;; out. Copy the definitions as necessary to individual files. | |
363 | |
364 ;; Kbd macro to convert from FSF-style define-charset to our make-charset. | |
365 | |
366 ; (setq last-kbd-macro (read-kbd-macro | |
367 ; "<right> M-d make <M-right> M-d <home> <down> TAB '[dimension DEL SPC <M-right> RET TAB chars SPC <M-right> RET TAB columns SPC <M-right> RET TAB direction SPC <M-right> RET TAB final SPC <M-right> RET TAB graphic SPC <M-right> RET TAB short- name SPC <M-right> RET TAB long- name SPC <M-right> RET TAB <S-M-right> <f2> DEL TAB <end> ] <M-left> <end> SPC <f4> 3*<M-left> <left> <M-right> RET <down>")) | |
368 | |
369 ;; Kbd macro to take one registry entry from the list of registry entries, | |
370 ;; find the appropriate make-charset call, and add the appropriate registry | |
371 ;; property. | |
372 | |
373 ; (setq last-kbd-macro (read-kbd-macro | |
374 ; "3*<right> <S-M-right> C-x x 1 <right> <S-M-right> C-x x 2 <home> C-x r m foo RET <M-down> M-x sear TAB for TAB RET C-x g 1 RET C-s dimen RET <end> RET TAB 3*<backspace> registry SPC C-x g 2 C-x r b RET <down>")) | |
375 | |
376 ;; List from FSF international/fontset.el of registries for charsets. | |
377 | |
378 ;; latin-iso8859-1 "ISO8859-1" | |
379 ;; latin-iso8859-2 "ISO8859-2" | |
380 ;; latin-iso8859-3 "ISO8859-3" | |
381 ;; latin-iso8859-4 "ISO8859-4" | |
382 ;; thai-tis620 "TIS620" | |
383 ;; greek-iso8859-7 "ISO8859-7" | |
384 ;; arabic-iso8859-6 "ISO8859-6" | |
385 ;; hebrew-iso8859-8 "ISO8859-8" | |
386 ;; katakana-jisx0201 "JISX0201" | |
387 ;; latin-jisx0201 "JISX0201" | |
388 ;; cyrillic-iso8859-5 "ISO8859-5" | |
389 ;; latin-iso8859-9 "ISO8859-9" | |
390 ;; japanese-jisx0208-1978 "JISX0208.1978" | |
391 ;; chinese-gb2312 "GB2312.1980" | |
392 ;; japanese-jisx0208 "JISX0208.1990" | |
393 ;; korean-ksc5601 "KSC5601.1989" | |
394 ;; japanese-jisx0212 "JISX0212" | |
395 ;; chinese-cns11643-1 "CNS11643.1992-1" | |
396 ;; chinese-cns11643-2 "CNS11643.1992-2" | |
397 ;; chinese-cns11643-3 "CNS11643.1992-3" | |
398 ;; chinese-cns11643-4 "CNS11643.1992-4" | |
399 ;; chinese-cns11643-5 "CNS11643.1992-5" | |
400 ;; chinese-cns11643-6 "CNS11643.1992-6" | |
401 ;; chinese-cns11643-7 "CNS11643.1992-7" | |
402 ;; chinese-big5-1 "Big5" | |
403 ;; chinese-big5-2 "Big5" | |
404 ;; chinese-sisheng "sisheng_cwnn" | |
405 ;; vietnamese-viscii-lower "VISCII1.1" | |
406 ;; vietnamese-viscii-upper "VISCII1.1" | |
407 ;; arabic-digit "MuleArabic-0" | |
408 ;; arabic-1-column "MuleArabic-1" | |
409 ;; arabic-2-column "MuleArabic-2" | |
410 ;; ipa "MuleIPA" | |
411 ;; ethiopic "Ethiopic-Unicode" | |
412 ;; indian-is13194 "IS13194-Devanagari" | |
413 ;; indian-2-column "MuleIndian-2" | |
414 ;; indian-1-column "MuleIndian-1" | |
415 ;; lao "MuleLao-1" | |
416 ;; tibetan "MuleTibetan-2" | |
417 ;; tibetan-1-column "MuleTibetan-1" | |
418 ;; latin-iso8859-14 "ISO8859-14" | |
419 ;; latin-iso8859-15 "ISO8859-15" | |
420 ;; mule-unicode-0100-24ff "ISO10646-1" | |
421 ;; mule-unicode-2500-33ff "ISO10646-1" | |
422 ;; mule-unicode-e000-ffff "ISO10646-1" | |
423 ;; japanese-jisx0213-1 "JISX0213.2000-1" | |
424 ;; japanese-jisx0213-2 "JISX0213.2000-2" | |
425 | |
426 ;;; Begin stuff from international/mule-conf.el. | |
427 | |
428 ; ;;; Definitions of character sets. | |
429 | |
430 ; ;; Basic (official) character sets. These character sets are treated | |
431 ; ;; efficiently with respect to buffer memory. | |
432 | |
433 ; ;; Syntax: | |
434 ; ;; (define-charset CHARSET-ID CHARSET | |
435 ; ;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE | |
436 ; ;; SHORT-NAME LONG-NAME DESCRIPTION ]) | |
437 ; ;; ASCII charset is defined in src/charset.c as below. | |
438 ; ;; (define-charset 0 ascii | |
439 ; ;; [1 94 1 0 ?B 0 "ASCII" "ASCII" "ASCII (ISO646 IRV)"]) | |
440 | |
441 ; ;; 1-byte charsets. Valid range of CHARSET-ID is 128..143. | |
442 | |
443 ; ;; CHARSET-ID 128 is not used. | |
444 | |
445 ; ; An extra level of commenting means an official (done in C) charset. | |
446 ; ; (make-charset 'latin-iso8859-1 | |
447 ; ; "Right-Hand Part of Latin Alphabet 1 (ISO/IEC 8859-1): ISO-IR-100" | |
448 ; ; '(dimension | |
449 ; ; 1 | |
450 ; ; registry "ISO8859-1" | |
451 ; ; chars 96 | |
452 ; ; columns 1 | |
453 ; ; direction l2r | |
454 ; ; final ?A | |
455 ; ; graphic 1 | |
456 ; ; short-name "RHP of Latin-1" | |
457 ; ; long-name "RHP of Latin-1 (ISO 8859-1): ISO-IR-100" | |
458 ; ; )) | |
459 | |
460 ; ; (make-charset 'latin-iso8859-2 | |
461 ; ; "Right-Hand Part of Latin Alphabet 2 (ISO/IEC 8859-2): ISO-IR-101" | |
462 ; ; '(dimension | |
463 ; ; 1 | |
464 ; ; registry "ISO8859-2" | |
465 ; ; chars 96 | |
466 ; ; columns 1 | |
467 ; ; direction l2r | |
468 ; ; final ?B | |
469 ; ; graphic 1 | |
470 ; ; short-name "RHP of Latin-2" | |
471 ; ; long-name "RHP of Latin-2 (ISO 8859-2): ISO-IR-101" | |
472 ; ; )) | |
473 | |
474 ; ; (make-charset 'latin-iso8859-3 | |
475 ; ; "Right-Hand Part of Latin Alphabet 3 (ISO/IEC 8859-3): ISO-IR-109" | |
476 ; ; '(dimension | |
477 ; ; 1 | |
478 ; ; registry "ISO8859-3" | |
479 ; ; chars 96 | |
480 ; ; columns 1 | |
481 ; ; direction l2r | |
482 ; ; final ?C | |
483 ; ; graphic 1 | |
484 ; ; short-name "RHP of Latin-3" | |
485 ; ; long-name "RHP of Latin-3 (ISO 8859-3): ISO-IR-109" | |
486 ; ; )) | |
487 | |
488 ; ; (make-charset 'latin-iso8859-4 | |
489 ; ; "Right-Hand Part of Latin Alphabet 4 (ISO/IEC 8859-4): ISO-IR-110" | |
490 ; ; '(dimension | |
491 ; ; 1 | |
492 ; ; registry "ISO8859-4" | |
493 ; ; chars 96 | |
494 ; ; columns 1 | |
495 ; ; direction l2r | |
496 ; ; final ?D | |
497 ; ; graphic 1 | |
498 ; ; short-name "RHP of Latin-4" | |
499 ; ; long-name "RHP of Latin-4 (ISO 8859-4): ISO-IR-110" | |
500 ; ; )) | |
501 | |
502 ; ; (make-charset 'thai-tis620 | |
503 ; ; "Right-Hand Part of TIS620.2533 (Thai): ISO-IR-166" | |
504 ; ; '(dimension | |
505 ; ; 1 | |
506 ; ; registry "TIS620" | |
507 ; ; chars 96 | |
508 ; ; columns 1 | |
509 ; ; direction l2r | |
510 ; ; final ?T | |
511 ; ; graphic 1 | |
512 ; ; short-name "RHP of TIS620" | |
513 ; ; long-name "RHP of Thai (TIS620): ISO-IR-166" | |
514 ; ; )) | |
515 | |
516 ; ; (make-charset 'greek-iso8859-7 | |
517 ; ; "Right-Hand Part of Latin/Greek Alphabet (ISO/IEC 8859-7): ISO-IR-126" | |
518 ; ; '(dimension | |
519 ; ; 1 | |
520 ; ; registry "ISO8859-7" | |
521 ; ; chars 96 | |
522 ; ; columns 1 | |
523 ; ; direction l2r | |
524 ; ; final ?F | |
525 ; ; graphic 1 | |
526 ; ; short-name "RHP of ISO8859/7" | |
527 ; ; long-name "RHP of Greek (ISO 8859-7): ISO-IR-126" | |
528 ; ; )) | |
529 | |
530 ; ; (make-charset 'arabic-iso8859-6 | |
531 ; ; "Right-Hand Part of Latin/Arabic Alphabet (ISO/IEC 8859-6): ISO-IR-127" | |
532 ; ; '(dimension | |
533 ; ; 1 | |
534 ; ; registry "ISO8859-6" | |
535 ; ; chars 96 | |
536 ; ; columns 1 | |
537 ; ; direction r2l | |
538 ; ; final ?G | |
539 ; ; graphic 1 | |
540 ; ; short-name "RHP of ISO8859/6" | |
541 ; ; long-name "RHP of Arabic (ISO 8859-6): ISO-IR-127" | |
542 ; ; )) | |
543 | |
544 ; ; (make-charset 'hebrew-iso8859-8 | |
545 ; ; "Right-Hand Part of Latin/Hebrew Alphabet (ISO/IEC 8859-8): ISO-IR-138" | |
546 ; ; '(dimension | |
547 ; ; 1 | |
548 ; ; registry "ISO8859-8" | |
549 ; ; chars 96 | |
550 ; ; columns 1 | |
551 ; ; direction r2l | |
552 ; ; final ?H | |
553 ; ; graphic 1 | |
554 ; ; short-name "RHP of ISO8859/8" | |
555 ; ; long-name "RHP of Hebrew (ISO 8859-8): ISO-IR-138" | |
556 ; ; )) | |
557 | |
558 ; ; (make-charset 'katakana-jisx0201 | |
559 ; ; "Katakana Part of JISX0201.1976" | |
560 ; ; '(dimension | |
561 ; ; 1 | |
562 ; ; registry "JISX0201" | |
563 ; ; chars 94 | |
564 ; ; columns 1 | |
565 ; ; direction l2r | |
566 ; ; final ?I | |
567 ; ; graphic 1 | |
568 ; ; short-name "JISX0201 Katakana" | |
569 ; ; long-name "Japanese Katakana (JISX0201.1976)" | |
570 ; ; )) | |
571 | |
572 ; ; (make-charset 'latin-jisx0201 | |
573 ; ; "Roman Part of JISX0201.1976" | |
574 ; ; '(dimension | |
575 ; ; 1 | |
576 ; ; registry "JISX0201" | |
577 ; ; chars 94 | |
578 ; ; columns 1 | |
579 ; ; direction l2r | |
580 ; ; final ?J | |
581 ; ; graphic 0 | |
582 ; ; short-name "JISX0201 Roman" | |
583 ; ; long-name "Japanese Roman (JISX0201.1976)" | |
584 ; ; )) | |
585 | |
586 | |
587 ; ;; CHARSET-ID is not used 139. | |
588 | |
589 ; ; (make-charset 'cyrillic-iso8859-5 | |
590 ; ; "Right-Hand Part of Latin/Cyrillic Alphabet (ISO/IEC 8859-5): ISO-IR-144" | |
591 ; ; '(dimension | |
592 ; ; 1 | |
593 ; ; registry "ISO8859-5" | |
594 ; ; chars 96 | |
595 ; ; columns 1 | |
596 ; ; direction l2r | |
597 ; ; final ?L | |
598 ; ; graphic 1 | |
599 ; ; short-name "RHP of ISO8859/5" | |
600 ; ; long-name "RHP of Cyrillic (ISO 8859-5): ISO-IR-144" | |
601 ; ; )) | |
602 | |
603 ; ; (make-charset 'latin-iso8859-9 | |
604 ; ; "Right-Hand Part of Latin Alphabet 5 (ISO/IEC 8859-9): ISO-IR-148" | |
605 ; ; '(dimension | |
606 ; ; 1 | |
607 ; ; registry "ISO8859-9" | |
608 ; ; chars 96 | |
609 ; ; columns 1 | |
610 ; ; direction l2r | |
611 ; ; final ?M | |
612 ; ; graphic 1 | |
613 ; ; short-name "RHP of Latin-5" | |
614 ; ; long-name "RHP of Latin-5 (ISO 8859-9): ISO-IR-148" | |
615 ; ; )) | |
616 | |
617 ; ; (make-charset 'latin-iso8859-15 | |
618 ; ; "Right-Hand Part of Latin Alphabet 9 (ISO/IEC 8859-15): ISO-IR-203" | |
619 ; ; '(dimension | |
620 ; ; 1 | |
621 ; ; registry "ISO8859-15" | |
622 ; ; chars 96 | |
623 ; ; columns 1 | |
624 ; ; direction l2r | |
625 ; ; final ?b | |
626 ; ; graphic 1 | |
627 ; ; short-name "RHP of Latin-9" | |
628 ; ; long-name "RHP of Latin-9 (ISO 8859-15): ISO-IR-203" | |
629 ; ; )) | |
630 | |
631 ; (make-charset 'latin-iso8859-14 | |
632 ; "Right-Hand Part of Latin Alphabet 8 (ISO/IEC 8859-14)" | |
633 ; '(dimension | |
634 ; 1 | |
635 ; registry "ISO8859-14" | |
636 ; chars 96 | |
637 ; columns 1 | |
638 ; direction l2r | |
639 ; final ?_ | |
640 ; graphic 1 | |
641 ; short-name "RHP of Latin-8" | |
642 ; long-name "RHP of Latin-8 (ISO 8859-14)" | |
643 ; )) | |
644 | |
645 | |
646 ; ;; 2-byte charsets. Valid range of CHARSET-ID is 144..153. | |
647 | |
648 ; ; (make-charset 'japanese-jisx0208-1978 | |
649 ; ; "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42" | |
650 ; ; '(dimension | |
651 ; ; 2 | |
652 ; ; registry "JISX0208.1990" | |
653 ; ; registry "JISX0208.1978" | |
654 ; ; chars 94 | |
655 ; ; columns 2 | |
656 ; ; direction l2r | |
657 ; ; final ?@ | |
658 ; ; graphic 0 | |
659 ; ; short-name "JISX0208.1978" | |
660 ; ; long-name "JISX0208.1978 (Japanese): ISO-IR-42" | |
661 ; ; )) | |
662 | |
663 ; ; (make-charset 'chinese-gb2312 | |
664 ; ; "GB2312 Chinese simplified: ISO-IR-58" | |
665 ; ; '(dimension | |
666 ; ; 2 | |
667 ; ; registry "GB2312.1980" | |
668 ; ; chars 94 | |
669 ; ; columns 2 | |
670 ; ; direction l2r | |
671 ; ; final ?A | |
672 ; ; graphic 0 | |
673 ; ; short-name "GB2312" | |
674 ; ; long-name "GB2312: ISO-IR-58" | |
675 ; ; )) | |
676 | |
677 ; ; (make-charset 'japanese-jisx0208 | |
678 ; ; "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87" | |
679 ; ; '(dimension | |
680 ; ; 2 | |
681 ; ; chars 94 | |
682 ; ; columns 2 | |
683 ; ; direction l2r | |
684 ; ; final ?B | |
685 ; ; graphic 0 | |
686 ; ; short-name "JISX0208" | |
687 ; ; long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87" | |
688 ; ; )) | |
689 | |
690 ; ; (make-charset 'korean-ksc5601 | |
691 ; ; "KSC5601 Korean Hangul and Hanja: ISO-IR-149" | |
692 ; ; '(dimension | |
693 ; ; 2 | |
694 ; ; registry "KSC5601.1989" | |
695 ; ; chars 94 | |
696 ; ; columns 2 | |
697 ; ; direction l2r | |
698 ; ; final ?C | |
699 ; ; graphic 0 | |
700 ; ; short-name "KSC5601" | |
701 ; ; long-name "KSC5601 (Korean): ISO-IR-149" | |
702 ; ; )) | |
703 | |
704 ; ; (make-charset 'japanese-jisx0212 | |
705 ; ; "JISX0212 Japanese supplement: ISO-IR-159" | |
706 ; ; '(dimension | |
707 ; ; 2 | |
708 ; ; registry "JISX0212" | |
709 ; ; chars 94 | |
710 ; ; columns 2 | |
711 ; ; direction l2r | |
712 ; ; final ?D | |
713 ; ; graphic 0 | |
714 ; ; short-name "JISX0212" | |
715 ; ; long-name "JISX0212 (Japanese): ISO-IR-159" | |
716 ; ; )) | |
717 | |
718 ; ; (make-charset 'chinese-cns11643-1 | |
719 ; ; "CNS11643 Plane 1 Chinese traditional: ISO-IR-171" | |
720 ; ; '(dimension | |
721 ; ; 2 | |
722 ; ; registry "CNS11643.1992-1" | |
723 ; ; chars 94 | |
724 ; ; columns 2 | |
725 ; ; direction l2r | |
726 ; ; final ?G | |
727 ; ; graphic 0 | |
728 ; ; short-name "CNS11643-1" | |
729 ; ; long-name "CNS11643-1 (Chinese traditional): ISO-IR-171" | |
730 ; ; )) | |
731 | |
732 ; ; (make-charset 'chinese-cns11643-2 | |
733 ; ; "CNS11643 Plane 2 Chinese traditional: ISO-IR-172" | |
734 ; ; '(dimension | |
735 ; ; 2 | |
736 ; ; registry "CNS11643.1992-2" | |
737 ; ; chars 94 | |
738 ; ; columns 2 | |
739 ; ; direction l2r | |
740 ; ; final ?H | |
741 ; ; graphic 0 | |
742 ; ; short-name "CNS11643-2" | |
743 ; ; long-name "CNS11643-2 (Chinese traditional): ISO-IR-172" | |
744 ; ; )) | |
745 | |
746 ; (make-charset 'japanese-jisx0213-1 "JISX0213 Plane 1 (Japanese)" | |
747 ; '(dimension | |
748 ; 2 | |
749 ; registry "JISX0213.2000-1" | |
750 ; chars 94 | |
751 ; columns 2 | |
752 ; direction l2r | |
753 ; final ?O | |
754 ; graphic 0 | |
755 ; short-name "JISX0213-1" | |
756 ; long-name "JISX0213-1" | |
757 ; )) | |
758 | |
759 ; ; (make-charset 'chinese-big5-1 | |
760 ; ; "Frequently used part (A141-C67F) of Big5 (Chinese traditional)" | |
761 ; ; '(dimension | |
762 ; ; 2 | |
763 ; ; registry "Big5" | |
764 ; ; chars 94 | |
765 ; ; columns 2 | |
766 ; ; direction l2r | |
767 ; ; final ?0 | |
768 ; ; graphic 0 | |
769 ; ; short-name "Big5 (Level-1)" | |
770 ; ; long-name "Big5 (Level-1) A141-C67F" | |
771 ; ; )) | |
772 | |
773 ; ; (make-charset 'chinese-big5-2 | |
774 ; ; "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)" | |
775 ; ; '(dimension | |
776 ; ; 2 | |
777 ; ; registry "Big5" | |
778 ; ; chars 94 | |
779 ; ; columns 2 | |
780 ; ; direction l2r | |
781 ; ; final ?1 | |
782 ; ; graphic 0 | |
783 ; ; short-name "Big5 (Level-2)" | |
784 ; ; long-name "Big5 (Level-2) C940-FEFE" | |
785 ; ; )) | |
786 | |
787 | |
788 ; ;; Additional (private) character sets. These character sets are | |
789 ; ;; treated less space-efficiently in the buffer. | |
790 | |
791 ; ;; Syntax: | |
792 ; ;; (define-charset CHARSET-ID CHARSET | |
793 ; ;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE | |
794 ; ;; SHORT-NAME LONG-NAME DESCRIPTION ]) | |
795 | |
796 ; ;; ISO-2022 allows a use of character sets not registered in ISO with | |
797 ; ;; final characters `0' (0x30) through `?' (0x3F). Among them, Emacs | |
798 ; ;; reserves `0' through `9' to support several private character sets. | |
799 ; ;; The remaining final characters `:' through `?' are for users. | |
800 | |
801 ; ;; 1-byte 1-column charsets. Valid range of CHARSET-ID is 160..223. | |
802 | |
803 ; (make-charset 'chinese-sisheng | |
804 ; "SiSheng characters for PinYin/ZhuYin" | |
805 ; '(dimension | |
806 ; 1 | |
807 ; registry "sisheng_cwnn" | |
808 ; chars 94 | |
809 ; columns 1 | |
810 ; direction l2r | |
811 ; final ?0 | |
812 ; graphic 0 | |
813 ; short-name "SiSheng" | |
814 ; long-name "SiSheng (PinYin/ZhuYin)" | |
815 ; )) | |
816 | |
817 | |
818 ; ;; IPA characters for phonetic symbols. | |
819 ; (make-charset 'ipa "IPA (International Phonetic Association)" | |
820 ; '(dimension | |
821 ; 1 | |
822 ; registry "MuleIPA" | |
823 ; chars 96 | |
824 ; columns 1 | |
825 ; direction l2r | |
826 ; final ?0 | |
827 ; graphic 1 | |
828 ; short-name "IPA" | |
829 ; long-name "IPA" | |
830 ; )) | |
831 | |
832 | |
833 ; ;; Vietnamese VISCII. VISCII is 1-byte character set which contains | |
834 ; ;; more than 96 characters. Since Emacs can't handle it as one | |
835 ; ;; character set, it is divided into two: lower case letters and upper | |
836 ; ;; case letters. | |
837 ; (make-charset 'vietnamese-viscii-lower "VISCII1.1 lower-case" | |
838 ; '(dimension | |
839 ; 1 | |
840 ; registry "VISCII1.1" | |
841 ; chars 96 | |
842 ; columns 1 | |
843 ; direction l2r | |
844 ; final ?1 | |
845 ; graphic 1 | |
846 ; short-name "VISCII lower" | |
847 ; long-name "VISCII lower-case" | |
848 ; )) | |
849 | |
850 ; (make-charset 'vietnamese-viscii-upper "VISCII1.1 upper-case" | |
851 ; '(dimension | |
852 ; 1 | |
853 ; registry "VISCII1.1" | |
854 ; chars 96 | |
855 ; columns 1 | |
856 ; direction l2r | |
857 ; final ?2 | |
858 ; graphic 1 | |
859 ; short-name "VISCII upper" | |
860 ; long-name "VISCII upper-case" | |
861 ; )) | |
862 | |
863 | |
864 ; ;; For Arabic, we need three different types of character sets. | |
865 ; ;; Digits are of direction left-to-right and of width 1-column. | |
866 ; ;; Others are of direction right-to-left and of width 1-column or | |
867 ; ;; 2-column. | |
868 ; (make-charset 'arabic-digit "Arabic digit" | |
869 ; '(dimension | |
870 ; 1 | |
871 ; registry "MuleArabic-0" | |
872 ; chars 94 | |
873 ; columns 1 | |
874 ; direction l2r | |
875 ; final ?2 | |
876 ; graphic 0 | |
877 ; short-name "Arabic digit" | |
878 ; long-name "Arabic digit" | |
879 ; )) | |
880 | |
881 ; (make-charset 'arabic-1-column "Arabic 1-column" | |
882 ; '(dimension | |
883 ; 1 | |
884 ; registry "MuleArabic-1" | |
885 ; chars 94 | |
886 ; columns 1 | |
887 ; direction r2l | |
888 ; final ?3 | |
889 ; graphic 0 | |
890 ; short-name "Arabic 1-col" | |
891 ; long-name "Arabic 1-column" | |
892 ; )) | |
893 | |
894 | |
895 ; ;; ASCII with right-to-left direction. | |
896 ; (make-charset 'ascii-right-to-left | |
897 ; "ASCII (left half of ISO 8859-1) with right-to-left direction" | |
898 ; '(dimension | |
899 ; 1 | |
900 ; registry "ISO8859-1" | |
901 ; chars 94 | |
902 ; columns 1 | |
903 ; direction r2l | |
904 ; final ?B | |
905 ; graphic 0 | |
906 ; short-name "rev ASCII" | |
907 ; long-name "ASCII with right-to-left direction" | |
908 ; )) | |
909 | |
910 | |
911 ; ;; Lao script. | |
912 ; ;; ISO10646's 0x0E80..0x0EDF are mapped to 0x20..0x7F. | |
913 ; (make-charset 'lao "Lao characters (ISO10646 0E80..0EDF)" | |
914 ; '(dimension | |
915 ; 1 | |
916 ; registry "MuleLao-1" | |
917 ; chars 94 | |
918 ; columns 1 | |
919 ; direction l2r | |
920 ; final ?1 | |
921 ; graphic 0 | |
922 ; short-name "Lao" | |
923 ; long-name "Lao" | |
924 ; )) | |
925 | |
926 | |
927 ; ;; CHARSET-IDs 168..223 are not used. | |
928 | |
929 ; ;; 1-byte 2-column charsets. Valid range of CHARSET-ID is 224..239. | |
930 | |
931 ; (make-charset 'arabic-2-column "Arabic 2-column" | |
932 ; '(dimension | |
933 ; 1 | |
934 ; registry "MuleArabic-2" | |
935 ; chars 94 | |
936 ; columns 2 | |
937 ; direction r2l | |
938 ; final ?4 | |
939 ; graphic 0 | |
940 ; short-name "Arabic 2-col" | |
941 ; long-name "Arabic 2-column" | |
942 ; )) | |
943 | |
944 | |
945 ; ;; Indian scripts. Symbolic charset for data exchange. Glyphs are | |
946 ; ;; not assigned. They are automatically converted to each Indian | |
947 ; ;; script which IS-13194 supports. | |
948 | |
949 ; (make-charset 'indian-is13194 | |
950 ; "Generic Indian charset for data exchange with IS 13194" | |
951 ; '(dimension | |
952 ; 1 | |
953 ; registry "IS13194-Devanagari" | |
954 ; chars 94 | |
955 ; columns 2 | |
956 ; direction l2r | |
957 ; final ?5 | |
958 ; graphic 1 | |
959 ; short-name "IS 13194" | |
960 ; long-name "Indian IS 13194" | |
961 ; )) | |
962 | |
963 | |
964 ; ;; CHARSET-IDs 226..239 are not used. | |
965 | |
966 ; ;; 2-byte 1-column charsets. Valid range of CHARSET-ID is 240..244. | |
967 | |
968 ; ;; Actual Glyph for 1-column width. | |
969 ; (make-charset 'indian-1-column | |
970 ; "Indian charset for 2-column width glyphs" | |
971 ; '(dimension | |
972 ; 2 | |
973 ; registry "MuleIndian-1" | |
974 ; chars 94 | |
975 ; columns 1 | |
976 ; direction l2r | |
977 ; final ?6 | |
978 ; graphic 0 | |
979 ; short-name "Indian 1-col" | |
980 ; long-name "Indian 1 Column" | |
981 ; )) | |
982 | |
983 | |
984 ; (make-charset 'tibetan-1-column "Tibetan 1 column glyph" | |
985 ; '(dimension | |
986 ; 2 | |
987 ; registry "MuleTibetan-1" | |
988 ; chars 94 | |
989 ; columns 1 | |
990 ; direction l2r | |
991 ; final ?8 | |
992 ; graphic 0 | |
993 ; short-name "Tibetan 1-col" | |
994 ; long-name "Tibetan 1 column" | |
995 ; )) | |
996 | |
997 | |
998 ; ;; Subsets of Unicode. | |
999 | |
1000 ; (make-charset 'mule-unicode-2500-33ff | |
1001 ; "Unicode characters of the range U+2500..U+33FF." | |
1002 ; '(dimension | |
1003 ; 2 | |
1004 ; registry "ISO10646-1" | |
1005 ; chars 96 | |
1006 ; columns 1 | |
1007 ; direction l2r | |
1008 ; final ?2 | |
1009 ; graphic 0 | |
1010 ; short-name "Unicode subset 2" | |
1011 ; long-name "Unicode subset (U+2500..U+33FF)" | |
1012 ; )) | |
1013 | |
1014 | |
1015 ; (make-charset 'mule-unicode-e000-ffff | |
1016 ; "Unicode characters of the range U+E000..U+FFFF." | |
1017 ; '(dimension | |
1018 ; 2 | |
1019 ; registry "ISO10646-1" | |
1020 ; chars 96 | |
1021 ; columns 1 | |
1022 ; direction l2r | |
1023 ; final ?3 | |
1024 ; graphic 0 | |
1025 ; short-name "Unicode subset 3" | |
1026 ; long-name "Unicode subset (U+E000+FFFF)" | |
1027 ; )) | |
1028 | |
1029 | |
1030 ; (make-charset 'mule-unicode-0100-24ff | |
1031 ; "Unicode characters of the range U+0100..U+24FF." | |
1032 ; '(dimension | |
1033 ; 2 | |
1034 ; registry "ISO10646-1" | |
1035 ; chars 96 | |
1036 ; columns 1 | |
1037 ; direction l2r | |
1038 ; final ?1 | |
1039 ; graphic 0 | |
1040 ; short-name "Unicode subset" | |
1041 ; long-name "Unicode subset (U+0100..U+24FF)" | |
1042 ; )) | |
1043 | |
1044 | |
1045 ; ;; 2-byte 2-column charsets. Valid range of CHARSET-ID is 245..254. | |
1046 | |
1047 ; ;; Ethiopic characters (Amahric and Tigrigna). | |
1048 ; (make-charset 'ethiopic "Ethiopic characters" | |
1049 ; '(dimension | |
1050 ; 2 | |
1051 ; registry "Ethiopic-Unicode" | |
1052 ; chars 94 | |
1053 ; columns 2 | |
1054 ; direction l2r | |
1055 ; final ?3 | |
1056 ; graphic 0 | |
1057 ; short-name "Ethiopic" | |
1058 ; long-name "Ethiopic characters" | |
1059 ; )) | |
1060 | |
1061 | |
1062 ; ;; Chinese CNS11643 Plane3 thru Plane7. Although these are official | |
1063 ; ;; character sets, the use is rare and don't have to be treated | |
1064 ; ;; space-efficiently in the buffer. | |
1065 ; (make-charset 'chinese-cns11643-3 | |
1066 ; "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183" | |
1067 ; '(dimension | |
1068 ; 2 | |
1069 ; registry "CNS11643.1992-3" | |
1070 ; chars 94 | |
1071 ; columns 2 | |
1072 ; direction l2r | |
1073 ; final ?I | |
1074 ; graphic 0 | |
1075 ; short-name "CNS11643-3" | |
1076 ; long-name "CNS11643-3 (Chinese traditional): ISO-IR-183" | |
1077 ; )) | |
1078 | |
1079 ; (make-charset 'chinese-cns11643-4 | |
1080 ; "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184" | |
1081 ; '(dimension | |
1082 ; 2 | |
1083 ; registry "CNS11643.1992-4" | |
1084 ; chars 94 | |
1085 ; columns 2 | |
1086 ; direction l2r | |
1087 ; final ?J | |
1088 ; graphic 0 | |
1089 ; short-name "CNS11643-4" | |
1090 ; long-name "CNS11643-4 (Chinese traditional): ISO-IR-184" | |
1091 ; )) | |
1092 | |
1093 ; (make-charset 'chinese-cns11643-5 | |
1094 ; "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185" | |
1095 ; '(dimension | |
1096 ; 2 | |
1097 ; registry "CNS11643.1992-5" | |
1098 ; chars 94 | |
1099 ; columns 2 | |
1100 ; direction l2r | |
1101 ; final ?K | |
1102 ; graphic 0 | |
1103 ; short-name "CNS11643-5" | |
1104 ; long-name "CNS11643-5 (Chinese traditional): ISO-IR-185" | |
1105 ; )) | |
1106 | |
1107 ; (make-charset 'chinese-cns11643-6 | |
1108 ; "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186" | |
1109 ; '(dimension | |
1110 ; 2 | |
1111 ; registry "CNS11643.1992-6" | |
1112 ; chars 94 | |
1113 ; columns 2 | |
1114 ; direction l2r | |
1115 ; final ?L | |
1116 ; graphic 0 | |
1117 ; short-name "CNS11643-6" | |
1118 ; long-name "CNS11643-6 (Chinese traditional): ISO-IR-186" | |
1119 ; )) | |
1120 | |
1121 ; (make-charset 'chinese-cns11643-7 | |
1122 ; "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187" | |
1123 ; '(dimension | |
1124 ; 2 | |
1125 ; registry "CNS11643.1992-7" | |
1126 ; chars 94 | |
1127 ; columns 2 | |
1128 ; direction l2r | |
1129 ; final ?M | |
1130 ; graphic 0 | |
1131 ; short-name "CNS11643-7" | |
1132 ; long-name "CNS11643-7 (Chinese traditional): ISO-IR-187" | |
1133 ; )) | |
1134 | |
1135 | |
1136 ; ;; Actual Glyph for 2-column width. | |
1137 ; (make-charset 'indian-2-column | |
1138 ; "Indian charset for 2-column width glyphs" | |
1139 ; '(dimension | |
1140 ; 2 | |
1141 ; registry "MuleIndian-2" | |
1142 ; chars 94 | |
1143 ; columns 2 | |
1144 ; direction l2r | |
1145 ; final ?5 | |
1146 ; graphic 0 | |
1147 ; short-name "Indian 2-col" | |
1148 ; long-name "Indian 2 Column" | |
1149 ; )) | |
1150 | |
1151 | |
1152 ; ;; Tibetan script. | |
1153 ; (make-charset 'tibetan "Tibetan characters" | |
1154 ; '(dimension | |
1155 ; 2 | |
1156 ; registry "MuleTibetan-2" | |
1157 ; chars 94 | |
1158 ; columns 2 | |
1159 ; direction l2r | |
1160 ; final ?7 | |
1161 ; graphic 0 | |
1162 ; short-name "Tibetan 2-col" | |
1163 ; long-name "Tibetan 2 column" | |
1164 ; )) | |
1165 | |
1166 | |
1167 ; ;; CHARSET-ID 253 is not used. | |
1168 | |
1169 ; ;; JISX0213 Plane 2 | |
1170 ; (make-charset 'japanese-jisx0213-2 "JISX0213 Plane 2 (Japanese)" | |
1171 ; '(dimension | |
1172 ; 2 | |
1173 ; registry "JISX0213.2000-2" | |
1174 ; chars 94 | |
1175 ; columns 2 | |
1176 ; direction l2r | |
1177 ; final ?P | |
1178 ; graphic 0 | |
1179 ; short-name "JISX0213-2" | |
1180 ; long-name "JISX0213-2" | |
1181 ; )) | |
1182 | |
428 | 1183 ;;; mule-charset.el ends here |
778 | 1184 |