428
|
1 /* Functions to handle multilingual characters.
|
|
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
|
|
3 Copyright (C) 1995 Sun Microsystems, Inc.
|
3025
|
4 Copyright (C) 2001, 2002, 2004, 2005 Ben Wing.
|
428
|
5
|
|
6 This file is part of XEmacs.
|
|
7
|
|
8 XEmacs is free software; you can redistribute it and/or modify it
|
|
9 under the terms of the GNU General Public License as published by the
|
|
10 Free Software Foundation; either version 2, or (at your option) any
|
|
11 later version.
|
|
12
|
|
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
|
|
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
16 for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with XEmacs; see the file COPYING. If not, write to
|
|
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
21 Boston, MA 02111-1307, USA. */
|
|
22
|
|
23 /* Synched up with: FSF 20.3. Not in FSF. */
|
|
24
|
|
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
|
|
26
|
|
27 #include <config.h>
|
|
28 #include "lisp.h"
|
|
29
|
|
30 #include "buffer.h"
|
|
31 #include "chartab.h"
|
|
32 #include "elhash.h"
|
|
33 #include "device.h"
|
|
34 #include "faces.h"
|
771
|
35 #include "lstream.h"
|
428
|
36 #include "mule-ccl.h"
|
872
|
37 #include "objects.h"
|
3659
|
38 #include "specifier.h"
|
428
|
39
|
|
40 /* The various pre-defined charsets. */
|
|
41
|
|
42 Lisp_Object Vcharset_ascii;
|
|
43 Lisp_Object Vcharset_control_1;
|
|
44 Lisp_Object Vcharset_latin_iso8859_1;
|
|
45 Lisp_Object Vcharset_latin_iso8859_2;
|
|
46 Lisp_Object Vcharset_latin_iso8859_3;
|
|
47 Lisp_Object Vcharset_latin_iso8859_4;
|
|
48 Lisp_Object Vcharset_thai_tis620;
|
|
49 Lisp_Object Vcharset_greek_iso8859_7;
|
|
50 Lisp_Object Vcharset_hebrew_iso8859_8;
|
|
51 Lisp_Object Vcharset_katakana_jisx0201;
|
|
52 Lisp_Object Vcharset_latin_jisx0201;
|
|
53 Lisp_Object Vcharset_cyrillic_iso8859_5;
|
|
54 Lisp_Object Vcharset_latin_iso8859_9;
|
728
|
55 Lisp_Object Vcharset_latin_iso8859_15;
|
428
|
56 Lisp_Object Vcharset_japanese_jisx0208_1978;
|
|
57 Lisp_Object Vcharset_chinese_gb2312;
|
|
58 Lisp_Object Vcharset_japanese_jisx0208;
|
|
59 Lisp_Object Vcharset_korean_ksc5601;
|
|
60 Lisp_Object Vcharset_japanese_jisx0212;
|
|
61 Lisp_Object Vcharset_chinese_cns11643_1;
|
|
62 Lisp_Object Vcharset_chinese_cns11643_2;
|
|
63 Lisp_Object Vcharset_chinese_big5_1;
|
|
64 Lisp_Object Vcharset_chinese_big5_2;
|
|
65 Lisp_Object Vcharset_composite;
|
|
66
|
|
67 struct charset_lookup *chlook;
|
|
68
|
1204
|
69 static const struct memory_description charset_lookup_description_1[] = {
|
771
|
70 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), NUM_LEADING_BYTES+4*128*2 },
|
428
|
71 { XD_END }
|
|
72 };
|
|
73
|
1204
|
74 static const struct sized_memory_description charset_lookup_description = {
|
440
|
75 sizeof (struct charset_lookup),
|
428
|
76 charset_lookup_description_1
|
|
77 };
|
|
78
|
|
79 Lisp_Object Qcharsetp;
|
|
80
|
3664
|
81 /* Qdoc_string, Qdimension, Qchars, Qfinal defined in general.c */
|
|
82 Lisp_Object Qregistries, Qgraphic, Qregistry;
|
428
|
83 Lisp_Object Qdirection;
|
|
84 Lisp_Object Qreverse_direction_charset;
|
|
85 Lisp_Object Qshort_name, Qlong_name;
|
|
86
|
771
|
87 Lisp_Object Qfrom_unicode, Qto_unicode;
|
|
88
|
|
89 Lisp_Object
|
428
|
90 Qlatin_iso8859_1,
|
|
91 Qlatin_iso8859_2,
|
|
92 Qlatin_iso8859_3,
|
|
93 Qlatin_iso8859_4,
|
|
94 Qthai_tis620,
|
|
95 Qgreek_iso8859_7,
|
|
96 Qhebrew_iso8859_8,
|
|
97 Qkatakana_jisx0201,
|
|
98 Qlatin_jisx0201,
|
|
99 Qcyrillic_iso8859_5,
|
|
100 Qlatin_iso8859_9,
|
728
|
101 Qlatin_iso8859_15,
|
428
|
102 Qjapanese_jisx0208_1978,
|
|
103 Qchinese_gb2312,
|
|
104 Qjapanese_jisx0208,
|
|
105 Qkorean_ksc5601,
|
|
106 Qjapanese_jisx0212,
|
|
107 Qchinese_cns11643_1,
|
|
108 Qchinese_cns11643_2,
|
|
109 Qchinese_big5_1,
|
|
110 Qchinese_big5_2,
|
|
111 Qcomposite;
|
|
112
|
|
113 Lisp_Object Ql2r, Qr2l;
|
|
114
|
|
115 Lisp_Object Vcharset_hash_table;
|
|
116
|
|
117
|
|
118 /************************************************************************/
|
|
119 /* charset object */
|
|
120 /************************************************************************/
|
|
121
|
|
122 static Lisp_Object
|
|
123 mark_charset (Lisp_Object obj)
|
|
124 {
|
440
|
125 Lisp_Charset *cs = XCHARSET (obj);
|
428
|
126
|
|
127 mark_object (cs->short_name);
|
|
128 mark_object (cs->long_name);
|
|
129 mark_object (cs->doc_string);
|
3659
|
130 mark_object (cs->registries);
|
428
|
131 mark_object (cs->ccl_program);
|
|
132 return cs->name;
|
|
133 }
|
|
134
|
|
135 static void
|
2286
|
136 print_charset (Lisp_Object obj, Lisp_Object printcharfun,
|
|
137 int UNUSED (escapeflag))
|
428
|
138 {
|
440
|
139 Lisp_Charset *cs = XCHARSET (obj);
|
428
|
140
|
|
141 if (print_readably)
|
563
|
142 printing_unreadable_object ("#<charset %s 0x%x>",
|
793
|
143 XSTRING_DATA (XSYMBOL (CHARSET_NAME (cs))->
|
563
|
144 name),
|
|
145 cs->header.uid);
|
428
|
146
|
771
|
147 write_fmt_string_lisp (printcharfun, "#<charset %s %S %S %S", 4,
|
|
148 CHARSET_NAME (cs), CHARSET_SHORT_NAME (cs),
|
|
149 CHARSET_LONG_NAME (cs), CHARSET_DOC_STRING (cs));
|
|
150 write_fmt_string (printcharfun, " %s %s cols=%d g%d final='%c' reg=",
|
|
151 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
|
|
152 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
|
|
153 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
|
|
154 "96x96",
|
|
155 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" :
|
|
156 "r2l",
|
|
157 CHARSET_COLUMNS (cs),
|
|
158 CHARSET_GRAPHIC (cs),
|
|
159 CHARSET_FINAL (cs));
|
3659
|
160 print_internal (CHARSET_REGISTRIES (cs), printcharfun, 0);
|
771
|
161 write_fmt_string (printcharfun, " 0x%x>", cs->header.uid);
|
|
162 }
|
|
163
|
1204
|
164 static const struct memory_description charset_description[] = {
|
|
165 { XD_INT, offsetof (Lisp_Charset, dimension) },
|
|
166 { XD_INT, offsetof (Lisp_Charset, from_unicode_levels) },
|
440
|
167 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
|
|
168 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
|
3659
|
169 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registries) },
|
440
|
170 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
|
|
171 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
|
|
172 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
|
|
173 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
|
771
|
174 { XD_UNION, offsetof (Lisp_Charset, to_unicode_table),
|
2775
|
175 XD_INDIRECT (0, 0), { &to_unicode_description }, XD_FLAG_NO_KKCC },
|
771
|
176 { XD_UNION, offsetof (Lisp_Charset, from_unicode_table),
|
2775
|
177 XD_INDIRECT (1, 0), { &from_unicode_description }, XD_FLAG_NO_KKCC },
|
428
|
178 { XD_END }
|
|
179 };
|
|
180
|
934
|
181 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
|
|
182 1, /* dumpable flag */
|
2367
|
183 mark_charset, print_charset, 0,
|
934
|
184 0, 0, charset_description, Lisp_Charset);
|
428
|
185 /* Make a new charset. */
|
446
|
186 /* #### SJT Should generic properties be allowed? */
|
428
|
187 static Lisp_Object
|
771
|
188 make_charset (int id, Lisp_Object name, int rep_bytes,
|
|
189 int type, int columns, int graphic,
|
867
|
190 Ibyte final, int direction, Lisp_Object short_name,
|
428
|
191 Lisp_Object long_name, Lisp_Object doc,
|
3439
|
192 Lisp_Object reg, int overwrite, int encode_as_utf_8)
|
428
|
193 {
|
|
194 Lisp_Object obj;
|
771
|
195 Lisp_Charset *cs;
|
|
196
|
|
197 if (!overwrite)
|
|
198 {
|
3017
|
199 cs = ALLOC_LCRECORD_TYPE (Lisp_Charset, &lrecord_charset);
|
793
|
200 obj = wrap_charset (cs);
|
771
|
201
|
|
202 if (final)
|
|
203 {
|
|
204 /* some charsets do not have final characters. This includes
|
|
205 ASCII, Control-1, Composite, and the two faux private
|
|
206 charsets. */
|
|
207 assert (NILP (chlook->
|
|
208 charset_by_attributes[type][final][direction]));
|
|
209 chlook->charset_by_attributes[type][final][direction] = obj;
|
|
210 }
|
440
|
211
|
771
|
212 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
|
|
213 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
|
|
214 }
|
|
215 else
|
|
216 {
|
|
217 Lisp_Object ret;
|
|
218 /* Actually overwrite the properties of the existing charset.
|
|
219 We do this because until now charsets could never be "deleted",
|
|
220 so parts of the code don't bother to GC charsets. */
|
|
221 obj = chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE];
|
|
222 cs = XCHARSET (obj);
|
|
223 assert (EQ (chlook->charset_by_attributes[type][final][direction],
|
|
224 obj));
|
|
225
|
|
226 ret = Fremhash (XCHARSET_NAME (obj), Vcharset_hash_table);
|
|
227 assert (!NILP (ret));
|
|
228 }
|
428
|
229
|
|
230 CHARSET_ID (cs) = id;
|
|
231 CHARSET_NAME (cs) = name;
|
|
232 CHARSET_SHORT_NAME (cs) = short_name;
|
|
233 CHARSET_LONG_NAME (cs) = long_name;
|
|
234 CHARSET_REP_BYTES (cs) = rep_bytes;
|
|
235 CHARSET_DIRECTION (cs) = direction;
|
|
236 CHARSET_TYPE (cs) = type;
|
|
237 CHARSET_COLUMNS (cs) = columns;
|
|
238 CHARSET_GRAPHIC (cs) = graphic;
|
|
239 CHARSET_FINAL (cs) = final;
|
|
240 CHARSET_DOC_STRING (cs) = doc;
|
3659
|
241 CHECK_VECTOR(reg);
|
|
242 CHARSET_REGISTRIES (cs) = reg;
|
3439
|
243 CHARSET_ENCODE_AS_UTF_8 (cs) = encode_as_utf_8 ? 1 : 0;
|
428
|
244 CHARSET_CCL_PROGRAM (cs) = Qnil;
|
|
245 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
|
|
246
|
771
|
247 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
|
|
248 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
|
|
249 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
|
|
250 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
|
428
|
251
|
771
|
252 if (id == LEADING_BYTE_ASCII || id == LEADING_BYTE_CONTROL_1
|
|
253 #ifdef ENABLE_COMPOSITE_CHARS
|
|
254 || id == LEADING_BYTE_COMPOSITE
|
|
255 #endif
|
|
256 )
|
|
257 assert (!overwrite);
|
|
258 else
|
428
|
259 {
|
771
|
260 if (overwrite)
|
|
261 free_charset_unicode_tables (obj);
|
|
262 init_charset_unicode_tables (obj);
|
428
|
263 }
|
|
264
|
|
265 /* Some charsets are "faux" and don't have names or really exist at
|
|
266 all except in the leading-byte table. */
|
|
267 if (!NILP (name))
|
771
|
268 {
|
|
269 assert (NILP (Fgethash (name, Vcharset_hash_table, Qnil)));
|
|
270 Fputhash (name, obj, Vcharset_hash_table);
|
|
271 }
|
|
272
|
|
273 recalculate_unicode_precedence ();
|
3659
|
274 setup_charset_initial_specifier_tags (obj);
|
|
275
|
428
|
276 return obj;
|
|
277 }
|
|
278
|
|
279 static int
|
|
280 get_unallocated_leading_byte (int dimension)
|
|
281 {
|
|
282 int lb;
|
|
283
|
|
284 if (dimension == 1)
|
|
285 {
|
771
|
286 if (chlook->next_allocated_1_byte_leading_byte >
|
|
287 MAX_LEADING_BYTE_PRIVATE_1)
|
428
|
288 lb = 0;
|
|
289 else
|
442
|
290 lb = chlook->next_allocated_1_byte_leading_byte++;
|
428
|
291 }
|
|
292 else
|
|
293 {
|
1747
|
294 /* awfully fragile, but correct */
|
|
295 #if MAX_LEADING_BYTE_PRIVATE_2 == 255
|
|
296 if (chlook->next_allocated_2_byte_leading_byte == 0)
|
1749
|
297 #else
|
771
|
298 if (chlook->next_allocated_2_byte_leading_byte >
|
|
299 MAX_LEADING_BYTE_PRIVATE_2)
|
1747
|
300 #endif
|
428
|
301 lb = 0;
|
|
302 else
|
442
|
303 lb = chlook->next_allocated_2_byte_leading_byte++;
|
428
|
304 }
|
|
305
|
|
306 if (!lb)
|
563
|
307 invalid_operation
|
771
|
308 ("No more character sets free for this dimension", make_int (dimension));
|
428
|
309
|
|
310 return lb;
|
|
311 }
|
|
312
|
|
313
|
|
314 /************************************************************************/
|
|
315 /* Basic charset Lisp functions */
|
|
316 /************************************************************************/
|
|
317
|
788
|
318 void
|
|
319 get_charset_limits (Lisp_Object charset, int *low, int *high)
|
|
320 {
|
|
321 Lisp_Charset *cs = XCHARSET (charset);
|
|
322
|
|
323 if (EQ (charset, Vcharset_ascii)) *low = 0, *high = 127;
|
|
324 else if (EQ (charset, Vcharset_control_1)) *low = 0, *high = 31;
|
|
325 else if (CHARSET_CHARS (cs) == 94) *low = 33, *high = 126;
|
|
326 else /* CHARSET_CHARS (cs) == 96) */ *low = 32, *high = 127;
|
|
327 }
|
|
328
|
428
|
329 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
|
|
330 Return non-nil if OBJECT is a charset.
|
|
331 */
|
|
332 (object))
|
|
333 {
|
|
334 return CHARSETP (object) ? Qt : Qnil;
|
|
335 }
|
|
336
|
|
337 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
|
|
338 Retrieve the charset of the given name.
|
|
339 If CHARSET-OR-NAME is a charset object, it is simply returned.
|
|
340 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
|
|
341 nil is returned. Otherwise the associated charset object is returned.
|
|
342 */
|
|
343 (charset_or_name))
|
|
344 {
|
|
345 if (CHARSETP (charset_or_name))
|
|
346 return charset_or_name;
|
|
347
|
|
348 CHECK_SYMBOL (charset_or_name);
|
|
349 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
|
|
350 }
|
|
351
|
|
352 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
|
|
353 Retrieve the charset of the given name.
|
|
354 Same as `find-charset' except an error is signalled if there is no such
|
|
355 charset instead of returning nil.
|
|
356 */
|
|
357 (name))
|
|
358 {
|
|
359 Lisp_Object charset = Ffind_charset (name);
|
|
360
|
|
361 if (NILP (charset))
|
563
|
362 invalid_argument ("No such charset", name);
|
428
|
363 return charset;
|
|
364 }
|
|
365
|
|
366 /* We store the charsets in hash tables with the names as the key and the
|
|
367 actual charset object as the value. Occasionally we need to use them
|
|
368 in a list format. These routines provide us with that. */
|
|
369 struct charset_list_closure
|
|
370 {
|
|
371 Lisp_Object *charset_list;
|
|
372 };
|
|
373
|
|
374 static int
|
2286
|
375 add_charset_to_list_mapper (Lisp_Object UNUSED (key), Lisp_Object value,
|
428
|
376 void *charset_list_closure)
|
|
377 {
|
|
378 /* This function can GC */
|
|
379 struct charset_list_closure *chcl =
|
|
380 (struct charset_list_closure*) charset_list_closure;
|
|
381 Lisp_Object *charset_list = chcl->charset_list;
|
|
382
|
|
383 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
|
|
384 return 0;
|
|
385 }
|
|
386
|
|
387 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
|
|
388 Return a list of the names of all defined charsets.
|
|
389 */
|
|
390 ())
|
|
391 {
|
|
392 Lisp_Object charset_list = Qnil;
|
|
393 struct gcpro gcpro1;
|
|
394 struct charset_list_closure charset_list_closure;
|
|
395
|
|
396 GCPRO1 (charset_list);
|
|
397 charset_list_closure.charset_list = &charset_list;
|
|
398 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
|
|
399 &charset_list_closure);
|
|
400 UNGCPRO;
|
|
401
|
|
402 return charset_list;
|
|
403 }
|
|
404
|
|
405 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
|
444
|
406 Return the name of charset CHARSET.
|
428
|
407 */
|
|
408 (charset))
|
|
409 {
|
|
410 return XCHARSET_NAME (Fget_charset (charset));
|
|
411 }
|
|
412
|
446
|
413 /* #### SJT Should generic properties be allowed? */
|
428
|
414 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
|
|
415 Define a new character set.
|
|
416 This function is for use with Mule support.
|
|
417 NAME is a symbol, the name by which the character set is normally referred.
|
|
418 DOC-STRING is a string describing the character set.
|
|
419 PROPS is a property list, describing the specific nature of the
|
|
420 character set. Recognized properties are:
|
|
421
|
3025
|
422 `short-name' Short version of the charset name (ex: Latin-1)
|
|
423 `long-name' Long version of the charset name (ex: ISO8859-1 (Latin-1))
|
3659
|
424 `registries' A vector of possible XLFD REGISTRY-ENCODING combinations for
|
|
425 this character set. Note that this is not a regular expression.
|
3025
|
426 `dimension' Number of octets used to index a character in this charset.
|
428
|
427 Either 1 or 2. Defaults to 1.
|
3025
|
428 `columns' Number of columns used to display a character in this charset.
|
428
|
429 Only used in TTY mode. (Under X, the actual width of a
|
|
430 character can be derived from the font used to display the
|
|
431 characters.) If unspecified, defaults to the dimension
|
|
432 (this is almost always the correct value).
|
3025
|
433 `chars' Number of characters in each dimension (94 or 96).
|
428
|
434 Defaults to 94. Note that if the dimension is 2, the
|
|
435 character set thus described is 94x94 or 96x96.
|
3025
|
436 `final' Final byte of ISO 2022 escape sequence. Must be
|
428
|
437 supplied. Each combination of (DIMENSION, CHARS) defines a
|
|
438 separate namespace for final bytes. Note that ISO
|
|
439 2022 restricts the final byte to the range
|
|
440 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
|
|
441 dimension == 2. Note also that final bytes in the range
|
|
442 0x30 - 0x3F are reserved for user-defined (not official)
|
|
443 character sets.
|
3025
|
444 `graphic' 0 (use left half of font on output) or 1 (use right half
|
428
|
445 of font on output). Defaults to 0. For example, for
|
|
446 a font whose registry is ISO8859-1, the left half
|
|
447 (octets 0x20 - 0x7F) is the `ascii' character set, while
|
|
448 the right half (octets 0xA0 - 0xFF) is the `latin-1'
|
3025
|
449 character set. With `graphic' set to 0, the octets
|
428
|
450 will have their high bit cleared; with it set to 1,
|
|
451 the octets will have their high bit set.
|
3025
|
452 `direction' `l2r' (left-to-right) or `r2l' (right-to-left).
|
|
453 Defaults to `l2r'.
|
|
454 `ccl-program' A compiled CCL program used to convert a character in
|
428
|
455 this charset into an index into the font. This is in
|
3025
|
456 addition to the `graphic' property. The CCL program
|
428
|
457 is passed the octets of the character, with the high
|
|
458 bit cleared and set depending upon whether the value
|
3025
|
459 of the `graphic' property is 0 or 1.
|
3439
|
460 `encode-as-utf-8'
|
|
461 If non-nil, the charset will be written out using the UTF-8
|
|
462 escape syntax in ISO 2022-oriented coding systems. Used for
|
|
463 supporting characters we know are part of Unicode but not of
|
|
464 any other known character set in escape-quoted and compound
|
|
465 text.
|
428
|
466 */
|
|
467 (name, doc_string, props))
|
|
468 {
|
771
|
469 int id, dimension = 1, chars = 94, graphic = 0, columns = -1;
|
867
|
470 Ibyte final = 0;
|
428
|
471 int direction = CHARSET_LEFT_TO_RIGHT;
|
|
472 int type;
|
3659
|
473 Lisp_Object registries = Qnil;
|
771
|
474 Lisp_Object charset = Qnil;
|
428
|
475 Lisp_Object ccl_program = Qnil;
|
|
476 Lisp_Object short_name = Qnil, long_name = Qnil;
|
3439
|
477 int encode_as_utf_8 = 0;
|
771
|
478 Lisp_Object existing_charset;
|
|
479 int temporary = UNBOUNDP (name);
|
428
|
480
|
771
|
481 /* NOTE: name == Qunbound is a directive from the iso2022 code to
|
|
482 create a temporary charset for an unknown final. We allow the final
|
|
483 to be overwritten with a real charset later on. */
|
|
484
|
428
|
485 if (!NILP (doc_string))
|
|
486 CHECK_STRING (doc_string);
|
771
|
487 if (!UNBOUNDP (name))
|
|
488 {
|
|
489 CHECK_SYMBOL (name);
|
428
|
490
|
771
|
491 charset = Ffind_charset (name);
|
|
492 if (!NILP (charset))
|
|
493 invalid_operation ("Cannot redefine existing charset", name);
|
|
494 }
|
428
|
495
|
442
|
496 {
|
|
497 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
|
|
498 {
|
|
499 if (EQ (keyword, Qshort_name))
|
|
500 {
|
|
501 CHECK_STRING (value);
|
|
502 short_name = value;
|
|
503 }
|
428
|
504
|
519
|
505 else if (EQ (keyword, Qlong_name))
|
442
|
506 {
|
|
507 CHECK_STRING (value);
|
|
508 long_name = value;
|
|
509 }
|
428
|
510
|
442
|
511 else if (EQ (keyword, Qdimension))
|
|
512 {
|
|
513 CHECK_INT (value);
|
|
514 dimension = XINT (value);
|
|
515 if (dimension < 1 || dimension > 2)
|
3025
|
516 invalid_constant ("Invalid value for `dimension'", value);
|
442
|
517 }
|
428
|
518
|
442
|
519 else if (EQ (keyword, Qchars))
|
|
520 {
|
|
521 CHECK_INT (value);
|
|
522 chars = XINT (value);
|
|
523 if (chars != 94 && chars != 96)
|
3025
|
524 invalid_constant ("Invalid value for `chars'", value);
|
442
|
525 }
|
428
|
526
|
442
|
527 else if (EQ (keyword, Qcolumns))
|
|
528 {
|
|
529 CHECK_INT (value);
|
|
530 columns = XINT (value);
|
|
531 if (columns != 1 && columns != 2)
|
3025
|
532 invalid_constant ("Invalid value for `columns'", value);
|
442
|
533 }
|
428
|
534
|
442
|
535 else if (EQ (keyword, Qgraphic))
|
|
536 {
|
|
537 CHECK_INT (value);
|
|
538 graphic = XINT (value);
|
|
539 if (graphic < 0 || graphic > 1)
|
3025
|
540 invalid_constant ("Invalid value for `graphic'", value);
|
442
|
541 }
|
428
|
542
|
3659
|
543 else if (EQ (keyword, Qregistries))
|
|
544 {
|
|
545 CHECK_VECTOR (value);
|
|
546 registries = value;
|
|
547 }
|
|
548
|
442
|
549 else if (EQ (keyword, Qregistry))
|
|
550 {
|
3659
|
551 Lisp_Object quoted_registry;
|
|
552
|
442
|
553 CHECK_STRING (value);
|
3659
|
554 quoted_registry = Fregexp_quote(value);
|
3662
|
555 if (qxestrcmp(XSTRING_DATA(quoted_registry),
|
3659
|
556 XSTRING_DATA(value)))
|
|
557 {
|
|
558 warn_when_safe
|
|
559 (Qregistry, Qwarning,
|
|
560 "Regexps no longer allowed for charset-registry. "
|
|
561 "Treating %s as string", XSTRING_DATA(value));
|
|
562 }
|
|
563 registries = vector1(value);
|
442
|
564 }
|
428
|
565
|
442
|
566 else if (EQ (keyword, Qdirection))
|
|
567 {
|
|
568 if (EQ (value, Ql2r))
|
|
569 direction = CHARSET_LEFT_TO_RIGHT;
|
|
570 else if (EQ (value, Qr2l))
|
|
571 direction = CHARSET_RIGHT_TO_LEFT;
|
|
572 else
|
3025
|
573 invalid_constant ("Invalid value for `direction'", value);
|
442
|
574 }
|
428
|
575
|
3439
|
576 else if (EQ (keyword, Qencode_as_utf_8))
|
|
577 {
|
|
578 encode_as_utf_8 = NILP (value) ? 0 : 1;
|
|
579 }
|
|
580
|
442
|
581 else if (EQ (keyword, Qfinal))
|
|
582 {
|
|
583 CHECK_CHAR_COERCE_INT (value);
|
|
584 final = XCHAR (value);
|
|
585 if (final < '0' || final > '~')
|
3025
|
586 invalid_constant ("Invalid value for `final'", value);
|
442
|
587 }
|
|
588 else if (EQ (keyword, Qccl_program))
|
|
589 {
|
444
|
590 struct ccl_program test_ccl;
|
|
591
|
|
592 if (setup_ccl_program (&test_ccl, value) < 0)
|
3025
|
593 invalid_argument ("Invalid value for `ccl-program'", value);
|
442
|
594 ccl_program = value;
|
|
595 }
|
|
596 else
|
563
|
597 invalid_constant ("Unrecognized property", keyword);
|
442
|
598 }
|
|
599 }
|
428
|
600
|
|
601 if (!final)
|
3025
|
602 invalid_argument ("`final' must be specified", Qunbound);
|
428
|
603 if (dimension == 2 && final > 0x5F)
|
563
|
604 invalid_constant
|
428
|
605 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
|
|
606 make_char (final));
|
|
607
|
|
608 if (dimension == 1)
|
|
609 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
|
|
610 else
|
|
611 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
|
|
612
|
826
|
613 existing_charset = charset_by_attributes (type, final, direction);
|
771
|
614
|
|
615 if (!NILP (existing_charset) && !XCHARSET (existing_charset)->temporary)
|
578
|
616 invalid_argument
|
793
|
617 ("Character set already defined for this DIMENSION/CHARS/FINAL/DIRECTION combo",
|
771
|
618 existing_charset);
|
|
619
|
|
620 if (!NILP (existing_charset))
|
|
621 /* Reuse same leading byte */
|
|
622 id = XCHARSET_ID (existing_charset);
|
|
623 else
|
|
624 id = get_unallocated_leading_byte (dimension);
|
428
|
625
|
771
|
626 if (temporary)
|
|
627 {
|
867
|
628 Ibyte tempname[80];
|
428
|
629
|
771
|
630 qxesprintf (tempname, "___temporary___%d__", id);
|
|
631 name = intern_int (tempname);
|
|
632 }
|
428
|
633 if (NILP (doc_string))
|
|
634 doc_string = build_string ("");
|
3659
|
635 if (NILP (registries))
|
|
636 registries = make_vector(0, Qnil);
|
428
|
637 if (NILP (short_name))
|
793
|
638 short_name = XSYMBOL (name)->name;
|
428
|
639 if (NILP (long_name))
|
|
640 long_name = doc_string;
|
|
641 if (columns == -1)
|
|
642 columns = dimension;
|
771
|
643
|
428
|
644 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
|
771
|
645 final, direction, short_name, long_name,
|
3659
|
646 doc_string, registries, !NILP (existing_charset),
|
3439
|
647 encode_as_utf_8);
|
771
|
648
|
|
649 XCHARSET (charset)->temporary = temporary;
|
428
|
650 if (!NILP (ccl_program))
|
|
651 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
|
771
|
652
|
793
|
653 {
|
|
654 Lisp_Object revdircs =
|
826
|
655 charset_by_attributes (type, final,
|
793
|
656 direction == CHARSET_LEFT_TO_RIGHT ?
|
|
657 CHARSET_RIGHT_TO_LEFT : CHARSET_LEFT_TO_RIGHT);
|
|
658 if (!NILP (revdircs))
|
|
659 {
|
|
660 XCHARSET_REVERSE_DIRECTION_CHARSET (revdircs) = charset;
|
|
661 XCHARSET_REVERSE_DIRECTION_CHARSET (charset) = revdircs;
|
|
662 }
|
|
663 }
|
|
664
|
428
|
665 return charset;
|
|
666 }
|
|
667
|
|
668 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
|
|
669 2, 2, 0, /*
|
|
670 Make a charset equivalent to CHARSET but which goes in the opposite direction.
|
|
671 NEW-NAME is the name of the new charset. Return the new charset.
|
|
672 */
|
|
673 (charset, new_name))
|
|
674 {
|
|
675 Lisp_Object new_charset = Qnil;
|
3439
|
676 int id, dimension, columns, graphic, encode_as_utf_8;
|
867
|
677 Ibyte final;
|
428
|
678 int direction, type;
|
3659
|
679 Lisp_Object registries, doc_string, short_name, long_name;
|
440
|
680 Lisp_Charset *cs;
|
428
|
681
|
|
682 charset = Fget_charset (charset);
|
|
683 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
|
563
|
684 invalid_operation ("Charset already has reverse-direction charset",
|
793
|
685 charset);
|
428
|
686
|
|
687 CHECK_SYMBOL (new_name);
|
|
688 if (!NILP (Ffind_charset (new_name)))
|
563
|
689 invalid_operation ("Cannot redefine existing charset", new_name);
|
428
|
690
|
|
691 cs = XCHARSET (charset);
|
|
692
|
|
693 type = CHARSET_TYPE (cs);
|
|
694 columns = CHARSET_COLUMNS (cs);
|
|
695 dimension = CHARSET_DIMENSION (cs);
|
|
696 id = get_unallocated_leading_byte (dimension);
|
|
697
|
|
698 graphic = CHARSET_GRAPHIC (cs);
|
|
699 final = CHARSET_FINAL (cs);
|
|
700 direction = CHARSET_RIGHT_TO_LEFT;
|
|
701 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
|
|
702 direction = CHARSET_LEFT_TO_RIGHT;
|
|
703 doc_string = CHARSET_DOC_STRING (cs);
|
|
704 short_name = CHARSET_SHORT_NAME (cs);
|
|
705 long_name = CHARSET_LONG_NAME (cs);
|
3659
|
706 registries = CHARSET_REGISTRIES (cs);
|
3439
|
707 encode_as_utf_8 = CHARSET_ENCODE_AS_UTF_8 (cs);
|
428
|
708
|
|
709 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
|
|
710 graphic, final, direction, short_name, long_name,
|
3659
|
711 doc_string, registries, 0, encode_as_utf_8);
|
428
|
712
|
|
713 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
|
|
714 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
|
|
715
|
|
716 return new_charset;
|
|
717 }
|
|
718
|
|
719 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
|
|
720 1, 1, 0, /*
|
|
721 Return the reverse-direction charset parallel to CHARSET, if any.
|
|
722 This is the charset with the same properties (in particular, the same
|
|
723 dimension, number of characters per dimension, and final byte) as
|
|
724 CHARSET but whose characters are displayed in the opposite direction.
|
|
725 */
|
|
726 (charset))
|
|
727 {
|
|
728 charset = Fget_charset (charset);
|
|
729 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
|
|
730 }
|
|
731
|
|
732 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
|
|
733 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
|
|
734 If DIRECTION is omitted, both directions will be checked (left-to-right
|
|
735 will be returned if character sets exist for both directions).
|
|
736 */
|
|
737 (dimension, chars, final, direction))
|
|
738 {
|
|
739 int dm, ch, fi, di = -1;
|
|
740 int type;
|
|
741 Lisp_Object obj = Qnil;
|
|
742
|
|
743 CHECK_INT (dimension);
|
|
744 dm = XINT (dimension);
|
|
745 if (dm < 1 || dm > 2)
|
563
|
746 invalid_constant ("Invalid value for DIMENSION", dimension);
|
428
|
747
|
|
748 CHECK_INT (chars);
|
|
749 ch = XINT (chars);
|
|
750 if (ch != 94 && ch != 96)
|
563
|
751 invalid_constant ("Invalid value for CHARS", chars);
|
428
|
752
|
|
753 CHECK_CHAR_COERCE_INT (final);
|
|
754 fi = XCHAR (final);
|
|
755 if (fi < '0' || fi > '~')
|
563
|
756 invalid_constant ("Invalid value for FINAL", final);
|
428
|
757
|
|
758 if (EQ (direction, Ql2r))
|
|
759 di = CHARSET_LEFT_TO_RIGHT;
|
|
760 else if (EQ (direction, Qr2l))
|
|
761 di = CHARSET_RIGHT_TO_LEFT;
|
|
762 else if (!NILP (direction))
|
563
|
763 invalid_constant ("Invalid value for DIRECTION", direction);
|
428
|
764
|
|
765 if (dm == 2 && fi > 0x5F)
|
563
|
766 invalid_constant
|
428
|
767 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
|
|
768
|
|
769 if (dm == 1)
|
|
770 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
|
|
771 else
|
|
772 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
|
|
773
|
|
774 if (di == -1)
|
|
775 {
|
826
|
776 obj = charset_by_attributes (type, fi, CHARSET_LEFT_TO_RIGHT);
|
428
|
777 if (NILP (obj))
|
826
|
778 obj = charset_by_attributes (type, fi, CHARSET_RIGHT_TO_LEFT);
|
428
|
779 }
|
|
780 else
|
826
|
781 obj = charset_by_attributes (type, fi, di);
|
428
|
782
|
|
783 if (CHARSETP (obj))
|
|
784 return XCHARSET_NAME (obj);
|
|
785 return obj;
|
|
786 }
|
|
787
|
|
788 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
|
|
789 Return short name of CHARSET.
|
|
790 */
|
|
791 (charset))
|
|
792 {
|
|
793 return XCHARSET_SHORT_NAME (Fget_charset (charset));
|
|
794 }
|
|
795
|
|
796 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
|
|
797 Return long name of CHARSET.
|
|
798 */
|
|
799 (charset))
|
|
800 {
|
|
801 return XCHARSET_LONG_NAME (Fget_charset (charset));
|
|
802 }
|
|
803
|
|
804 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
|
|
805 Return description of CHARSET.
|
|
806 */
|
|
807 (charset))
|
|
808 {
|
|
809 return XCHARSET_DOC_STRING (Fget_charset (charset));
|
|
810 }
|
|
811
|
|
812 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
|
|
813 Return dimension of CHARSET.
|
|
814 */
|
|
815 (charset))
|
|
816 {
|
|
817 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
|
|
818 }
|
|
819
|
|
820 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
|
446
|
821 Return property PROP of CHARSET, a charset object or symbol naming a charset.
|
428
|
822 Recognized properties are those listed in `make-charset', as well as
|
3025
|
823 `name' and `doc-string'.
|
428
|
824 */
|
|
825 (charset, prop))
|
|
826 {
|
440
|
827 Lisp_Charset *cs;
|
428
|
828
|
|
829 charset = Fget_charset (charset);
|
|
830 cs = XCHARSET (charset);
|
|
831
|
|
832 CHECK_SYMBOL (prop);
|
|
833 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
|
|
834 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
|
|
835 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
|
|
836 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
|
|
837 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
|
|
838 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
|
|
839 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
|
|
840 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
|
|
841 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
|
3659
|
842 if (EQ (prop, Qregistries)) return CHARSET_REGISTRIES (cs);
|
3439
|
843 if (EQ (prop, Qencode_as_utf_8))
|
|
844 return CHARSET_ENCODE_AS_UTF_8 (cs) ? Qt : Qnil;
|
428
|
845 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
|
|
846 if (EQ (prop, Qdirection))
|
|
847 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
|
|
848 if (EQ (prop, Qreverse_direction_charset))
|
|
849 {
|
|
850 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
|
446
|
851 /* #### Is this translation OK? If so, error checking sufficient? */
|
|
852 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
|
428
|
853 }
|
563
|
854 invalid_constant ("Unrecognized charset property name", prop);
|
1204
|
855 RETURN_NOT_REACHED (Qnil);
|
428
|
856 }
|
|
857
|
|
858 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
|
|
859 Return charset identification number of CHARSET.
|
|
860 */
|
|
861 (charset))
|
|
862 {
|
793
|
863 return make_int (XCHARSET_LEADING_BYTE (Fget_charset (charset)));
|
428
|
864 }
|
|
865
|
|
866 /* #### We need to figure out which properties we really want to
|
|
867 allow to be set. */
|
|
868
|
|
869 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
|
3025
|
870 Set the `ccl-program' property of CHARSET to CCL-PROGRAM.
|
428
|
871 */
|
|
872 (charset, ccl_program))
|
|
873 {
|
444
|
874 struct ccl_program test_ccl;
|
|
875
|
428
|
876 charset = Fget_charset (charset);
|
444
|
877 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
|
563
|
878 invalid_argument ("Invalid ccl-program", ccl_program);
|
428
|
879 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
|
510
|
880 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
|
428
|
881 return Qnil;
|
|
882 }
|
|
883
|
3676
|
884 void
|
|
885 set_charset_registries(Lisp_Object charset, Lisp_Object registries)
|
|
886 {
|
|
887 XCHARSET_REGISTRIES (charset) = registries;
|
|
888 invalidate_charset_font_caches (charset);
|
|
889 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
|
|
890 }
|
|
891
|
3711
|
892 DEFUN ("set-charset-registries", Fset_charset_registries, 2, 3, 0, /*
|
3659
|
893 Set the `registries' property of CHARSET to REGISTRIES.
|
|
894
|
|
895 REGISTRIES is an ordered vector of strings that describe the X11
|
|
896 CHARSET_REGISTRY and the CHARSET_ENCODINGs appropriate for this charset.
|
|
897 Separate each registry from the corresponding encoding with a dash. The
|
|
898 strings are not regular expressions, in contrast to the old behavior of
|
|
899 the `charset-registry' property.
|
|
900
|
|
901 One reason to call this function might be if you're in Japan and you'd
|
|
902 prefer the backslash to display as a Yen sign; the corresponding syntax
|
|
903 would be:
|
|
904
|
|
905 (set-charset-registries 'ascii ["jisx0201.1976-0"])
|
|
906
|
3711
|
907 If optional argument FORCE is non-nil, avoid sanity-checking the elements of
|
|
908 REGISTRIES. Normally the strings are checked to make sure they contain no
|
|
909 XLFD wild cards and that they contain at least one hyphen; the only context
|
|
910 in which one might want not to do this is in order to use a font which
|
|
911 doesn't have a full XLFD--and thus, an effective
|
|
912 CHARSET_REGISTRY-CHARSET_ENCODING of ""--to display ASCII.
|
|
913
|
|
914 We recommend strongly that you specify a full XLFD, since this makes
|
|
915 multilingual and variant font handling work much better. To get the full
|
|
916 XLFD of any font, start xfd with the short name as the pattern argument:
|
|
917
|
|
918 xfd -fn 8x16kana
|
|
919
|
|
920 and use the text that appears at the top of the window.
|
428
|
921 */
|
3711
|
922 (charset, registries, force))
|
428
|
923 {
|
3659
|
924 int i;
|
428
|
925 charset = Fget_charset (charset);
|
3659
|
926 CHECK_VECTOR (registries);
|
|
927
|
|
928 for (i = 0; i < XVECTOR_LENGTH(registries); ++i)
|
|
929 {
|
|
930 CHECK_STRING (XVECTOR_DATA(registries)[i]);
|
3711
|
931
|
|
932 if (!NILP(force))
|
|
933 {
|
|
934 continue;
|
|
935 }
|
|
936
|
3659
|
937 if (NULL == qxestrchr(XSTRING_DATA(XVECTOR_DATA(registries)[i]), '-'))
|
|
938 {
|
|
939 invalid_argument("Not an X11 REGISTRY-ENCODING combination",
|
|
940 XVECTOR_DATA(registries)[i]);
|
|
941 }
|
3676
|
942
|
|
943 if (qxestrchr(XSTRING_DATA(XVECTOR_DATA(registries)[i]), '*') ||
|
|
944 qxestrchr(XSTRING_DATA(XVECTOR_DATA(registries)[i]), '?'))
|
|
945 {
|
|
946 invalid_argument
|
|
947 ("XLFD wildcards not allowed in charset-registries",
|
|
948 XVECTOR_DATA(registries)[i]);
|
|
949
|
|
950 }
|
3659
|
951 }
|
|
952
|
3676
|
953 set_charset_registries(charset, registries);
|
|
954
|
428
|
955 return Qnil;
|
|
956 }
|
|
957
|
3681
|
958 DEFUN ("charsets-in-region", Fcharsets_in_region, 2, 3, 0, /*
|
|
959 Return a list of the charsets in the region between START and END.
|
|
960 BUFFER defaults to the current buffer if omitted.
|
|
961 */
|
|
962 (start, end, buffer))
|
|
963 {
|
|
964 /* This function can GC */
|
|
965 struct buffer *buf = decode_buffer (buffer, 1);
|
|
966 Charbpos pos, stop; /* Limits of the region. */
|
|
967 Lisp_Object res = Qnil;
|
|
968 int charsets[NUM_LEADING_BYTES];
|
|
969 Ibyte lb;
|
|
970 struct gcpro gcpro1;
|
|
971
|
|
972 memset(charsets, 0, sizeof(charsets));
|
|
973 get_buffer_range_char (buf, start, end, &pos, &stop, 0);
|
|
974
|
|
975 GCPRO1 (res);
|
|
976 while (pos < stop)
|
|
977 {
|
|
978 lb = ichar_leading_byte(BUF_FETCH_CHAR (buf, pos));
|
|
979 if (0 == charsets[lb - MIN_LEADING_BYTE])
|
|
980 {
|
|
981 charsets[lb - MIN_LEADING_BYTE] = 1;
|
|
982 res = Fcons (XCHARSET_NAME(charset_by_leading_byte(lb)), res);
|
|
983 }
|
|
984 ++pos;
|
|
985 }
|
|
986 UNGCPRO;
|
|
987
|
|
988 return res;
|
|
989 }
|
|
990
|
428
|
991
|
|
992 /************************************************************************/
|
771
|
993 /* memory usage */
|
428
|
994 /************************************************************************/
|
|
995
|
771
|
996 #ifdef MEMORY_USAGE_STATS
|
428
|
997
|
771
|
998 struct charset_stats
|
|
999 {
|
|
1000 int from_unicode;
|
|
1001 int to_unicode;
|
|
1002 int other;
|
|
1003 };
|
428
|
1004
|
771
|
1005 static void
|
|
1006 compute_charset_usage (Lisp_Object charset, struct charset_stats *stats,
|
|
1007 struct overhead_stats *ovstats)
|
428
|
1008 {
|
771
|
1009 struct Lisp_Charset *c = XCHARSET (charset);
|
|
1010 xzero (*stats);
|
3024
|
1011 stats->other += LISPOBJ_STORAGE_SIZE (c, sizeof (*c), ovstats);
|
771
|
1012 stats->from_unicode += compute_from_unicode_table_size (charset, ovstats);
|
|
1013 stats->to_unicode += compute_to_unicode_table_size (charset, ovstats);
|
438
|
1014 }
|
|
1015
|
771
|
1016 DEFUN ("charset-memory-usage", Fcharset_memory_usage, 1, 1, 0, /*
|
|
1017 Return stats about the memory usage of charset CHARSET.
|
|
1018 The values returned are in the form of an alist of usage types and
|
|
1019 byte counts. The byte counts attempt to encompass all the memory used
|
|
1020 by the charset (separate from the memory logically associated with a
|
|
1021 charset or frame), including internal structures and any malloc()
|
|
1022 overhead associated with them. In practice, the byte counts are
|
|
1023 underestimated for various reasons, e.g. because certain memory usage
|
|
1024 is very hard to determine \(e.g. the amount of memory used inside the
|
|
1025 Xt library or inside the X server).
|
428
|
1026
|
771
|
1027 Multiple slices of the total memory usage may be returned, separated
|
|
1028 by a nil. Each slice represents a particular view of the memory, a
|
|
1029 particular way of partitioning it into groups. Within a slice, there
|
|
1030 is no overlap between the groups of memory, and each slice collectively
|
|
1031 represents all the memory concerned.
|
|
1032 */
|
|
1033 (charset))
|
|
1034 {
|
|
1035 struct charset_stats stats;
|
|
1036 struct overhead_stats ovstats;
|
|
1037 Lisp_Object val = Qnil;
|
428
|
1038
|
771
|
1039 charset = Fget_charset (charset);
|
|
1040 xzero (ovstats);
|
|
1041 compute_charset_usage (charset, &stats, &ovstats);
|
428
|
1042
|
771
|
1043 val = acons (Qfrom_unicode, make_int (stats.from_unicode), val);
|
|
1044 val = acons (Qto_unicode, make_int (stats.to_unicode), val);
|
|
1045 val = Fcons (Qnil, val);
|
|
1046 val = acons (Qactually_requested, make_int (ovstats.was_requested), val);
|
|
1047 val = acons (Qmalloc_overhead, make_int (ovstats.malloc_overhead), val);
|
|
1048 val = acons (Qgap_overhead, make_int (ovstats.gap_overhead), val);
|
|
1049 val = acons (Qdynarr_overhead, make_int (ovstats.dynarr_overhead), val);
|
|
1050
|
|
1051 return Fnreverse (val);
|
428
|
1052 }
|
|
1053
|
771
|
1054 #endif /* MEMORY_USAGE_STATS */
|
428
|
1055
|
|
1056
|
|
1057 /************************************************************************/
|
|
1058 /* initialization */
|
|
1059 /************************************************************************/
|
|
1060
|
|
1061 void
|
|
1062 syms_of_mule_charset (void)
|
|
1063 {
|
442
|
1064 INIT_LRECORD_IMPLEMENTATION (charset);
|
|
1065
|
428
|
1066 DEFSUBR (Fcharsetp);
|
|
1067 DEFSUBR (Ffind_charset);
|
|
1068 DEFSUBR (Fget_charset);
|
|
1069 DEFSUBR (Fcharset_list);
|
|
1070 DEFSUBR (Fcharset_name);
|
|
1071 DEFSUBR (Fmake_charset);
|
|
1072 DEFSUBR (Fmake_reverse_direction_charset);
|
793
|
1073 DEFSUBR (Fcharset_reverse_direction_charset);
|
428
|
1074 DEFSUBR (Fcharset_from_attributes);
|
|
1075 DEFSUBR (Fcharset_short_name);
|
|
1076 DEFSUBR (Fcharset_long_name);
|
|
1077 DEFSUBR (Fcharset_description);
|
|
1078 DEFSUBR (Fcharset_dimension);
|
|
1079 DEFSUBR (Fcharset_property);
|
|
1080 DEFSUBR (Fcharset_id);
|
|
1081 DEFSUBR (Fset_charset_ccl_program);
|
3659
|
1082 DEFSUBR (Fset_charset_registries);
|
3681
|
1083 DEFSUBR (Fcharsets_in_region);
|
428
|
1084
|
771
|
1085 #ifdef MEMORY_USAGE_STATS
|
|
1086 DEFSUBR (Fcharset_memory_usage);
|
428
|
1087 #endif
|
|
1088
|
563
|
1089 DEFSYMBOL (Qcharsetp);
|
3659
|
1090 DEFSYMBOL (Qregistries);
|
563
|
1091 DEFSYMBOL (Qfinal);
|
|
1092 DEFSYMBOL (Qgraphic);
|
3659
|
1093 DEFSYMBOL (Qregistry);
|
563
|
1094 DEFSYMBOL (Qdirection);
|
|
1095 DEFSYMBOL (Qreverse_direction_charset);
|
|
1096 DEFSYMBOL (Qshort_name);
|
|
1097 DEFSYMBOL (Qlong_name);
|
428
|
1098
|
771
|
1099 DEFSYMBOL (Qfrom_unicode);
|
|
1100 DEFSYMBOL (Qto_unicode);
|
|
1101
|
563
|
1102 DEFSYMBOL (Ql2r);
|
|
1103 DEFSYMBOL (Qr2l);
|
428
|
1104
|
|
1105 /* Charsets, compatible with FSF 20.3
|
|
1106 Naming convention is Script-Charset[-Edition] */
|
563
|
1107 DEFSYMBOL (Qlatin_iso8859_1);
|
|
1108 DEFSYMBOL (Qlatin_iso8859_2);
|
|
1109 DEFSYMBOL (Qlatin_iso8859_3);
|
|
1110 DEFSYMBOL (Qlatin_iso8859_4);
|
|
1111 DEFSYMBOL (Qthai_tis620);
|
|
1112 DEFSYMBOL (Qgreek_iso8859_7);
|
|
1113 DEFSYMBOL (Qhebrew_iso8859_8);
|
|
1114 DEFSYMBOL (Qkatakana_jisx0201);
|
|
1115 DEFSYMBOL (Qlatin_jisx0201);
|
|
1116 DEFSYMBOL (Qcyrillic_iso8859_5);
|
|
1117 DEFSYMBOL (Qlatin_iso8859_9);
|
728
|
1118 DEFSYMBOL (Qlatin_iso8859_15);
|
563
|
1119 DEFSYMBOL (Qjapanese_jisx0208_1978);
|
|
1120 DEFSYMBOL (Qchinese_gb2312);
|
|
1121 DEFSYMBOL (Qjapanese_jisx0208);
|
|
1122 DEFSYMBOL (Qkorean_ksc5601);
|
|
1123 DEFSYMBOL (Qjapanese_jisx0212);
|
|
1124 DEFSYMBOL (Qchinese_cns11643_1);
|
|
1125 DEFSYMBOL (Qchinese_cns11643_2);
|
|
1126 DEFSYMBOL (Qchinese_big5_1);
|
|
1127 DEFSYMBOL (Qchinese_big5_2);
|
428
|
1128
|
563
|
1129 DEFSYMBOL (Qcomposite);
|
428
|
1130 }
|
|
1131
|
|
1132 void
|
|
1133 vars_of_mule_charset (void)
|
|
1134 {
|
|
1135 int i, j, k;
|
|
1136
|
452
|
1137 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
|
2367
|
1138 dump_add_root_block_ptr (&chlook, &charset_lookup_description);
|
428
|
1139
|
|
1140 /* Table of charsets indexed by leading byte. */
|
|
1141 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
|
|
1142 chlook->charset_by_leading_byte[i] = Qnil;
|
|
1143
|
|
1144 /* Table of charsets indexed by type/final-byte/direction. */
|
|
1145 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
|
|
1146 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
|
|
1147 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
|
|
1148 chlook->charset_by_attributes[i][j][k] = Qnil;
|
|
1149
|
442
|
1150 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
|
|
1151 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
|
771
|
1152
|
|
1153 staticpro (&Vcharset_hash_table);
|
|
1154 Vcharset_hash_table =
|
|
1155 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
|
428
|
1156 }
|
|
1157
|
|
1158 void
|
|
1159 complex_vars_of_mule_charset (void)
|
|
1160 {
|
|
1161 /* Predefined character sets. We store them into variables for
|
|
1162 ease of access. */
|
|
1163
|
|
1164 staticpro (&Vcharset_ascii);
|
|
1165 Vcharset_ascii =
|
|
1166 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
|
|
1167 CHARSET_TYPE_94, 1, 0, 'B',
|
|
1168 CHARSET_LEFT_TO_RIGHT,
|
|
1169 build_string ("ASCII"),
|
771
|
1170 build_msg_string ("ASCII"),
|
|
1171 build_msg_string ("ASCII (ISO646 IRV)"),
|
3659
|
1172 vector1(build_string("iso8859-1")), 0, 0);
|
428
|
1173 staticpro (&Vcharset_control_1);
|
|
1174 Vcharset_control_1 =
|
|
1175 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
|
|
1176 CHARSET_TYPE_94, 1, 1, 0,
|
|
1177 CHARSET_LEFT_TO_RIGHT,
|
|
1178 build_string ("C1"),
|
771
|
1179 build_msg_string ("Control characters"),
|
|
1180 build_msg_string ("Control characters 128-191"),
|
3659
|
1181 vector1(build_string("iso8859-1")), 0, 0);
|
428
|
1182 staticpro (&Vcharset_latin_iso8859_1);
|
|
1183 Vcharset_latin_iso8859_1 =
|
|
1184 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
|
|
1185 CHARSET_TYPE_96, 1, 1, 'A',
|
|
1186 CHARSET_LEFT_TO_RIGHT,
|
|
1187 build_string ("Latin-1"),
|
771
|
1188 build_msg_string ("ISO8859-1 (Latin-1)"),
|
|
1189 build_msg_string ("ISO8859-1 (Latin-1)"),
|
3659
|
1190 vector1(build_string("iso8859-1")), 0, 0);
|
428
|
1191 staticpro (&Vcharset_latin_iso8859_2);
|
|
1192 Vcharset_latin_iso8859_2 =
|
|
1193 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
|
|
1194 CHARSET_TYPE_96, 1, 1, 'B',
|
|
1195 CHARSET_LEFT_TO_RIGHT,
|
|
1196 build_string ("Latin-2"),
|
771
|
1197 build_msg_string ("ISO8859-2 (Latin-2)"),
|
|
1198 build_msg_string ("ISO8859-2 (Latin-2)"),
|
3659
|
1199 vector1(build_string("iso8859-2")), 0, 0);
|
428
|
1200 staticpro (&Vcharset_latin_iso8859_3);
|
|
1201 Vcharset_latin_iso8859_3 =
|
|
1202 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
|
|
1203 CHARSET_TYPE_96, 1, 1, 'C',
|
|
1204 CHARSET_LEFT_TO_RIGHT,
|
|
1205 build_string ("Latin-3"),
|
771
|
1206 build_msg_string ("ISO8859-3 (Latin-3)"),
|
|
1207 build_msg_string ("ISO8859-3 (Latin-3)"),
|
3659
|
1208 vector1(build_string("iso8859-3")), 0, 0);
|
428
|
1209 staticpro (&Vcharset_latin_iso8859_4);
|
|
1210 Vcharset_latin_iso8859_4 =
|
|
1211 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
|
|
1212 CHARSET_TYPE_96, 1, 1, 'D',
|
|
1213 CHARSET_LEFT_TO_RIGHT,
|
|
1214 build_string ("Latin-4"),
|
771
|
1215 build_msg_string ("ISO8859-4 (Latin-4)"),
|
|
1216 build_msg_string ("ISO8859-4 (Latin-4)"),
|
3816
|
1217 vector1(build_string("iso8859-4")), 0, 0);
|
428
|
1218 staticpro (&Vcharset_thai_tis620);
|
|
1219 Vcharset_thai_tis620 =
|
|
1220 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
|
|
1221 CHARSET_TYPE_96, 1, 1, 'T',
|
|
1222 CHARSET_LEFT_TO_RIGHT,
|
|
1223 build_string ("TIS620"),
|
771
|
1224 build_msg_string ("TIS620 (Thai)"),
|
|
1225 build_msg_string ("TIS620.2529 (Thai)"),
|
3659
|
1226 vector1(build_string("tis620.2529-1")), 0, 0);
|
428
|
1227 staticpro (&Vcharset_greek_iso8859_7);
|
|
1228 Vcharset_greek_iso8859_7 =
|
|
1229 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
|
|
1230 CHARSET_TYPE_96, 1, 1, 'F',
|
|
1231 CHARSET_LEFT_TO_RIGHT,
|
|
1232 build_string ("ISO8859-7"),
|
771
|
1233 build_msg_string ("ISO8859-7 (Greek)"),
|
|
1234 build_msg_string ("ISO8859-7 (Greek)"),
|
3659
|
1235 vector1(build_string("iso8859-7")), 0, 0);
|
428
|
1236 staticpro (&Vcharset_hebrew_iso8859_8);
|
|
1237 Vcharset_hebrew_iso8859_8 =
|
|
1238 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
|
|
1239 CHARSET_TYPE_96, 1, 1, 'H',
|
|
1240 CHARSET_RIGHT_TO_LEFT,
|
|
1241 build_string ("ISO8859-8"),
|
771
|
1242 build_msg_string ("ISO8859-8 (Hebrew)"),
|
|
1243 build_msg_string ("ISO8859-8 (Hebrew)"),
|
3659
|
1244 vector1(build_string ("iso8859-8")), 0, 0);
|
428
|
1245 staticpro (&Vcharset_katakana_jisx0201);
|
|
1246 Vcharset_katakana_jisx0201 =
|
|
1247 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
|
|
1248 CHARSET_TYPE_94, 1, 1, 'I',
|
|
1249 CHARSET_LEFT_TO_RIGHT,
|
|
1250 build_string ("JISX0201 Kana"),
|
771
|
1251 build_msg_string ("JISX0201.1976 (Japanese Kana)"),
|
|
1252 build_msg_string ("JISX0201.1976 Japanese Kana"),
|
3659
|
1253 vector1(build_string ("jisx0201.1976-0")), 0, 0);
|
428
|
1254 staticpro (&Vcharset_latin_jisx0201);
|
|
1255 Vcharset_latin_jisx0201 =
|
|
1256 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
|
|
1257 CHARSET_TYPE_94, 1, 0, 'J',
|
|
1258 CHARSET_LEFT_TO_RIGHT,
|
|
1259 build_string ("JISX0201 Roman"),
|
771
|
1260 build_msg_string ("JISX0201.1976 (Japanese Roman)"),
|
|
1261 build_msg_string ("JISX0201.1976 Japanese Roman"),
|
3659
|
1262 vector1(build_string ("jisx0201.1976-0")), 0, 0);
|
428
|
1263 staticpro (&Vcharset_cyrillic_iso8859_5);
|
|
1264 Vcharset_cyrillic_iso8859_5 =
|
|
1265 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
|
|
1266 CHARSET_TYPE_96, 1, 1, 'L',
|
|
1267 CHARSET_LEFT_TO_RIGHT,
|
|
1268 build_string ("ISO8859-5"),
|
771
|
1269 build_msg_string ("ISO8859-5 (Cyrillic)"),
|
|
1270 build_msg_string ("ISO8859-5 (Cyrillic)"),
|
3659
|
1271 vector1(build_string ("iso8859-5")), 0, 0);
|
428
|
1272 staticpro (&Vcharset_latin_iso8859_9);
|
|
1273 Vcharset_latin_iso8859_9 =
|
|
1274 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
|
|
1275 CHARSET_TYPE_96, 1, 1, 'M',
|
|
1276 CHARSET_LEFT_TO_RIGHT,
|
|
1277 build_string ("Latin-5"),
|
771
|
1278 build_msg_string ("ISO8859-9 (Latin-5)"),
|
|
1279 build_msg_string ("ISO8859-9 (Latin-5)"),
|
3659
|
1280 vector1(build_string ("iso8859-9")), 0, 0);
|
728
|
1281 staticpro (&Vcharset_latin_iso8859_15);
|
|
1282 Vcharset_latin_iso8859_15 =
|
|
1283 make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2,
|
|
1284 CHARSET_TYPE_96, 1, 1, 'b',
|
|
1285 CHARSET_LEFT_TO_RIGHT,
|
|
1286 build_string ("Latin-9"),
|
771
|
1287 build_msg_string ("ISO8859-15 (Latin-9)"),
|
|
1288 build_msg_string ("ISO8859-15 (Latin-9)"),
|
3659
|
1289 vector1(build_string ("iso8859-15")), 0, 0);
|
428
|
1290 staticpro (&Vcharset_japanese_jisx0208_1978);
|
|
1291 Vcharset_japanese_jisx0208_1978 =
|
|
1292 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
|
|
1293 CHARSET_TYPE_94X94, 2, 0, '@',
|
|
1294 CHARSET_LEFT_TO_RIGHT,
|
|
1295 build_string ("JISX0208.1978"),
|
771
|
1296 build_msg_string ("JISX0208.1978 (Japanese)"),
|
|
1297 build_msg_string
|
428
|
1298 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
|
3659
|
1299 vector2(build_string("jisx0208.1978-0"),
|
|
1300 build_string("jisc6226.1978-0")), 0, 0);
|
428
|
1301 staticpro (&Vcharset_chinese_gb2312);
|
|
1302 Vcharset_chinese_gb2312 =
|
|
1303 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
|
|
1304 CHARSET_TYPE_94X94, 2, 0, 'A',
|
|
1305 CHARSET_LEFT_TO_RIGHT,
|
|
1306 build_string ("GB2312"),
|
771
|
1307 build_msg_string ("GB2312)"),
|
|
1308 build_msg_string ("GB2312 Chinese simplified"),
|
3659
|
1309 vector2(build_string("gb2312.1980-0"),
|
|
1310 build_string("gb2312.80&gb8565.88-0")), 0, 0);
|
428
|
1311 staticpro (&Vcharset_japanese_jisx0208);
|
|
1312 Vcharset_japanese_jisx0208 =
|
|
1313 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
|
|
1314 CHARSET_TYPE_94X94, 2, 0, 'B',
|
|
1315 CHARSET_LEFT_TO_RIGHT,
|
|
1316 build_string ("JISX0208"),
|
771
|
1317 build_msg_string ("JISX0208.1983/1990 (Japanese)"),
|
|
1318 build_msg_string ("JISX0208.1983/1990 Japanese Kanji"),
|
3659
|
1319 vector2(build_string("jisx0208.1983-0"),
|
|
1320 build_string("jisx0208.1990-0")), 0, 0);
|
428
|
1321 staticpro (&Vcharset_korean_ksc5601);
|
|
1322 Vcharset_korean_ksc5601 =
|
|
1323 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
|
|
1324 CHARSET_TYPE_94X94, 2, 0, 'C',
|
|
1325 CHARSET_LEFT_TO_RIGHT,
|
|
1326 build_string ("KSC5601"),
|
771
|
1327 build_msg_string ("KSC5601 (Korean"),
|
|
1328 build_msg_string ("KSC5601 Korean Hangul and Hanja"),
|
3659
|
1329 vector1(build_string("ksc5601.1987-0")), 0, 0);
|
428
|
1330 staticpro (&Vcharset_japanese_jisx0212);
|
|
1331 Vcharset_japanese_jisx0212 =
|
|
1332 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
|
|
1333 CHARSET_TYPE_94X94, 2, 0, 'D',
|
|
1334 CHARSET_LEFT_TO_RIGHT,
|
|
1335 build_string ("JISX0212"),
|
771
|
1336 build_msg_string ("JISX0212 (Japanese)"),
|
|
1337 build_msg_string ("JISX0212 Japanese Supplement"),
|
3659
|
1338 vector1(build_string("jisx0212.1990-0")), 0, 0);
|
428
|
1339
|
3659
|
1340 #define CHINESE_CNS_PLANE(n) "cns11643.1992-" n
|
428
|
1341 staticpro (&Vcharset_chinese_cns11643_1);
|
|
1342 Vcharset_chinese_cns11643_1 =
|
|
1343 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
|
|
1344 CHARSET_TYPE_94X94, 2, 0, 'G',
|
|
1345 CHARSET_LEFT_TO_RIGHT,
|
|
1346 build_string ("CNS11643-1"),
|
771
|
1347 build_msg_string ("CNS11643-1 (Chinese traditional)"),
|
|
1348 build_msg_string
|
428
|
1349 ("CNS 11643 Plane 1 Chinese traditional"),
|
3659
|
1350 vector1(build_string (CHINESE_CNS_PLANE("1"))), 0, 0);
|
428
|
1351 staticpro (&Vcharset_chinese_cns11643_2);
|
|
1352 Vcharset_chinese_cns11643_2 =
|
|
1353 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
|
|
1354 CHARSET_TYPE_94X94, 2, 0, 'H',
|
|
1355 CHARSET_LEFT_TO_RIGHT,
|
|
1356 build_string ("CNS11643-2"),
|
771
|
1357 build_msg_string ("CNS11643-2 (Chinese traditional)"),
|
|
1358 build_msg_string
|
428
|
1359 ("CNS 11643 Plane 2 Chinese traditional"),
|
3659
|
1360 vector1(build_string (CHINESE_CNS_PLANE("2"))), 0, 0);
|
428
|
1361 staticpro (&Vcharset_chinese_big5_1);
|
|
1362 Vcharset_chinese_big5_1 =
|
|
1363 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
|
|
1364 CHARSET_TYPE_94X94, 2, 0, '0',
|
|
1365 CHARSET_LEFT_TO_RIGHT,
|
|
1366 build_string ("Big5"),
|
771
|
1367 build_msg_string ("Big5 (Level-1)"),
|
|
1368 build_msg_string
|
428
|
1369 ("Big5 Level-1 Chinese traditional"),
|
3659
|
1370 vector1(build_string ("big5.eten-0")), 0, 0);
|
428
|
1371 staticpro (&Vcharset_chinese_big5_2);
|
|
1372 Vcharset_chinese_big5_2 =
|
|
1373 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
|
|
1374 CHARSET_TYPE_94X94, 2, 0, '1',
|
|
1375 CHARSET_LEFT_TO_RIGHT,
|
|
1376 build_string ("Big5"),
|
771
|
1377 build_msg_string ("Big5 (Level-2)"),
|
|
1378 build_msg_string
|
428
|
1379 ("Big5 Level-2 Chinese traditional"),
|
3659
|
1380 vector1(build_string ("big5.eten-0")), 0, 0);
|
428
|
1381
|
|
1382
|
|
1383 #ifdef ENABLE_COMPOSITE_CHARS
|
|
1384 /* #### For simplicity, we put composite chars into a 96x96 charset.
|
|
1385 This is going to lead to problems because you can run out of
|
|
1386 room, esp. as we don't yet recycle numbers. */
|
|
1387 staticpro (&Vcharset_composite);
|
|
1388 Vcharset_composite =
|
|
1389 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
|
|
1390 CHARSET_TYPE_96X96, 2, 0, 0,
|
|
1391 CHARSET_LEFT_TO_RIGHT,
|
|
1392 build_string ("Composite"),
|
771
|
1393 build_msg_string ("Composite characters"),
|
|
1394 build_msg_string ("Composite characters"),
|
3659
|
1395 vector1(build_string ("")), 0, 0);
|
771
|
1396 #else
|
|
1397 /* We create a hack so that we have a way of storing ESC 0 and ESC 1
|
|
1398 sequences as "characters", so that they will be output correctly. */
|
|
1399 staticpro (&Vcharset_composite);
|
|
1400 Vcharset_composite =
|
|
1401 make_charset (LEADING_BYTE_COMPOSITE_REPLACEMENT, Qcomposite, 2,
|
|
1402 CHARSET_TYPE_96, 1, 1, '|',
|
|
1403 CHARSET_LEFT_TO_RIGHT,
|
|
1404 build_string ("Composite hack"),
|
|
1405 build_msg_string ("Composite characters hack"),
|
|
1406 build_msg_string ("Composite characters hack"),
|
3659
|
1407 vector1(build_string ("")), 0, 0);
|
428
|
1408 #endif /* ENABLE_COMPOSITE_CHARS */
|
|
1409 }
|