comparison src/mule-charset.c @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents 4d00488244c1
children 026c5bf9c134
comparison
equal deleted inserted replaced
770:336a418893b5 771:943eaba38521
1 /* Functions to handle multilingual characters. 1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc. 2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc. 3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 2001, 2002 Ben Wing.
4 5
5 This file is part of XEmacs. 6 This file is part of XEmacs.
6 7
7 XEmacs is free software; you can redistribute it and/or modify it 8 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the 9 under the terms of the GNU General Public License as published by the
27 #include "lisp.h" 28 #include "lisp.h"
28 29
29 #include "buffer.h" 30 #include "buffer.h"
30 #include "chartab.h" 31 #include "chartab.h"
31 #include "elhash.h" 32 #include "elhash.h"
32 #include "lstream.h"
33 #include "device.h" 33 #include "device.h"
34 #include "faces.h" 34 #include "faces.h"
35 #include "lstream.h"
35 #include "mule-ccl.h" 36 #include "mule-ccl.h"
36 37
37 /* The various pre-defined charsets. */ 38 /* The various pre-defined charsets. */
38 39
39 Lisp_Object Vcharset_ascii; 40 Lisp_Object Vcharset_ascii;
58 Lisp_Object Vcharset_japanese_jisx0212; 59 Lisp_Object Vcharset_japanese_jisx0212;
59 Lisp_Object Vcharset_chinese_cns11643_1; 60 Lisp_Object Vcharset_chinese_cns11643_1;
60 Lisp_Object Vcharset_chinese_cns11643_2; 61 Lisp_Object Vcharset_chinese_cns11643_2;
61 Lisp_Object Vcharset_chinese_big5_1; 62 Lisp_Object Vcharset_chinese_big5_1;
62 Lisp_Object Vcharset_chinese_big5_2; 63 Lisp_Object Vcharset_chinese_big5_2;
63
64 #ifdef ENABLE_COMPOSITE_CHARS
65 Lisp_Object Vcharset_composite; 64 Lisp_Object Vcharset_composite;
66 65
67 /* Hash tables for composite chars. One maps string representing
68 composed chars to their equivalent chars; one goes the
69 other way. */
70 Lisp_Object Vcomposite_char_char2string_hash_table;
71 Lisp_Object Vcomposite_char_string2char_hash_table;
72
73 static int composite_char_row_next;
74 static int composite_char_col_next;
75
76 #endif /* ENABLE_COMPOSITE_CHARS */
77
78 struct charset_lookup *chlook; 66 struct charset_lookup *chlook;
79 67
80 static const struct lrecord_description charset_lookup_description_1[] = { 68 static const struct lrecord_description charset_lookup_description_1[] = {
81 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), 128+4*128*2 }, 69 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), NUM_LEADING_BYTES+4*128*2 },
82 { XD_END } 70 { XD_END }
83 }; 71 };
84 72
85 static const struct struct_description charset_lookup_description = { 73 static const struct struct_description charset_lookup_description = {
86 sizeof (struct charset_lookup), 74 sizeof (struct charset_lookup),
87 charset_lookup_description_1 75 charset_lookup_description_1
88 }; 76 };
89 77
90 /* Table of number of bytes in the string representation of a character
91 indexed by the first byte of that representation.
92
93 rep_bytes_by_first_byte(c) is more efficient than the equivalent
94 canonical computation:
95
96 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
97
98 const Bytecount rep_bytes_by_first_byte[0xA0] =
99 { /* 0x00 - 0x7f are for straight ASCII */
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
108 /* 0x80 - 0x8f are for Dimension-1 official charsets */
109 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 /* 0x90 - 0x9d are for Dimension-2 official charsets */
111 /* 0x9e is for Dimension-1 private charsets */
112 /* 0x9f is for Dimension-2 private charsets */
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
114 };
115
116 Lisp_Object Qcharsetp; 78 Lisp_Object Qcharsetp;
117 79
118 /* Qdoc_string, Qdimension, Qchars defined in general.c */ 80 /* Qdoc_string, Qdimension, Qchars defined in general.c */
119 Lisp_Object Qregistry, Qfinal, Qgraphic; 81 Lisp_Object Qregistry, Qfinal, Qgraphic;
120 Lisp_Object Qdirection; 82 Lisp_Object Qdirection;
121 Lisp_Object Qreverse_direction_charset; 83 Lisp_Object Qreverse_direction_charset;
122 Lisp_Object Qleading_byte;
123 Lisp_Object Qshort_name, Qlong_name; 84 Lisp_Object Qshort_name, Qlong_name;
124 85
125 Lisp_Object Qascii, 86 Lisp_Object Qfrom_unicode, Qto_unicode;
126 Qcontrol_1, 87
88 Lisp_Object
127 Qlatin_iso8859_1, 89 Qlatin_iso8859_1,
128 Qlatin_iso8859_2, 90 Qlatin_iso8859_2,
129 Qlatin_iso8859_3, 91 Qlatin_iso8859_3,
130 Qlatin_iso8859_4, 92 Qlatin_iso8859_4,
131 Qthai_tis620, 93 Qthai_tis620,
150 112
151 Lisp_Object Ql2r, Qr2l; 113 Lisp_Object Ql2r, Qr2l;
152 114
153 Lisp_Object Vcharset_hash_table; 115 Lisp_Object Vcharset_hash_table;
154 116
155 /* Composite characters are characters constructed by overstriking two
156 or more regular characters.
157
158 1) The old Mule implementation involves storing composite characters
159 in a buffer as a tag followed by all of the actual characters
160 used to make up the composite character. I think this is a bad
161 idea; it greatly complicates code that wants to handle strings
162 one character at a time because it has to deal with the possibility
163 of great big ungainly characters. It's much more reasonable to
164 simply store an index into a table of composite characters.
165
166 2) The current implementation only allows for 16,384 separate
167 composite characters over the lifetime of the XEmacs process.
168 This could become a potential problem if the user
169 edited lots of different files that use composite characters.
170 Due to FSF bogosity, increasing the number of allowable
171 composite characters under Mule would decrease the number
172 of possible faces that can exist. Mule already has shrunk
173 this to 2048, and further shrinkage would become uncomfortable.
174 No such problems exist in XEmacs.
175
176 Composite characters could be represented as 0x80 C1 C2 C3,
177 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
178 for slightly under 2^20 (one million) composite characters
179 over the XEmacs process lifetime, and you only need to
180 increase the size of a Mule character from 19 to 21 bits.
181 Or you could use 0x80 C1 C2 C3 C4, allowing for about
182 85 million (slightly over 2^26) composite characters. */
183
184
185 /************************************************************************/
186 /* Basic Emchar functions */
187 /************************************************************************/
188
189 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
190 string in STR. Returns the number of bytes stored.
191 Do not call this directly. Use the macro set_charptr_emchar() instead.
192 */
193
194 Bytecount
195 non_ascii_set_charptr_emchar (Intbyte *str, Emchar c)
196 {
197 Intbyte *p;
198 Intbyte lb;
199 int c1, c2;
200 Lisp_Object charset;
201
202 p = str;
203 BREAKUP_CHAR (c, charset, c1, c2);
204 lb = CHAR_LEADING_BYTE (c);
205 if (LEADING_BYTE_PRIVATE_P (lb))
206 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
207 *p++ = lb;
208 if (EQ (charset, Vcharset_control_1))
209 c1 += 0x20;
210 *p++ = c1 | 0x80;
211 if (c2)
212 *p++ = c2 | 0x80;
213
214 return (p - str);
215 }
216
217 /* Return the first character from a Mule-encoded string in STR,
218 assuming it's non-ASCII. Do not call this directly.
219 Use the macro charptr_emchar() instead. */
220
221 Emchar
222 non_ascii_charptr_emchar (const Intbyte *str)
223 {
224 Intbyte i0 = *str, i1, i2 = 0;
225 Lisp_Object charset;
226
227 if (i0 == LEADING_BYTE_CONTROL_1)
228 return (Emchar) (*++str - 0x20);
229
230 if (LEADING_BYTE_PREFIX_P (i0))
231 i0 = *++str;
232
233 i1 = *++str & 0x7F;
234
235 charset = CHARSET_BY_LEADING_BYTE (i0);
236 if (XCHARSET_DIMENSION (charset) == 2)
237 i2 = *++str & 0x7F;
238
239 return MAKE_CHAR (charset, i1, i2);
240 }
241
242 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
243 Do not call this directly. Use the macro valid_char_p() instead. */
244
245 int
246 non_ascii_valid_char_p (Emchar ch)
247 {
248 int f1, f2, f3;
249
250 /* Must have only lowest 19 bits set */
251 if (ch & ~0x7FFFF)
252 return 0;
253
254 f1 = CHAR_FIELD1 (ch);
255 f2 = CHAR_FIELD2 (ch);
256 f3 = CHAR_FIELD3 (ch);
257
258 if (f1 == 0)
259 {
260 Lisp_Object charset;
261
262 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
263 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
264 f2 > MAX_CHAR_FIELD2_PRIVATE)
265 return 0;
266 if (f3 < 0x20)
267 return 0;
268
269 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
270 f2 <= MAX_CHAR_FIELD2_PRIVATE))
271 return 1;
272
273 /*
274 NOTE: This takes advantage of the fact that
275 FIELD2_TO_OFFICIAL_LEADING_BYTE and
276 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
277 */
278 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
279 if (EQ (charset, Qnil))
280 return 0;
281 return (XCHARSET_CHARS (charset) == 96);
282 }
283 else
284 {
285 Lisp_Object charset;
286
287 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
288 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
289 f1 > MAX_CHAR_FIELD1_PRIVATE)
290 return 0;
291 if (f2 < 0x20 || f3 < 0x20)
292 return 0;
293
294 #ifdef ENABLE_COMPOSITE_CHARS
295 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
296 {
297 if (UNBOUNDP (Fgethash (make_int (ch),
298 Vcomposite_char_char2string_hash_table,
299 Qunbound)))
300 return 0;
301 return 1;
302 }
303 #endif /* ENABLE_COMPOSITE_CHARS */
304
305 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
306 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
307 return 1;
308
309 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
310 charset =
311 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
312 else
313 charset =
314 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
315
316 if (EQ (charset, Qnil))
317 return 0;
318 return (XCHARSET_CHARS (charset) == 96);
319 }
320 }
321
322
323 /************************************************************************/
324 /* Basic string functions */
325 /************************************************************************/
326
327 /* Copy the character pointed to by SRC into DST. Do not call this
328 directly. Use the macro charptr_copy_char() instead.
329 Return the number of bytes copied. */
330
331 Bytecount
332 non_ascii_charptr_copy_char (const Intbyte *src, Intbyte *dst)
333 {
334 Bytecount bytes = REP_BYTES_BY_FIRST_BYTE (*src);
335 Bytecount i;
336 for (i = bytes; i; i--, dst++, src++)
337 *dst = *src;
338 return bytes;
339 }
340
341
342 /************************************************************************/
343 /* streams of Emchars */
344 /************************************************************************/
345
346 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
347 The functions below are not meant to be called directly; use
348 the macros in insdel.h. */
349
350 Emchar
351 Lstream_get_emchar_1 (Lstream *stream, int ch)
352 {
353 Intbyte str[MAX_EMCHAR_LEN];
354 Intbyte *strptr = str;
355 Bytecount bytes;
356
357 str[0] = (Intbyte) ch;
358
359 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
360 {
361 int c = Lstream_getc (stream);
362 charbpos_checking_assert (c >= 0);
363 *++strptr = (Intbyte) c;
364 }
365 return charptr_emchar (str);
366 }
367
368 int
369 Lstream_fput_emchar (Lstream *stream, Emchar ch)
370 {
371 Intbyte str[MAX_EMCHAR_LEN];
372 Bytecount len = set_charptr_emchar (str, ch);
373 return Lstream_write (stream, str, len);
374 }
375
376 void
377 Lstream_funget_emchar (Lstream *stream, Emchar ch)
378 {
379 Intbyte str[MAX_EMCHAR_LEN];
380 Bytecount len = set_charptr_emchar (str, ch);
381 Lstream_unread (stream, str, len);
382 }
383
384 117
385 /************************************************************************/ 118 /************************************************************************/
386 /* charset object */ 119 /* charset object */
387 /************************************************************************/ 120 /************************************************************************/
388 121
401 134
402 static void 135 static void
403 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) 136 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
404 { 137 {
405 Lisp_Charset *cs = XCHARSET (obj); 138 Lisp_Charset *cs = XCHARSET (obj);
406 char buf[200];
407 139
408 if (print_readably) 140 if (print_readably)
409 printing_unreadable_object ("#<charset %s 0x%x>", 141 printing_unreadable_object ("#<charset %s 0x%x>",
410 string_data (XSYMBOL (CHARSET_NAME (cs))-> 142 string_data (XSYMBOL (CHARSET_NAME (cs))->
411 name), 143 name),
412 cs->header.uid); 144 cs->header.uid);
413 145
414 write_c_string ("#<charset ", printcharfun); 146 write_fmt_string_lisp (printcharfun, "#<charset %s %S %S %S", 4,
415 print_internal (CHARSET_NAME (cs), printcharfun, 0); 147 CHARSET_NAME (cs), CHARSET_SHORT_NAME (cs),
416 write_c_string (" ", printcharfun); 148 CHARSET_LONG_NAME (cs), CHARSET_DOC_STRING (cs));
417 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1); 149 write_fmt_string (printcharfun, " %s %s cols=%d g%d final='%c' reg=",
418 write_c_string (" ", printcharfun); 150 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
419 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1); 151 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
420 write_c_string (" ", printcharfun); 152 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
421 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1); 153 "96x96",
422 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=", 154 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" :
423 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" : 155 "r2l",
424 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" : 156 CHARSET_COLUMNS (cs),
425 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" : 157 CHARSET_GRAPHIC (cs),
426 "96x96", 158 CHARSET_FINAL (cs));
427 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
428 CHARSET_COLUMNS (cs),
429 CHARSET_GRAPHIC (cs),
430 CHARSET_FINAL (cs));
431 write_c_string (buf, printcharfun);
432 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0); 159 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
433 sprintf (buf, " 0x%x>", cs->header.uid); 160 write_fmt_string (printcharfun, " 0x%x>", cs->header.uid);
434 write_c_string (buf, printcharfun); 161 }
162
163 static void
164 finalize_charset (void *header, int for_disksave)
165 {
166 /* See mule-charset.h, definition of Lisp_Charset. */
167 Lisp_Object charset = wrap_charset ((Lisp_Charset *) header);
168 if (for_disksave && XCHARSET_TO_UNICODE_TABLE (charset))
169 {
170 /* Control-1, ASCII, Composite don't have tables */
171 free_charset_unicode_tables (charset);
172 XCHARSET_TO_UNICODE_TABLE (charset) = 0;
173 XCHARSET_FROM_UNICODE_TABLE (charset) = 0;
174 }
435 } 175 }
436 176
437 static const struct lrecord_description charset_description[] = { 177 static const struct lrecord_description charset_description[] = {
438 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) }, 178 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
439 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) }, 179 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
440 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) }, 180 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
441 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) }, 181 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
442 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) }, 182 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
443 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) }, 183 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
444 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) }, 184 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
185 #if 0
186 /* #### XD_UNION not yet implemented! pdump version of XEmacs will
187 not work! */
188 { XD_UNION, offsetof (Lisp_Charset, to_unicode_table),
189 XD_INDIRECT (offsetof (Lisp_Charset, dimension), 0),
190 to_unicode_description },
191 { XD_UNION, offsetof (Lisp_Charset, from_unicode_table),
192 XD_INDIRECT (offsetof (Lisp_Charset, from_unicode_levels), 0),
193 from_unicode_description },
194 #endif
445 { XD_END } 195 { XD_END }
446 }; 196 };
447 197
448 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset, 198 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
449 mark_charset, print_charset, 0, 0, 0, charset_description, 199 mark_charset, print_charset, finalize_charset,
450 Lisp_Charset); 200 0, 0, charset_description, Lisp_Charset);
451 201
452 /* Make a new charset. */ 202 /* Make a new charset. */
453 /* #### SJT Should generic properties be allowed? */ 203 /* #### SJT Should generic properties be allowed? */
454 static Lisp_Object 204 static Lisp_Object
455 make_charset (int id, Lisp_Object name, unsigned char rep_bytes, 205 make_charset (int id, Lisp_Object name, int rep_bytes,
456 unsigned char type, unsigned char columns, unsigned char graphic, 206 int type, int columns, int graphic,
457 Intbyte final, unsigned char direction, Lisp_Object short_name, 207 Intbyte final, int direction, Lisp_Object short_name,
458 Lisp_Object long_name, Lisp_Object doc, 208 Lisp_Object long_name, Lisp_Object doc,
459 Lisp_Object reg) 209 Lisp_Object reg, int overwrite)
460 { 210 {
461 Lisp_Object obj; 211 Lisp_Object obj;
462 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset); 212 Lisp_Charset *cs;
463 213
464 zero_lcrecord (cs); 214 if (!overwrite)
465 215 {
466 XSETCHARSET (obj, cs); 216 cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
217 zero_lcrecord (cs);
218 XSETCHARSET (obj, cs);
219
220 if (final)
221 {
222 /* some charsets do not have final characters. This includes
223 ASCII, Control-1, Composite, and the two faux private
224 charsets. */
225 assert (NILP (chlook->
226 charset_by_attributes[type][final][direction]));
227 chlook->charset_by_attributes[type][final][direction] = obj;
228 }
229
230 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
231 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
232 }
233 else
234 {
235 Lisp_Object ret;
236 /* Actually overwrite the properties of the existing charset.
237 We do this because until now charsets could never be "deleted",
238 so parts of the code don't bother to GC charsets. */
239 obj = chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE];
240 cs = XCHARSET (obj);
241 assert (EQ (chlook->charset_by_attributes[type][final][direction],
242 obj));
243
244 ret = Fremhash (XCHARSET_NAME (obj), Vcharset_hash_table);
245 assert (!NILP (ret));
246 }
467 247
468 CHARSET_ID (cs) = id; 248 CHARSET_ID (cs) = id;
469 CHARSET_NAME (cs) = name; 249 CHARSET_NAME (cs) = name;
470 CHARSET_SHORT_NAME (cs) = short_name; 250 CHARSET_SHORT_NAME (cs) = short_name;
471 CHARSET_LONG_NAME (cs) = long_name; 251 CHARSET_LONG_NAME (cs) = long_name;
478 CHARSET_DOC_STRING (cs) = doc; 258 CHARSET_DOC_STRING (cs) = doc;
479 CHARSET_REGISTRY (cs) = reg; 259 CHARSET_REGISTRY (cs) = reg;
480 CHARSET_CCL_PROGRAM (cs) = Qnil; 260 CHARSET_CCL_PROGRAM (cs) = Qnil;
481 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; 261 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
482 262
483 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || 263 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
484 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2; 264 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
485 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || 265 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
486 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96; 266 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
487 267
488 if (final) 268 if (id == LEADING_BYTE_ASCII || id == LEADING_BYTE_CONTROL_1
269 #ifdef ENABLE_COMPOSITE_CHARS
270 || id == LEADING_BYTE_COMPOSITE
271 #endif
272 )
273 assert (!overwrite);
274 else
489 { 275 {
490 /* some charsets do not have final characters. This includes 276 if (overwrite)
491 ASCII, Control-1, Composite, and the two faux private 277 free_charset_unicode_tables (obj);
492 charsets. */ 278 init_charset_unicode_tables (obj);
493 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
494 chlook->charset_by_attributes[type][final][direction] = obj;
495 } 279 }
496
497 assert (NILP (chlook->charset_by_leading_byte[id - 128]));
498 chlook->charset_by_leading_byte[id - 128] = obj;
499 280
500 /* Some charsets are "faux" and don't have names or really exist at 281 /* Some charsets are "faux" and don't have names or really exist at
501 all except in the leading-byte table. */ 282 all except in the leading-byte table. */
502 if (!NILP (name)) 283 if (!NILP (name))
503 Fputhash (name, obj, Vcharset_hash_table); 284 {
285 assert (NILP (Fgethash (name, Vcharset_hash_table, Qnil)));
286 Fputhash (name, obj, Vcharset_hash_table);
287 }
288
289 recalculate_unicode_precedence ();
504 return obj; 290 return obj;
505 } 291 }
506 292
507 static int 293 static int
508 get_unallocated_leading_byte (int dimension) 294 get_unallocated_leading_byte (int dimension)
509 { 295 {
510 int lb; 296 int lb;
511 297
512 if (dimension == 1) 298 if (dimension == 1)
513 { 299 {
514 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1) 300 if (chlook->next_allocated_1_byte_leading_byte >
301 MAX_LEADING_BYTE_PRIVATE_1)
515 lb = 0; 302 lb = 0;
516 else 303 else
517 lb = chlook->next_allocated_1_byte_leading_byte++; 304 lb = chlook->next_allocated_1_byte_leading_byte++;
518 } 305 }
519 else 306 else
520 { 307 {
521 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2) 308 if (chlook->next_allocated_2_byte_leading_byte >
309 MAX_LEADING_BYTE_PRIVATE_2)
522 lb = 0; 310 lb = 0;
523 else 311 else
524 lb = chlook->next_allocated_2_byte_leading_byte++; 312 lb = chlook->next_allocated_2_byte_leading_byte++;
525 } 313 }
526 314
527 if (!lb) 315 if (!lb)
528 invalid_operation 316 invalid_operation
529 ("No more character sets free for this dimension", 317 ("No more character sets free for this dimension", make_int (dimension));
530 make_int (dimension));
531 318
532 return lb; 319 return lb;
533 } 320 }
534 321
535 322
669 bit cleared and set depending upon whether the value 456 bit cleared and set depending upon whether the value
670 of the 'graphic property is 0 or 1. 457 of the 'graphic property is 0 or 1.
671 */ 458 */
672 (name, doc_string, props)) 459 (name, doc_string, props))
673 { 460 {
674 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1; 461 int id, dimension = 1, chars = 94, graphic = 0, columns = -1;
462 Intbyte final = 0;
675 int direction = CHARSET_LEFT_TO_RIGHT; 463 int direction = CHARSET_LEFT_TO_RIGHT;
676 int type; 464 int type;
677 Lisp_Object registry = Qnil; 465 Lisp_Object registry = Qnil;
678 Lisp_Object charset; 466 Lisp_Object charset = Qnil;
679 Lisp_Object ccl_program = Qnil; 467 Lisp_Object ccl_program = Qnil;
680 Lisp_Object short_name = Qnil, long_name = Qnil; 468 Lisp_Object short_name = Qnil, long_name = Qnil;
681 469 Lisp_Object existing_charset;
682 CHECK_SYMBOL (name); 470 int temporary = UNBOUNDP (name);
471
472 /* NOTE: name == Qunbound is a directive from the iso2022 code to
473 create a temporary charset for an unknown final. We allow the final
474 to be overwritten with a real charset later on. */
475
683 if (!NILP (doc_string)) 476 if (!NILP (doc_string))
684 CHECK_STRING (doc_string); 477 CHECK_STRING (doc_string);
685 478 if (!UNBOUNDP (name))
686 charset = Ffind_charset (name); 479 {
687 if (!NILP (charset)) 480 CHECK_SYMBOL (name);
688 invalid_operation ("Cannot redefine existing charset", name); 481
482 charset = Ffind_charset (name);
483 if (!NILP (charset))
484 invalid_operation ("Cannot redefine existing charset", name);
485 }
689 486
690 { 487 {
691 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props) 488 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
692 { 489 {
693 if (EQ (keyword, Qshort_name)) 490 if (EQ (keyword, Qshort_name))
764 561
765 if (setup_ccl_program (&test_ccl, value) < 0) 562 if (setup_ccl_program (&test_ccl, value) < 0)
766 invalid_argument ("Invalid value for 'ccl-program", value); 563 invalid_argument ("Invalid value for 'ccl-program", value);
767 ccl_program = value; 564 ccl_program = value;
768 } 565 }
769
770 else 566 else
771 invalid_constant ("Unrecognized property", keyword); 567 invalid_constant ("Unrecognized property", keyword);
772 } 568 }
773 } 569 }
774 570
782 if (dimension == 1) 578 if (dimension == 1)
783 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96; 579 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
784 else 580 else
785 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96; 581 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
786 582
787 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) || 583 existing_charset = CHARSET_BY_ATTRIBUTES (type, final,
788 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT))) 584 CHARSET_LEFT_TO_RIGHT);
585 if (NILP (existing_charset))
586 existing_charset = CHARSET_BY_ATTRIBUTES (type, final,
587 CHARSET_RIGHT_TO_LEFT);
588
589 if (!NILP (existing_charset) && !XCHARSET (existing_charset)->temporary)
789 invalid_argument 590 invalid_argument
790 ("Character set already defined for this DIMENSION/CHARS/FINAL combo", 591 ("Character set already defined for this DIMENSION/CHARS/FINAL combo",
791 Qunbound); 592 existing_charset);
792 593
793 id = get_unallocated_leading_byte (dimension); 594 if (!NILP (existing_charset))
794 595 /* Reuse same leading byte */
596 id = XCHARSET_ID (existing_charset);
597 else
598 id = get_unallocated_leading_byte (dimension);
599
600 if (temporary)
601 {
602 Intbyte tempname[80];
603
604 qxesprintf (tempname, "___temporary___%d__", id);
605 name = intern_int (tempname);
606 }
795 if (NILP (doc_string)) 607 if (NILP (doc_string))
796 doc_string = build_string (""); 608 doc_string = build_string ("");
797
798 if (NILP (registry)) 609 if (NILP (registry))
799 registry = build_string (""); 610 registry = build_string ("");
800
801 if (NILP (short_name)) 611 if (NILP (short_name))
802 XSETSTRING (short_name, XSYMBOL (name)->name); 612 XSETSTRING (short_name, XSYMBOL (name)->name);
803
804 if (NILP (long_name)) 613 if (NILP (long_name))
805 long_name = doc_string; 614 long_name = doc_string;
806
807 if (columns == -1) 615 if (columns == -1)
808 columns = dimension; 616 columns = dimension;
617
809 charset = make_charset (id, name, dimension + 2, type, columns, graphic, 618 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
810 final, direction, short_name, long_name, doc_string, registry); 619 final, direction, short_name, long_name,
620 doc_string, registry, !NILP (existing_charset));
621
622 XCHARSET (charset)->temporary = temporary;
811 if (!NILP (ccl_program)) 623 if (!NILP (ccl_program))
812 XCHARSET_CCL_PROGRAM (charset) = ccl_program; 624 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
625
813 return charset; 626 return charset;
814 } 627 }
815 628
816 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset, 629 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
817 2, 2, 0, /* 630 2, 2, 0, /*
819 NEW-NAME is the name of the new charset. Return the new charset. 632 NEW-NAME is the name of the new charset. Return the new charset.
820 */ 633 */
821 (charset, new_name)) 634 (charset, new_name))
822 { 635 {
823 Lisp_Object new_charset = Qnil; 636 Lisp_Object new_charset = Qnil;
824 int id, dimension, columns, graphic, final; 637 int id, dimension, columns, graphic;
638 Intbyte final;
825 int direction, type; 639 int direction, type;
826 Lisp_Object registry, doc_string, short_name, long_name; 640 Lisp_Object registry, doc_string, short_name, long_name;
827 Lisp_Charset *cs; 641 Lisp_Charset *cs;
828 642
829 charset = Fget_charset (charset); 643 charset = Fget_charset (charset);
852 long_name = CHARSET_LONG_NAME (cs); 666 long_name = CHARSET_LONG_NAME (cs);
853 registry = CHARSET_REGISTRY (cs); 667 registry = CHARSET_REGISTRY (cs);
854 668
855 new_charset = make_charset (id, new_name, dimension + 2, type, columns, 669 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
856 graphic, final, direction, short_name, long_name, 670 graphic, final, direction, short_name, long_name,
857 doc_string, registry); 671 doc_string, registry, 0);
858 672
859 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset; 673 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
860 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset; 674 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
861 675
862 return new_charset; 676 return new_charset;
1056 return Qnil; 870 return Qnil;
1057 } 871 }
1058 872
1059 873
1060 /************************************************************************/ 874 /************************************************************************/
1061 /* Lisp primitives for working with characters */ 875 /* memory usage */
1062 /************************************************************************/ 876 /************************************************************************/
1063 877
1064 DEFUN ("make-char", Fmake_char, 2, 3, 0, /* 878 #ifdef MEMORY_USAGE_STATS
1065 Make a character from CHARSET and octets ARG1 and ARG2. 879
1066 ARG2 is required only for characters from two-dimensional charsets. 880 struct charset_stats
1067 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2 881 {
1068 character s with caron. 882 int from_unicode;
1069 */ 883 int to_unicode;
1070 (charset, arg1, arg2)) 884 int other;
1071 { 885 };
1072 Lisp_Charset *cs; 886
1073 int a1, a2; 887 static void
1074 int lowlim, highlim; 888 compute_charset_usage (Lisp_Object charset, struct charset_stats *stats,
889 struct overhead_stats *ovstats)
890 {
891 struct Lisp_Charset *c = XCHARSET (charset);
892 xzero (*stats);
893 stats->other += malloced_storage_size (c, sizeof (*c), ovstats);
894 stats->from_unicode += compute_from_unicode_table_size (charset, ovstats);
895 stats->to_unicode += compute_to_unicode_table_size (charset, ovstats);
896 }
897
898 DEFUN ("charset-memory-usage", Fcharset_memory_usage, 1, 1, 0, /*
899 Return stats about the memory usage of charset CHARSET.
900 The values returned are in the form of an alist of usage types and
901 byte counts. The byte counts attempt to encompass all the memory used
902 by the charset (separate from the memory logically associated with a
903 charset or frame), including internal structures and any malloc()
904 overhead associated with them. In practice, the byte counts are
905 underestimated for various reasons, e.g. because certain memory usage
906 is very hard to determine \(e.g. the amount of memory used inside the
907 Xt library or inside the X server).
908
909 Multiple slices of the total memory usage may be returned, separated
910 by a nil. Each slice represents a particular view of the memory, a
911 particular way of partitioning it into groups. Within a slice, there
912 is no overlap between the groups of memory, and each slice collectively
913 represents all the memory concerned.
914 */
915 (charset))
916 {
917 struct charset_stats stats;
918 struct overhead_stats ovstats;
919 Lisp_Object val = Qnil;
1075 920
1076 charset = Fget_charset (charset); 921 charset = Fget_charset (charset);
1077 cs = XCHARSET (charset); 922 xzero (ovstats);
1078 923 compute_charset_usage (charset, &stats, &ovstats);
1079 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127; 924
1080 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31; 925 val = acons (Qfrom_unicode, make_int (stats.from_unicode), val);
1081 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126; 926 val = acons (Qto_unicode, make_int (stats.to_unicode), val);
1082 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127; 927 val = Fcons (Qnil, val);
1083 928 val = acons (Qactually_requested, make_int (ovstats.was_requested), val);
1084 CHECK_INT (arg1); 929 val = acons (Qmalloc_overhead, make_int (ovstats.malloc_overhead), val);
1085 /* It is useful (and safe, according to Olivier Galibert) to strip 930 val = acons (Qgap_overhead, make_int (ovstats.gap_overhead), val);
1086 the 8th bit off ARG1 and ARG2 because it allows programmers to 931 val = acons (Qdynarr_overhead, make_int (ovstats.dynarr_overhead), val);
1087 write (make-char 'latin-iso8859-2 CODE) where code is the actual 932
1088 Latin 2 code of the character. */ 933 return Fnreverse (val);
1089 a1 = XINT (arg1) & 0x7f; 934 }
1090 if (a1 < lowlim || a1 > highlim) 935
1091 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim)); 936 #endif /* MEMORY_USAGE_STATS */
1092
1093 if (CHARSET_DIMENSION (cs) == 1)
1094 {
1095 if (!NILP (arg2))
1096 invalid_argument
1097 ("Charset is of dimension one; second octet must be nil", arg2);
1098 return make_char (MAKE_CHAR (charset, a1, 0));
1099 }
1100
1101 CHECK_INT (arg2);
1102 a2 = XINT (arg2) & 0x7f;
1103 if (a2 < lowlim || a2 > highlim)
1104 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1105
1106 return make_char (MAKE_CHAR (charset, a1, a2));
1107 }
1108
1109 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1110 Return the character set of CHARACTER.
1111 */
1112 (character))
1113 {
1114 CHECK_CHAR_COERCE_INT (character);
1115
1116 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1117 (CHAR_LEADING_BYTE (XCHAR (character))));
1118 }
1119
1120 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1121 Return the octet numbered N (should be 0 or 1) of CHARACTER.
1122 N defaults to 0 if omitted.
1123 */
1124 (character, n))
1125 {
1126 Lisp_Object charset;
1127 int octet0, octet1;
1128
1129 CHECK_CHAR_COERCE_INT (character);
1130
1131 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
1132
1133 if (NILP (n) || EQ (n, Qzero))
1134 return make_int (octet0);
1135 else if (EQ (n, make_int (1)))
1136 return make_int (octet1);
1137 else
1138 invalid_constant ("Octet number must be 0 or 1", n);
1139 }
1140
1141 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1142 Return list of charset and one or two position-codes of CHARACTER.
1143 */
1144 (character))
1145 {
1146 /* This function can GC */
1147 struct gcpro gcpro1, gcpro2;
1148 Lisp_Object charset = Qnil;
1149 Lisp_Object rc = Qnil;
1150 int c1, c2;
1151
1152 GCPRO2 (charset, rc);
1153 CHECK_CHAR_COERCE_INT (character);
1154
1155 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1156
1157 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1158 {
1159 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1160 }
1161 else
1162 {
1163 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1164 }
1165 UNGCPRO;
1166
1167 return rc;
1168 }
1169
1170
1171 #ifdef ENABLE_COMPOSITE_CHARS
1172 /************************************************************************/
1173 /* composite character functions */
1174 /************************************************************************/
1175
1176 Emchar
1177 lookup_composite_char (Intbyte *str, int len)
1178 {
1179 Lisp_Object lispstr = make_string (str, len);
1180 Lisp_Object ch = Fgethash (lispstr,
1181 Vcomposite_char_string2char_hash_table,
1182 Qunbound);
1183 Emchar emch;
1184
1185 if (UNBOUNDP (ch))
1186 {
1187 if (composite_char_row_next >= 128)
1188 invalid_operation ("No more composite chars available", lispstr);
1189 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1190 composite_char_col_next);
1191 Fputhash (make_char (emch), lispstr,
1192 Vcomposite_char_char2string_hash_table);
1193 Fputhash (lispstr, make_char (emch),
1194 Vcomposite_char_string2char_hash_table);
1195 composite_char_col_next++;
1196 if (composite_char_col_next >= 128)
1197 {
1198 composite_char_col_next = 32;
1199 composite_char_row_next++;
1200 }
1201 }
1202 else
1203 emch = XCHAR (ch);
1204 return emch;
1205 }
1206
1207 Lisp_Object
1208 composite_char_string (Emchar ch)
1209 {
1210 Lisp_Object str = Fgethash (make_char (ch),
1211 Vcomposite_char_char2string_hash_table,
1212 Qunbound);
1213 assert (!UNBOUNDP (str));
1214 return str;
1215 }
1216
1217 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1218 Convert a string into a single composite character.
1219 The character is the result of overstriking all the characters in
1220 the string.
1221 */
1222 (string))
1223 {
1224 CHECK_STRING (string);
1225 return make_char (lookup_composite_char (XSTRING_DATA (string),
1226 XSTRING_LENGTH (string)));
1227 }
1228
1229 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1230 Return a string of the characters comprising a composite character.
1231 */
1232 (ch))
1233 {
1234 Emchar emch;
1235
1236 CHECK_CHAR (ch);
1237 emch = XCHAR (ch);
1238 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1239 invalid_argument ("Must be composite char", ch);
1240 return composite_char_string (emch);
1241 }
1242 #endif /* ENABLE_COMPOSITE_CHARS */
1243 937
1244 938
1245 /************************************************************************/ 939 /************************************************************************/
1246 /* initialization */ 940 /* initialization */
1247 /************************************************************************/ 941 /************************************************************************/
1267 DEFSUBR (Fcharset_property); 961 DEFSUBR (Fcharset_property);
1268 DEFSUBR (Fcharset_id); 962 DEFSUBR (Fcharset_id);
1269 DEFSUBR (Fset_charset_ccl_program); 963 DEFSUBR (Fset_charset_ccl_program);
1270 DEFSUBR (Fset_charset_registry); 964 DEFSUBR (Fset_charset_registry);
1271 965
1272 DEFSUBR (Fmake_char); 966 #ifdef MEMORY_USAGE_STATS
1273 DEFSUBR (Fchar_charset); 967 DEFSUBR (Fcharset_memory_usage);
1274 DEFSUBR (Fchar_octet);
1275 DEFSUBR (Fsplit_char);
1276
1277 #ifdef ENABLE_COMPOSITE_CHARS
1278 DEFSUBR (Fmake_composite_char);
1279 DEFSUBR (Fcomposite_char_string);
1280 #endif 968 #endif
1281 969
1282 DEFSYMBOL (Qcharsetp); 970 DEFSYMBOL (Qcharsetp);
1283 DEFSYMBOL (Qregistry); 971 DEFSYMBOL (Qregistry);
1284 DEFSYMBOL (Qfinal); 972 DEFSYMBOL (Qfinal);
1286 DEFSYMBOL (Qdirection); 974 DEFSYMBOL (Qdirection);
1287 DEFSYMBOL (Qreverse_direction_charset); 975 DEFSYMBOL (Qreverse_direction_charset);
1288 DEFSYMBOL (Qshort_name); 976 DEFSYMBOL (Qshort_name);
1289 DEFSYMBOL (Qlong_name); 977 DEFSYMBOL (Qlong_name);
1290 978
979 DEFSYMBOL (Qfrom_unicode);
980 DEFSYMBOL (Qto_unicode);
981
1291 DEFSYMBOL (Ql2r); 982 DEFSYMBOL (Ql2r);
1292 DEFSYMBOL (Qr2l); 983 DEFSYMBOL (Qr2l);
1293 984
1294 /* Charsets, compatible with FSF 20.3 985 /* Charsets, compatible with FSF 20.3
1295 Naming convention is Script-Charset[-Edition] */ 986 Naming convention is Script-Charset[-Edition] */
1296 DEFSYMBOL (Qascii);
1297 DEFSYMBOL (Qcontrol_1);
1298 DEFSYMBOL (Qlatin_iso8859_1); 987 DEFSYMBOL (Qlatin_iso8859_1);
1299 DEFSYMBOL (Qlatin_iso8859_2); 988 DEFSYMBOL (Qlatin_iso8859_2);
1300 DEFSYMBOL (Qlatin_iso8859_3); 989 DEFSYMBOL (Qlatin_iso8859_3);
1301 DEFSYMBOL (Qlatin_iso8859_4); 990 DEFSYMBOL (Qlatin_iso8859_4);
1302 DEFSYMBOL (Qthai_tis620); 991 DEFSYMBOL (Qthai_tis620);
1319 DEFSYMBOL (Qchinese_big5_2); 1008 DEFSYMBOL (Qchinese_big5_2);
1320 1009
1321 DEFSYMBOL (Qcomposite); 1010 DEFSYMBOL (Qcomposite);
1322 } 1011 }
1323 1012
1013 static int
1014 init_charset_unicode_tables_mapper (Lisp_Object key, Lisp_Object value,
1015 void *closure)
1016 {
1017 init_charset_unicode_tables (value);
1018 return 0;
1019 }
1020
1021 void
1022 init_mule_charset (void)
1023 {
1024 /* See mule-charset.h, definition of Lisp_Charset. */
1025 if (initialized)
1026 elisp_maphash (init_charset_unicode_tables_mapper, Vcharset_hash_table,
1027 0);
1028 }
1029
1324 void 1030 void
1325 vars_of_mule_charset (void) 1031 vars_of_mule_charset (void)
1326 { 1032 {
1327 int i, j, k; 1033 int i, j, k;
1328 1034
1339 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++) 1045 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
1340 chlook->charset_by_attributes[i][j][k] = Qnil; 1046 chlook->charset_by_attributes[i][j][k] = Qnil;
1341 1047
1342 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; 1048 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1343 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; 1049 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1344 } 1050
1345
1346 void
1347 complex_vars_of_mule_charset (void)
1348 {
1349 staticpro (&Vcharset_hash_table); 1051 staticpro (&Vcharset_hash_table);
1350 Vcharset_hash_table = 1052 Vcharset_hash_table =
1351 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); 1053 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1352 1054 }
1055
1056 void
1057 complex_vars_of_mule_charset (void)
1058 {
1353 /* Predefined character sets. We store them into variables for 1059 /* Predefined character sets. We store them into variables for
1354 ease of access. */ 1060 ease of access. */
1355 1061
1356 staticpro (&Vcharset_ascii); 1062 staticpro (&Vcharset_ascii);
1357 Vcharset_ascii = 1063 Vcharset_ascii =
1358 make_charset (LEADING_BYTE_ASCII, Qascii, 1, 1064 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1359 CHARSET_TYPE_94, 1, 0, 'B', 1065 CHARSET_TYPE_94, 1, 0, 'B',
1360 CHARSET_LEFT_TO_RIGHT, 1066 CHARSET_LEFT_TO_RIGHT,
1361 build_string ("ASCII"), 1067 build_string ("ASCII"),
1362 build_string ("ASCII)"), 1068 build_msg_string ("ASCII"),
1363 build_string ("ASCII (ISO646 IRV)"), 1069 build_msg_string ("ASCII (ISO646 IRV)"),
1364 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)")); 1070 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), 0);
1365 staticpro (&Vcharset_control_1); 1071 staticpro (&Vcharset_control_1);
1366 Vcharset_control_1 = 1072 Vcharset_control_1 =
1367 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2, 1073 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1368 CHARSET_TYPE_94, 1, 1, 0, 1074 CHARSET_TYPE_94, 1, 1, 0,
1369 CHARSET_LEFT_TO_RIGHT, 1075 CHARSET_LEFT_TO_RIGHT,
1370 build_string ("C1"), 1076 build_string ("C1"),
1371 build_string ("Control characters"), 1077 build_msg_string ("Control characters"),
1372 build_string ("Control characters 128-191"), 1078 build_msg_string ("Control characters 128-191"),
1373 build_string ("")); 1079 build_string (""), 0);
1374 staticpro (&Vcharset_latin_iso8859_1); 1080 staticpro (&Vcharset_latin_iso8859_1);
1375 Vcharset_latin_iso8859_1 = 1081 Vcharset_latin_iso8859_1 =
1376 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2, 1082 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1377 CHARSET_TYPE_96, 1, 1, 'A', 1083 CHARSET_TYPE_96, 1, 1, 'A',
1378 CHARSET_LEFT_TO_RIGHT, 1084 CHARSET_LEFT_TO_RIGHT,
1379 build_string ("Latin-1"), 1085 build_string ("Latin-1"),
1380 build_string ("ISO8859-1 (Latin-1)"), 1086 build_msg_string ("ISO8859-1 (Latin-1)"),
1381 build_string ("ISO8859-1 (Latin-1)"), 1087 build_msg_string ("ISO8859-1 (Latin-1)"),
1382 build_string ("iso8859-1")); 1088 build_string ("iso8859-1"), 0);
1383 staticpro (&Vcharset_latin_iso8859_2); 1089 staticpro (&Vcharset_latin_iso8859_2);
1384 Vcharset_latin_iso8859_2 = 1090 Vcharset_latin_iso8859_2 =
1385 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2, 1091 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1386 CHARSET_TYPE_96, 1, 1, 'B', 1092 CHARSET_TYPE_96, 1, 1, 'B',
1387 CHARSET_LEFT_TO_RIGHT, 1093 CHARSET_LEFT_TO_RIGHT,
1388 build_string ("Latin-2"), 1094 build_string ("Latin-2"),
1389 build_string ("ISO8859-2 (Latin-2)"), 1095 build_msg_string ("ISO8859-2 (Latin-2)"),
1390 build_string ("ISO8859-2 (Latin-2)"), 1096 build_msg_string ("ISO8859-2 (Latin-2)"),
1391 build_string ("iso8859-2")); 1097 build_string ("iso8859-2"), 0);
1392 staticpro (&Vcharset_latin_iso8859_3); 1098 staticpro (&Vcharset_latin_iso8859_3);
1393 Vcharset_latin_iso8859_3 = 1099 Vcharset_latin_iso8859_3 =
1394 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2, 1100 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1395 CHARSET_TYPE_96, 1, 1, 'C', 1101 CHARSET_TYPE_96, 1, 1, 'C',
1396 CHARSET_LEFT_TO_RIGHT, 1102 CHARSET_LEFT_TO_RIGHT,
1397 build_string ("Latin-3"), 1103 build_string ("Latin-3"),
1398 build_string ("ISO8859-3 (Latin-3)"), 1104 build_msg_string ("ISO8859-3 (Latin-3)"),
1399 build_string ("ISO8859-3 (Latin-3)"), 1105 build_msg_string ("ISO8859-3 (Latin-3)"),
1400 build_string ("iso8859-3")); 1106 build_string ("iso8859-3"), 0);
1401 staticpro (&Vcharset_latin_iso8859_4); 1107 staticpro (&Vcharset_latin_iso8859_4);
1402 Vcharset_latin_iso8859_4 = 1108 Vcharset_latin_iso8859_4 =
1403 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2, 1109 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1404 CHARSET_TYPE_96, 1, 1, 'D', 1110 CHARSET_TYPE_96, 1, 1, 'D',
1405 CHARSET_LEFT_TO_RIGHT, 1111 CHARSET_LEFT_TO_RIGHT,
1406 build_string ("Latin-4"), 1112 build_string ("Latin-4"),
1407 build_string ("ISO8859-4 (Latin-4)"), 1113 build_msg_string ("ISO8859-4 (Latin-4)"),
1408 build_string ("ISO8859-4 (Latin-4)"), 1114 build_msg_string ("ISO8859-4 (Latin-4)"),
1409 build_string ("iso8859-4")); 1115 build_string ("iso8859-4"), 0);
1410 staticpro (&Vcharset_thai_tis620); 1116 staticpro (&Vcharset_thai_tis620);
1411 Vcharset_thai_tis620 = 1117 Vcharset_thai_tis620 =
1412 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2, 1118 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1413 CHARSET_TYPE_96, 1, 1, 'T', 1119 CHARSET_TYPE_96, 1, 1, 'T',
1414 CHARSET_LEFT_TO_RIGHT, 1120 CHARSET_LEFT_TO_RIGHT,
1415 build_string ("TIS620"), 1121 build_string ("TIS620"),
1416 build_string ("TIS620 (Thai)"), 1122 build_msg_string ("TIS620 (Thai)"),
1417 build_string ("TIS620.2529 (Thai)"), 1123 build_msg_string ("TIS620.2529 (Thai)"),
1418 build_string ("tis620")); 1124 build_string ("tis620"),0);
1419 staticpro (&Vcharset_greek_iso8859_7); 1125 staticpro (&Vcharset_greek_iso8859_7);
1420 Vcharset_greek_iso8859_7 = 1126 Vcharset_greek_iso8859_7 =
1421 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2, 1127 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1422 CHARSET_TYPE_96, 1, 1, 'F', 1128 CHARSET_TYPE_96, 1, 1, 'F',
1423 CHARSET_LEFT_TO_RIGHT, 1129 CHARSET_LEFT_TO_RIGHT,
1424 build_string ("ISO8859-7"), 1130 build_string ("ISO8859-7"),
1425 build_string ("ISO8859-7 (Greek)"), 1131 build_msg_string ("ISO8859-7 (Greek)"),
1426 build_string ("ISO8859-7 (Greek)"), 1132 build_msg_string ("ISO8859-7 (Greek)"),
1427 build_string ("iso8859-7")); 1133 build_string ("iso8859-7"), 0);
1428 staticpro (&Vcharset_arabic_iso8859_6); 1134 staticpro (&Vcharset_arabic_iso8859_6);
1429 Vcharset_arabic_iso8859_6 = 1135 Vcharset_arabic_iso8859_6 =
1430 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2, 1136 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1431 CHARSET_TYPE_96, 1, 1, 'G', 1137 CHARSET_TYPE_96, 1, 1, 'G',
1432 CHARSET_RIGHT_TO_LEFT, 1138 CHARSET_RIGHT_TO_LEFT,
1433 build_string ("ISO8859-6"), 1139 build_string ("ISO8859-6"),
1434 build_string ("ISO8859-6 (Arabic)"), 1140 build_msg_string ("ISO8859-6 (Arabic)"),
1435 build_string ("ISO8859-6 (Arabic)"), 1141 build_msg_string ("ISO8859-6 (Arabic)"),
1436 build_string ("iso8859-6")); 1142 build_string ("iso8859-6"), 0);
1437 staticpro (&Vcharset_hebrew_iso8859_8); 1143 staticpro (&Vcharset_hebrew_iso8859_8);
1438 Vcharset_hebrew_iso8859_8 = 1144 Vcharset_hebrew_iso8859_8 =
1439 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2, 1145 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1440 CHARSET_TYPE_96, 1, 1, 'H', 1146 CHARSET_TYPE_96, 1, 1, 'H',
1441 CHARSET_RIGHT_TO_LEFT, 1147 CHARSET_RIGHT_TO_LEFT,
1442 build_string ("ISO8859-8"), 1148 build_string ("ISO8859-8"),
1443 build_string ("ISO8859-8 (Hebrew)"), 1149 build_msg_string ("ISO8859-8 (Hebrew)"),
1444 build_string ("ISO8859-8 (Hebrew)"), 1150 build_msg_string ("ISO8859-8 (Hebrew)"),
1445 build_string ("iso8859-8")); 1151 build_string ("iso8859-8"), 0);
1446 staticpro (&Vcharset_katakana_jisx0201); 1152 staticpro (&Vcharset_katakana_jisx0201);
1447 Vcharset_katakana_jisx0201 = 1153 Vcharset_katakana_jisx0201 =
1448 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2, 1154 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1449 CHARSET_TYPE_94, 1, 1, 'I', 1155 CHARSET_TYPE_94, 1, 1, 'I',
1450 CHARSET_LEFT_TO_RIGHT, 1156 CHARSET_LEFT_TO_RIGHT,
1451 build_string ("JISX0201 Kana"), 1157 build_string ("JISX0201 Kana"),
1452 build_string ("JISX0201.1976 (Japanese Kana)"), 1158 build_msg_string ("JISX0201.1976 (Japanese Kana)"),
1453 build_string ("JISX0201.1976 Japanese Kana"), 1159 build_msg_string ("JISX0201.1976 Japanese Kana"),
1454 build_string ("jisx0201.1976")); 1160 build_string ("jisx0201.1976"), 0);
1455 staticpro (&Vcharset_latin_jisx0201); 1161 staticpro (&Vcharset_latin_jisx0201);
1456 Vcharset_latin_jisx0201 = 1162 Vcharset_latin_jisx0201 =
1457 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2, 1163 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1458 CHARSET_TYPE_94, 1, 0, 'J', 1164 CHARSET_TYPE_94, 1, 0, 'J',
1459 CHARSET_LEFT_TO_RIGHT, 1165 CHARSET_LEFT_TO_RIGHT,
1460 build_string ("JISX0201 Roman"), 1166 build_string ("JISX0201 Roman"),
1461 build_string ("JISX0201.1976 (Japanese Roman)"), 1167 build_msg_string ("JISX0201.1976 (Japanese Roman)"),
1462 build_string ("JISX0201.1976 Japanese Roman"), 1168 build_msg_string ("JISX0201.1976 Japanese Roman"),
1463 build_string ("jisx0201.1976")); 1169 build_string ("jisx0201.1976"), 0);
1464 staticpro (&Vcharset_cyrillic_iso8859_5); 1170 staticpro (&Vcharset_cyrillic_iso8859_5);
1465 Vcharset_cyrillic_iso8859_5 = 1171 Vcharset_cyrillic_iso8859_5 =
1466 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2, 1172 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1467 CHARSET_TYPE_96, 1, 1, 'L', 1173 CHARSET_TYPE_96, 1, 1, 'L',
1468 CHARSET_LEFT_TO_RIGHT, 1174 CHARSET_LEFT_TO_RIGHT,
1469 build_string ("ISO8859-5"), 1175 build_string ("ISO8859-5"),
1470 build_string ("ISO8859-5 (Cyrillic)"), 1176 build_msg_string ("ISO8859-5 (Cyrillic)"),
1471 build_string ("ISO8859-5 (Cyrillic)"), 1177 build_msg_string ("ISO8859-5 (Cyrillic)"),
1472 build_string ("iso8859-5")); 1178 build_string ("iso8859-5"), 0);
1473 staticpro (&Vcharset_latin_iso8859_9); 1179 staticpro (&Vcharset_latin_iso8859_9);
1474 Vcharset_latin_iso8859_9 = 1180 Vcharset_latin_iso8859_9 =
1475 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2, 1181 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1476 CHARSET_TYPE_96, 1, 1, 'M', 1182 CHARSET_TYPE_96, 1, 1, 'M',
1477 CHARSET_LEFT_TO_RIGHT, 1183 CHARSET_LEFT_TO_RIGHT,
1478 build_string ("Latin-5"), 1184 build_string ("Latin-5"),
1479 build_string ("ISO8859-9 (Latin-5)"), 1185 build_msg_string ("ISO8859-9 (Latin-5)"),
1480 build_string ("ISO8859-9 (Latin-5)"), 1186 build_msg_string ("ISO8859-9 (Latin-5)"),
1481 build_string ("iso8859-9")); 1187 build_string ("iso8859-9"), 0);
1482 staticpro (&Vcharset_latin_iso8859_15); 1188 staticpro (&Vcharset_latin_iso8859_15);
1483 Vcharset_latin_iso8859_15 = 1189 Vcharset_latin_iso8859_15 =
1484 make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2, 1190 make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2,
1485 CHARSET_TYPE_96, 1, 1, 'b', 1191 CHARSET_TYPE_96, 1, 1, 'b',
1486 CHARSET_LEFT_TO_RIGHT, 1192 CHARSET_LEFT_TO_RIGHT,
1487 build_string ("Latin-9"), 1193 build_string ("Latin-9"),
1488 build_string ("ISO8859-15 (Latin-9)"), 1194 build_msg_string ("ISO8859-15 (Latin-9)"),
1489 build_string ("ISO8859-15 (Latin-9)"), 1195 build_msg_string ("ISO8859-15 (Latin-9)"),
1490 build_string ("iso8859-15")); 1196 build_string ("iso8859-15"), 0);
1491 staticpro (&Vcharset_japanese_jisx0208_1978); 1197 staticpro (&Vcharset_japanese_jisx0208_1978);
1492 Vcharset_japanese_jisx0208_1978 = 1198 Vcharset_japanese_jisx0208_1978 =
1493 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3, 1199 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1494 CHARSET_TYPE_94X94, 2, 0, '@', 1200 CHARSET_TYPE_94X94, 2, 0, '@',
1495 CHARSET_LEFT_TO_RIGHT, 1201 CHARSET_LEFT_TO_RIGHT,
1496 build_string ("JISX0208.1978"), 1202 build_string ("JISX0208.1978"),
1497 build_string ("JISX0208.1978 (Japanese)"), 1203 build_msg_string ("JISX0208.1978 (Japanese)"),
1498 build_string 1204 build_msg_string
1499 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"), 1205 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1500 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978")); 1206 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), 0);
1501 staticpro (&Vcharset_chinese_gb2312); 1207 staticpro (&Vcharset_chinese_gb2312);
1502 Vcharset_chinese_gb2312 = 1208 Vcharset_chinese_gb2312 =
1503 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3, 1209 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1504 CHARSET_TYPE_94X94, 2, 0, 'A', 1210 CHARSET_TYPE_94X94, 2, 0, 'A',
1505 CHARSET_LEFT_TO_RIGHT, 1211 CHARSET_LEFT_TO_RIGHT,
1506 build_string ("GB2312"), 1212 build_string ("GB2312"),
1507 build_string ("GB2312)"), 1213 build_msg_string ("GB2312)"),
1508 build_string ("GB2312 Chinese simplified"), 1214 build_msg_string ("GB2312 Chinese simplified"),
1509 build_string ("gb2312")); 1215 build_string ("gb2312"), 0);
1510 staticpro (&Vcharset_japanese_jisx0208); 1216 staticpro (&Vcharset_japanese_jisx0208);
1511 Vcharset_japanese_jisx0208 = 1217 Vcharset_japanese_jisx0208 =
1512 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3, 1218 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1513 CHARSET_TYPE_94X94, 2, 0, 'B', 1219 CHARSET_TYPE_94X94, 2, 0, 'B',
1514 CHARSET_LEFT_TO_RIGHT, 1220 CHARSET_LEFT_TO_RIGHT,
1515 build_string ("JISX0208"), 1221 build_string ("JISX0208"),
1516 build_string ("JISX0208.1983/1990 (Japanese)"), 1222 build_msg_string ("JISX0208.1983/1990 (Japanese)"),
1517 build_string ("JISX0208.1983/1990 Japanese Kanji"), 1223 build_msg_string ("JISX0208.1983/1990 Japanese Kanji"),
1518 build_string ("jisx0208.19\\(83\\|90\\)")); 1224 build_string ("jisx0208.19\\(83\\|90\\)"), 0);
1519 staticpro (&Vcharset_korean_ksc5601); 1225 staticpro (&Vcharset_korean_ksc5601);
1520 Vcharset_korean_ksc5601 = 1226 Vcharset_korean_ksc5601 =
1521 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3, 1227 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1522 CHARSET_TYPE_94X94, 2, 0, 'C', 1228 CHARSET_TYPE_94X94, 2, 0, 'C',
1523 CHARSET_LEFT_TO_RIGHT, 1229 CHARSET_LEFT_TO_RIGHT,
1524 build_string ("KSC5601"), 1230 build_string ("KSC5601"),
1525 build_string ("KSC5601 (Korean"), 1231 build_msg_string ("KSC5601 (Korean"),
1526 build_string ("KSC5601 Korean Hangul and Hanja"), 1232 build_msg_string ("KSC5601 Korean Hangul and Hanja"),
1527 build_string ("ksc5601")); 1233 build_string ("ksc5601"), 0);
1528 staticpro (&Vcharset_japanese_jisx0212); 1234 staticpro (&Vcharset_japanese_jisx0212);
1529 Vcharset_japanese_jisx0212 = 1235 Vcharset_japanese_jisx0212 =
1530 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3, 1236 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1531 CHARSET_TYPE_94X94, 2, 0, 'D', 1237 CHARSET_TYPE_94X94, 2, 0, 'D',
1532 CHARSET_LEFT_TO_RIGHT, 1238 CHARSET_LEFT_TO_RIGHT,
1533 build_string ("JISX0212"), 1239 build_string ("JISX0212"),
1534 build_string ("JISX0212 (Japanese)"), 1240 build_msg_string ("JISX0212 (Japanese)"),
1535 build_string ("JISX0212 Japanese Supplement"), 1241 build_msg_string ("JISX0212 Japanese Supplement"),
1536 build_string ("jisx0212")); 1242 build_string ("jisx0212"), 0);
1537 1243
1538 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$" 1244 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1539 staticpro (&Vcharset_chinese_cns11643_1); 1245 staticpro (&Vcharset_chinese_cns11643_1);
1540 Vcharset_chinese_cns11643_1 = 1246 Vcharset_chinese_cns11643_1 =
1541 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3, 1247 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1542 CHARSET_TYPE_94X94, 2, 0, 'G', 1248 CHARSET_TYPE_94X94, 2, 0, 'G',
1543 CHARSET_LEFT_TO_RIGHT, 1249 CHARSET_LEFT_TO_RIGHT,
1544 build_string ("CNS11643-1"), 1250 build_string ("CNS11643-1"),
1545 build_string ("CNS11643-1 (Chinese traditional)"), 1251 build_msg_string ("CNS11643-1 (Chinese traditional)"),
1546 build_string 1252 build_msg_string
1547 ("CNS 11643 Plane 1 Chinese traditional"), 1253 ("CNS 11643 Plane 1 Chinese traditional"),
1548 build_string (CHINESE_CNS_PLANE_RE("1"))); 1254 build_string (CHINESE_CNS_PLANE_RE("1")), 0);
1549 staticpro (&Vcharset_chinese_cns11643_2); 1255 staticpro (&Vcharset_chinese_cns11643_2);
1550 Vcharset_chinese_cns11643_2 = 1256 Vcharset_chinese_cns11643_2 =
1551 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3, 1257 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1552 CHARSET_TYPE_94X94, 2, 0, 'H', 1258 CHARSET_TYPE_94X94, 2, 0, 'H',
1553 CHARSET_LEFT_TO_RIGHT, 1259 CHARSET_LEFT_TO_RIGHT,
1554 build_string ("CNS11643-2"), 1260 build_string ("CNS11643-2"),
1555 build_string ("CNS11643-2 (Chinese traditional)"), 1261 build_msg_string ("CNS11643-2 (Chinese traditional)"),
1556 build_string 1262 build_msg_string
1557 ("CNS 11643 Plane 2 Chinese traditional"), 1263 ("CNS 11643 Plane 2 Chinese traditional"),
1558 build_string (CHINESE_CNS_PLANE_RE("2"))); 1264 build_string (CHINESE_CNS_PLANE_RE("2")), 0);
1559 staticpro (&Vcharset_chinese_big5_1); 1265 staticpro (&Vcharset_chinese_big5_1);
1560 Vcharset_chinese_big5_1 = 1266 Vcharset_chinese_big5_1 =
1561 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3, 1267 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1562 CHARSET_TYPE_94X94, 2, 0, '0', 1268 CHARSET_TYPE_94X94, 2, 0, '0',
1563 CHARSET_LEFT_TO_RIGHT, 1269 CHARSET_LEFT_TO_RIGHT,
1564 build_string ("Big5"), 1270 build_string ("Big5"),
1565 build_string ("Big5 (Level-1)"), 1271 build_msg_string ("Big5 (Level-1)"),
1566 build_string 1272 build_msg_string
1567 ("Big5 Level-1 Chinese traditional"), 1273 ("Big5 Level-1 Chinese traditional"),
1568 build_string ("big5")); 1274 build_string ("big5"), 0);
1569 staticpro (&Vcharset_chinese_big5_2); 1275 staticpro (&Vcharset_chinese_big5_2);
1570 Vcharset_chinese_big5_2 = 1276 Vcharset_chinese_big5_2 =
1571 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3, 1277 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1572 CHARSET_TYPE_94X94, 2, 0, '1', 1278 CHARSET_TYPE_94X94, 2, 0, '1',
1573 CHARSET_LEFT_TO_RIGHT, 1279 CHARSET_LEFT_TO_RIGHT,
1574 build_string ("Big5"), 1280 build_string ("Big5"),
1575 build_string ("Big5 (Level-2)"), 1281 build_msg_string ("Big5 (Level-2)"),
1576 build_string 1282 build_msg_string
1577 ("Big5 Level-2 Chinese traditional"), 1283 ("Big5 Level-2 Chinese traditional"),
1578 build_string ("big5")); 1284 build_string ("big5"), 0);
1579 1285
1580 1286
1581 #ifdef ENABLE_COMPOSITE_CHARS 1287 #ifdef ENABLE_COMPOSITE_CHARS
1582 /* #### For simplicity, we put composite chars into a 96x96 charset. 1288 /* #### For simplicity, we put composite chars into a 96x96 charset.
1583 This is going to lead to problems because you can run out of 1289 This is going to lead to problems because you can run out of
1586 Vcharset_composite = 1292 Vcharset_composite =
1587 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3, 1293 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1588 CHARSET_TYPE_96X96, 2, 0, 0, 1294 CHARSET_TYPE_96X96, 2, 0, 0,
1589 CHARSET_LEFT_TO_RIGHT, 1295 CHARSET_LEFT_TO_RIGHT,
1590 build_string ("Composite"), 1296 build_string ("Composite"),
1591 build_string ("Composite characters"), 1297 build_msg_string ("Composite characters"),
1592 build_string ("Composite characters"), 1298 build_msg_string ("Composite characters"),
1593 build_string ("")); 1299 build_string (""), 0);
1594 1300 #else
1595 /* #### not dumped properly */ 1301 /* We create a hack so that we have a way of storing ESC 0 and ESC 1
1596 composite_char_row_next = 32; 1302 sequences as "characters", so that they will be output correctly. */
1597 composite_char_col_next = 32; 1303 staticpro (&Vcharset_composite);
1598 1304 Vcharset_composite =
1599 Vcomposite_char_string2char_hash_table = 1305 make_charset (LEADING_BYTE_COMPOSITE_REPLACEMENT, Qcomposite, 2,
1600 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL); 1306 CHARSET_TYPE_96, 1, 1, '|',
1601 Vcomposite_char_char2string_hash_table = 1307 CHARSET_LEFT_TO_RIGHT,
1602 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); 1308 build_string ("Composite hack"),
1603 staticpro (&Vcomposite_char_string2char_hash_table); 1309 build_msg_string ("Composite characters hack"),
1604 staticpro (&Vcomposite_char_char2string_hash_table); 1310 build_msg_string ("Composite characters hack"),
1311 build_string (""), 0);
1605 #endif /* ENABLE_COMPOSITE_CHARS */ 1312 #endif /* ENABLE_COMPOSITE_CHARS */
1606 1313 }
1607 }