Mercurial > hg > xemacs-beta
comparison src/mule-charset.c @ 771:943eaba38521
[xemacs-hg @ 2002-03-13 08:51:24 by ben]
The big ben-mule-21-5 check-in!
Various files were added and deleted. See CHANGES-ben-mule.
There are still some test suite failures. No crashes, though.
Many of the failures have to do with problems in the test suite itself
rather than in the actual code. I'll be addressing these in the next
day or so -- none of the test suite failures are at all critical.
Meanwhile I'll be trying to address the biggest issues -- i.e. build
or run failures, which will almost certainly happen on various platforms.
All comments should be sent to ben@xemacs.org -- use a Cc: if necessary
when sending to mailing lists. There will be pre- and post- tags,
something like
pre-ben-mule-21-5-merge-in, and
post-ben-mule-21-5-merge-in.
author | ben |
---|---|
date | Wed, 13 Mar 2002 08:54:06 +0000 |
parents | 4d00488244c1 |
children | 026c5bf9c134 |
comparison
equal
deleted
inserted
replaced
770:336a418893b5 | 771:943eaba38521 |
---|---|
1 /* Functions to handle multilingual characters. | 1 /* Functions to handle multilingual characters. |
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc. | 2 Copyright (C) 1992, 1995 Free Software Foundation, Inc. |
3 Copyright (C) 1995 Sun Microsystems, Inc. | 3 Copyright (C) 1995 Sun Microsystems, Inc. |
4 Copyright (C) 2001, 2002 Ben Wing. | |
4 | 5 |
5 This file is part of XEmacs. | 6 This file is part of XEmacs. |
6 | 7 |
7 XEmacs is free software; you can redistribute it and/or modify it | 8 XEmacs is free software; you can redistribute it and/or modify it |
8 under the terms of the GNU General Public License as published by the | 9 under the terms of the GNU General Public License as published by the |
27 #include "lisp.h" | 28 #include "lisp.h" |
28 | 29 |
29 #include "buffer.h" | 30 #include "buffer.h" |
30 #include "chartab.h" | 31 #include "chartab.h" |
31 #include "elhash.h" | 32 #include "elhash.h" |
32 #include "lstream.h" | |
33 #include "device.h" | 33 #include "device.h" |
34 #include "faces.h" | 34 #include "faces.h" |
35 #include "lstream.h" | |
35 #include "mule-ccl.h" | 36 #include "mule-ccl.h" |
36 | 37 |
37 /* The various pre-defined charsets. */ | 38 /* The various pre-defined charsets. */ |
38 | 39 |
39 Lisp_Object Vcharset_ascii; | 40 Lisp_Object Vcharset_ascii; |
58 Lisp_Object Vcharset_japanese_jisx0212; | 59 Lisp_Object Vcharset_japanese_jisx0212; |
59 Lisp_Object Vcharset_chinese_cns11643_1; | 60 Lisp_Object Vcharset_chinese_cns11643_1; |
60 Lisp_Object Vcharset_chinese_cns11643_2; | 61 Lisp_Object Vcharset_chinese_cns11643_2; |
61 Lisp_Object Vcharset_chinese_big5_1; | 62 Lisp_Object Vcharset_chinese_big5_1; |
62 Lisp_Object Vcharset_chinese_big5_2; | 63 Lisp_Object Vcharset_chinese_big5_2; |
63 | |
64 #ifdef ENABLE_COMPOSITE_CHARS | |
65 Lisp_Object Vcharset_composite; | 64 Lisp_Object Vcharset_composite; |
66 | 65 |
67 /* Hash tables for composite chars. One maps string representing | |
68 composed chars to their equivalent chars; one goes the | |
69 other way. */ | |
70 Lisp_Object Vcomposite_char_char2string_hash_table; | |
71 Lisp_Object Vcomposite_char_string2char_hash_table; | |
72 | |
73 static int composite_char_row_next; | |
74 static int composite_char_col_next; | |
75 | |
76 #endif /* ENABLE_COMPOSITE_CHARS */ | |
77 | |
78 struct charset_lookup *chlook; | 66 struct charset_lookup *chlook; |
79 | 67 |
80 static const struct lrecord_description charset_lookup_description_1[] = { | 68 static const struct lrecord_description charset_lookup_description_1[] = { |
81 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), 128+4*128*2 }, | 69 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), NUM_LEADING_BYTES+4*128*2 }, |
82 { XD_END } | 70 { XD_END } |
83 }; | 71 }; |
84 | 72 |
85 static const struct struct_description charset_lookup_description = { | 73 static const struct struct_description charset_lookup_description = { |
86 sizeof (struct charset_lookup), | 74 sizeof (struct charset_lookup), |
87 charset_lookup_description_1 | 75 charset_lookup_description_1 |
88 }; | 76 }; |
89 | 77 |
90 /* Table of number of bytes in the string representation of a character | |
91 indexed by the first byte of that representation. | |
92 | |
93 rep_bytes_by_first_byte(c) is more efficient than the equivalent | |
94 canonical computation: | |
95 | |
96 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */ | |
97 | |
98 const Bytecount rep_bytes_by_first_byte[0xA0] = | |
99 { /* 0x00 - 0x7f are for straight ASCII */ | |
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
108 /* 0x80 - 0x8f are for Dimension-1 official charsets */ | |
109 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
110 /* 0x90 - 0x9d are for Dimension-2 official charsets */ | |
111 /* 0x9e is for Dimension-1 private charsets */ | |
112 /* 0x9f is for Dimension-2 private charsets */ | |
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4 | |
114 }; | |
115 | |
116 Lisp_Object Qcharsetp; | 78 Lisp_Object Qcharsetp; |
117 | 79 |
118 /* Qdoc_string, Qdimension, Qchars defined in general.c */ | 80 /* Qdoc_string, Qdimension, Qchars defined in general.c */ |
119 Lisp_Object Qregistry, Qfinal, Qgraphic; | 81 Lisp_Object Qregistry, Qfinal, Qgraphic; |
120 Lisp_Object Qdirection; | 82 Lisp_Object Qdirection; |
121 Lisp_Object Qreverse_direction_charset; | 83 Lisp_Object Qreverse_direction_charset; |
122 Lisp_Object Qleading_byte; | |
123 Lisp_Object Qshort_name, Qlong_name; | 84 Lisp_Object Qshort_name, Qlong_name; |
124 | 85 |
125 Lisp_Object Qascii, | 86 Lisp_Object Qfrom_unicode, Qto_unicode; |
126 Qcontrol_1, | 87 |
88 Lisp_Object | |
127 Qlatin_iso8859_1, | 89 Qlatin_iso8859_1, |
128 Qlatin_iso8859_2, | 90 Qlatin_iso8859_2, |
129 Qlatin_iso8859_3, | 91 Qlatin_iso8859_3, |
130 Qlatin_iso8859_4, | 92 Qlatin_iso8859_4, |
131 Qthai_tis620, | 93 Qthai_tis620, |
150 | 112 |
151 Lisp_Object Ql2r, Qr2l; | 113 Lisp_Object Ql2r, Qr2l; |
152 | 114 |
153 Lisp_Object Vcharset_hash_table; | 115 Lisp_Object Vcharset_hash_table; |
154 | 116 |
155 /* Composite characters are characters constructed by overstriking two | |
156 or more regular characters. | |
157 | |
158 1) The old Mule implementation involves storing composite characters | |
159 in a buffer as a tag followed by all of the actual characters | |
160 used to make up the composite character. I think this is a bad | |
161 idea; it greatly complicates code that wants to handle strings | |
162 one character at a time because it has to deal with the possibility | |
163 of great big ungainly characters. It's much more reasonable to | |
164 simply store an index into a table of composite characters. | |
165 | |
166 2) The current implementation only allows for 16,384 separate | |
167 composite characters over the lifetime of the XEmacs process. | |
168 This could become a potential problem if the user | |
169 edited lots of different files that use composite characters. | |
170 Due to FSF bogosity, increasing the number of allowable | |
171 composite characters under Mule would decrease the number | |
172 of possible faces that can exist. Mule already has shrunk | |
173 this to 2048, and further shrinkage would become uncomfortable. | |
174 No such problems exist in XEmacs. | |
175 | |
176 Composite characters could be represented as 0x80 C1 C2 C3, | |
177 where each C[1-3] is in the range 0xA0 - 0xFF. This allows | |
178 for slightly under 2^20 (one million) composite characters | |
179 over the XEmacs process lifetime, and you only need to | |
180 increase the size of a Mule character from 19 to 21 bits. | |
181 Or you could use 0x80 C1 C2 C3 C4, allowing for about | |
182 85 million (slightly over 2^26) composite characters. */ | |
183 | |
184 | |
185 /************************************************************************/ | |
186 /* Basic Emchar functions */ | |
187 /************************************************************************/ | |
188 | |
189 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded | |
190 string in STR. Returns the number of bytes stored. | |
191 Do not call this directly. Use the macro set_charptr_emchar() instead. | |
192 */ | |
193 | |
194 Bytecount | |
195 non_ascii_set_charptr_emchar (Intbyte *str, Emchar c) | |
196 { | |
197 Intbyte *p; | |
198 Intbyte lb; | |
199 int c1, c2; | |
200 Lisp_Object charset; | |
201 | |
202 p = str; | |
203 BREAKUP_CHAR (c, charset, c1, c2); | |
204 lb = CHAR_LEADING_BYTE (c); | |
205 if (LEADING_BYTE_PRIVATE_P (lb)) | |
206 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb); | |
207 *p++ = lb; | |
208 if (EQ (charset, Vcharset_control_1)) | |
209 c1 += 0x20; | |
210 *p++ = c1 | 0x80; | |
211 if (c2) | |
212 *p++ = c2 | 0x80; | |
213 | |
214 return (p - str); | |
215 } | |
216 | |
217 /* Return the first character from a Mule-encoded string in STR, | |
218 assuming it's non-ASCII. Do not call this directly. | |
219 Use the macro charptr_emchar() instead. */ | |
220 | |
221 Emchar | |
222 non_ascii_charptr_emchar (const Intbyte *str) | |
223 { | |
224 Intbyte i0 = *str, i1, i2 = 0; | |
225 Lisp_Object charset; | |
226 | |
227 if (i0 == LEADING_BYTE_CONTROL_1) | |
228 return (Emchar) (*++str - 0x20); | |
229 | |
230 if (LEADING_BYTE_PREFIX_P (i0)) | |
231 i0 = *++str; | |
232 | |
233 i1 = *++str & 0x7F; | |
234 | |
235 charset = CHARSET_BY_LEADING_BYTE (i0); | |
236 if (XCHARSET_DIMENSION (charset) == 2) | |
237 i2 = *++str & 0x7F; | |
238 | |
239 return MAKE_CHAR (charset, i1, i2); | |
240 } | |
241 | |
242 /* Return whether CH is a valid Emchar, assuming it's non-ASCII. | |
243 Do not call this directly. Use the macro valid_char_p() instead. */ | |
244 | |
245 int | |
246 non_ascii_valid_char_p (Emchar ch) | |
247 { | |
248 int f1, f2, f3; | |
249 | |
250 /* Must have only lowest 19 bits set */ | |
251 if (ch & ~0x7FFFF) | |
252 return 0; | |
253 | |
254 f1 = CHAR_FIELD1 (ch); | |
255 f2 = CHAR_FIELD2 (ch); | |
256 f3 = CHAR_FIELD3 (ch); | |
257 | |
258 if (f1 == 0) | |
259 { | |
260 Lisp_Object charset; | |
261 | |
262 if (f2 < MIN_CHAR_FIELD2_OFFICIAL || | |
263 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) || | |
264 f2 > MAX_CHAR_FIELD2_PRIVATE) | |
265 return 0; | |
266 if (f3 < 0x20) | |
267 return 0; | |
268 | |
269 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE && | |
270 f2 <= MAX_CHAR_FIELD2_PRIVATE)) | |
271 return 1; | |
272 | |
273 /* | |
274 NOTE: This takes advantage of the fact that | |
275 FIELD2_TO_OFFICIAL_LEADING_BYTE and | |
276 FIELD2_TO_PRIVATE_LEADING_BYTE are the same. | |
277 */ | |
278 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE); | |
279 if (EQ (charset, Qnil)) | |
280 return 0; | |
281 return (XCHARSET_CHARS (charset) == 96); | |
282 } | |
283 else | |
284 { | |
285 Lisp_Object charset; | |
286 | |
287 if (f1 < MIN_CHAR_FIELD1_OFFICIAL || | |
288 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) || | |
289 f1 > MAX_CHAR_FIELD1_PRIVATE) | |
290 return 0; | |
291 if (f2 < 0x20 || f3 < 0x20) | |
292 return 0; | |
293 | |
294 #ifdef ENABLE_COMPOSITE_CHARS | |
295 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE) | |
296 { | |
297 if (UNBOUNDP (Fgethash (make_int (ch), | |
298 Vcomposite_char_char2string_hash_table, | |
299 Qunbound))) | |
300 return 0; | |
301 return 1; | |
302 } | |
303 #endif /* ENABLE_COMPOSITE_CHARS */ | |
304 | |
305 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F | |
306 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE)) | |
307 return 1; | |
308 | |
309 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL) | |
310 charset = | |
311 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE); | |
312 else | |
313 charset = | |
314 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE); | |
315 | |
316 if (EQ (charset, Qnil)) | |
317 return 0; | |
318 return (XCHARSET_CHARS (charset) == 96); | |
319 } | |
320 } | |
321 | |
322 | |
323 /************************************************************************/ | |
324 /* Basic string functions */ | |
325 /************************************************************************/ | |
326 | |
327 /* Copy the character pointed to by SRC into DST. Do not call this | |
328 directly. Use the macro charptr_copy_char() instead. | |
329 Return the number of bytes copied. */ | |
330 | |
331 Bytecount | |
332 non_ascii_charptr_copy_char (const Intbyte *src, Intbyte *dst) | |
333 { | |
334 Bytecount bytes = REP_BYTES_BY_FIRST_BYTE (*src); | |
335 Bytecount i; | |
336 for (i = bytes; i; i--, dst++, src++) | |
337 *dst = *src; | |
338 return bytes; | |
339 } | |
340 | |
341 | |
342 /************************************************************************/ | |
343 /* streams of Emchars */ | |
344 /************************************************************************/ | |
345 | |
346 /* Treat a stream as a stream of Emchar's rather than a stream of bytes. | |
347 The functions below are not meant to be called directly; use | |
348 the macros in insdel.h. */ | |
349 | |
350 Emchar | |
351 Lstream_get_emchar_1 (Lstream *stream, int ch) | |
352 { | |
353 Intbyte str[MAX_EMCHAR_LEN]; | |
354 Intbyte *strptr = str; | |
355 Bytecount bytes; | |
356 | |
357 str[0] = (Intbyte) ch; | |
358 | |
359 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--) | |
360 { | |
361 int c = Lstream_getc (stream); | |
362 charbpos_checking_assert (c >= 0); | |
363 *++strptr = (Intbyte) c; | |
364 } | |
365 return charptr_emchar (str); | |
366 } | |
367 | |
368 int | |
369 Lstream_fput_emchar (Lstream *stream, Emchar ch) | |
370 { | |
371 Intbyte str[MAX_EMCHAR_LEN]; | |
372 Bytecount len = set_charptr_emchar (str, ch); | |
373 return Lstream_write (stream, str, len); | |
374 } | |
375 | |
376 void | |
377 Lstream_funget_emchar (Lstream *stream, Emchar ch) | |
378 { | |
379 Intbyte str[MAX_EMCHAR_LEN]; | |
380 Bytecount len = set_charptr_emchar (str, ch); | |
381 Lstream_unread (stream, str, len); | |
382 } | |
383 | |
384 | 117 |
385 /************************************************************************/ | 118 /************************************************************************/ |
386 /* charset object */ | 119 /* charset object */ |
387 /************************************************************************/ | 120 /************************************************************************/ |
388 | 121 |
401 | 134 |
402 static void | 135 static void |
403 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) | 136 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) |
404 { | 137 { |
405 Lisp_Charset *cs = XCHARSET (obj); | 138 Lisp_Charset *cs = XCHARSET (obj); |
406 char buf[200]; | |
407 | 139 |
408 if (print_readably) | 140 if (print_readably) |
409 printing_unreadable_object ("#<charset %s 0x%x>", | 141 printing_unreadable_object ("#<charset %s 0x%x>", |
410 string_data (XSYMBOL (CHARSET_NAME (cs))-> | 142 string_data (XSYMBOL (CHARSET_NAME (cs))-> |
411 name), | 143 name), |
412 cs->header.uid); | 144 cs->header.uid); |
413 | 145 |
414 write_c_string ("#<charset ", printcharfun); | 146 write_fmt_string_lisp (printcharfun, "#<charset %s %S %S %S", 4, |
415 print_internal (CHARSET_NAME (cs), printcharfun, 0); | 147 CHARSET_NAME (cs), CHARSET_SHORT_NAME (cs), |
416 write_c_string (" ", printcharfun); | 148 CHARSET_LONG_NAME (cs), CHARSET_DOC_STRING (cs)); |
417 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1); | 149 write_fmt_string (printcharfun, " %s %s cols=%d g%d final='%c' reg=", |
418 write_c_string (" ", printcharfun); | 150 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" : |
419 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1); | 151 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" : |
420 write_c_string (" ", printcharfun); | 152 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" : |
421 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1); | 153 "96x96", |
422 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=", | 154 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : |
423 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" : | 155 "r2l", |
424 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" : | 156 CHARSET_COLUMNS (cs), |
425 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" : | 157 CHARSET_GRAPHIC (cs), |
426 "96x96", | 158 CHARSET_FINAL (cs)); |
427 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l", | |
428 CHARSET_COLUMNS (cs), | |
429 CHARSET_GRAPHIC (cs), | |
430 CHARSET_FINAL (cs)); | |
431 write_c_string (buf, printcharfun); | |
432 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0); | 159 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0); |
433 sprintf (buf, " 0x%x>", cs->header.uid); | 160 write_fmt_string (printcharfun, " 0x%x>", cs->header.uid); |
434 write_c_string (buf, printcharfun); | 161 } |
162 | |
163 static void | |
164 finalize_charset (void *header, int for_disksave) | |
165 { | |
166 /* See mule-charset.h, definition of Lisp_Charset. */ | |
167 Lisp_Object charset = wrap_charset ((Lisp_Charset *) header); | |
168 if (for_disksave && XCHARSET_TO_UNICODE_TABLE (charset)) | |
169 { | |
170 /* Control-1, ASCII, Composite don't have tables */ | |
171 free_charset_unicode_tables (charset); | |
172 XCHARSET_TO_UNICODE_TABLE (charset) = 0; | |
173 XCHARSET_FROM_UNICODE_TABLE (charset) = 0; | |
174 } | |
435 } | 175 } |
436 | 176 |
437 static const struct lrecord_description charset_description[] = { | 177 static const struct lrecord_description charset_description[] = { |
438 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) }, | 178 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) }, |
439 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) }, | 179 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) }, |
440 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) }, | 180 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) }, |
441 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) }, | 181 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) }, |
442 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) }, | 182 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) }, |
443 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) }, | 183 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) }, |
444 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) }, | 184 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) }, |
185 #if 0 | |
186 /* #### XD_UNION not yet implemented! pdump version of XEmacs will | |
187 not work! */ | |
188 { XD_UNION, offsetof (Lisp_Charset, to_unicode_table), | |
189 XD_INDIRECT (offsetof (Lisp_Charset, dimension), 0), | |
190 to_unicode_description }, | |
191 { XD_UNION, offsetof (Lisp_Charset, from_unicode_table), | |
192 XD_INDIRECT (offsetof (Lisp_Charset, from_unicode_levels), 0), | |
193 from_unicode_description }, | |
194 #endif | |
445 { XD_END } | 195 { XD_END } |
446 }; | 196 }; |
447 | 197 |
448 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset, | 198 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset, |
449 mark_charset, print_charset, 0, 0, 0, charset_description, | 199 mark_charset, print_charset, finalize_charset, |
450 Lisp_Charset); | 200 0, 0, charset_description, Lisp_Charset); |
451 | 201 |
452 /* Make a new charset. */ | 202 /* Make a new charset. */ |
453 /* #### SJT Should generic properties be allowed? */ | 203 /* #### SJT Should generic properties be allowed? */ |
454 static Lisp_Object | 204 static Lisp_Object |
455 make_charset (int id, Lisp_Object name, unsigned char rep_bytes, | 205 make_charset (int id, Lisp_Object name, int rep_bytes, |
456 unsigned char type, unsigned char columns, unsigned char graphic, | 206 int type, int columns, int graphic, |
457 Intbyte final, unsigned char direction, Lisp_Object short_name, | 207 Intbyte final, int direction, Lisp_Object short_name, |
458 Lisp_Object long_name, Lisp_Object doc, | 208 Lisp_Object long_name, Lisp_Object doc, |
459 Lisp_Object reg) | 209 Lisp_Object reg, int overwrite) |
460 { | 210 { |
461 Lisp_Object obj; | 211 Lisp_Object obj; |
462 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset); | 212 Lisp_Charset *cs; |
463 | 213 |
464 zero_lcrecord (cs); | 214 if (!overwrite) |
465 | 215 { |
466 XSETCHARSET (obj, cs); | 216 cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset); |
217 zero_lcrecord (cs); | |
218 XSETCHARSET (obj, cs); | |
219 | |
220 if (final) | |
221 { | |
222 /* some charsets do not have final characters. This includes | |
223 ASCII, Control-1, Composite, and the two faux private | |
224 charsets. */ | |
225 assert (NILP (chlook-> | |
226 charset_by_attributes[type][final][direction])); | |
227 chlook->charset_by_attributes[type][final][direction] = obj; | |
228 } | |
229 | |
230 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE])); | |
231 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj; | |
232 } | |
233 else | |
234 { | |
235 Lisp_Object ret; | |
236 /* Actually overwrite the properties of the existing charset. | |
237 We do this because until now charsets could never be "deleted", | |
238 so parts of the code don't bother to GC charsets. */ | |
239 obj = chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]; | |
240 cs = XCHARSET (obj); | |
241 assert (EQ (chlook->charset_by_attributes[type][final][direction], | |
242 obj)); | |
243 | |
244 ret = Fremhash (XCHARSET_NAME (obj), Vcharset_hash_table); | |
245 assert (!NILP (ret)); | |
246 } | |
467 | 247 |
468 CHARSET_ID (cs) = id; | 248 CHARSET_ID (cs) = id; |
469 CHARSET_NAME (cs) = name; | 249 CHARSET_NAME (cs) = name; |
470 CHARSET_SHORT_NAME (cs) = short_name; | 250 CHARSET_SHORT_NAME (cs) = short_name; |
471 CHARSET_LONG_NAME (cs) = long_name; | 251 CHARSET_LONG_NAME (cs) = long_name; |
478 CHARSET_DOC_STRING (cs) = doc; | 258 CHARSET_DOC_STRING (cs) = doc; |
479 CHARSET_REGISTRY (cs) = reg; | 259 CHARSET_REGISTRY (cs) = reg; |
480 CHARSET_CCL_PROGRAM (cs) = Qnil; | 260 CHARSET_CCL_PROGRAM (cs) = Qnil; |
481 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; | 261 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; |
482 | 262 |
483 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || | 263 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || |
484 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2; | 264 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2; |
485 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || | 265 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || |
486 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96; | 266 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96; |
487 | 267 |
488 if (final) | 268 if (id == LEADING_BYTE_ASCII || id == LEADING_BYTE_CONTROL_1 |
269 #ifdef ENABLE_COMPOSITE_CHARS | |
270 || id == LEADING_BYTE_COMPOSITE | |
271 #endif | |
272 ) | |
273 assert (!overwrite); | |
274 else | |
489 { | 275 { |
490 /* some charsets do not have final characters. This includes | 276 if (overwrite) |
491 ASCII, Control-1, Composite, and the two faux private | 277 free_charset_unicode_tables (obj); |
492 charsets. */ | 278 init_charset_unicode_tables (obj); |
493 assert (NILP (chlook->charset_by_attributes[type][final][direction])); | |
494 chlook->charset_by_attributes[type][final][direction] = obj; | |
495 } | 279 } |
496 | |
497 assert (NILP (chlook->charset_by_leading_byte[id - 128])); | |
498 chlook->charset_by_leading_byte[id - 128] = obj; | |
499 | 280 |
500 /* Some charsets are "faux" and don't have names or really exist at | 281 /* Some charsets are "faux" and don't have names or really exist at |
501 all except in the leading-byte table. */ | 282 all except in the leading-byte table. */ |
502 if (!NILP (name)) | 283 if (!NILP (name)) |
503 Fputhash (name, obj, Vcharset_hash_table); | 284 { |
285 assert (NILP (Fgethash (name, Vcharset_hash_table, Qnil))); | |
286 Fputhash (name, obj, Vcharset_hash_table); | |
287 } | |
288 | |
289 recalculate_unicode_precedence (); | |
504 return obj; | 290 return obj; |
505 } | 291 } |
506 | 292 |
507 static int | 293 static int |
508 get_unallocated_leading_byte (int dimension) | 294 get_unallocated_leading_byte (int dimension) |
509 { | 295 { |
510 int lb; | 296 int lb; |
511 | 297 |
512 if (dimension == 1) | 298 if (dimension == 1) |
513 { | 299 { |
514 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1) | 300 if (chlook->next_allocated_1_byte_leading_byte > |
301 MAX_LEADING_BYTE_PRIVATE_1) | |
515 lb = 0; | 302 lb = 0; |
516 else | 303 else |
517 lb = chlook->next_allocated_1_byte_leading_byte++; | 304 lb = chlook->next_allocated_1_byte_leading_byte++; |
518 } | 305 } |
519 else | 306 else |
520 { | 307 { |
521 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2) | 308 if (chlook->next_allocated_2_byte_leading_byte > |
309 MAX_LEADING_BYTE_PRIVATE_2) | |
522 lb = 0; | 310 lb = 0; |
523 else | 311 else |
524 lb = chlook->next_allocated_2_byte_leading_byte++; | 312 lb = chlook->next_allocated_2_byte_leading_byte++; |
525 } | 313 } |
526 | 314 |
527 if (!lb) | 315 if (!lb) |
528 invalid_operation | 316 invalid_operation |
529 ("No more character sets free for this dimension", | 317 ("No more character sets free for this dimension", make_int (dimension)); |
530 make_int (dimension)); | |
531 | 318 |
532 return lb; | 319 return lb; |
533 } | 320 } |
534 | 321 |
535 | 322 |
669 bit cleared and set depending upon whether the value | 456 bit cleared and set depending upon whether the value |
670 of the 'graphic property is 0 or 1. | 457 of the 'graphic property is 0 or 1. |
671 */ | 458 */ |
672 (name, doc_string, props)) | 459 (name, doc_string, props)) |
673 { | 460 { |
674 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1; | 461 int id, dimension = 1, chars = 94, graphic = 0, columns = -1; |
462 Intbyte final = 0; | |
675 int direction = CHARSET_LEFT_TO_RIGHT; | 463 int direction = CHARSET_LEFT_TO_RIGHT; |
676 int type; | 464 int type; |
677 Lisp_Object registry = Qnil; | 465 Lisp_Object registry = Qnil; |
678 Lisp_Object charset; | 466 Lisp_Object charset = Qnil; |
679 Lisp_Object ccl_program = Qnil; | 467 Lisp_Object ccl_program = Qnil; |
680 Lisp_Object short_name = Qnil, long_name = Qnil; | 468 Lisp_Object short_name = Qnil, long_name = Qnil; |
681 | 469 Lisp_Object existing_charset; |
682 CHECK_SYMBOL (name); | 470 int temporary = UNBOUNDP (name); |
471 | |
472 /* NOTE: name == Qunbound is a directive from the iso2022 code to | |
473 create a temporary charset for an unknown final. We allow the final | |
474 to be overwritten with a real charset later on. */ | |
475 | |
683 if (!NILP (doc_string)) | 476 if (!NILP (doc_string)) |
684 CHECK_STRING (doc_string); | 477 CHECK_STRING (doc_string); |
685 | 478 if (!UNBOUNDP (name)) |
686 charset = Ffind_charset (name); | 479 { |
687 if (!NILP (charset)) | 480 CHECK_SYMBOL (name); |
688 invalid_operation ("Cannot redefine existing charset", name); | 481 |
482 charset = Ffind_charset (name); | |
483 if (!NILP (charset)) | |
484 invalid_operation ("Cannot redefine existing charset", name); | |
485 } | |
689 | 486 |
690 { | 487 { |
691 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props) | 488 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props) |
692 { | 489 { |
693 if (EQ (keyword, Qshort_name)) | 490 if (EQ (keyword, Qshort_name)) |
764 | 561 |
765 if (setup_ccl_program (&test_ccl, value) < 0) | 562 if (setup_ccl_program (&test_ccl, value) < 0) |
766 invalid_argument ("Invalid value for 'ccl-program", value); | 563 invalid_argument ("Invalid value for 'ccl-program", value); |
767 ccl_program = value; | 564 ccl_program = value; |
768 } | 565 } |
769 | |
770 else | 566 else |
771 invalid_constant ("Unrecognized property", keyword); | 567 invalid_constant ("Unrecognized property", keyword); |
772 } | 568 } |
773 } | 569 } |
774 | 570 |
782 if (dimension == 1) | 578 if (dimension == 1) |
783 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96; | 579 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96; |
784 else | 580 else |
785 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96; | 581 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96; |
786 | 582 |
787 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) || | 583 existing_charset = CHARSET_BY_ATTRIBUTES (type, final, |
788 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT))) | 584 CHARSET_LEFT_TO_RIGHT); |
585 if (NILP (existing_charset)) | |
586 existing_charset = CHARSET_BY_ATTRIBUTES (type, final, | |
587 CHARSET_RIGHT_TO_LEFT); | |
588 | |
589 if (!NILP (existing_charset) && !XCHARSET (existing_charset)->temporary) | |
789 invalid_argument | 590 invalid_argument |
790 ("Character set already defined for this DIMENSION/CHARS/FINAL combo", | 591 ("Character set already defined for this DIMENSION/CHARS/FINAL combo", |
791 Qunbound); | 592 existing_charset); |
792 | 593 |
793 id = get_unallocated_leading_byte (dimension); | 594 if (!NILP (existing_charset)) |
794 | 595 /* Reuse same leading byte */ |
596 id = XCHARSET_ID (existing_charset); | |
597 else | |
598 id = get_unallocated_leading_byte (dimension); | |
599 | |
600 if (temporary) | |
601 { | |
602 Intbyte tempname[80]; | |
603 | |
604 qxesprintf (tempname, "___temporary___%d__", id); | |
605 name = intern_int (tempname); | |
606 } | |
795 if (NILP (doc_string)) | 607 if (NILP (doc_string)) |
796 doc_string = build_string (""); | 608 doc_string = build_string (""); |
797 | |
798 if (NILP (registry)) | 609 if (NILP (registry)) |
799 registry = build_string (""); | 610 registry = build_string (""); |
800 | |
801 if (NILP (short_name)) | 611 if (NILP (short_name)) |
802 XSETSTRING (short_name, XSYMBOL (name)->name); | 612 XSETSTRING (short_name, XSYMBOL (name)->name); |
803 | |
804 if (NILP (long_name)) | 613 if (NILP (long_name)) |
805 long_name = doc_string; | 614 long_name = doc_string; |
806 | |
807 if (columns == -1) | 615 if (columns == -1) |
808 columns = dimension; | 616 columns = dimension; |
617 | |
809 charset = make_charset (id, name, dimension + 2, type, columns, graphic, | 618 charset = make_charset (id, name, dimension + 2, type, columns, graphic, |
810 final, direction, short_name, long_name, doc_string, registry); | 619 final, direction, short_name, long_name, |
620 doc_string, registry, !NILP (existing_charset)); | |
621 | |
622 XCHARSET (charset)->temporary = temporary; | |
811 if (!NILP (ccl_program)) | 623 if (!NILP (ccl_program)) |
812 XCHARSET_CCL_PROGRAM (charset) = ccl_program; | 624 XCHARSET_CCL_PROGRAM (charset) = ccl_program; |
625 | |
813 return charset; | 626 return charset; |
814 } | 627 } |
815 | 628 |
816 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset, | 629 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset, |
817 2, 2, 0, /* | 630 2, 2, 0, /* |
819 NEW-NAME is the name of the new charset. Return the new charset. | 632 NEW-NAME is the name of the new charset. Return the new charset. |
820 */ | 633 */ |
821 (charset, new_name)) | 634 (charset, new_name)) |
822 { | 635 { |
823 Lisp_Object new_charset = Qnil; | 636 Lisp_Object new_charset = Qnil; |
824 int id, dimension, columns, graphic, final; | 637 int id, dimension, columns, graphic; |
638 Intbyte final; | |
825 int direction, type; | 639 int direction, type; |
826 Lisp_Object registry, doc_string, short_name, long_name; | 640 Lisp_Object registry, doc_string, short_name, long_name; |
827 Lisp_Charset *cs; | 641 Lisp_Charset *cs; |
828 | 642 |
829 charset = Fget_charset (charset); | 643 charset = Fget_charset (charset); |
852 long_name = CHARSET_LONG_NAME (cs); | 666 long_name = CHARSET_LONG_NAME (cs); |
853 registry = CHARSET_REGISTRY (cs); | 667 registry = CHARSET_REGISTRY (cs); |
854 | 668 |
855 new_charset = make_charset (id, new_name, dimension + 2, type, columns, | 669 new_charset = make_charset (id, new_name, dimension + 2, type, columns, |
856 graphic, final, direction, short_name, long_name, | 670 graphic, final, direction, short_name, long_name, |
857 doc_string, registry); | 671 doc_string, registry, 0); |
858 | 672 |
859 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset; | 673 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset; |
860 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset; | 674 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset; |
861 | 675 |
862 return new_charset; | 676 return new_charset; |
1056 return Qnil; | 870 return Qnil; |
1057 } | 871 } |
1058 | 872 |
1059 | 873 |
1060 /************************************************************************/ | 874 /************************************************************************/ |
1061 /* Lisp primitives for working with characters */ | 875 /* memory usage */ |
1062 /************************************************************************/ | 876 /************************************************************************/ |
1063 | 877 |
1064 DEFUN ("make-char", Fmake_char, 2, 3, 0, /* | 878 #ifdef MEMORY_USAGE_STATS |
1065 Make a character from CHARSET and octets ARG1 and ARG2. | 879 |
1066 ARG2 is required only for characters from two-dimensional charsets. | 880 struct charset_stats |
1067 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2 | 881 { |
1068 character s with caron. | 882 int from_unicode; |
1069 */ | 883 int to_unicode; |
1070 (charset, arg1, arg2)) | 884 int other; |
1071 { | 885 }; |
1072 Lisp_Charset *cs; | 886 |
1073 int a1, a2; | 887 static void |
1074 int lowlim, highlim; | 888 compute_charset_usage (Lisp_Object charset, struct charset_stats *stats, |
889 struct overhead_stats *ovstats) | |
890 { | |
891 struct Lisp_Charset *c = XCHARSET (charset); | |
892 xzero (*stats); | |
893 stats->other += malloced_storage_size (c, sizeof (*c), ovstats); | |
894 stats->from_unicode += compute_from_unicode_table_size (charset, ovstats); | |
895 stats->to_unicode += compute_to_unicode_table_size (charset, ovstats); | |
896 } | |
897 | |
898 DEFUN ("charset-memory-usage", Fcharset_memory_usage, 1, 1, 0, /* | |
899 Return stats about the memory usage of charset CHARSET. | |
900 The values returned are in the form of an alist of usage types and | |
901 byte counts. The byte counts attempt to encompass all the memory used | |
902 by the charset (separate from the memory logically associated with a | |
903 charset or frame), including internal structures and any malloc() | |
904 overhead associated with them. In practice, the byte counts are | |
905 underestimated for various reasons, e.g. because certain memory usage | |
906 is very hard to determine \(e.g. the amount of memory used inside the | |
907 Xt library or inside the X server). | |
908 | |
909 Multiple slices of the total memory usage may be returned, separated | |
910 by a nil. Each slice represents a particular view of the memory, a | |
911 particular way of partitioning it into groups. Within a slice, there | |
912 is no overlap between the groups of memory, and each slice collectively | |
913 represents all the memory concerned. | |
914 */ | |
915 (charset)) | |
916 { | |
917 struct charset_stats stats; | |
918 struct overhead_stats ovstats; | |
919 Lisp_Object val = Qnil; | |
1075 | 920 |
1076 charset = Fget_charset (charset); | 921 charset = Fget_charset (charset); |
1077 cs = XCHARSET (charset); | 922 xzero (ovstats); |
1078 | 923 compute_charset_usage (charset, &stats, &ovstats); |
1079 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127; | 924 |
1080 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31; | 925 val = acons (Qfrom_unicode, make_int (stats.from_unicode), val); |
1081 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126; | 926 val = acons (Qto_unicode, make_int (stats.to_unicode), val); |
1082 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127; | 927 val = Fcons (Qnil, val); |
1083 | 928 val = acons (Qactually_requested, make_int (ovstats.was_requested), val); |
1084 CHECK_INT (arg1); | 929 val = acons (Qmalloc_overhead, make_int (ovstats.malloc_overhead), val); |
1085 /* It is useful (and safe, according to Olivier Galibert) to strip | 930 val = acons (Qgap_overhead, make_int (ovstats.gap_overhead), val); |
1086 the 8th bit off ARG1 and ARG2 because it allows programmers to | 931 val = acons (Qdynarr_overhead, make_int (ovstats.dynarr_overhead), val); |
1087 write (make-char 'latin-iso8859-2 CODE) where code is the actual | 932 |
1088 Latin 2 code of the character. */ | 933 return Fnreverse (val); |
1089 a1 = XINT (arg1) & 0x7f; | 934 } |
1090 if (a1 < lowlim || a1 > highlim) | 935 |
1091 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim)); | 936 #endif /* MEMORY_USAGE_STATS */ |
1092 | |
1093 if (CHARSET_DIMENSION (cs) == 1) | |
1094 { | |
1095 if (!NILP (arg2)) | |
1096 invalid_argument | |
1097 ("Charset is of dimension one; second octet must be nil", arg2); | |
1098 return make_char (MAKE_CHAR (charset, a1, 0)); | |
1099 } | |
1100 | |
1101 CHECK_INT (arg2); | |
1102 a2 = XINT (arg2) & 0x7f; | |
1103 if (a2 < lowlim || a2 > highlim) | |
1104 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim)); | |
1105 | |
1106 return make_char (MAKE_CHAR (charset, a1, a2)); | |
1107 } | |
1108 | |
1109 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /* | |
1110 Return the character set of CHARACTER. | |
1111 */ | |
1112 (character)) | |
1113 { | |
1114 CHECK_CHAR_COERCE_INT (character); | |
1115 | |
1116 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE | |
1117 (CHAR_LEADING_BYTE (XCHAR (character)))); | |
1118 } | |
1119 | |
1120 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /* | |
1121 Return the octet numbered N (should be 0 or 1) of CHARACTER. | |
1122 N defaults to 0 if omitted. | |
1123 */ | |
1124 (character, n)) | |
1125 { | |
1126 Lisp_Object charset; | |
1127 int octet0, octet1; | |
1128 | |
1129 CHECK_CHAR_COERCE_INT (character); | |
1130 | |
1131 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1); | |
1132 | |
1133 if (NILP (n) || EQ (n, Qzero)) | |
1134 return make_int (octet0); | |
1135 else if (EQ (n, make_int (1))) | |
1136 return make_int (octet1); | |
1137 else | |
1138 invalid_constant ("Octet number must be 0 or 1", n); | |
1139 } | |
1140 | |
1141 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /* | |
1142 Return list of charset and one or two position-codes of CHARACTER. | |
1143 */ | |
1144 (character)) | |
1145 { | |
1146 /* This function can GC */ | |
1147 struct gcpro gcpro1, gcpro2; | |
1148 Lisp_Object charset = Qnil; | |
1149 Lisp_Object rc = Qnil; | |
1150 int c1, c2; | |
1151 | |
1152 GCPRO2 (charset, rc); | |
1153 CHECK_CHAR_COERCE_INT (character); | |
1154 | |
1155 BREAKUP_CHAR (XCHAR (character), charset, c1, c2); | |
1156 | |
1157 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2) | |
1158 { | |
1159 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2)); | |
1160 } | |
1161 else | |
1162 { | |
1163 rc = list2 (XCHARSET_NAME (charset), make_int (c1)); | |
1164 } | |
1165 UNGCPRO; | |
1166 | |
1167 return rc; | |
1168 } | |
1169 | |
1170 | |
1171 #ifdef ENABLE_COMPOSITE_CHARS | |
1172 /************************************************************************/ | |
1173 /* composite character functions */ | |
1174 /************************************************************************/ | |
1175 | |
1176 Emchar | |
1177 lookup_composite_char (Intbyte *str, int len) | |
1178 { | |
1179 Lisp_Object lispstr = make_string (str, len); | |
1180 Lisp_Object ch = Fgethash (lispstr, | |
1181 Vcomposite_char_string2char_hash_table, | |
1182 Qunbound); | |
1183 Emchar emch; | |
1184 | |
1185 if (UNBOUNDP (ch)) | |
1186 { | |
1187 if (composite_char_row_next >= 128) | |
1188 invalid_operation ("No more composite chars available", lispstr); | |
1189 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next, | |
1190 composite_char_col_next); | |
1191 Fputhash (make_char (emch), lispstr, | |
1192 Vcomposite_char_char2string_hash_table); | |
1193 Fputhash (lispstr, make_char (emch), | |
1194 Vcomposite_char_string2char_hash_table); | |
1195 composite_char_col_next++; | |
1196 if (composite_char_col_next >= 128) | |
1197 { | |
1198 composite_char_col_next = 32; | |
1199 composite_char_row_next++; | |
1200 } | |
1201 } | |
1202 else | |
1203 emch = XCHAR (ch); | |
1204 return emch; | |
1205 } | |
1206 | |
1207 Lisp_Object | |
1208 composite_char_string (Emchar ch) | |
1209 { | |
1210 Lisp_Object str = Fgethash (make_char (ch), | |
1211 Vcomposite_char_char2string_hash_table, | |
1212 Qunbound); | |
1213 assert (!UNBOUNDP (str)); | |
1214 return str; | |
1215 } | |
1216 | |
1217 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /* | |
1218 Convert a string into a single composite character. | |
1219 The character is the result of overstriking all the characters in | |
1220 the string. | |
1221 */ | |
1222 (string)) | |
1223 { | |
1224 CHECK_STRING (string); | |
1225 return make_char (lookup_composite_char (XSTRING_DATA (string), | |
1226 XSTRING_LENGTH (string))); | |
1227 } | |
1228 | |
1229 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /* | |
1230 Return a string of the characters comprising a composite character. | |
1231 */ | |
1232 (ch)) | |
1233 { | |
1234 Emchar emch; | |
1235 | |
1236 CHECK_CHAR (ch); | |
1237 emch = XCHAR (ch); | |
1238 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE) | |
1239 invalid_argument ("Must be composite char", ch); | |
1240 return composite_char_string (emch); | |
1241 } | |
1242 #endif /* ENABLE_COMPOSITE_CHARS */ | |
1243 | 937 |
1244 | 938 |
1245 /************************************************************************/ | 939 /************************************************************************/ |
1246 /* initialization */ | 940 /* initialization */ |
1247 /************************************************************************/ | 941 /************************************************************************/ |
1267 DEFSUBR (Fcharset_property); | 961 DEFSUBR (Fcharset_property); |
1268 DEFSUBR (Fcharset_id); | 962 DEFSUBR (Fcharset_id); |
1269 DEFSUBR (Fset_charset_ccl_program); | 963 DEFSUBR (Fset_charset_ccl_program); |
1270 DEFSUBR (Fset_charset_registry); | 964 DEFSUBR (Fset_charset_registry); |
1271 | 965 |
1272 DEFSUBR (Fmake_char); | 966 #ifdef MEMORY_USAGE_STATS |
1273 DEFSUBR (Fchar_charset); | 967 DEFSUBR (Fcharset_memory_usage); |
1274 DEFSUBR (Fchar_octet); | |
1275 DEFSUBR (Fsplit_char); | |
1276 | |
1277 #ifdef ENABLE_COMPOSITE_CHARS | |
1278 DEFSUBR (Fmake_composite_char); | |
1279 DEFSUBR (Fcomposite_char_string); | |
1280 #endif | 968 #endif |
1281 | 969 |
1282 DEFSYMBOL (Qcharsetp); | 970 DEFSYMBOL (Qcharsetp); |
1283 DEFSYMBOL (Qregistry); | 971 DEFSYMBOL (Qregistry); |
1284 DEFSYMBOL (Qfinal); | 972 DEFSYMBOL (Qfinal); |
1286 DEFSYMBOL (Qdirection); | 974 DEFSYMBOL (Qdirection); |
1287 DEFSYMBOL (Qreverse_direction_charset); | 975 DEFSYMBOL (Qreverse_direction_charset); |
1288 DEFSYMBOL (Qshort_name); | 976 DEFSYMBOL (Qshort_name); |
1289 DEFSYMBOL (Qlong_name); | 977 DEFSYMBOL (Qlong_name); |
1290 | 978 |
979 DEFSYMBOL (Qfrom_unicode); | |
980 DEFSYMBOL (Qto_unicode); | |
981 | |
1291 DEFSYMBOL (Ql2r); | 982 DEFSYMBOL (Ql2r); |
1292 DEFSYMBOL (Qr2l); | 983 DEFSYMBOL (Qr2l); |
1293 | 984 |
1294 /* Charsets, compatible with FSF 20.3 | 985 /* Charsets, compatible with FSF 20.3 |
1295 Naming convention is Script-Charset[-Edition] */ | 986 Naming convention is Script-Charset[-Edition] */ |
1296 DEFSYMBOL (Qascii); | |
1297 DEFSYMBOL (Qcontrol_1); | |
1298 DEFSYMBOL (Qlatin_iso8859_1); | 987 DEFSYMBOL (Qlatin_iso8859_1); |
1299 DEFSYMBOL (Qlatin_iso8859_2); | 988 DEFSYMBOL (Qlatin_iso8859_2); |
1300 DEFSYMBOL (Qlatin_iso8859_3); | 989 DEFSYMBOL (Qlatin_iso8859_3); |
1301 DEFSYMBOL (Qlatin_iso8859_4); | 990 DEFSYMBOL (Qlatin_iso8859_4); |
1302 DEFSYMBOL (Qthai_tis620); | 991 DEFSYMBOL (Qthai_tis620); |
1319 DEFSYMBOL (Qchinese_big5_2); | 1008 DEFSYMBOL (Qchinese_big5_2); |
1320 | 1009 |
1321 DEFSYMBOL (Qcomposite); | 1010 DEFSYMBOL (Qcomposite); |
1322 } | 1011 } |
1323 | 1012 |
1013 static int | |
1014 init_charset_unicode_tables_mapper (Lisp_Object key, Lisp_Object value, | |
1015 void *closure) | |
1016 { | |
1017 init_charset_unicode_tables (value); | |
1018 return 0; | |
1019 } | |
1020 | |
1021 void | |
1022 init_mule_charset (void) | |
1023 { | |
1024 /* See mule-charset.h, definition of Lisp_Charset. */ | |
1025 if (initialized) | |
1026 elisp_maphash (init_charset_unicode_tables_mapper, Vcharset_hash_table, | |
1027 0); | |
1028 } | |
1029 | |
1324 void | 1030 void |
1325 vars_of_mule_charset (void) | 1031 vars_of_mule_charset (void) |
1326 { | 1032 { |
1327 int i, j, k; | 1033 int i, j, k; |
1328 | 1034 |
1339 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++) | 1045 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++) |
1340 chlook->charset_by_attributes[i][j][k] = Qnil; | 1046 chlook->charset_by_attributes[i][j][k] = Qnil; |
1341 | 1047 |
1342 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; | 1048 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; |
1343 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; | 1049 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; |
1344 } | 1050 |
1345 | |
1346 void | |
1347 complex_vars_of_mule_charset (void) | |
1348 { | |
1349 staticpro (&Vcharset_hash_table); | 1051 staticpro (&Vcharset_hash_table); |
1350 Vcharset_hash_table = | 1052 Vcharset_hash_table = |
1351 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); | 1053 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); |
1352 | 1054 } |
1055 | |
1056 void | |
1057 complex_vars_of_mule_charset (void) | |
1058 { | |
1353 /* Predefined character sets. We store them into variables for | 1059 /* Predefined character sets. We store them into variables for |
1354 ease of access. */ | 1060 ease of access. */ |
1355 | 1061 |
1356 staticpro (&Vcharset_ascii); | 1062 staticpro (&Vcharset_ascii); |
1357 Vcharset_ascii = | 1063 Vcharset_ascii = |
1358 make_charset (LEADING_BYTE_ASCII, Qascii, 1, | 1064 make_charset (LEADING_BYTE_ASCII, Qascii, 1, |
1359 CHARSET_TYPE_94, 1, 0, 'B', | 1065 CHARSET_TYPE_94, 1, 0, 'B', |
1360 CHARSET_LEFT_TO_RIGHT, | 1066 CHARSET_LEFT_TO_RIGHT, |
1361 build_string ("ASCII"), | 1067 build_string ("ASCII"), |
1362 build_string ("ASCII)"), | 1068 build_msg_string ("ASCII"), |
1363 build_string ("ASCII (ISO646 IRV)"), | 1069 build_msg_string ("ASCII (ISO646 IRV)"), |
1364 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)")); | 1070 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), 0); |
1365 staticpro (&Vcharset_control_1); | 1071 staticpro (&Vcharset_control_1); |
1366 Vcharset_control_1 = | 1072 Vcharset_control_1 = |
1367 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2, | 1073 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2, |
1368 CHARSET_TYPE_94, 1, 1, 0, | 1074 CHARSET_TYPE_94, 1, 1, 0, |
1369 CHARSET_LEFT_TO_RIGHT, | 1075 CHARSET_LEFT_TO_RIGHT, |
1370 build_string ("C1"), | 1076 build_string ("C1"), |
1371 build_string ("Control characters"), | 1077 build_msg_string ("Control characters"), |
1372 build_string ("Control characters 128-191"), | 1078 build_msg_string ("Control characters 128-191"), |
1373 build_string ("")); | 1079 build_string (""), 0); |
1374 staticpro (&Vcharset_latin_iso8859_1); | 1080 staticpro (&Vcharset_latin_iso8859_1); |
1375 Vcharset_latin_iso8859_1 = | 1081 Vcharset_latin_iso8859_1 = |
1376 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2, | 1082 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2, |
1377 CHARSET_TYPE_96, 1, 1, 'A', | 1083 CHARSET_TYPE_96, 1, 1, 'A', |
1378 CHARSET_LEFT_TO_RIGHT, | 1084 CHARSET_LEFT_TO_RIGHT, |
1379 build_string ("Latin-1"), | 1085 build_string ("Latin-1"), |
1380 build_string ("ISO8859-1 (Latin-1)"), | 1086 build_msg_string ("ISO8859-1 (Latin-1)"), |
1381 build_string ("ISO8859-1 (Latin-1)"), | 1087 build_msg_string ("ISO8859-1 (Latin-1)"), |
1382 build_string ("iso8859-1")); | 1088 build_string ("iso8859-1"), 0); |
1383 staticpro (&Vcharset_latin_iso8859_2); | 1089 staticpro (&Vcharset_latin_iso8859_2); |
1384 Vcharset_latin_iso8859_2 = | 1090 Vcharset_latin_iso8859_2 = |
1385 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2, | 1091 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2, |
1386 CHARSET_TYPE_96, 1, 1, 'B', | 1092 CHARSET_TYPE_96, 1, 1, 'B', |
1387 CHARSET_LEFT_TO_RIGHT, | 1093 CHARSET_LEFT_TO_RIGHT, |
1388 build_string ("Latin-2"), | 1094 build_string ("Latin-2"), |
1389 build_string ("ISO8859-2 (Latin-2)"), | 1095 build_msg_string ("ISO8859-2 (Latin-2)"), |
1390 build_string ("ISO8859-2 (Latin-2)"), | 1096 build_msg_string ("ISO8859-2 (Latin-2)"), |
1391 build_string ("iso8859-2")); | 1097 build_string ("iso8859-2"), 0); |
1392 staticpro (&Vcharset_latin_iso8859_3); | 1098 staticpro (&Vcharset_latin_iso8859_3); |
1393 Vcharset_latin_iso8859_3 = | 1099 Vcharset_latin_iso8859_3 = |
1394 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2, | 1100 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2, |
1395 CHARSET_TYPE_96, 1, 1, 'C', | 1101 CHARSET_TYPE_96, 1, 1, 'C', |
1396 CHARSET_LEFT_TO_RIGHT, | 1102 CHARSET_LEFT_TO_RIGHT, |
1397 build_string ("Latin-3"), | 1103 build_string ("Latin-3"), |
1398 build_string ("ISO8859-3 (Latin-3)"), | 1104 build_msg_string ("ISO8859-3 (Latin-3)"), |
1399 build_string ("ISO8859-3 (Latin-3)"), | 1105 build_msg_string ("ISO8859-3 (Latin-3)"), |
1400 build_string ("iso8859-3")); | 1106 build_string ("iso8859-3"), 0); |
1401 staticpro (&Vcharset_latin_iso8859_4); | 1107 staticpro (&Vcharset_latin_iso8859_4); |
1402 Vcharset_latin_iso8859_4 = | 1108 Vcharset_latin_iso8859_4 = |
1403 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2, | 1109 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2, |
1404 CHARSET_TYPE_96, 1, 1, 'D', | 1110 CHARSET_TYPE_96, 1, 1, 'D', |
1405 CHARSET_LEFT_TO_RIGHT, | 1111 CHARSET_LEFT_TO_RIGHT, |
1406 build_string ("Latin-4"), | 1112 build_string ("Latin-4"), |
1407 build_string ("ISO8859-4 (Latin-4)"), | 1113 build_msg_string ("ISO8859-4 (Latin-4)"), |
1408 build_string ("ISO8859-4 (Latin-4)"), | 1114 build_msg_string ("ISO8859-4 (Latin-4)"), |
1409 build_string ("iso8859-4")); | 1115 build_string ("iso8859-4"), 0); |
1410 staticpro (&Vcharset_thai_tis620); | 1116 staticpro (&Vcharset_thai_tis620); |
1411 Vcharset_thai_tis620 = | 1117 Vcharset_thai_tis620 = |
1412 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2, | 1118 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2, |
1413 CHARSET_TYPE_96, 1, 1, 'T', | 1119 CHARSET_TYPE_96, 1, 1, 'T', |
1414 CHARSET_LEFT_TO_RIGHT, | 1120 CHARSET_LEFT_TO_RIGHT, |
1415 build_string ("TIS620"), | 1121 build_string ("TIS620"), |
1416 build_string ("TIS620 (Thai)"), | 1122 build_msg_string ("TIS620 (Thai)"), |
1417 build_string ("TIS620.2529 (Thai)"), | 1123 build_msg_string ("TIS620.2529 (Thai)"), |
1418 build_string ("tis620")); | 1124 build_string ("tis620"),0); |
1419 staticpro (&Vcharset_greek_iso8859_7); | 1125 staticpro (&Vcharset_greek_iso8859_7); |
1420 Vcharset_greek_iso8859_7 = | 1126 Vcharset_greek_iso8859_7 = |
1421 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2, | 1127 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2, |
1422 CHARSET_TYPE_96, 1, 1, 'F', | 1128 CHARSET_TYPE_96, 1, 1, 'F', |
1423 CHARSET_LEFT_TO_RIGHT, | 1129 CHARSET_LEFT_TO_RIGHT, |
1424 build_string ("ISO8859-7"), | 1130 build_string ("ISO8859-7"), |
1425 build_string ("ISO8859-7 (Greek)"), | 1131 build_msg_string ("ISO8859-7 (Greek)"), |
1426 build_string ("ISO8859-7 (Greek)"), | 1132 build_msg_string ("ISO8859-7 (Greek)"), |
1427 build_string ("iso8859-7")); | 1133 build_string ("iso8859-7"), 0); |
1428 staticpro (&Vcharset_arabic_iso8859_6); | 1134 staticpro (&Vcharset_arabic_iso8859_6); |
1429 Vcharset_arabic_iso8859_6 = | 1135 Vcharset_arabic_iso8859_6 = |
1430 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2, | 1136 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2, |
1431 CHARSET_TYPE_96, 1, 1, 'G', | 1137 CHARSET_TYPE_96, 1, 1, 'G', |
1432 CHARSET_RIGHT_TO_LEFT, | 1138 CHARSET_RIGHT_TO_LEFT, |
1433 build_string ("ISO8859-6"), | 1139 build_string ("ISO8859-6"), |
1434 build_string ("ISO8859-6 (Arabic)"), | 1140 build_msg_string ("ISO8859-6 (Arabic)"), |
1435 build_string ("ISO8859-6 (Arabic)"), | 1141 build_msg_string ("ISO8859-6 (Arabic)"), |
1436 build_string ("iso8859-6")); | 1142 build_string ("iso8859-6"), 0); |
1437 staticpro (&Vcharset_hebrew_iso8859_8); | 1143 staticpro (&Vcharset_hebrew_iso8859_8); |
1438 Vcharset_hebrew_iso8859_8 = | 1144 Vcharset_hebrew_iso8859_8 = |
1439 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2, | 1145 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2, |
1440 CHARSET_TYPE_96, 1, 1, 'H', | 1146 CHARSET_TYPE_96, 1, 1, 'H', |
1441 CHARSET_RIGHT_TO_LEFT, | 1147 CHARSET_RIGHT_TO_LEFT, |
1442 build_string ("ISO8859-8"), | 1148 build_string ("ISO8859-8"), |
1443 build_string ("ISO8859-8 (Hebrew)"), | 1149 build_msg_string ("ISO8859-8 (Hebrew)"), |
1444 build_string ("ISO8859-8 (Hebrew)"), | 1150 build_msg_string ("ISO8859-8 (Hebrew)"), |
1445 build_string ("iso8859-8")); | 1151 build_string ("iso8859-8"), 0); |
1446 staticpro (&Vcharset_katakana_jisx0201); | 1152 staticpro (&Vcharset_katakana_jisx0201); |
1447 Vcharset_katakana_jisx0201 = | 1153 Vcharset_katakana_jisx0201 = |
1448 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2, | 1154 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2, |
1449 CHARSET_TYPE_94, 1, 1, 'I', | 1155 CHARSET_TYPE_94, 1, 1, 'I', |
1450 CHARSET_LEFT_TO_RIGHT, | 1156 CHARSET_LEFT_TO_RIGHT, |
1451 build_string ("JISX0201 Kana"), | 1157 build_string ("JISX0201 Kana"), |
1452 build_string ("JISX0201.1976 (Japanese Kana)"), | 1158 build_msg_string ("JISX0201.1976 (Japanese Kana)"), |
1453 build_string ("JISX0201.1976 Japanese Kana"), | 1159 build_msg_string ("JISX0201.1976 Japanese Kana"), |
1454 build_string ("jisx0201.1976")); | 1160 build_string ("jisx0201.1976"), 0); |
1455 staticpro (&Vcharset_latin_jisx0201); | 1161 staticpro (&Vcharset_latin_jisx0201); |
1456 Vcharset_latin_jisx0201 = | 1162 Vcharset_latin_jisx0201 = |
1457 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2, | 1163 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2, |
1458 CHARSET_TYPE_94, 1, 0, 'J', | 1164 CHARSET_TYPE_94, 1, 0, 'J', |
1459 CHARSET_LEFT_TO_RIGHT, | 1165 CHARSET_LEFT_TO_RIGHT, |
1460 build_string ("JISX0201 Roman"), | 1166 build_string ("JISX0201 Roman"), |
1461 build_string ("JISX0201.1976 (Japanese Roman)"), | 1167 build_msg_string ("JISX0201.1976 (Japanese Roman)"), |
1462 build_string ("JISX0201.1976 Japanese Roman"), | 1168 build_msg_string ("JISX0201.1976 Japanese Roman"), |
1463 build_string ("jisx0201.1976")); | 1169 build_string ("jisx0201.1976"), 0); |
1464 staticpro (&Vcharset_cyrillic_iso8859_5); | 1170 staticpro (&Vcharset_cyrillic_iso8859_5); |
1465 Vcharset_cyrillic_iso8859_5 = | 1171 Vcharset_cyrillic_iso8859_5 = |
1466 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2, | 1172 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2, |
1467 CHARSET_TYPE_96, 1, 1, 'L', | 1173 CHARSET_TYPE_96, 1, 1, 'L', |
1468 CHARSET_LEFT_TO_RIGHT, | 1174 CHARSET_LEFT_TO_RIGHT, |
1469 build_string ("ISO8859-5"), | 1175 build_string ("ISO8859-5"), |
1470 build_string ("ISO8859-5 (Cyrillic)"), | 1176 build_msg_string ("ISO8859-5 (Cyrillic)"), |
1471 build_string ("ISO8859-5 (Cyrillic)"), | 1177 build_msg_string ("ISO8859-5 (Cyrillic)"), |
1472 build_string ("iso8859-5")); | 1178 build_string ("iso8859-5"), 0); |
1473 staticpro (&Vcharset_latin_iso8859_9); | 1179 staticpro (&Vcharset_latin_iso8859_9); |
1474 Vcharset_latin_iso8859_9 = | 1180 Vcharset_latin_iso8859_9 = |
1475 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2, | 1181 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2, |
1476 CHARSET_TYPE_96, 1, 1, 'M', | 1182 CHARSET_TYPE_96, 1, 1, 'M', |
1477 CHARSET_LEFT_TO_RIGHT, | 1183 CHARSET_LEFT_TO_RIGHT, |
1478 build_string ("Latin-5"), | 1184 build_string ("Latin-5"), |
1479 build_string ("ISO8859-9 (Latin-5)"), | 1185 build_msg_string ("ISO8859-9 (Latin-5)"), |
1480 build_string ("ISO8859-9 (Latin-5)"), | 1186 build_msg_string ("ISO8859-9 (Latin-5)"), |
1481 build_string ("iso8859-9")); | 1187 build_string ("iso8859-9"), 0); |
1482 staticpro (&Vcharset_latin_iso8859_15); | 1188 staticpro (&Vcharset_latin_iso8859_15); |
1483 Vcharset_latin_iso8859_15 = | 1189 Vcharset_latin_iso8859_15 = |
1484 make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2, | 1190 make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2, |
1485 CHARSET_TYPE_96, 1, 1, 'b', | 1191 CHARSET_TYPE_96, 1, 1, 'b', |
1486 CHARSET_LEFT_TO_RIGHT, | 1192 CHARSET_LEFT_TO_RIGHT, |
1487 build_string ("Latin-9"), | 1193 build_string ("Latin-9"), |
1488 build_string ("ISO8859-15 (Latin-9)"), | 1194 build_msg_string ("ISO8859-15 (Latin-9)"), |
1489 build_string ("ISO8859-15 (Latin-9)"), | 1195 build_msg_string ("ISO8859-15 (Latin-9)"), |
1490 build_string ("iso8859-15")); | 1196 build_string ("iso8859-15"), 0); |
1491 staticpro (&Vcharset_japanese_jisx0208_1978); | 1197 staticpro (&Vcharset_japanese_jisx0208_1978); |
1492 Vcharset_japanese_jisx0208_1978 = | 1198 Vcharset_japanese_jisx0208_1978 = |
1493 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3, | 1199 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3, |
1494 CHARSET_TYPE_94X94, 2, 0, '@', | 1200 CHARSET_TYPE_94X94, 2, 0, '@', |
1495 CHARSET_LEFT_TO_RIGHT, | 1201 CHARSET_LEFT_TO_RIGHT, |
1496 build_string ("JISX0208.1978"), | 1202 build_string ("JISX0208.1978"), |
1497 build_string ("JISX0208.1978 (Japanese)"), | 1203 build_msg_string ("JISX0208.1978 (Japanese)"), |
1498 build_string | 1204 build_msg_string |
1499 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"), | 1205 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"), |
1500 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978")); | 1206 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), 0); |
1501 staticpro (&Vcharset_chinese_gb2312); | 1207 staticpro (&Vcharset_chinese_gb2312); |
1502 Vcharset_chinese_gb2312 = | 1208 Vcharset_chinese_gb2312 = |
1503 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3, | 1209 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3, |
1504 CHARSET_TYPE_94X94, 2, 0, 'A', | 1210 CHARSET_TYPE_94X94, 2, 0, 'A', |
1505 CHARSET_LEFT_TO_RIGHT, | 1211 CHARSET_LEFT_TO_RIGHT, |
1506 build_string ("GB2312"), | 1212 build_string ("GB2312"), |
1507 build_string ("GB2312)"), | 1213 build_msg_string ("GB2312)"), |
1508 build_string ("GB2312 Chinese simplified"), | 1214 build_msg_string ("GB2312 Chinese simplified"), |
1509 build_string ("gb2312")); | 1215 build_string ("gb2312"), 0); |
1510 staticpro (&Vcharset_japanese_jisx0208); | 1216 staticpro (&Vcharset_japanese_jisx0208); |
1511 Vcharset_japanese_jisx0208 = | 1217 Vcharset_japanese_jisx0208 = |
1512 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3, | 1218 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3, |
1513 CHARSET_TYPE_94X94, 2, 0, 'B', | 1219 CHARSET_TYPE_94X94, 2, 0, 'B', |
1514 CHARSET_LEFT_TO_RIGHT, | 1220 CHARSET_LEFT_TO_RIGHT, |
1515 build_string ("JISX0208"), | 1221 build_string ("JISX0208"), |
1516 build_string ("JISX0208.1983/1990 (Japanese)"), | 1222 build_msg_string ("JISX0208.1983/1990 (Japanese)"), |
1517 build_string ("JISX0208.1983/1990 Japanese Kanji"), | 1223 build_msg_string ("JISX0208.1983/1990 Japanese Kanji"), |
1518 build_string ("jisx0208.19\\(83\\|90\\)")); | 1224 build_string ("jisx0208.19\\(83\\|90\\)"), 0); |
1519 staticpro (&Vcharset_korean_ksc5601); | 1225 staticpro (&Vcharset_korean_ksc5601); |
1520 Vcharset_korean_ksc5601 = | 1226 Vcharset_korean_ksc5601 = |
1521 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3, | 1227 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3, |
1522 CHARSET_TYPE_94X94, 2, 0, 'C', | 1228 CHARSET_TYPE_94X94, 2, 0, 'C', |
1523 CHARSET_LEFT_TO_RIGHT, | 1229 CHARSET_LEFT_TO_RIGHT, |
1524 build_string ("KSC5601"), | 1230 build_string ("KSC5601"), |
1525 build_string ("KSC5601 (Korean"), | 1231 build_msg_string ("KSC5601 (Korean"), |
1526 build_string ("KSC5601 Korean Hangul and Hanja"), | 1232 build_msg_string ("KSC5601 Korean Hangul and Hanja"), |
1527 build_string ("ksc5601")); | 1233 build_string ("ksc5601"), 0); |
1528 staticpro (&Vcharset_japanese_jisx0212); | 1234 staticpro (&Vcharset_japanese_jisx0212); |
1529 Vcharset_japanese_jisx0212 = | 1235 Vcharset_japanese_jisx0212 = |
1530 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3, | 1236 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3, |
1531 CHARSET_TYPE_94X94, 2, 0, 'D', | 1237 CHARSET_TYPE_94X94, 2, 0, 'D', |
1532 CHARSET_LEFT_TO_RIGHT, | 1238 CHARSET_LEFT_TO_RIGHT, |
1533 build_string ("JISX0212"), | 1239 build_string ("JISX0212"), |
1534 build_string ("JISX0212 (Japanese)"), | 1240 build_msg_string ("JISX0212 (Japanese)"), |
1535 build_string ("JISX0212 Japanese Supplement"), | 1241 build_msg_string ("JISX0212 Japanese Supplement"), |
1536 build_string ("jisx0212")); | 1242 build_string ("jisx0212"), 0); |
1537 | 1243 |
1538 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$" | 1244 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$" |
1539 staticpro (&Vcharset_chinese_cns11643_1); | 1245 staticpro (&Vcharset_chinese_cns11643_1); |
1540 Vcharset_chinese_cns11643_1 = | 1246 Vcharset_chinese_cns11643_1 = |
1541 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3, | 1247 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3, |
1542 CHARSET_TYPE_94X94, 2, 0, 'G', | 1248 CHARSET_TYPE_94X94, 2, 0, 'G', |
1543 CHARSET_LEFT_TO_RIGHT, | 1249 CHARSET_LEFT_TO_RIGHT, |
1544 build_string ("CNS11643-1"), | 1250 build_string ("CNS11643-1"), |
1545 build_string ("CNS11643-1 (Chinese traditional)"), | 1251 build_msg_string ("CNS11643-1 (Chinese traditional)"), |
1546 build_string | 1252 build_msg_string |
1547 ("CNS 11643 Plane 1 Chinese traditional"), | 1253 ("CNS 11643 Plane 1 Chinese traditional"), |
1548 build_string (CHINESE_CNS_PLANE_RE("1"))); | 1254 build_string (CHINESE_CNS_PLANE_RE("1")), 0); |
1549 staticpro (&Vcharset_chinese_cns11643_2); | 1255 staticpro (&Vcharset_chinese_cns11643_2); |
1550 Vcharset_chinese_cns11643_2 = | 1256 Vcharset_chinese_cns11643_2 = |
1551 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3, | 1257 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3, |
1552 CHARSET_TYPE_94X94, 2, 0, 'H', | 1258 CHARSET_TYPE_94X94, 2, 0, 'H', |
1553 CHARSET_LEFT_TO_RIGHT, | 1259 CHARSET_LEFT_TO_RIGHT, |
1554 build_string ("CNS11643-2"), | 1260 build_string ("CNS11643-2"), |
1555 build_string ("CNS11643-2 (Chinese traditional)"), | 1261 build_msg_string ("CNS11643-2 (Chinese traditional)"), |
1556 build_string | 1262 build_msg_string |
1557 ("CNS 11643 Plane 2 Chinese traditional"), | 1263 ("CNS 11643 Plane 2 Chinese traditional"), |
1558 build_string (CHINESE_CNS_PLANE_RE("2"))); | 1264 build_string (CHINESE_CNS_PLANE_RE("2")), 0); |
1559 staticpro (&Vcharset_chinese_big5_1); | 1265 staticpro (&Vcharset_chinese_big5_1); |
1560 Vcharset_chinese_big5_1 = | 1266 Vcharset_chinese_big5_1 = |
1561 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3, | 1267 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3, |
1562 CHARSET_TYPE_94X94, 2, 0, '0', | 1268 CHARSET_TYPE_94X94, 2, 0, '0', |
1563 CHARSET_LEFT_TO_RIGHT, | 1269 CHARSET_LEFT_TO_RIGHT, |
1564 build_string ("Big5"), | 1270 build_string ("Big5"), |
1565 build_string ("Big5 (Level-1)"), | 1271 build_msg_string ("Big5 (Level-1)"), |
1566 build_string | 1272 build_msg_string |
1567 ("Big5 Level-1 Chinese traditional"), | 1273 ("Big5 Level-1 Chinese traditional"), |
1568 build_string ("big5")); | 1274 build_string ("big5"), 0); |
1569 staticpro (&Vcharset_chinese_big5_2); | 1275 staticpro (&Vcharset_chinese_big5_2); |
1570 Vcharset_chinese_big5_2 = | 1276 Vcharset_chinese_big5_2 = |
1571 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3, | 1277 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3, |
1572 CHARSET_TYPE_94X94, 2, 0, '1', | 1278 CHARSET_TYPE_94X94, 2, 0, '1', |
1573 CHARSET_LEFT_TO_RIGHT, | 1279 CHARSET_LEFT_TO_RIGHT, |
1574 build_string ("Big5"), | 1280 build_string ("Big5"), |
1575 build_string ("Big5 (Level-2)"), | 1281 build_msg_string ("Big5 (Level-2)"), |
1576 build_string | 1282 build_msg_string |
1577 ("Big5 Level-2 Chinese traditional"), | 1283 ("Big5 Level-2 Chinese traditional"), |
1578 build_string ("big5")); | 1284 build_string ("big5"), 0); |
1579 | 1285 |
1580 | 1286 |
1581 #ifdef ENABLE_COMPOSITE_CHARS | 1287 #ifdef ENABLE_COMPOSITE_CHARS |
1582 /* #### For simplicity, we put composite chars into a 96x96 charset. | 1288 /* #### For simplicity, we put composite chars into a 96x96 charset. |
1583 This is going to lead to problems because you can run out of | 1289 This is going to lead to problems because you can run out of |
1586 Vcharset_composite = | 1292 Vcharset_composite = |
1587 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3, | 1293 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3, |
1588 CHARSET_TYPE_96X96, 2, 0, 0, | 1294 CHARSET_TYPE_96X96, 2, 0, 0, |
1589 CHARSET_LEFT_TO_RIGHT, | 1295 CHARSET_LEFT_TO_RIGHT, |
1590 build_string ("Composite"), | 1296 build_string ("Composite"), |
1591 build_string ("Composite characters"), | 1297 build_msg_string ("Composite characters"), |
1592 build_string ("Composite characters"), | 1298 build_msg_string ("Composite characters"), |
1593 build_string ("")); | 1299 build_string (""), 0); |
1594 | 1300 #else |
1595 /* #### not dumped properly */ | 1301 /* We create a hack so that we have a way of storing ESC 0 and ESC 1 |
1596 composite_char_row_next = 32; | 1302 sequences as "characters", so that they will be output correctly. */ |
1597 composite_char_col_next = 32; | 1303 staticpro (&Vcharset_composite); |
1598 | 1304 Vcharset_composite = |
1599 Vcomposite_char_string2char_hash_table = | 1305 make_charset (LEADING_BYTE_COMPOSITE_REPLACEMENT, Qcomposite, 2, |
1600 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL); | 1306 CHARSET_TYPE_96, 1, 1, '|', |
1601 Vcomposite_char_char2string_hash_table = | 1307 CHARSET_LEFT_TO_RIGHT, |
1602 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); | 1308 build_string ("Composite hack"), |
1603 staticpro (&Vcomposite_char_string2char_hash_table); | 1309 build_msg_string ("Composite characters hack"), |
1604 staticpro (&Vcomposite_char_char2string_hash_table); | 1310 build_msg_string ("Composite characters hack"), |
1311 build_string (""), 0); | |
1605 #endif /* ENABLE_COMPOSITE_CHARS */ | 1312 #endif /* ENABLE_COMPOSITE_CHARS */ |
1606 | 1313 } |
1607 } |