Mercurial > hg > xemacs-beta
annotate src/mule-charset.c @ 5117:3742ea8250b5 ben-lisp-object ben-lisp-object-final-ws-year-2005
Checking in final CVS version of workspace 'ben-lisp-object'
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Sat, 26 Dec 2009 00:20:27 -0600 |
parents | facf3239ba30 |
children | e0db3c197671 |
rev | line source |
---|---|
428 | 1 /* Functions to handle multilingual characters. |
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
3025 | 4 Copyright (C) 2001, 2002, 2004, 2005 Ben Wing. |
428 | 5 |
6 This file is part of XEmacs. | |
7 | |
8 XEmacs is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
10 Free Software Foundation; either version 2, or (at your option) any | |
11 later version. | |
12 | |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with XEmacs; see the file COPYING. If not, write to | |
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 Boston, MA 02111-1307, USA. */ | |
22 | |
23 /* Synched up with: FSF 20.3. Not in FSF. */ | |
24 | |
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */ | |
26 | |
27 #include <config.h> | |
28 #include "lisp.h" | |
29 | |
30 #include "buffer.h" | |
31 #include "chartab.h" | |
32 #include "elhash.h" | |
33 #include "device.h" | |
34 #include "faces.h" | |
771 | 35 #include "lstream.h" |
428 | 36 #include "mule-ccl.h" |
872 | 37 #include "objects.h" |
428 | 38 |
39 /* The various pre-defined charsets. */ | |
40 | |
41 Lisp_Object Vcharset_ascii; | |
42 Lisp_Object Vcharset_control_1; | |
43 Lisp_Object Vcharset_latin_iso8859_1; | |
44 Lisp_Object Vcharset_latin_iso8859_2; | |
45 Lisp_Object Vcharset_latin_iso8859_3; | |
46 Lisp_Object Vcharset_latin_iso8859_4; | |
47 Lisp_Object Vcharset_thai_tis620; | |
48 Lisp_Object Vcharset_greek_iso8859_7; | |
49 Lisp_Object Vcharset_arabic_iso8859_6; | |
50 Lisp_Object Vcharset_hebrew_iso8859_8; | |
51 Lisp_Object Vcharset_katakana_jisx0201; | |
52 Lisp_Object Vcharset_latin_jisx0201; | |
53 Lisp_Object Vcharset_cyrillic_iso8859_5; | |
54 Lisp_Object Vcharset_latin_iso8859_9; | |
728 | 55 Lisp_Object Vcharset_latin_iso8859_15; |
428 | 56 Lisp_Object Vcharset_japanese_jisx0208_1978; |
57 Lisp_Object Vcharset_chinese_gb2312; | |
58 Lisp_Object Vcharset_japanese_jisx0208; | |
59 Lisp_Object Vcharset_korean_ksc5601; | |
60 Lisp_Object Vcharset_japanese_jisx0212; | |
61 Lisp_Object Vcharset_chinese_cns11643_1; | |
62 Lisp_Object Vcharset_chinese_cns11643_2; | |
63 Lisp_Object Vcharset_chinese_big5_1; | |
64 Lisp_Object Vcharset_chinese_big5_2; | |
65 Lisp_Object Vcharset_composite; | |
66 | |
67 struct charset_lookup *chlook; | |
68 | |
1204 | 69 static const struct memory_description charset_lookup_description_1[] = { |
771 | 70 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), NUM_LEADING_BYTES+4*128*2 }, |
428 | 71 { XD_END } |
72 }; | |
73 | |
1204 | 74 static const struct sized_memory_description charset_lookup_description = { |
440 | 75 sizeof (struct charset_lookup), |
428 | 76 charset_lookup_description_1 |
77 }; | |
78 | |
79 Lisp_Object Qcharsetp; | |
80 | |
81 /* Qdoc_string, Qdimension, Qchars defined in general.c */ | |
82 Lisp_Object Qregistry, Qfinal, Qgraphic; | |
83 Lisp_Object Qdirection; | |
84 Lisp_Object Qreverse_direction_charset; | |
85 Lisp_Object Qshort_name, Qlong_name; | |
86 | |
771 | 87 Lisp_Object Qfrom_unicode, Qto_unicode; |
88 | |
89 Lisp_Object | |
428 | 90 Qlatin_iso8859_1, |
91 Qlatin_iso8859_2, | |
92 Qlatin_iso8859_3, | |
93 Qlatin_iso8859_4, | |
94 Qthai_tis620, | |
95 Qgreek_iso8859_7, | |
96 Qarabic_iso8859_6, | |
97 Qhebrew_iso8859_8, | |
98 Qkatakana_jisx0201, | |
99 Qlatin_jisx0201, | |
100 Qcyrillic_iso8859_5, | |
101 Qlatin_iso8859_9, | |
728 | 102 Qlatin_iso8859_15, |
428 | 103 Qjapanese_jisx0208_1978, |
104 Qchinese_gb2312, | |
105 Qjapanese_jisx0208, | |
106 Qkorean_ksc5601, | |
107 Qjapanese_jisx0212, | |
108 Qchinese_cns11643_1, | |
109 Qchinese_cns11643_2, | |
110 Qchinese_big5_1, | |
111 Qchinese_big5_2, | |
112 Qcomposite; | |
113 | |
114 Lisp_Object Ql2r, Qr2l; | |
115 | |
116 Lisp_Object Vcharset_hash_table; | |
117 | |
118 | |
119 /************************************************************************/ | |
120 /* charset object */ | |
121 /************************************************************************/ | |
122 | |
123 static Lisp_Object | |
124 mark_charset (Lisp_Object obj) | |
125 { | |
440 | 126 Lisp_Charset *cs = XCHARSET (obj); |
428 | 127 |
128 mark_object (cs->short_name); | |
129 mark_object (cs->long_name); | |
130 mark_object (cs->doc_string); | |
131 mark_object (cs->registry); | |
132 mark_object (cs->ccl_program); | |
133 return cs->name; | |
134 } | |
135 | |
136 static void | |
2286 | 137 print_charset (Lisp_Object obj, Lisp_Object printcharfun, |
138 int UNUSED (escapeflag)) | |
428 | 139 { |
440 | 140 Lisp_Charset *cs = XCHARSET (obj); |
428 | 141 |
142 if (print_readably) | |
563 | 143 printing_unreadable_object ("#<charset %s 0x%x>", |
793 | 144 XSTRING_DATA (XSYMBOL (CHARSET_NAME (cs))-> |
563 | 145 name), |
146 cs->header.uid); | |
428 | 147 |
771 | 148 write_fmt_string_lisp (printcharfun, "#<charset %s %S %S %S", 4, |
149 CHARSET_NAME (cs), CHARSET_SHORT_NAME (cs), | |
150 CHARSET_LONG_NAME (cs), CHARSET_DOC_STRING (cs)); | |
151 write_fmt_string (printcharfun, " %s %s cols=%d g%d final='%c' reg=", | |
152 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" : | |
153 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" : | |
154 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" : | |
155 "96x96", | |
156 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : | |
157 "r2l", | |
158 CHARSET_COLUMNS (cs), | |
159 CHARSET_GRAPHIC (cs), | |
160 CHARSET_FINAL (cs)); | |
428 | 161 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0); |
771 | 162 write_fmt_string (printcharfun, " 0x%x>", cs->header.uid); |
163 } | |
164 | |
1204 | 165 static const struct memory_description charset_description[] = { |
166 { XD_INT, offsetof (Lisp_Charset, dimension) }, | |
167 { XD_INT, offsetof (Lisp_Charset, from_unicode_levels) }, | |
440 | 168 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) }, |
169 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) }, | |
170 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) }, | |
171 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) }, | |
172 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) }, | |
173 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) }, | |
174 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) }, | |
771 | 175 { XD_UNION, offsetof (Lisp_Charset, to_unicode_table), |
2775 | 176 XD_INDIRECT (0, 0), { &to_unicode_description }, XD_FLAG_NO_KKCC }, |
771 | 177 { XD_UNION, offsetof (Lisp_Charset, from_unicode_table), |
2775 | 178 XD_INDIRECT (1, 0), { &from_unicode_description }, XD_FLAG_NO_KKCC }, |
428 | 179 { XD_END } |
180 }; | |
181 | |
5117
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
182 DEFINE_LISP_OBJECT ("charset", charset, |
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
183 mark_charset, print_charset, 0, |
934 | 184 0, 0, charset_description, Lisp_Charset); |
428 | 185 /* Make a new charset. */ |
446 | 186 /* #### SJT Should generic properties be allowed? */ |
428 | 187 static Lisp_Object |
771 | 188 make_charset (int id, Lisp_Object name, int rep_bytes, |
189 int type, int columns, int graphic, | |
867 | 190 Ibyte final, int direction, Lisp_Object short_name, |
428 | 191 Lisp_Object long_name, Lisp_Object doc, |
771 | 192 Lisp_Object reg, int overwrite) |
428 | 193 { |
194 Lisp_Object obj; | |
771 | 195 Lisp_Charset *cs; |
196 | |
197 if (!overwrite) | |
198 { | |
5117
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
199 obj = ALLOC_LISP_OBJECT (charset); |
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
200 cs = XCHARSET (obj); |
771 | 201 |
202 if (final) | |
203 { | |
204 /* some charsets do not have final characters. This includes | |
205 ASCII, Control-1, Composite, and the two faux private | |
206 charsets. */ | |
207 assert (NILP (chlook-> | |
208 charset_by_attributes[type][final][direction])); | |
209 chlook->charset_by_attributes[type][final][direction] = obj; | |
210 } | |
440 | 211 |
771 | 212 assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE])); |
213 chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj; | |
214 } | |
215 else | |
216 { | |
217 Lisp_Object ret; | |
218 /* Actually overwrite the properties of the existing charset. | |
219 We do this because until now charsets could never be "deleted", | |
220 so parts of the code don't bother to GC charsets. */ | |
221 obj = chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]; | |
222 cs = XCHARSET (obj); | |
223 assert (EQ (chlook->charset_by_attributes[type][final][direction], | |
224 obj)); | |
225 | |
226 ret = Fremhash (XCHARSET_NAME (obj), Vcharset_hash_table); | |
227 assert (!NILP (ret)); | |
228 } | |
428 | 229 |
230 CHARSET_ID (cs) = id; | |
231 CHARSET_NAME (cs) = name; | |
232 CHARSET_SHORT_NAME (cs) = short_name; | |
233 CHARSET_LONG_NAME (cs) = long_name; | |
234 CHARSET_REP_BYTES (cs) = rep_bytes; | |
235 CHARSET_DIRECTION (cs) = direction; | |
236 CHARSET_TYPE (cs) = type; | |
237 CHARSET_COLUMNS (cs) = columns; | |
238 CHARSET_GRAPHIC (cs) = graphic; | |
239 CHARSET_FINAL (cs) = final; | |
240 CHARSET_DOC_STRING (cs) = doc; | |
241 CHARSET_REGISTRY (cs) = reg; | |
242 CHARSET_CCL_PROGRAM (cs) = Qnil; | |
243 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; | |
244 | |
771 | 245 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || |
246 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2; | |
247 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || | |
248 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96; | |
428 | 249 |
771 | 250 if (id == LEADING_BYTE_ASCII || id == LEADING_BYTE_CONTROL_1 |
251 #ifdef ENABLE_COMPOSITE_CHARS | |
252 || id == LEADING_BYTE_COMPOSITE | |
253 #endif | |
254 ) | |
255 assert (!overwrite); | |
256 else | |
428 | 257 { |
771 | 258 if (overwrite) |
259 free_charset_unicode_tables (obj); | |
260 init_charset_unicode_tables (obj); | |
428 | 261 } |
262 | |
263 /* Some charsets are "faux" and don't have names or really exist at | |
264 all except in the leading-byte table. */ | |
265 if (!NILP (name)) | |
771 | 266 { |
267 assert (NILP (Fgethash (name, Vcharset_hash_table, Qnil))); | |
268 Fputhash (name, obj, Vcharset_hash_table); | |
269 } | |
270 | |
271 recalculate_unicode_precedence (); | |
428 | 272 return obj; |
273 } | |
274 | |
275 static int | |
276 get_unallocated_leading_byte (int dimension) | |
277 { | |
278 int lb; | |
279 | |
280 if (dimension == 1) | |
281 { | |
771 | 282 if (chlook->next_allocated_1_byte_leading_byte > |
283 MAX_LEADING_BYTE_PRIVATE_1) | |
428 | 284 lb = 0; |
285 else | |
442 | 286 lb = chlook->next_allocated_1_byte_leading_byte++; |
428 | 287 } |
288 else | |
289 { | |
1747 | 290 /* awfully fragile, but correct */ |
291 #if MAX_LEADING_BYTE_PRIVATE_2 == 255 | |
292 if (chlook->next_allocated_2_byte_leading_byte == 0) | |
1749 | 293 #else |
771 | 294 if (chlook->next_allocated_2_byte_leading_byte > |
295 MAX_LEADING_BYTE_PRIVATE_2) | |
1747 | 296 #endif |
428 | 297 lb = 0; |
298 else | |
442 | 299 lb = chlook->next_allocated_2_byte_leading_byte++; |
428 | 300 } |
301 | |
302 if (!lb) | |
563 | 303 invalid_operation |
771 | 304 ("No more character sets free for this dimension", make_int (dimension)); |
428 | 305 |
306 return lb; | |
307 } | |
308 | |
309 | |
310 /************************************************************************/ | |
311 /* Basic charset Lisp functions */ | |
312 /************************************************************************/ | |
313 | |
788 | 314 void |
315 get_charset_limits (Lisp_Object charset, int *low, int *high) | |
316 { | |
317 Lisp_Charset *cs = XCHARSET (charset); | |
318 | |
319 if (EQ (charset, Vcharset_ascii)) *low = 0, *high = 127; | |
320 else if (EQ (charset, Vcharset_control_1)) *low = 0, *high = 31; | |
321 else if (CHARSET_CHARS (cs) == 94) *low = 33, *high = 126; | |
322 else /* CHARSET_CHARS (cs) == 96) */ *low = 32, *high = 127; | |
323 } | |
324 | |
428 | 325 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /* |
326 Return non-nil if OBJECT is a charset. | |
327 */ | |
328 (object)) | |
329 { | |
330 return CHARSETP (object) ? Qt : Qnil; | |
331 } | |
332 | |
333 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /* | |
334 Retrieve the charset of the given name. | |
335 If CHARSET-OR-NAME is a charset object, it is simply returned. | |
336 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset, | |
337 nil is returned. Otherwise the associated charset object is returned. | |
338 */ | |
339 (charset_or_name)) | |
340 { | |
341 if (CHARSETP (charset_or_name)) | |
342 return charset_or_name; | |
343 | |
344 CHECK_SYMBOL (charset_or_name); | |
345 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil); | |
346 } | |
347 | |
348 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /* | |
349 Retrieve the charset of the given name. | |
350 Same as `find-charset' except an error is signalled if there is no such | |
351 charset instead of returning nil. | |
352 */ | |
353 (name)) | |
354 { | |
355 Lisp_Object charset = Ffind_charset (name); | |
356 | |
357 if (NILP (charset)) | |
563 | 358 invalid_argument ("No such charset", name); |
428 | 359 return charset; |
360 } | |
361 | |
362 /* We store the charsets in hash tables with the names as the key and the | |
363 actual charset object as the value. Occasionally we need to use them | |
364 in a list format. These routines provide us with that. */ | |
365 struct charset_list_closure | |
366 { | |
367 Lisp_Object *charset_list; | |
368 }; | |
369 | |
370 static int | |
2286 | 371 add_charset_to_list_mapper (Lisp_Object UNUSED (key), Lisp_Object value, |
428 | 372 void *charset_list_closure) |
373 { | |
374 /* This function can GC */ | |
375 struct charset_list_closure *chcl = | |
376 (struct charset_list_closure*) charset_list_closure; | |
377 Lisp_Object *charset_list = chcl->charset_list; | |
378 | |
379 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list); | |
380 return 0; | |
381 } | |
382 | |
383 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /* | |
384 Return a list of the names of all defined charsets. | |
385 */ | |
386 ()) | |
387 { | |
388 Lisp_Object charset_list = Qnil; | |
389 struct gcpro gcpro1; | |
390 struct charset_list_closure charset_list_closure; | |
391 | |
392 GCPRO1 (charset_list); | |
393 charset_list_closure.charset_list = &charset_list; | |
394 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table, | |
395 &charset_list_closure); | |
396 UNGCPRO; | |
397 | |
398 return charset_list; | |
399 } | |
400 | |
401 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /* | |
444 | 402 Return the name of charset CHARSET. |
428 | 403 */ |
404 (charset)) | |
405 { | |
406 return XCHARSET_NAME (Fget_charset (charset)); | |
407 } | |
408 | |
446 | 409 /* #### SJT Should generic properties be allowed? */ |
428 | 410 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /* |
411 Define a new character set. | |
412 This function is for use with Mule support. | |
413 NAME is a symbol, the name by which the character set is normally referred. | |
414 DOC-STRING is a string describing the character set. | |
415 PROPS is a property list, describing the specific nature of the | |
416 character set. Recognized properties are: | |
417 | |
3025 | 418 `short-name' Short version of the charset name (ex: Latin-1) |
419 `long-name' Long version of the charset name (ex: ISO8859-1 (Latin-1)) | |
420 `registry' A regular expression matching the font registry field for | |
428 | 421 this character set. |
3025 | 422 `dimension' Number of octets used to index a character in this charset. |
428 | 423 Either 1 or 2. Defaults to 1. |
3025 | 424 `columns' Number of columns used to display a character in this charset. |
428 | 425 Only used in TTY mode. (Under X, the actual width of a |
426 character can be derived from the font used to display the | |
427 characters.) If unspecified, defaults to the dimension | |
428 (this is almost always the correct value). | |
3025 | 429 `chars' Number of characters in each dimension (94 or 96). |
428 | 430 Defaults to 94. Note that if the dimension is 2, the |
431 character set thus described is 94x94 or 96x96. | |
3025 | 432 `final' Final byte of ISO 2022 escape sequence. Must be |
428 | 433 supplied. Each combination of (DIMENSION, CHARS) defines a |
434 separate namespace for final bytes. Note that ISO | |
435 2022 restricts the final byte to the range | |
436 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if | |
437 dimension == 2. Note also that final bytes in the range | |
438 0x30 - 0x3F are reserved for user-defined (not official) | |
439 character sets. | |
3025 | 440 `graphic' 0 (use left half of font on output) or 1 (use right half |
428 | 441 of font on output). Defaults to 0. For example, for |
442 a font whose registry is ISO8859-1, the left half | |
443 (octets 0x20 - 0x7F) is the `ascii' character set, while | |
444 the right half (octets 0xA0 - 0xFF) is the `latin-1' | |
3025 | 445 character set. With `graphic' set to 0, the octets |
428 | 446 will have their high bit cleared; with it set to 1, |
447 the octets will have their high bit set. | |
3025 | 448 `direction' `l2r' (left-to-right) or `r2l' (right-to-left). |
449 Defaults to `l2r'. | |
450 `ccl-program' A compiled CCL program used to convert a character in | |
428 | 451 this charset into an index into the font. This is in |
3025 | 452 addition to the `graphic' property. The CCL program |
428 | 453 is passed the octets of the character, with the high |
454 bit cleared and set depending upon whether the value | |
3025 | 455 of the `graphic' property is 0 or 1. |
428 | 456 */ |
457 (name, doc_string, props)) | |
458 { | |
771 | 459 int id, dimension = 1, chars = 94, graphic = 0, columns = -1; |
867 | 460 Ibyte final = 0; |
428 | 461 int direction = CHARSET_LEFT_TO_RIGHT; |
462 int type; | |
463 Lisp_Object registry = Qnil; | |
771 | 464 Lisp_Object charset = Qnil; |
428 | 465 Lisp_Object ccl_program = Qnil; |
466 Lisp_Object short_name = Qnil, long_name = Qnil; | |
771 | 467 Lisp_Object existing_charset; |
468 int temporary = UNBOUNDP (name); | |
428 | 469 |
771 | 470 /* NOTE: name == Qunbound is a directive from the iso2022 code to |
471 create a temporary charset for an unknown final. We allow the final | |
472 to be overwritten with a real charset later on. */ | |
473 | |
428 | 474 if (!NILP (doc_string)) |
475 CHECK_STRING (doc_string); | |
771 | 476 if (!UNBOUNDP (name)) |
477 { | |
478 CHECK_SYMBOL (name); | |
428 | 479 |
771 | 480 charset = Ffind_charset (name); |
481 if (!NILP (charset)) | |
482 invalid_operation ("Cannot redefine existing charset", name); | |
483 } | |
428 | 484 |
442 | 485 { |
486 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props) | |
487 { | |
488 if (EQ (keyword, Qshort_name)) | |
489 { | |
490 CHECK_STRING (value); | |
491 short_name = value; | |
492 } | |
428 | 493 |
519 | 494 else if (EQ (keyword, Qlong_name)) |
442 | 495 { |
496 CHECK_STRING (value); | |
497 long_name = value; | |
498 } | |
428 | 499 |
442 | 500 else if (EQ (keyword, Qdimension)) |
501 { | |
502 CHECK_INT (value); | |
503 dimension = XINT (value); | |
504 if (dimension < 1 || dimension > 2) | |
3025 | 505 invalid_constant ("Invalid value for `dimension'", value); |
442 | 506 } |
428 | 507 |
442 | 508 else if (EQ (keyword, Qchars)) |
509 { | |
510 CHECK_INT (value); | |
511 chars = XINT (value); | |
512 if (chars != 94 && chars != 96) | |
3025 | 513 invalid_constant ("Invalid value for `chars'", value); |
442 | 514 } |
428 | 515 |
442 | 516 else if (EQ (keyword, Qcolumns)) |
517 { | |
518 CHECK_INT (value); | |
519 columns = XINT (value); | |
520 if (columns != 1 && columns != 2) | |
3025 | 521 invalid_constant ("Invalid value for `columns'", value); |
442 | 522 } |
428 | 523 |
442 | 524 else if (EQ (keyword, Qgraphic)) |
525 { | |
526 CHECK_INT (value); | |
527 graphic = XINT (value); | |
528 if (graphic < 0 || graphic > 1) | |
3025 | 529 invalid_constant ("Invalid value for `graphic'", value); |
442 | 530 } |
428 | 531 |
442 | 532 else if (EQ (keyword, Qregistry)) |
533 { | |
534 CHECK_STRING (value); | |
535 registry = value; | |
536 } | |
428 | 537 |
442 | 538 else if (EQ (keyword, Qdirection)) |
539 { | |
540 if (EQ (value, Ql2r)) | |
541 direction = CHARSET_LEFT_TO_RIGHT; | |
542 else if (EQ (value, Qr2l)) | |
543 direction = CHARSET_RIGHT_TO_LEFT; | |
544 else | |
3025 | 545 invalid_constant ("Invalid value for `direction'", value); |
442 | 546 } |
428 | 547 |
442 | 548 else if (EQ (keyword, Qfinal)) |
549 { | |
550 CHECK_CHAR_COERCE_INT (value); | |
551 final = XCHAR (value); | |
552 if (final < '0' || final > '~') | |
3025 | 553 invalid_constant ("Invalid value for `final'", value); |
442 | 554 } |
428 | 555 |
442 | 556 else if (EQ (keyword, Qccl_program)) |
557 { | |
444 | 558 struct ccl_program test_ccl; |
559 | |
560 if (setup_ccl_program (&test_ccl, value) < 0) | |
3025 | 561 invalid_argument ("Invalid value for `ccl-program'", value); |
442 | 562 ccl_program = value; |
563 } | |
564 else | |
563 | 565 invalid_constant ("Unrecognized property", keyword); |
442 | 566 } |
567 } | |
428 | 568 |
569 if (!final) | |
3025 | 570 invalid_argument ("`final' must be specified", Qunbound); |
428 | 571 if (dimension == 2 && final > 0x5F) |
563 | 572 invalid_constant |
428 | 573 ("Final must be in the range 0x30 - 0x5F for dimension == 2", |
574 make_char (final)); | |
575 | |
576 if (dimension == 1) | |
577 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96; | |
578 else | |
579 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96; | |
580 | |
826 | 581 existing_charset = charset_by_attributes (type, final, direction); |
771 | 582 |
583 if (!NILP (existing_charset) && !XCHARSET (existing_charset)->temporary) | |
578 | 584 invalid_argument |
793 | 585 ("Character set already defined for this DIMENSION/CHARS/FINAL/DIRECTION combo", |
771 | 586 existing_charset); |
587 | |
588 if (!NILP (existing_charset)) | |
589 /* Reuse same leading byte */ | |
590 id = XCHARSET_ID (existing_charset); | |
591 else | |
592 id = get_unallocated_leading_byte (dimension); | |
428 | 593 |
771 | 594 if (temporary) |
595 { | |
867 | 596 Ibyte tempname[80]; |
428 | 597 |
771 | 598 qxesprintf (tempname, "___temporary___%d__", id); |
599 name = intern_int (tempname); | |
600 } | |
428 | 601 if (NILP (doc_string)) |
602 doc_string = build_string (""); | |
603 if (NILP (registry)) | |
604 registry = build_string (""); | |
605 if (NILP (short_name)) | |
793 | 606 short_name = XSYMBOL (name)->name; |
428 | 607 if (NILP (long_name)) |
608 long_name = doc_string; | |
609 if (columns == -1) | |
610 columns = dimension; | |
771 | 611 |
428 | 612 charset = make_charset (id, name, dimension + 2, type, columns, graphic, |
771 | 613 final, direction, short_name, long_name, |
614 doc_string, registry, !NILP (existing_charset)); | |
615 | |
616 XCHARSET (charset)->temporary = temporary; | |
428 | 617 if (!NILP (ccl_program)) |
618 XCHARSET_CCL_PROGRAM (charset) = ccl_program; | |
771 | 619 |
793 | 620 { |
621 Lisp_Object revdircs = | |
826 | 622 charset_by_attributes (type, final, |
793 | 623 direction == CHARSET_LEFT_TO_RIGHT ? |
624 CHARSET_RIGHT_TO_LEFT : CHARSET_LEFT_TO_RIGHT); | |
625 if (!NILP (revdircs)) | |
626 { | |
627 XCHARSET_REVERSE_DIRECTION_CHARSET (revdircs) = charset; | |
628 XCHARSET_REVERSE_DIRECTION_CHARSET (charset) = revdircs; | |
629 } | |
630 } | |
631 | |
428 | 632 return charset; |
633 } | |
634 | |
635 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset, | |
636 2, 2, 0, /* | |
637 Make a charset equivalent to CHARSET but which goes in the opposite direction. | |
638 NEW-NAME is the name of the new charset. Return the new charset. | |
639 */ | |
640 (charset, new_name)) | |
641 { | |
642 Lisp_Object new_charset = Qnil; | |
771 | 643 int id, dimension, columns, graphic; |
867 | 644 Ibyte final; |
428 | 645 int direction, type; |
646 Lisp_Object registry, doc_string, short_name, long_name; | |
440 | 647 Lisp_Charset *cs; |
428 | 648 |
649 charset = Fget_charset (charset); | |
650 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset))) | |
563 | 651 invalid_operation ("Charset already has reverse-direction charset", |
793 | 652 charset); |
428 | 653 |
654 CHECK_SYMBOL (new_name); | |
655 if (!NILP (Ffind_charset (new_name))) | |
563 | 656 invalid_operation ("Cannot redefine existing charset", new_name); |
428 | 657 |
658 cs = XCHARSET (charset); | |
659 | |
660 type = CHARSET_TYPE (cs); | |
661 columns = CHARSET_COLUMNS (cs); | |
662 dimension = CHARSET_DIMENSION (cs); | |
663 id = get_unallocated_leading_byte (dimension); | |
664 | |
665 graphic = CHARSET_GRAPHIC (cs); | |
666 final = CHARSET_FINAL (cs); | |
667 direction = CHARSET_RIGHT_TO_LEFT; | |
668 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT) | |
669 direction = CHARSET_LEFT_TO_RIGHT; | |
670 doc_string = CHARSET_DOC_STRING (cs); | |
671 short_name = CHARSET_SHORT_NAME (cs); | |
672 long_name = CHARSET_LONG_NAME (cs); | |
673 registry = CHARSET_REGISTRY (cs); | |
674 | |
675 new_charset = make_charset (id, new_name, dimension + 2, type, columns, | |
676 graphic, final, direction, short_name, long_name, | |
771 | 677 doc_string, registry, 0); |
428 | 678 |
679 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset; | |
680 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset; | |
681 | |
682 return new_charset; | |
683 } | |
684 | |
685 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset, | |
686 1, 1, 0, /* | |
687 Return the reverse-direction charset parallel to CHARSET, if any. | |
688 This is the charset with the same properties (in particular, the same | |
689 dimension, number of characters per dimension, and final byte) as | |
690 CHARSET but whose characters are displayed in the opposite direction. | |
691 */ | |
692 (charset)) | |
693 { | |
694 charset = Fget_charset (charset); | |
695 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset); | |
696 } | |
697 | |
698 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /* | |
699 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION. | |
700 If DIRECTION is omitted, both directions will be checked (left-to-right | |
701 will be returned if character sets exist for both directions). | |
702 */ | |
703 (dimension, chars, final, direction)) | |
704 { | |
705 int dm, ch, fi, di = -1; | |
706 int type; | |
707 Lisp_Object obj = Qnil; | |
708 | |
709 CHECK_INT (dimension); | |
710 dm = XINT (dimension); | |
711 if (dm < 1 || dm > 2) | |
563 | 712 invalid_constant ("Invalid value for DIMENSION", dimension); |
428 | 713 |
714 CHECK_INT (chars); | |
715 ch = XINT (chars); | |
716 if (ch != 94 && ch != 96) | |
563 | 717 invalid_constant ("Invalid value for CHARS", chars); |
428 | 718 |
719 CHECK_CHAR_COERCE_INT (final); | |
720 fi = XCHAR (final); | |
721 if (fi < '0' || fi > '~') | |
563 | 722 invalid_constant ("Invalid value for FINAL", final); |
428 | 723 |
724 if (EQ (direction, Ql2r)) | |
725 di = CHARSET_LEFT_TO_RIGHT; | |
726 else if (EQ (direction, Qr2l)) | |
727 di = CHARSET_RIGHT_TO_LEFT; | |
728 else if (!NILP (direction)) | |
563 | 729 invalid_constant ("Invalid value for DIRECTION", direction); |
428 | 730 |
731 if (dm == 2 && fi > 0x5F) | |
563 | 732 invalid_constant |
428 | 733 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final); |
734 | |
735 if (dm == 1) | |
736 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96; | |
737 else | |
738 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96; | |
739 | |
740 if (di == -1) | |
741 { | |
826 | 742 obj = charset_by_attributes (type, fi, CHARSET_LEFT_TO_RIGHT); |
428 | 743 if (NILP (obj)) |
826 | 744 obj = charset_by_attributes (type, fi, CHARSET_RIGHT_TO_LEFT); |
428 | 745 } |
746 else | |
826 | 747 obj = charset_by_attributes (type, fi, di); |
428 | 748 |
749 if (CHARSETP (obj)) | |
750 return XCHARSET_NAME (obj); | |
751 return obj; | |
752 } | |
753 | |
754 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /* | |
755 Return short name of CHARSET. | |
756 */ | |
757 (charset)) | |
758 { | |
759 return XCHARSET_SHORT_NAME (Fget_charset (charset)); | |
760 } | |
761 | |
762 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /* | |
763 Return long name of CHARSET. | |
764 */ | |
765 (charset)) | |
766 { | |
767 return XCHARSET_LONG_NAME (Fget_charset (charset)); | |
768 } | |
769 | |
770 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /* | |
771 Return description of CHARSET. | |
772 */ | |
773 (charset)) | |
774 { | |
775 return XCHARSET_DOC_STRING (Fget_charset (charset)); | |
776 } | |
777 | |
778 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /* | |
779 Return dimension of CHARSET. | |
780 */ | |
781 (charset)) | |
782 { | |
783 return make_int (XCHARSET_DIMENSION (Fget_charset (charset))); | |
784 } | |
785 | |
786 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /* | |
446 | 787 Return property PROP of CHARSET, a charset object or symbol naming a charset. |
428 | 788 Recognized properties are those listed in `make-charset', as well as |
3025 | 789 `name' and `doc-string'. |
428 | 790 */ |
791 (charset, prop)) | |
792 { | |
440 | 793 Lisp_Charset *cs; |
428 | 794 |
795 charset = Fget_charset (charset); | |
796 cs = XCHARSET (charset); | |
797 | |
798 CHECK_SYMBOL (prop); | |
799 if (EQ (prop, Qname)) return CHARSET_NAME (cs); | |
800 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs); | |
801 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs); | |
802 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs); | |
803 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs)); | |
804 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs)); | |
805 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs)); | |
806 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs)); | |
807 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs)); | |
808 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs); | |
809 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs); | |
810 if (EQ (prop, Qdirection)) | |
811 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l; | |
812 if (EQ (prop, Qreverse_direction_charset)) | |
813 { | |
814 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs); | |
446 | 815 /* #### Is this translation OK? If so, error checking sufficient? */ |
816 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj; | |
428 | 817 } |
563 | 818 invalid_constant ("Unrecognized charset property name", prop); |
1204 | 819 RETURN_NOT_REACHED (Qnil); |
428 | 820 } |
821 | |
822 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /* | |
823 Return charset identification number of CHARSET. | |
824 */ | |
825 (charset)) | |
826 { | |
793 | 827 return make_int (XCHARSET_LEADING_BYTE (Fget_charset (charset))); |
428 | 828 } |
829 | |
830 /* #### We need to figure out which properties we really want to | |
831 allow to be set. */ | |
832 | |
833 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /* | |
3025 | 834 Set the `ccl-program' property of CHARSET to CCL-PROGRAM. |
428 | 835 */ |
836 (charset, ccl_program)) | |
837 { | |
444 | 838 struct ccl_program test_ccl; |
839 | |
428 | 840 charset = Fget_charset (charset); |
444 | 841 if (setup_ccl_program (&test_ccl, ccl_program) < 0) |
563 | 842 invalid_argument ("Invalid ccl-program", ccl_program); |
428 | 843 XCHARSET_CCL_PROGRAM (charset) = ccl_program; |
510 | 844 face_property_was_changed (Vdefault_face, Qfont, Qglobal); |
428 | 845 return Qnil; |
846 } | |
847 | |
848 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */ | |
849 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /* | |
3025 | 850 Set the `registry' property of CHARSET to REGISTRY. |
428 | 851 */ |
852 (charset, registry)) | |
853 { | |
854 charset = Fget_charset (charset); | |
855 CHECK_STRING (registry); | |
856 XCHARSET_REGISTRY (charset) = registry; | |
857 invalidate_charset_font_caches (charset); | |
858 face_property_was_changed (Vdefault_face, Qfont, Qglobal); | |
859 return Qnil; | |
860 } | |
861 | |
862 | |
863 /************************************************************************/ | |
771 | 864 /* memory usage */ |
428 | 865 /************************************************************************/ |
866 | |
771 | 867 #ifdef MEMORY_USAGE_STATS |
428 | 868 |
771 | 869 struct charset_stats |
870 { | |
871 int from_unicode; | |
872 int to_unicode; | |
873 int other; | |
874 }; | |
428 | 875 |
771 | 876 static void |
877 compute_charset_usage (Lisp_Object charset, struct charset_stats *stats, | |
878 struct overhead_stats *ovstats) | |
428 | 879 { |
771 | 880 struct Lisp_Charset *c = XCHARSET (charset); |
881 xzero (*stats); | |
3024 | 882 stats->other += LISPOBJ_STORAGE_SIZE (c, sizeof (*c), ovstats); |
771 | 883 stats->from_unicode += compute_from_unicode_table_size (charset, ovstats); |
884 stats->to_unicode += compute_to_unicode_table_size (charset, ovstats); | |
438 | 885 } |
886 | |
771 | 887 DEFUN ("charset-memory-usage", Fcharset_memory_usage, 1, 1, 0, /* |
888 Return stats about the memory usage of charset CHARSET. | |
889 The values returned are in the form of an alist of usage types and | |
890 byte counts. The byte counts attempt to encompass all the memory used | |
891 by the charset (separate from the memory logically associated with a | |
892 charset or frame), including internal structures and any malloc() | |
893 overhead associated with them. In practice, the byte counts are | |
894 underestimated for various reasons, e.g. because certain memory usage | |
895 is very hard to determine \(e.g. the amount of memory used inside the | |
896 Xt library or inside the X server). | |
428 | 897 |
771 | 898 Multiple slices of the total memory usage may be returned, separated |
899 by a nil. Each slice represents a particular view of the memory, a | |
900 particular way of partitioning it into groups. Within a slice, there | |
901 is no overlap between the groups of memory, and each slice collectively | |
902 represents all the memory concerned. | |
903 */ | |
904 (charset)) | |
905 { | |
906 struct charset_stats stats; | |
907 struct overhead_stats ovstats; | |
908 Lisp_Object val = Qnil; | |
428 | 909 |
771 | 910 charset = Fget_charset (charset); |
911 xzero (ovstats); | |
912 compute_charset_usage (charset, &stats, &ovstats); | |
428 | 913 |
771 | 914 val = acons (Qfrom_unicode, make_int (stats.from_unicode), val); |
915 val = acons (Qto_unicode, make_int (stats.to_unicode), val); | |
916 val = Fcons (Qnil, val); | |
917 val = acons (Qactually_requested, make_int (ovstats.was_requested), val); | |
918 val = acons (Qmalloc_overhead, make_int (ovstats.malloc_overhead), val); | |
919 val = acons (Qgap_overhead, make_int (ovstats.gap_overhead), val); | |
920 val = acons (Qdynarr_overhead, make_int (ovstats.dynarr_overhead), val); | |
921 | |
922 return Fnreverse (val); | |
428 | 923 } |
924 | |
771 | 925 #endif /* MEMORY_USAGE_STATS */ |
428 | 926 |
927 | |
928 /************************************************************************/ | |
929 /* initialization */ | |
930 /************************************************************************/ | |
931 | |
932 void | |
933 syms_of_mule_charset (void) | |
934 { | |
5117
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
935 INIT_LISP_OBJECT (charset); |
442 | 936 |
428 | 937 DEFSUBR (Fcharsetp); |
938 DEFSUBR (Ffind_charset); | |
939 DEFSUBR (Fget_charset); | |
940 DEFSUBR (Fcharset_list); | |
941 DEFSUBR (Fcharset_name); | |
942 DEFSUBR (Fmake_charset); | |
943 DEFSUBR (Fmake_reverse_direction_charset); | |
793 | 944 DEFSUBR (Fcharset_reverse_direction_charset); |
428 | 945 DEFSUBR (Fcharset_from_attributes); |
946 DEFSUBR (Fcharset_short_name); | |
947 DEFSUBR (Fcharset_long_name); | |
948 DEFSUBR (Fcharset_description); | |
949 DEFSUBR (Fcharset_dimension); | |
950 DEFSUBR (Fcharset_property); | |
951 DEFSUBR (Fcharset_id); | |
952 DEFSUBR (Fset_charset_ccl_program); | |
953 DEFSUBR (Fset_charset_registry); | |
954 | |
771 | 955 #ifdef MEMORY_USAGE_STATS |
956 DEFSUBR (Fcharset_memory_usage); | |
428 | 957 #endif |
958 | |
563 | 959 DEFSYMBOL (Qcharsetp); |
960 DEFSYMBOL (Qregistry); | |
961 DEFSYMBOL (Qfinal); | |
962 DEFSYMBOL (Qgraphic); | |
963 DEFSYMBOL (Qdirection); | |
964 DEFSYMBOL (Qreverse_direction_charset); | |
965 DEFSYMBOL (Qshort_name); | |
966 DEFSYMBOL (Qlong_name); | |
428 | 967 |
771 | 968 DEFSYMBOL (Qfrom_unicode); |
969 DEFSYMBOL (Qto_unicode); | |
970 | |
563 | 971 DEFSYMBOL (Ql2r); |
972 DEFSYMBOL (Qr2l); | |
428 | 973 |
974 /* Charsets, compatible with FSF 20.3 | |
975 Naming convention is Script-Charset[-Edition] */ | |
563 | 976 DEFSYMBOL (Qlatin_iso8859_1); |
977 DEFSYMBOL (Qlatin_iso8859_2); | |
978 DEFSYMBOL (Qlatin_iso8859_3); | |
979 DEFSYMBOL (Qlatin_iso8859_4); | |
980 DEFSYMBOL (Qthai_tis620); | |
981 DEFSYMBOL (Qgreek_iso8859_7); | |
982 DEFSYMBOL (Qarabic_iso8859_6); | |
983 DEFSYMBOL (Qhebrew_iso8859_8); | |
984 DEFSYMBOL (Qkatakana_jisx0201); | |
985 DEFSYMBOL (Qlatin_jisx0201); | |
986 DEFSYMBOL (Qcyrillic_iso8859_5); | |
987 DEFSYMBOL (Qlatin_iso8859_9); | |
728 | 988 DEFSYMBOL (Qlatin_iso8859_15); |
563 | 989 DEFSYMBOL (Qjapanese_jisx0208_1978); |
990 DEFSYMBOL (Qchinese_gb2312); | |
991 DEFSYMBOL (Qjapanese_jisx0208); | |
992 DEFSYMBOL (Qkorean_ksc5601); | |
993 DEFSYMBOL (Qjapanese_jisx0212); | |
994 DEFSYMBOL (Qchinese_cns11643_1); | |
995 DEFSYMBOL (Qchinese_cns11643_2); | |
996 DEFSYMBOL (Qchinese_big5_1); | |
997 DEFSYMBOL (Qchinese_big5_2); | |
428 | 998 |
563 | 999 DEFSYMBOL (Qcomposite); |
428 | 1000 } |
1001 | |
1002 void | |
1003 vars_of_mule_charset (void) | |
1004 { | |
1005 int i, j, k; | |
1006 | |
452 | 1007 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */ |
2367 | 1008 dump_add_root_block_ptr (&chlook, &charset_lookup_description); |
428 | 1009 |
1010 /* Table of charsets indexed by leading byte. */ | |
1011 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++) | |
1012 chlook->charset_by_leading_byte[i] = Qnil; | |
1013 | |
1014 /* Table of charsets indexed by type/final-byte/direction. */ | |
1015 for (i = 0; i < countof (chlook->charset_by_attributes); i++) | |
1016 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++) | |
1017 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++) | |
1018 chlook->charset_by_attributes[i][j][k] = Qnil; | |
1019 | |
442 | 1020 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; |
1021 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; | |
771 | 1022 |
1023 staticpro (&Vcharset_hash_table); | |
1024 Vcharset_hash_table = | |
1025 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); | |
428 | 1026 } |
1027 | |
1028 void | |
1029 complex_vars_of_mule_charset (void) | |
1030 { | |
1031 /* Predefined character sets. We store them into variables for | |
1032 ease of access. */ | |
1033 | |
1034 staticpro (&Vcharset_ascii); | |
1035 Vcharset_ascii = | |
1036 make_charset (LEADING_BYTE_ASCII, Qascii, 1, | |
1037 CHARSET_TYPE_94, 1, 0, 'B', | |
1038 CHARSET_LEFT_TO_RIGHT, | |
1039 build_string ("ASCII"), | |
771 | 1040 build_msg_string ("ASCII"), |
1041 build_msg_string ("ASCII (ISO646 IRV)"), | |
1042 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), 0); | |
428 | 1043 staticpro (&Vcharset_control_1); |
1044 Vcharset_control_1 = | |
1045 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2, | |
1046 CHARSET_TYPE_94, 1, 1, 0, | |
1047 CHARSET_LEFT_TO_RIGHT, | |
1048 build_string ("C1"), | |
771 | 1049 build_msg_string ("Control characters"), |
1050 build_msg_string ("Control characters 128-191"), | |
1051 build_string (""), 0); | |
428 | 1052 staticpro (&Vcharset_latin_iso8859_1); |
1053 Vcharset_latin_iso8859_1 = | |
1054 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2, | |
1055 CHARSET_TYPE_96, 1, 1, 'A', | |
1056 CHARSET_LEFT_TO_RIGHT, | |
1057 build_string ("Latin-1"), | |
771 | 1058 build_msg_string ("ISO8859-1 (Latin-1)"), |
1059 build_msg_string ("ISO8859-1 (Latin-1)"), | |
1060 build_string ("iso8859-1"), 0); | |
428 | 1061 staticpro (&Vcharset_latin_iso8859_2); |
1062 Vcharset_latin_iso8859_2 = | |
1063 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2, | |
1064 CHARSET_TYPE_96, 1, 1, 'B', | |
1065 CHARSET_LEFT_TO_RIGHT, | |
1066 build_string ("Latin-2"), | |
771 | 1067 build_msg_string ("ISO8859-2 (Latin-2)"), |
1068 build_msg_string ("ISO8859-2 (Latin-2)"), | |
1069 build_string ("iso8859-2"), 0); | |
428 | 1070 staticpro (&Vcharset_latin_iso8859_3); |
1071 Vcharset_latin_iso8859_3 = | |
1072 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2, | |
1073 CHARSET_TYPE_96, 1, 1, 'C', | |
1074 CHARSET_LEFT_TO_RIGHT, | |
1075 build_string ("Latin-3"), | |
771 | 1076 build_msg_string ("ISO8859-3 (Latin-3)"), |
1077 build_msg_string ("ISO8859-3 (Latin-3)"), | |
1078 build_string ("iso8859-3"), 0); | |
428 | 1079 staticpro (&Vcharset_latin_iso8859_4); |
1080 Vcharset_latin_iso8859_4 = | |
1081 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2, | |
1082 CHARSET_TYPE_96, 1, 1, 'D', | |
1083 CHARSET_LEFT_TO_RIGHT, | |
1084 build_string ("Latin-4"), | |
771 | 1085 build_msg_string ("ISO8859-4 (Latin-4)"), |
1086 build_msg_string ("ISO8859-4 (Latin-4)"), | |
1087 build_string ("iso8859-4"), 0); | |
428 | 1088 staticpro (&Vcharset_thai_tis620); |
1089 Vcharset_thai_tis620 = | |
1090 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2, | |
1091 CHARSET_TYPE_96, 1, 1, 'T', | |
1092 CHARSET_LEFT_TO_RIGHT, | |
1093 build_string ("TIS620"), | |
771 | 1094 build_msg_string ("TIS620 (Thai)"), |
1095 build_msg_string ("TIS620.2529 (Thai)"), | |
1096 build_string ("tis620"),0); | |
428 | 1097 staticpro (&Vcharset_greek_iso8859_7); |
1098 Vcharset_greek_iso8859_7 = | |
1099 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2, | |
1100 CHARSET_TYPE_96, 1, 1, 'F', | |
1101 CHARSET_LEFT_TO_RIGHT, | |
1102 build_string ("ISO8859-7"), | |
771 | 1103 build_msg_string ("ISO8859-7 (Greek)"), |
1104 build_msg_string ("ISO8859-7 (Greek)"), | |
1105 build_string ("iso8859-7"), 0); | |
428 | 1106 staticpro (&Vcharset_arabic_iso8859_6); |
1107 Vcharset_arabic_iso8859_6 = | |
1108 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2, | |
1109 CHARSET_TYPE_96, 1, 1, 'G', | |
1110 CHARSET_RIGHT_TO_LEFT, | |
1111 build_string ("ISO8859-6"), | |
771 | 1112 build_msg_string ("ISO8859-6 (Arabic)"), |
1113 build_msg_string ("ISO8859-6 (Arabic)"), | |
1114 build_string ("iso8859-6"), 0); | |
428 | 1115 staticpro (&Vcharset_hebrew_iso8859_8); |
1116 Vcharset_hebrew_iso8859_8 = | |
1117 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2, | |
1118 CHARSET_TYPE_96, 1, 1, 'H', | |
1119 CHARSET_RIGHT_TO_LEFT, | |
1120 build_string ("ISO8859-8"), | |
771 | 1121 build_msg_string ("ISO8859-8 (Hebrew)"), |
1122 build_msg_string ("ISO8859-8 (Hebrew)"), | |
1123 build_string ("iso8859-8"), 0); | |
428 | 1124 staticpro (&Vcharset_katakana_jisx0201); |
1125 Vcharset_katakana_jisx0201 = | |
1126 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2, | |
1127 CHARSET_TYPE_94, 1, 1, 'I', | |
1128 CHARSET_LEFT_TO_RIGHT, | |
1129 build_string ("JISX0201 Kana"), | |
771 | 1130 build_msg_string ("JISX0201.1976 (Japanese Kana)"), |
1131 build_msg_string ("JISX0201.1976 Japanese Kana"), | |
1132 build_string ("jisx0201.1976"), 0); | |
428 | 1133 staticpro (&Vcharset_latin_jisx0201); |
1134 Vcharset_latin_jisx0201 = | |
1135 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2, | |
1136 CHARSET_TYPE_94, 1, 0, 'J', | |
1137 CHARSET_LEFT_TO_RIGHT, | |
1138 build_string ("JISX0201 Roman"), | |
771 | 1139 build_msg_string ("JISX0201.1976 (Japanese Roman)"), |
1140 build_msg_string ("JISX0201.1976 Japanese Roman"), | |
1141 build_string ("jisx0201.1976"), 0); | |
428 | 1142 staticpro (&Vcharset_cyrillic_iso8859_5); |
1143 Vcharset_cyrillic_iso8859_5 = | |
1144 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2, | |
1145 CHARSET_TYPE_96, 1, 1, 'L', | |
1146 CHARSET_LEFT_TO_RIGHT, | |
1147 build_string ("ISO8859-5"), | |
771 | 1148 build_msg_string ("ISO8859-5 (Cyrillic)"), |
1149 build_msg_string ("ISO8859-5 (Cyrillic)"), | |
1150 build_string ("iso8859-5"), 0); | |
428 | 1151 staticpro (&Vcharset_latin_iso8859_9); |
1152 Vcharset_latin_iso8859_9 = | |
1153 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2, | |
1154 CHARSET_TYPE_96, 1, 1, 'M', | |
1155 CHARSET_LEFT_TO_RIGHT, | |
1156 build_string ("Latin-5"), | |
771 | 1157 build_msg_string ("ISO8859-9 (Latin-5)"), |
1158 build_msg_string ("ISO8859-9 (Latin-5)"), | |
1159 build_string ("iso8859-9"), 0); | |
728 | 1160 staticpro (&Vcharset_latin_iso8859_15); |
1161 Vcharset_latin_iso8859_15 = | |
1162 make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2, | |
1163 CHARSET_TYPE_96, 1, 1, 'b', | |
1164 CHARSET_LEFT_TO_RIGHT, | |
1165 build_string ("Latin-9"), | |
771 | 1166 build_msg_string ("ISO8859-15 (Latin-9)"), |
1167 build_msg_string ("ISO8859-15 (Latin-9)"), | |
1168 build_string ("iso8859-15"), 0); | |
428 | 1169 staticpro (&Vcharset_japanese_jisx0208_1978); |
1170 Vcharset_japanese_jisx0208_1978 = | |
1171 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3, | |
1172 CHARSET_TYPE_94X94, 2, 0, '@', | |
1173 CHARSET_LEFT_TO_RIGHT, | |
1174 build_string ("JISX0208.1978"), | |
771 | 1175 build_msg_string ("JISX0208.1978 (Japanese)"), |
1176 build_msg_string | |
428 | 1177 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"), |
771 | 1178 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), 0); |
428 | 1179 staticpro (&Vcharset_chinese_gb2312); |
1180 Vcharset_chinese_gb2312 = | |
1181 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3, | |
1182 CHARSET_TYPE_94X94, 2, 0, 'A', | |
1183 CHARSET_LEFT_TO_RIGHT, | |
1184 build_string ("GB2312"), | |
771 | 1185 build_msg_string ("GB2312)"), |
1186 build_msg_string ("GB2312 Chinese simplified"), | |
1187 build_string ("gb2312"), 0); | |
428 | 1188 staticpro (&Vcharset_japanese_jisx0208); |
1189 Vcharset_japanese_jisx0208 = | |
1190 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3, | |
1191 CHARSET_TYPE_94X94, 2, 0, 'B', | |
1192 CHARSET_LEFT_TO_RIGHT, | |
1193 build_string ("JISX0208"), | |
771 | 1194 build_msg_string ("JISX0208.1983/1990 (Japanese)"), |
1195 build_msg_string ("JISX0208.1983/1990 Japanese Kanji"), | |
1196 build_string ("jisx0208.19\\(83\\|90\\)"), 0); | |
428 | 1197 staticpro (&Vcharset_korean_ksc5601); |
1198 Vcharset_korean_ksc5601 = | |
1199 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3, | |
1200 CHARSET_TYPE_94X94, 2, 0, 'C', | |
1201 CHARSET_LEFT_TO_RIGHT, | |
1202 build_string ("KSC5601"), | |
771 | 1203 build_msg_string ("KSC5601 (Korean"), |
1204 build_msg_string ("KSC5601 Korean Hangul and Hanja"), | |
1205 build_string ("ksc5601"), 0); | |
428 | 1206 staticpro (&Vcharset_japanese_jisx0212); |
1207 Vcharset_japanese_jisx0212 = | |
1208 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3, | |
1209 CHARSET_TYPE_94X94, 2, 0, 'D', | |
1210 CHARSET_LEFT_TO_RIGHT, | |
1211 build_string ("JISX0212"), | |
771 | 1212 build_msg_string ("JISX0212 (Japanese)"), |
1213 build_msg_string ("JISX0212 Japanese Supplement"), | |
1214 build_string ("jisx0212"), 0); | |
428 | 1215 |
1216 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$" | |
1217 staticpro (&Vcharset_chinese_cns11643_1); | |
1218 Vcharset_chinese_cns11643_1 = | |
1219 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3, | |
1220 CHARSET_TYPE_94X94, 2, 0, 'G', | |
1221 CHARSET_LEFT_TO_RIGHT, | |
1222 build_string ("CNS11643-1"), | |
771 | 1223 build_msg_string ("CNS11643-1 (Chinese traditional)"), |
1224 build_msg_string | |
428 | 1225 ("CNS 11643 Plane 1 Chinese traditional"), |
771 | 1226 build_string (CHINESE_CNS_PLANE_RE("1")), 0); |
428 | 1227 staticpro (&Vcharset_chinese_cns11643_2); |
1228 Vcharset_chinese_cns11643_2 = | |
1229 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3, | |
1230 CHARSET_TYPE_94X94, 2, 0, 'H', | |
1231 CHARSET_LEFT_TO_RIGHT, | |
1232 build_string ("CNS11643-2"), | |
771 | 1233 build_msg_string ("CNS11643-2 (Chinese traditional)"), |
1234 build_msg_string | |
428 | 1235 ("CNS 11643 Plane 2 Chinese traditional"), |
771 | 1236 build_string (CHINESE_CNS_PLANE_RE("2")), 0); |
428 | 1237 staticpro (&Vcharset_chinese_big5_1); |
1238 Vcharset_chinese_big5_1 = | |
1239 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3, | |
1240 CHARSET_TYPE_94X94, 2, 0, '0', | |
1241 CHARSET_LEFT_TO_RIGHT, | |
1242 build_string ("Big5"), | |
771 | 1243 build_msg_string ("Big5 (Level-1)"), |
1244 build_msg_string | |
428 | 1245 ("Big5 Level-1 Chinese traditional"), |
771 | 1246 build_string ("big5"), 0); |
428 | 1247 staticpro (&Vcharset_chinese_big5_2); |
1248 Vcharset_chinese_big5_2 = | |
1249 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3, | |
1250 CHARSET_TYPE_94X94, 2, 0, '1', | |
1251 CHARSET_LEFT_TO_RIGHT, | |
1252 build_string ("Big5"), | |
771 | 1253 build_msg_string ("Big5 (Level-2)"), |
1254 build_msg_string | |
428 | 1255 ("Big5 Level-2 Chinese traditional"), |
771 | 1256 build_string ("big5"), 0); |
428 | 1257 |
1258 | |
1259 #ifdef ENABLE_COMPOSITE_CHARS | |
1260 /* #### For simplicity, we put composite chars into a 96x96 charset. | |
1261 This is going to lead to problems because you can run out of | |
1262 room, esp. as we don't yet recycle numbers. */ | |
1263 staticpro (&Vcharset_composite); | |
1264 Vcharset_composite = | |
1265 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3, | |
1266 CHARSET_TYPE_96X96, 2, 0, 0, | |
1267 CHARSET_LEFT_TO_RIGHT, | |
1268 build_string ("Composite"), | |
771 | 1269 build_msg_string ("Composite characters"), |
1270 build_msg_string ("Composite characters"), | |
1271 build_string (""), 0); | |
1272 #else | |
1273 /* We create a hack so that we have a way of storing ESC 0 and ESC 1 | |
1274 sequences as "characters", so that they will be output correctly. */ | |
1275 staticpro (&Vcharset_composite); | |
1276 Vcharset_composite = | |
1277 make_charset (LEADING_BYTE_COMPOSITE_REPLACEMENT, Qcomposite, 2, | |
1278 CHARSET_TYPE_96, 1, 1, '|', | |
1279 CHARSET_LEFT_TO_RIGHT, | |
1280 build_string ("Composite hack"), | |
1281 build_msg_string ("Composite characters hack"), | |
1282 build_msg_string ("Composite characters hack"), | |
1283 build_string (""), 0); | |
428 | 1284 #endif /* ENABLE_COMPOSITE_CHARS */ |
1285 } |