comparison src/mule-charset.c @ 428:3ecd8885ac67 r21-2-22

Import from CVS: tag r21-2-22
author cvs
date Mon, 13 Aug 2007 11:28:15 +0200
parents
children 84b14dcb0985
comparison
equal deleted inserted replaced
427:0a0253eac470 428:3ecd8885ac67
1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4
5 This file is part of XEmacs.
6
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
10 later version.
11
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 /* Synched up with: FSF 20.3. Not in FSF. */
23
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25
26 #include <config.h>
27 #include "lisp.h"
28
29 #include "buffer.h"
30 #include "chartab.h"
31 #include "elhash.h"
32 #include "lstream.h"
33 #include "device.h"
34 #include "faces.h"
35 #include "mule-ccl.h"
36
37 /* The various pre-defined charsets. */
38
39 Lisp_Object Vcharset_ascii;
40 Lisp_Object Vcharset_control_1;
41 Lisp_Object Vcharset_latin_iso8859_1;
42 Lisp_Object Vcharset_latin_iso8859_2;
43 Lisp_Object Vcharset_latin_iso8859_3;
44 Lisp_Object Vcharset_latin_iso8859_4;
45 Lisp_Object Vcharset_thai_tis620;
46 Lisp_Object Vcharset_greek_iso8859_7;
47 Lisp_Object Vcharset_arabic_iso8859_6;
48 Lisp_Object Vcharset_hebrew_iso8859_8;
49 Lisp_Object Vcharset_katakana_jisx0201;
50 Lisp_Object Vcharset_latin_jisx0201;
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
52 Lisp_Object Vcharset_latin_iso8859_9;
53 Lisp_Object Vcharset_japanese_jisx0208_1978;
54 Lisp_Object Vcharset_chinese_gb2312;
55 Lisp_Object Vcharset_japanese_jisx0208;
56 Lisp_Object Vcharset_korean_ksc5601;
57 Lisp_Object Vcharset_japanese_jisx0212;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_chinese_big5_1;
61 Lisp_Object Vcharset_chinese_big5_2;
62
63 #ifdef ENABLE_COMPOSITE_CHARS
64 Lisp_Object Vcharset_composite;
65
66 /* Hash tables for composite chars. One maps string representing
67 composed chars to their equivalent chars; one goes the
68 other way. */
69 Lisp_Object Vcomposite_char_char2string_hash_table;
70 Lisp_Object Vcomposite_char_string2char_hash_table;
71
72 static int composite_char_row_next;
73 static int composite_char_col_next;
74
75 #endif /* ENABLE_COMPOSITE_CHARS */
76
77 struct charset_lookup *chlook;
78
79 static const struct lrecord_description charset_lookup_description_1[] = {
80 { XD_LISP_OBJECT, offsetof(struct charset_lookup, charset_by_leading_byte), 128+4*128*2 },
81 { XD_END }
82 };
83
84 static const struct struct_description charset_lookup_description = {
85 sizeof(struct charset_lookup),
86 charset_lookup_description_1
87 };
88
89 /* Table of number of bytes in the string representation of a character
90 indexed by the first byte of that representation.
91
92 rep_bytes_by_first_byte(c) is more efficient than the equivalent
93 canonical computation:
94
95 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
96
97 Bytecount rep_bytes_by_first_byte[0xA0] =
98 { /* 0x00 - 0x7f are for straight ASCII */
99 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107 /* 0x80 - 0x8f are for Dimension-1 official charsets */
108 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 /* 0x90 - 0x9d are for Dimension-2 official charsets */
110 /* 0x9e is for Dimension-1 private charsets */
111 /* 0x9f is for Dimension-2 private charsets */
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
113 };
114
115 Lisp_Object Qcharsetp;
116
117 /* Qdoc_string, Qdimension, Qchars defined in general.c */
118 Lisp_Object Qregistry, Qfinal, Qgraphic;
119 Lisp_Object Qdirection;
120 Lisp_Object Qreverse_direction_charset;
121 Lisp_Object Qleading_byte;
122 Lisp_Object Qshort_name, Qlong_name;
123
124 Lisp_Object Qascii,
125 Qcontrol_1,
126 Qlatin_iso8859_1,
127 Qlatin_iso8859_2,
128 Qlatin_iso8859_3,
129 Qlatin_iso8859_4,
130 Qthai_tis620,
131 Qgreek_iso8859_7,
132 Qarabic_iso8859_6,
133 Qhebrew_iso8859_8,
134 Qkatakana_jisx0201,
135 Qlatin_jisx0201,
136 Qcyrillic_iso8859_5,
137 Qlatin_iso8859_9,
138 Qjapanese_jisx0208_1978,
139 Qchinese_gb2312,
140 Qjapanese_jisx0208,
141 Qkorean_ksc5601,
142 Qjapanese_jisx0212,
143 Qchinese_cns11643_1,
144 Qchinese_cns11643_2,
145 Qchinese_big5_1,
146 Qchinese_big5_2,
147 Qcomposite;
148
149 Lisp_Object Ql2r, Qr2l;
150
151 Lisp_Object Vcharset_hash_table;
152
153 static Bufbyte next_allocated_1_byte_leading_byte;
154 static Bufbyte next_allocated_2_byte_leading_byte;
155
156 /* Composite characters are characters constructed by overstriking two
157 or more regular characters.
158
159 1) The old Mule implementation involves storing composite characters
160 in a buffer as a tag followed by all of the actual characters
161 used to make up the composite character. I think this is a bad
162 idea; it greatly complicates code that wants to handle strings
163 one character at a time because it has to deal with the possibility
164 of great big ungainly characters. It's much more reasonable to
165 simply store an index into a table of composite characters.
166
167 2) The current implementation only allows for 16,384 separate
168 composite characters over the lifetime of the XEmacs process.
169 This could become a potential problem if the user
170 edited lots of different files that use composite characters.
171 Due to FSF bogosity, increasing the number of allowable
172 composite characters under Mule would decrease the number
173 of possible faces that can exist. Mule already has shrunk
174 this to 2048, and further shrinkage would become uncomfortable.
175 No such problems exist in XEmacs.
176
177 Composite characters could be represented as 0x80 C1 C2 C3,
178 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
179 for slightly under 2^20 (one million) composite characters
180 over the XEmacs process lifetime, and you only need to
181 increase the size of a Mule character from 19 to 21 bits.
182 Or you could use 0x80 C1 C2 C3 C4, allowing for about
183 85 million (slightly over 2^26) composite characters. */
184
185
186 /************************************************************************/
187 /* Basic Emchar functions */
188 /************************************************************************/
189
190 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
191 string in STR. Returns the number of bytes stored.
192 Do not call this directly. Use the macro set_charptr_emchar() instead.
193 */
194
195 Bytecount
196 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
197 {
198 Bufbyte *p;
199 Bufbyte lb;
200 int c1, c2;
201 Lisp_Object charset;
202
203 p = str;
204 BREAKUP_CHAR (c, charset, c1, c2);
205 lb = CHAR_LEADING_BYTE (c);
206 if (LEADING_BYTE_PRIVATE_P (lb))
207 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
208 *p++ = lb;
209 if (EQ (charset, Vcharset_control_1))
210 c1 += 0x20;
211 *p++ = c1 | 0x80;
212 if (c2)
213 *p++ = c2 | 0x80;
214
215 return (p - str);
216 }
217
218 /* Return the first character from a Mule-encoded string in STR,
219 assuming it's non-ASCII. Do not call this directly.
220 Use the macro charptr_emchar() instead. */
221
222 Emchar
223 non_ascii_charptr_emchar (CONST Bufbyte *str)
224 {
225 Bufbyte i0 = *str, i1, i2 = 0;
226 Lisp_Object charset;
227
228 if (i0 == LEADING_BYTE_CONTROL_1)
229 return (Emchar) (*++str - 0x20);
230
231 if (LEADING_BYTE_PREFIX_P (i0))
232 i0 = *++str;
233
234 i1 = *++str & 0x7F;
235
236 charset = CHARSET_BY_LEADING_BYTE (i0);
237 if (XCHARSET_DIMENSION (charset) == 2)
238 i2 = *++str & 0x7F;
239
240 return MAKE_CHAR (charset, i1, i2);
241 }
242
243 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
244 Do not call this directly. Use the macro valid_char_p() instead. */
245
246 int
247 non_ascii_valid_char_p (Emchar ch)
248 {
249 int f1, f2, f3;
250
251 /* Must have only lowest 19 bits set */
252 if (ch & ~0x7FFFF)
253 return 0;
254
255 f1 = CHAR_FIELD1 (ch);
256 f2 = CHAR_FIELD2 (ch);
257 f3 = CHAR_FIELD3 (ch);
258
259 if (f1 == 0)
260 {
261 Lisp_Object charset;
262
263 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
264 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
265 f2 > MAX_CHAR_FIELD2_PRIVATE)
266 return 0;
267 if (f3 < 0x20)
268 return 0;
269
270 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
271 f2 <= MAX_CHAR_FIELD2_PRIVATE))
272 return 1;
273
274 /*
275 NOTE: This takes advantage of the fact that
276 FIELD2_TO_OFFICIAL_LEADING_BYTE and
277 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
278 */
279 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
280 if (EQ (charset, Qnil))
281 return 0;
282 return (XCHARSET_CHARS (charset) == 96);
283 }
284 else
285 {
286 Lisp_Object charset;
287
288 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
289 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
290 f1 > MAX_CHAR_FIELD1_PRIVATE)
291 return 0;
292 if (f2 < 0x20 || f3 < 0x20)
293 return 0;
294
295 #ifdef ENABLE_COMPOSITE_CHARS
296 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
297 {
298 if (UNBOUNDP (Fgethash (make_int (ch),
299 Vcomposite_char_char2string_hash_table,
300 Qunbound)))
301 return 0;
302 return 1;
303 }
304 #endif /* ENABLE_COMPOSITE_CHARS */
305
306 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
307 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
308 return 1;
309
310 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
311 charset =
312 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
313 else
314 charset =
315 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
316
317 if (EQ (charset, Qnil))
318 return 0;
319 return (XCHARSET_CHARS (charset) == 96);
320 }
321 }
322
323
324 /************************************************************************/
325 /* Basic string functions */
326 /************************************************************************/
327
328 /* Copy the character pointed to by PTR into STR, assuming it's
329 non-ASCII. Do not call this directly. Use the macro
330 charptr_copy_char() instead. */
331
332 Bytecount
333 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
334 {
335 Bufbyte *strptr = str;
336 *strptr = *ptr++;
337 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
338 {
339 /* Notice fallthrough. */
340 case 4: *++strptr = *ptr++;
341 case 3: *++strptr = *ptr++;
342 case 2: *++strptr = *ptr;
343 break;
344 default:
345 abort ();
346 }
347 return strptr + 1 - str;
348 }
349
350
351 /************************************************************************/
352 /* streams of Emchars */
353 /************************************************************************/
354
355 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
356 The functions below are not meant to be called directly; use
357 the macros in insdel.h. */
358
359 Emchar
360 Lstream_get_emchar_1 (Lstream *stream, int ch)
361 {
362 Bufbyte str[MAX_EMCHAR_LEN];
363 Bufbyte *strptr = str;
364
365 str[0] = (Bufbyte) ch;
366 switch (REP_BYTES_BY_FIRST_BYTE (ch))
367 {
368 /* Notice fallthrough. */
369 case 4:
370 ch = Lstream_getc (stream);
371 assert (ch >= 0);
372 *++strptr = (Bufbyte) ch;
373 case 3:
374 ch = Lstream_getc (stream);
375 assert (ch >= 0);
376 *++strptr = (Bufbyte) ch;
377 case 2:
378 ch = Lstream_getc (stream);
379 assert (ch >= 0);
380 *++strptr = (Bufbyte) ch;
381 break;
382 default:
383 abort ();
384 }
385 return charptr_emchar (str);
386 }
387
388 int
389 Lstream_fput_emchar (Lstream *stream, Emchar ch)
390 {
391 Bufbyte str[MAX_EMCHAR_LEN];
392 Bytecount len = set_charptr_emchar (str, ch);
393 return Lstream_write (stream, str, len);
394 }
395
396 void
397 Lstream_funget_emchar (Lstream *stream, Emchar ch)
398 {
399 Bufbyte str[MAX_EMCHAR_LEN];
400 Bytecount len = set_charptr_emchar (str, ch);
401 Lstream_unread (stream, str, len);
402 }
403
404
405 /************************************************************************/
406 /* charset object */
407 /************************************************************************/
408
409 static Lisp_Object
410 mark_charset (Lisp_Object obj)
411 {
412 struct Lisp_Charset *cs = XCHARSET (obj);
413
414 mark_object (cs->short_name);
415 mark_object (cs->long_name);
416 mark_object (cs->doc_string);
417 mark_object (cs->registry);
418 mark_object (cs->ccl_program);
419 return cs->name;
420 }
421
422 static void
423 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
424 {
425 struct Lisp_Charset *cs = XCHARSET (obj);
426 char buf[200];
427
428 if (print_readably)
429 error ("printing unreadable object #<charset %s 0x%x>",
430 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
431 cs->header.uid);
432
433 write_c_string ("#<charset ", printcharfun);
434 print_internal (CHARSET_NAME (cs), printcharfun, 0);
435 write_c_string (" ", printcharfun);
436 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
437 write_c_string (" ", printcharfun);
438 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
439 write_c_string (" ", printcharfun);
440 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
441 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
442 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
443 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
444 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
445 "96x96",
446 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
447 CHARSET_COLUMNS (cs),
448 CHARSET_GRAPHIC (cs),
449 CHARSET_FINAL (cs));
450 write_c_string (buf, printcharfun);
451 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
452 sprintf (buf, " 0x%x>", cs->header.uid);
453 write_c_string (buf, printcharfun);
454 }
455
456 static const struct lrecord_description charset_description[] = {
457 { XD_LISP_OBJECT, offsetof(struct Lisp_Charset, name), 7 },
458 { XD_END }
459 };
460
461 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
462 mark_charset, print_charset, 0, 0, 0, charset_description,
463 struct Lisp_Charset);
464 /* Make a new charset. */
465
466 static Lisp_Object
467 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
468 unsigned char type, unsigned char columns, unsigned char graphic,
469 Bufbyte final, unsigned char direction, Lisp_Object short_name,
470 Lisp_Object long_name, Lisp_Object doc,
471 Lisp_Object reg)
472 {
473 Lisp_Object obj;
474 struct Lisp_Charset *cs =
475 alloc_lcrecord_type (struct Lisp_Charset, &lrecord_charset);
476 XSETCHARSET (obj, cs);
477
478 CHARSET_ID (cs) = id;
479 CHARSET_NAME (cs) = name;
480 CHARSET_SHORT_NAME (cs) = short_name;
481 CHARSET_LONG_NAME (cs) = long_name;
482 CHARSET_REP_BYTES (cs) = rep_bytes;
483 CHARSET_DIRECTION (cs) = direction;
484 CHARSET_TYPE (cs) = type;
485 CHARSET_COLUMNS (cs) = columns;
486 CHARSET_GRAPHIC (cs) = graphic;
487 CHARSET_FINAL (cs) = final;
488 CHARSET_DOC_STRING (cs) = doc;
489 CHARSET_REGISTRY (cs) = reg;
490 CHARSET_CCL_PROGRAM (cs) = Qnil;
491 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
492
493 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
494 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
495 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
496 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
497
498 if (final)
499 {
500 /* some charsets do not have final characters. This includes
501 ASCII, Control-1, Composite, and the two faux private
502 charsets. */
503 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
504 chlook->charset_by_attributes[type][final][direction] = obj;
505 }
506
507 assert (NILP (chlook->charset_by_leading_byte[id - 128]));
508 chlook->charset_by_leading_byte[id - 128] = obj;
509 if (id < 0xA0)
510 /* official leading byte */
511 rep_bytes_by_first_byte[id] = rep_bytes;
512
513 /* Some charsets are "faux" and don't have names or really exist at
514 all except in the leading-byte table. */
515 if (!NILP (name))
516 Fputhash (name, obj, Vcharset_hash_table);
517 return obj;
518 }
519
520 static int
521 get_unallocated_leading_byte (int dimension)
522 {
523 int lb;
524
525 if (dimension == 1)
526 {
527 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
528 lb = 0;
529 else
530 lb = next_allocated_1_byte_leading_byte++;
531 }
532 else
533 {
534 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
535 lb = 0;
536 else
537 lb = next_allocated_2_byte_leading_byte++;
538 }
539
540 if (!lb)
541 signal_simple_error
542 ("No more character sets free for this dimension",
543 make_int (dimension));
544
545 return lb;
546 }
547
548
549 /************************************************************************/
550 /* Basic charset Lisp functions */
551 /************************************************************************/
552
553 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
554 Return non-nil if OBJECT is a charset.
555 */
556 (object))
557 {
558 return CHARSETP (object) ? Qt : Qnil;
559 }
560
561 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
562 Retrieve the charset of the given name.
563 If CHARSET-OR-NAME is a charset object, it is simply returned.
564 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
565 nil is returned. Otherwise the associated charset object is returned.
566 */
567 (charset_or_name))
568 {
569 if (CHARSETP (charset_or_name))
570 return charset_or_name;
571
572 CHECK_SYMBOL (charset_or_name);
573 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
574 }
575
576 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
577 Retrieve the charset of the given name.
578 Same as `find-charset' except an error is signalled if there is no such
579 charset instead of returning nil.
580 */
581 (name))
582 {
583 Lisp_Object charset = Ffind_charset (name);
584
585 if (NILP (charset))
586 signal_simple_error ("No such charset", name);
587 return charset;
588 }
589
590 /* We store the charsets in hash tables with the names as the key and the
591 actual charset object as the value. Occasionally we need to use them
592 in a list format. These routines provide us with that. */
593 struct charset_list_closure
594 {
595 Lisp_Object *charset_list;
596 };
597
598 static int
599 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
600 void *charset_list_closure)
601 {
602 /* This function can GC */
603 struct charset_list_closure *chcl =
604 (struct charset_list_closure*) charset_list_closure;
605 Lisp_Object *charset_list = chcl->charset_list;
606
607 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
608 return 0;
609 }
610
611 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
612 Return a list of the names of all defined charsets.
613 */
614 ())
615 {
616 Lisp_Object charset_list = Qnil;
617 struct gcpro gcpro1;
618 struct charset_list_closure charset_list_closure;
619
620 GCPRO1 (charset_list);
621 charset_list_closure.charset_list = &charset_list;
622 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
623 &charset_list_closure);
624 UNGCPRO;
625
626 return charset_list;
627 }
628
629 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
630 Return the name of the given charset.
631 */
632 (charset))
633 {
634 return XCHARSET_NAME (Fget_charset (charset));
635 }
636
637 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
638 Define a new character set.
639 This function is for use with Mule support.
640 NAME is a symbol, the name by which the character set is normally referred.
641 DOC-STRING is a string describing the character set.
642 PROPS is a property list, describing the specific nature of the
643 character set. Recognized properties are:
644
645 'short-name Short version of the charset name (ex: Latin-1)
646 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
647 'registry A regular expression matching the font registry field for
648 this character set.
649 'dimension Number of octets used to index a character in this charset.
650 Either 1 or 2. Defaults to 1.
651 'columns Number of columns used to display a character in this charset.
652 Only used in TTY mode. (Under X, the actual width of a
653 character can be derived from the font used to display the
654 characters.) If unspecified, defaults to the dimension
655 (this is almost always the correct value).
656 'chars Number of characters in each dimension (94 or 96).
657 Defaults to 94. Note that if the dimension is 2, the
658 character set thus described is 94x94 or 96x96.
659 'final Final byte of ISO 2022 escape sequence. Must be
660 supplied. Each combination of (DIMENSION, CHARS) defines a
661 separate namespace for final bytes. Note that ISO
662 2022 restricts the final byte to the range
663 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
664 dimension == 2. Note also that final bytes in the range
665 0x30 - 0x3F are reserved for user-defined (not official)
666 character sets.
667 'graphic 0 (use left half of font on output) or 1 (use right half
668 of font on output). Defaults to 0. For example, for
669 a font whose registry is ISO8859-1, the left half
670 (octets 0x20 - 0x7F) is the `ascii' character set, while
671 the right half (octets 0xA0 - 0xFF) is the `latin-1'
672 character set. With 'graphic set to 0, the octets
673 will have their high bit cleared; with it set to 1,
674 the octets will have their high bit set.
675 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
676 Defaults to 'l2r.
677 'ccl-program A compiled CCL program used to convert a character in
678 this charset into an index into the font. This is in
679 addition to the 'graphic property. The CCL program
680 is passed the octets of the character, with the high
681 bit cleared and set depending upon whether the value
682 of the 'graphic property is 0 or 1.
683 */
684 (name, doc_string, props))
685 {
686 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
687 int direction = CHARSET_LEFT_TO_RIGHT;
688 int type;
689 Lisp_Object registry = Qnil;
690 Lisp_Object charset;
691 Lisp_Object rest, keyword, value;
692 Lisp_Object ccl_program = Qnil;
693 Lisp_Object short_name = Qnil, long_name = Qnil;
694
695 CHECK_SYMBOL (name);
696 if (!NILP (doc_string))
697 CHECK_STRING (doc_string);
698
699 charset = Ffind_charset (name);
700 if (!NILP (charset))
701 signal_simple_error ("Cannot redefine existing charset", name);
702
703 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
704 {
705 if (EQ (keyword, Qshort_name))
706 {
707 CHECK_STRING (value);
708 short_name = value;
709 }
710
711 if (EQ (keyword, Qlong_name))
712 {
713 CHECK_STRING (value);
714 long_name = value;
715 }
716
717 else if (EQ (keyword, Qdimension))
718 {
719 CHECK_INT (value);
720 dimension = XINT (value);
721 if (dimension < 1 || dimension > 2)
722 signal_simple_error ("Invalid value for 'dimension", value);
723 }
724
725 else if (EQ (keyword, Qchars))
726 {
727 CHECK_INT (value);
728 chars = XINT (value);
729 if (chars != 94 && chars != 96)
730 signal_simple_error ("Invalid value for 'chars", value);
731 }
732
733 else if (EQ (keyword, Qcolumns))
734 {
735 CHECK_INT (value);
736 columns = XINT (value);
737 if (columns != 1 && columns != 2)
738 signal_simple_error ("Invalid value for 'columns", value);
739 }
740
741 else if (EQ (keyword, Qgraphic))
742 {
743 CHECK_INT (value);
744 graphic = XINT (value);
745 if (graphic < 0 || graphic > 1)
746 signal_simple_error ("Invalid value for 'graphic", value);
747 }
748
749 else if (EQ (keyword, Qregistry))
750 {
751 CHECK_STRING (value);
752 registry = value;
753 }
754
755 else if (EQ (keyword, Qdirection))
756 {
757 if (EQ (value, Ql2r))
758 direction = CHARSET_LEFT_TO_RIGHT;
759 else if (EQ (value, Qr2l))
760 direction = CHARSET_RIGHT_TO_LEFT;
761 else
762 signal_simple_error ("Invalid value for 'direction", value);
763 }
764
765 else if (EQ (keyword, Qfinal))
766 {
767 CHECK_CHAR_COERCE_INT (value);
768 final = XCHAR (value);
769 if (final < '0' || final > '~')
770 signal_simple_error ("Invalid value for 'final", value);
771 }
772
773 else if (EQ (keyword, Qccl_program))
774 {
775 CHECK_VECTOR (value);
776 ccl_program = value;
777 }
778
779 else
780 signal_simple_error ("Unrecognized property", keyword);
781 }
782
783 if (!final)
784 error ("'final must be specified");
785 if (dimension == 2 && final > 0x5F)
786 signal_simple_error
787 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
788 make_char (final));
789
790 if (dimension == 1)
791 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
792 else
793 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
794
795 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
796 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
797 error
798 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
799
800 id = get_unallocated_leading_byte (dimension);
801
802 if (NILP (doc_string))
803 doc_string = build_string ("");
804
805 if (NILP (registry))
806 registry = build_string ("");
807
808 if (NILP (short_name))
809 XSETSTRING (short_name, XSYMBOL (name)->name);
810
811 if (NILP (long_name))
812 long_name = doc_string;
813
814 if (columns == -1)
815 columns = dimension;
816 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
817 final, direction, short_name, long_name, doc_string, registry);
818 if (!NILP (ccl_program))
819 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
820 return charset;
821 }
822
823 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
824 2, 2, 0, /*
825 Make a charset equivalent to CHARSET but which goes in the opposite direction.
826 NEW-NAME is the name of the new charset. Return the new charset.
827 */
828 (charset, new_name))
829 {
830 Lisp_Object new_charset = Qnil;
831 int id, dimension, columns, graphic, final;
832 int direction, type;
833 Lisp_Object registry, doc_string, short_name, long_name;
834 struct Lisp_Charset *cs;
835
836 charset = Fget_charset (charset);
837 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
838 signal_simple_error ("Charset already has reverse-direction charset",
839 charset);
840
841 CHECK_SYMBOL (new_name);
842 if (!NILP (Ffind_charset (new_name)))
843 signal_simple_error ("Cannot redefine existing charset", new_name);
844
845 cs = XCHARSET (charset);
846
847 type = CHARSET_TYPE (cs);
848 columns = CHARSET_COLUMNS (cs);
849 dimension = CHARSET_DIMENSION (cs);
850 id = get_unallocated_leading_byte (dimension);
851
852 graphic = CHARSET_GRAPHIC (cs);
853 final = CHARSET_FINAL (cs);
854 direction = CHARSET_RIGHT_TO_LEFT;
855 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
856 direction = CHARSET_LEFT_TO_RIGHT;
857 doc_string = CHARSET_DOC_STRING (cs);
858 short_name = CHARSET_SHORT_NAME (cs);
859 long_name = CHARSET_LONG_NAME (cs);
860 registry = CHARSET_REGISTRY (cs);
861
862 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
863 graphic, final, direction, short_name, long_name,
864 doc_string, registry);
865
866 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
867 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
868
869 return new_charset;
870 }
871
872 /* #### Reverse direction charsets not yet implemented. */
873 #if 0
874 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
875 1, 1, 0, /*
876 Return the reverse-direction charset parallel to CHARSET, if any.
877 This is the charset with the same properties (in particular, the same
878 dimension, number of characters per dimension, and final byte) as
879 CHARSET but whose characters are displayed in the opposite direction.
880 */
881 (charset))
882 {
883 charset = Fget_charset (charset);
884 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
885 }
886 #endif
887
888 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
889 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
890 If DIRECTION is omitted, both directions will be checked (left-to-right
891 will be returned if character sets exist for both directions).
892 */
893 (dimension, chars, final, direction))
894 {
895 int dm, ch, fi, di = -1;
896 int type;
897 Lisp_Object obj = Qnil;
898
899 CHECK_INT (dimension);
900 dm = XINT (dimension);
901 if (dm < 1 || dm > 2)
902 signal_simple_error ("Invalid value for DIMENSION", dimension);
903
904 CHECK_INT (chars);
905 ch = XINT (chars);
906 if (ch != 94 && ch != 96)
907 signal_simple_error ("Invalid value for CHARS", chars);
908
909 CHECK_CHAR_COERCE_INT (final);
910 fi = XCHAR (final);
911 if (fi < '0' || fi > '~')
912 signal_simple_error ("Invalid value for FINAL", final);
913
914 if (EQ (direction, Ql2r))
915 di = CHARSET_LEFT_TO_RIGHT;
916 else if (EQ (direction, Qr2l))
917 di = CHARSET_RIGHT_TO_LEFT;
918 else if (!NILP (direction))
919 signal_simple_error ("Invalid value for DIRECTION", direction);
920
921 if (dm == 2 && fi > 0x5F)
922 signal_simple_error
923 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
924
925 if (dm == 1)
926 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
927 else
928 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
929
930 if (di == -1)
931 {
932 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
933 if (NILP (obj))
934 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
935 }
936 else
937 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
938
939 if (CHARSETP (obj))
940 return XCHARSET_NAME (obj);
941 return obj;
942 }
943
944 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
945 Return short name of CHARSET.
946 */
947 (charset))
948 {
949 return XCHARSET_SHORT_NAME (Fget_charset (charset));
950 }
951
952 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
953 Return long name of CHARSET.
954 */
955 (charset))
956 {
957 return XCHARSET_LONG_NAME (Fget_charset (charset));
958 }
959
960 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
961 Return description of CHARSET.
962 */
963 (charset))
964 {
965 return XCHARSET_DOC_STRING (Fget_charset (charset));
966 }
967
968 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
969 Return dimension of CHARSET.
970 */
971 (charset))
972 {
973 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
974 }
975
976 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
977 Return property PROP of CHARSET.
978 Recognized properties are those listed in `make-charset', as well as
979 'name and 'doc-string.
980 */
981 (charset, prop))
982 {
983 struct Lisp_Charset *cs;
984
985 charset = Fget_charset (charset);
986 cs = XCHARSET (charset);
987
988 CHECK_SYMBOL (prop);
989 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
990 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
991 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
992 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
993 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
994 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
995 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
996 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
997 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
998 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
999 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
1000 if (EQ (prop, Qdirection))
1001 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
1002 if (EQ (prop, Qreverse_direction_charset))
1003 {
1004 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
1005 if (NILP (obj))
1006 return Qnil;
1007 else
1008 return XCHARSET_NAME (obj);
1009 }
1010 signal_simple_error ("Unrecognized charset property name", prop);
1011 return Qnil; /* not reached */
1012 }
1013
1014 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
1015 Return charset identification number of CHARSET.
1016 */
1017 (charset))
1018 {
1019 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
1020 }
1021
1022 /* #### We need to figure out which properties we really want to
1023 allow to be set. */
1024
1025 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
1026 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
1027 */
1028 (charset, ccl_program))
1029 {
1030 charset = Fget_charset (charset);
1031 CHECK_VECTOR (ccl_program);
1032 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
1033 return Qnil;
1034 }
1035
1036 static void
1037 invalidate_charset_font_caches (Lisp_Object charset)
1038 {
1039 /* Invalidate font cache entries for charset on all devices. */
1040 Lisp_Object devcons, concons, hash_table;
1041 DEVICE_LOOP_NO_BREAK (devcons, concons)
1042 {
1043 struct device *d = XDEVICE (XCAR (devcons));
1044 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
1045 if (!UNBOUNDP (hash_table))
1046 Fclrhash (hash_table);
1047 }
1048 }
1049
1050 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
1051 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
1052 Set the 'registry property of CHARSET to REGISTRY.
1053 */
1054 (charset, registry))
1055 {
1056 charset = Fget_charset (charset);
1057 CHECK_STRING (registry);
1058 XCHARSET_REGISTRY (charset) = registry;
1059 invalidate_charset_font_caches (charset);
1060 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
1061 return Qnil;
1062 }
1063
1064
1065 /************************************************************************/
1066 /* Lisp primitives for working with characters */
1067 /************************************************************************/
1068
1069 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
1070 Make a character from CHARSET and octets ARG1 and ARG2.
1071 ARG2 is required only for characters from two-dimensional charsets.
1072 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
1073 character s with caron.
1074 */
1075 (charset, arg1, arg2))
1076 {
1077 struct Lisp_Charset *cs;
1078 int a1, a2;
1079 int lowlim, highlim;
1080
1081 charset = Fget_charset (charset);
1082 cs = XCHARSET (charset);
1083
1084 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
1085 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1086 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1087 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1088
1089 CHECK_INT (arg1);
1090 /* It is useful (and safe, according to Olivier Galibert) to strip
1091 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1092 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1093 Latin 2 code of the character. */
1094 a1 = XINT (arg1) & 0x7f;
1095 if (a1 < lowlim || a1 > highlim)
1096 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1097
1098 if (CHARSET_DIMENSION (cs) == 1)
1099 {
1100 if (!NILP (arg2))
1101 signal_simple_error
1102 ("Charset is of dimension one; second octet must be nil", arg2);
1103 return make_char (MAKE_CHAR (charset, a1, 0));
1104 }
1105
1106 CHECK_INT (arg2);
1107 a2 = XINT (arg2) & 0x7f;
1108 if (a2 < lowlim || a2 > highlim)
1109 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1110
1111 return make_char (MAKE_CHAR (charset, a1, a2));
1112 }
1113
1114 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1115 Return the character set of char CH.
1116 */
1117 (ch))
1118 {
1119 CHECK_CHAR_COERCE_INT (ch);
1120
1121 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1122 (CHAR_LEADING_BYTE (XCHAR (ch))));
1123 }
1124
1125 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
1126 Return list of charset and one or two position-codes of CHAR.
1127 */
1128 (character))
1129 {
1130 /* This function can GC */
1131 struct gcpro gcpro1, gcpro2;
1132 Lisp_Object charset = Qnil;
1133 Lisp_Object rc = Qnil;
1134 int c1, c2;
1135
1136 GCPRO2 (charset, rc);
1137 CHECK_CHAR_COERCE_INT (character);
1138
1139 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
1140
1141 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
1142 {
1143 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
1144 }
1145 else
1146 {
1147 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
1148 }
1149 UNGCPRO;
1150
1151 return rc;
1152 }
1153
1154
1155 #ifdef ENABLE_COMPOSITE_CHARS
1156 /************************************************************************/
1157 /* composite character functions */
1158 /************************************************************************/
1159
1160 Emchar
1161 lookup_composite_char (Bufbyte *str, int len)
1162 {
1163 Lisp_Object lispstr = make_string (str, len);
1164 Lisp_Object ch = Fgethash (lispstr,
1165 Vcomposite_char_string2char_hash_table,
1166 Qunbound);
1167 Emchar emch;
1168
1169 if (UNBOUNDP (ch))
1170 {
1171 if (composite_char_row_next >= 128)
1172 signal_simple_error ("No more composite chars available", lispstr);
1173 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1174 composite_char_col_next);
1175 Fputhash (make_char (emch), lispstr,
1176 Vcomposite_char_char2string_hash_table);
1177 Fputhash (lispstr, make_char (emch),
1178 Vcomposite_char_string2char_hash_table);
1179 composite_char_col_next++;
1180 if (composite_char_col_next >= 128)
1181 {
1182 composite_char_col_next = 32;
1183 composite_char_row_next++;
1184 }
1185 }
1186 else
1187 emch = XCHAR (ch);
1188 return emch;
1189 }
1190
1191 Lisp_Object
1192 composite_char_string (Emchar ch)
1193 {
1194 Lisp_Object str = Fgethash (make_char (ch),
1195 Vcomposite_char_char2string_hash_table,
1196 Qunbound);
1197 assert (!UNBOUNDP (str));
1198 return str;
1199 }
1200
1201 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1202 Convert a string into a single composite character.
1203 The character is the result of overstriking all the characters in
1204 the string.
1205 */
1206 (string))
1207 {
1208 CHECK_STRING (string);
1209 return make_char (lookup_composite_char (XSTRING_DATA (string),
1210 XSTRING_LENGTH (string)));
1211 }
1212
1213 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1214 Return a string of the characters comprising a composite character.
1215 */
1216 (ch))
1217 {
1218 Emchar emch;
1219
1220 CHECK_CHAR (ch);
1221 emch = XCHAR (ch);
1222 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1223 signal_simple_error ("Must be composite char", ch);
1224 return composite_char_string (emch);
1225 }
1226 #endif /* ENABLE_COMPOSITE_CHARS */
1227
1228
1229 /************************************************************************/
1230 /* initialization */
1231 /************************************************************************/
1232
1233 void
1234 syms_of_mule_charset (void)
1235 {
1236 DEFSUBR (Fcharsetp);
1237 DEFSUBR (Ffind_charset);
1238 DEFSUBR (Fget_charset);
1239 DEFSUBR (Fcharset_list);
1240 DEFSUBR (Fcharset_name);
1241 DEFSUBR (Fmake_charset);
1242 DEFSUBR (Fmake_reverse_direction_charset);
1243 /* DEFSUBR (Freverse_direction_charset); */
1244 DEFSUBR (Fcharset_from_attributes);
1245 DEFSUBR (Fcharset_short_name);
1246 DEFSUBR (Fcharset_long_name);
1247 DEFSUBR (Fcharset_description);
1248 DEFSUBR (Fcharset_dimension);
1249 DEFSUBR (Fcharset_property);
1250 DEFSUBR (Fcharset_id);
1251 DEFSUBR (Fset_charset_ccl_program);
1252 DEFSUBR (Fset_charset_registry);
1253
1254 DEFSUBR (Fmake_char);
1255 DEFSUBR (Fchar_charset);
1256 DEFSUBR (Fsplit_char);
1257
1258 #ifdef ENABLE_COMPOSITE_CHARS
1259 DEFSUBR (Fmake_composite_char);
1260 DEFSUBR (Fcomposite_char_string);
1261 #endif
1262
1263 defsymbol (&Qcharsetp, "charsetp");
1264 defsymbol (&Qregistry, "registry");
1265 defsymbol (&Qfinal, "final");
1266 defsymbol (&Qgraphic, "graphic");
1267 defsymbol (&Qdirection, "direction");
1268 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1269 defsymbol (&Qshort_name, "short-name");
1270 defsymbol (&Qlong_name, "long-name");
1271
1272 defsymbol (&Ql2r, "l2r");
1273 defsymbol (&Qr2l, "r2l");
1274
1275 /* Charsets, compatible with FSF 20.3
1276 Naming convention is Script-Charset[-Edition] */
1277 defsymbol (&Qascii, "ascii");
1278 defsymbol (&Qcontrol_1, "control-1");
1279 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
1280 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
1281 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
1282 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
1283 defsymbol (&Qthai_tis620, "thai-tis620");
1284 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1285 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1286 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1287 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1288 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1289 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
1290 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1291 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1292 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1293 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1294 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1295 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1296 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1297 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1298 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1299 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1300
1301 defsymbol (&Qcomposite, "composite");
1302 }
1303
1304 void
1305 vars_of_mule_charset (void)
1306 {
1307 int i, j, k;
1308
1309 chlook = xnew (struct charset_lookup);
1310 dumpstruct (&chlook, &charset_lookup_description);
1311
1312 /* Table of charsets indexed by leading byte. */
1313 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
1314 chlook->charset_by_leading_byte[i] = Qnil;
1315
1316 /* Table of charsets indexed by type/final-byte/direction. */
1317 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
1318 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
1319 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
1320 chlook->charset_by_attributes[i][j][k] = Qnil;
1321
1322 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1323 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1324 }
1325
1326 void
1327 complex_vars_of_mule_charset (void)
1328 {
1329 staticpro (&Vcharset_hash_table);
1330 Vcharset_hash_table =
1331 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1332
1333 /* Predefined character sets. We store them into variables for
1334 ease of access. */
1335
1336 staticpro (&Vcharset_ascii);
1337 Vcharset_ascii =
1338 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
1339 CHARSET_TYPE_94, 1, 0, 'B',
1340 CHARSET_LEFT_TO_RIGHT,
1341 build_string ("ASCII"),
1342 build_string ("ASCII)"),
1343 build_string ("ASCII (ISO646 IRV)"),
1344 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
1345 staticpro (&Vcharset_control_1);
1346 Vcharset_control_1 =
1347 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
1348 CHARSET_TYPE_94, 1, 1, 0,
1349 CHARSET_LEFT_TO_RIGHT,
1350 build_string ("C1"),
1351 build_string ("Control characters"),
1352 build_string ("Control characters 128-191"),
1353 build_string (""));
1354 staticpro (&Vcharset_latin_iso8859_1);
1355 Vcharset_latin_iso8859_1 =
1356 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
1357 CHARSET_TYPE_96, 1, 1, 'A',
1358 CHARSET_LEFT_TO_RIGHT,
1359 build_string ("Latin-1"),
1360 build_string ("ISO8859-1 (Latin-1)"),
1361 build_string ("ISO8859-1 (Latin-1)"),
1362 build_string ("iso8859-1"));
1363 staticpro (&Vcharset_latin_iso8859_2);
1364 Vcharset_latin_iso8859_2 =
1365 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
1366 CHARSET_TYPE_96, 1, 1, 'B',
1367 CHARSET_LEFT_TO_RIGHT,
1368 build_string ("Latin-2"),
1369 build_string ("ISO8859-2 (Latin-2)"),
1370 build_string ("ISO8859-2 (Latin-2)"),
1371 build_string ("iso8859-2"));
1372 staticpro (&Vcharset_latin_iso8859_3);
1373 Vcharset_latin_iso8859_3 =
1374 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
1375 CHARSET_TYPE_96, 1, 1, 'C',
1376 CHARSET_LEFT_TO_RIGHT,
1377 build_string ("Latin-3"),
1378 build_string ("ISO8859-3 (Latin-3)"),
1379 build_string ("ISO8859-3 (Latin-3)"),
1380 build_string ("iso8859-3"));
1381 staticpro (&Vcharset_latin_iso8859_4);
1382 Vcharset_latin_iso8859_4 =
1383 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
1384 CHARSET_TYPE_96, 1, 1, 'D',
1385 CHARSET_LEFT_TO_RIGHT,
1386 build_string ("Latin-4"),
1387 build_string ("ISO8859-4 (Latin-4)"),
1388 build_string ("ISO8859-4 (Latin-4)"),
1389 build_string ("iso8859-4"));
1390 staticpro (&Vcharset_thai_tis620);
1391 Vcharset_thai_tis620 =
1392 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
1393 CHARSET_TYPE_96, 1, 1, 'T',
1394 CHARSET_LEFT_TO_RIGHT,
1395 build_string ("TIS620"),
1396 build_string ("TIS620 (Thai)"),
1397 build_string ("TIS620.2529 (Thai)"),
1398 build_string ("tis620"));
1399 staticpro (&Vcharset_greek_iso8859_7);
1400 Vcharset_greek_iso8859_7 =
1401 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
1402 CHARSET_TYPE_96, 1, 1, 'F',
1403 CHARSET_LEFT_TO_RIGHT,
1404 build_string ("ISO8859-7"),
1405 build_string ("ISO8859-7 (Greek)"),
1406 build_string ("ISO8859-7 (Greek)"),
1407 build_string ("iso8859-7"));
1408 staticpro (&Vcharset_arabic_iso8859_6);
1409 Vcharset_arabic_iso8859_6 =
1410 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
1411 CHARSET_TYPE_96, 1, 1, 'G',
1412 CHARSET_RIGHT_TO_LEFT,
1413 build_string ("ISO8859-6"),
1414 build_string ("ISO8859-6 (Arabic)"),
1415 build_string ("ISO8859-6 (Arabic)"),
1416 build_string ("iso8859-6"));
1417 staticpro (&Vcharset_hebrew_iso8859_8);
1418 Vcharset_hebrew_iso8859_8 =
1419 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
1420 CHARSET_TYPE_96, 1, 1, 'H',
1421 CHARSET_RIGHT_TO_LEFT,
1422 build_string ("ISO8859-8"),
1423 build_string ("ISO8859-8 (Hebrew)"),
1424 build_string ("ISO8859-8 (Hebrew)"),
1425 build_string ("iso8859-8"));
1426 staticpro (&Vcharset_katakana_jisx0201);
1427 Vcharset_katakana_jisx0201 =
1428 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
1429 CHARSET_TYPE_94, 1, 1, 'I',
1430 CHARSET_LEFT_TO_RIGHT,
1431 build_string ("JISX0201 Kana"),
1432 build_string ("JISX0201.1976 (Japanese Kana)"),
1433 build_string ("JISX0201.1976 Japanese Kana"),
1434 build_string ("jisx0201.1976"));
1435 staticpro (&Vcharset_latin_jisx0201);
1436 Vcharset_latin_jisx0201 =
1437 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
1438 CHARSET_TYPE_94, 1, 0, 'J',
1439 CHARSET_LEFT_TO_RIGHT,
1440 build_string ("JISX0201 Roman"),
1441 build_string ("JISX0201.1976 (Japanese Roman)"),
1442 build_string ("JISX0201.1976 Japanese Roman"),
1443 build_string ("jisx0201.1976"));
1444 staticpro (&Vcharset_cyrillic_iso8859_5);
1445 Vcharset_cyrillic_iso8859_5 =
1446 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
1447 CHARSET_TYPE_96, 1, 1, 'L',
1448 CHARSET_LEFT_TO_RIGHT,
1449 build_string ("ISO8859-5"),
1450 build_string ("ISO8859-5 (Cyrillic)"),
1451 build_string ("ISO8859-5 (Cyrillic)"),
1452 build_string ("iso8859-5"));
1453 staticpro (&Vcharset_latin_iso8859_9);
1454 Vcharset_latin_iso8859_9 =
1455 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
1456 CHARSET_TYPE_96, 1, 1, 'M',
1457 CHARSET_LEFT_TO_RIGHT,
1458 build_string ("Latin-5"),
1459 build_string ("ISO8859-9 (Latin-5)"),
1460 build_string ("ISO8859-9 (Latin-5)"),
1461 build_string ("iso8859-9"));
1462 staticpro (&Vcharset_japanese_jisx0208_1978);
1463 Vcharset_japanese_jisx0208_1978 =
1464 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
1465 CHARSET_TYPE_94X94, 2, 0, '@',
1466 CHARSET_LEFT_TO_RIGHT,
1467 build_string ("JISX0208.1978"),
1468 build_string ("JISX0208.1978 (Japanese)"),
1469 build_string
1470 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
1471 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
1472 staticpro (&Vcharset_chinese_gb2312);
1473 Vcharset_chinese_gb2312 =
1474 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
1475 CHARSET_TYPE_94X94, 2, 0, 'A',
1476 CHARSET_LEFT_TO_RIGHT,
1477 build_string ("GB2312"),
1478 build_string ("GB2312)"),
1479 build_string ("GB2312 Chinese simplified"),
1480 build_string ("gb2312"));
1481 staticpro (&Vcharset_japanese_jisx0208);
1482 Vcharset_japanese_jisx0208 =
1483 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
1484 CHARSET_TYPE_94X94, 2, 0, 'B',
1485 CHARSET_LEFT_TO_RIGHT,
1486 build_string ("JISX0208"),
1487 build_string ("JISX0208.1983/1990 (Japanese)"),
1488 build_string ("JISX0208.1983/1990 Japanese Kanji"),
1489 build_string ("jisx0208.19\\(83\\|90\\)"));
1490 staticpro (&Vcharset_korean_ksc5601);
1491 Vcharset_korean_ksc5601 =
1492 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
1493 CHARSET_TYPE_94X94, 2, 0, 'C',
1494 CHARSET_LEFT_TO_RIGHT,
1495 build_string ("KSC5601"),
1496 build_string ("KSC5601 (Korean"),
1497 build_string ("KSC5601 Korean Hangul and Hanja"),
1498 build_string ("ksc5601"));
1499 staticpro (&Vcharset_japanese_jisx0212);
1500 Vcharset_japanese_jisx0212 =
1501 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
1502 CHARSET_TYPE_94X94, 2, 0, 'D',
1503 CHARSET_LEFT_TO_RIGHT,
1504 build_string ("JISX0212"),
1505 build_string ("JISX0212 (Japanese)"),
1506 build_string ("JISX0212 Japanese Supplement"),
1507 build_string ("jisx0212"));
1508
1509 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
1510 staticpro (&Vcharset_chinese_cns11643_1);
1511 Vcharset_chinese_cns11643_1 =
1512 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
1513 CHARSET_TYPE_94X94, 2, 0, 'G',
1514 CHARSET_LEFT_TO_RIGHT,
1515 build_string ("CNS11643-1"),
1516 build_string ("CNS11643-1 (Chinese traditional)"),
1517 build_string
1518 ("CNS 11643 Plane 1 Chinese traditional"),
1519 build_string (CHINESE_CNS_PLANE_RE("1")));
1520 staticpro (&Vcharset_chinese_cns11643_2);
1521 Vcharset_chinese_cns11643_2 =
1522 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
1523 CHARSET_TYPE_94X94, 2, 0, 'H',
1524 CHARSET_LEFT_TO_RIGHT,
1525 build_string ("CNS11643-2"),
1526 build_string ("CNS11643-2 (Chinese traditional)"),
1527 build_string
1528 ("CNS 11643 Plane 2 Chinese traditional"),
1529 build_string (CHINESE_CNS_PLANE_RE("2")));
1530 staticpro (&Vcharset_chinese_big5_1);
1531 Vcharset_chinese_big5_1 =
1532 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
1533 CHARSET_TYPE_94X94, 2, 0, '0',
1534 CHARSET_LEFT_TO_RIGHT,
1535 build_string ("Big5"),
1536 build_string ("Big5 (Level-1)"),
1537 build_string
1538 ("Big5 Level-1 Chinese traditional"),
1539 build_string ("big5"));
1540 staticpro (&Vcharset_chinese_big5_2);
1541 Vcharset_chinese_big5_2 =
1542 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
1543 CHARSET_TYPE_94X94, 2, 0, '1',
1544 CHARSET_LEFT_TO_RIGHT,
1545 build_string ("Big5"),
1546 build_string ("Big5 (Level-2)"),
1547 build_string
1548 ("Big5 Level-2 Chinese traditional"),
1549 build_string ("big5"));
1550
1551
1552 #ifdef ENABLE_COMPOSITE_CHARS
1553 /* #### For simplicity, we put composite chars into a 96x96 charset.
1554 This is going to lead to problems because you can run out of
1555 room, esp. as we don't yet recycle numbers. */
1556 staticpro (&Vcharset_composite);
1557 Vcharset_composite =
1558 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
1559 CHARSET_TYPE_96X96, 2, 0, 0,
1560 CHARSET_LEFT_TO_RIGHT,
1561 build_string ("Composite"),
1562 build_string ("Composite characters"),
1563 build_string ("Composite characters"),
1564 build_string (""));
1565
1566 /* #### not dumped properly */
1567 composite_char_row_next = 32;
1568 composite_char_col_next = 32;
1569
1570 Vcomposite_char_string2char_hash_table =
1571 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
1572 Vcomposite_char_char2string_hash_table =
1573 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
1574 staticpro (&Vcomposite_char_string2char_hash_table);
1575 staticpro (&Vcomposite_char_char2string_hash_table);
1576 #endif /* ENABLE_COMPOSITE_CHARS */
1577
1578 }