comparison src/mule-charset.c @ 70:131b0175ea99 r20-0b30

Import from CVS: tag r20-0b30
author cvs
date Mon, 13 Aug 2007 09:02:59 +0200
parents
children 54cc21c15cbb
comparison
equal deleted inserted replaced
69:804d1389bcd6 70:131b0175ea99
1 /* Functions to handle multilingual characters.
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4
5 This file is part of XEmacs.
6
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
10 later version.
11
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 /* Synched up with: Mule 2.3. Not in FSF. */
23
24 /* Rewritten by Ben Wing <wing@666.com>. */
25
26 #include <config.h>
27 #include "lisp.h"
28
29 #include "buffer.h"
30 #include "chartab.h"
31 #include "elhash.h"
32 #include "lstream.h"
33 #include "device.h"
34 #include "faces.h"
35
36 /* The various pre-defined charsets. */
37
38 Lisp_Object Vcharset_ascii;
39 Lisp_Object Vcharset_control_1;
40 Lisp_Object Vcharset_latin_1;
41 Lisp_Object Vcharset_latin_2;
42 Lisp_Object Vcharset_latin_3;
43 Lisp_Object Vcharset_latin_4;
44 Lisp_Object Vcharset_cyrillic;
45 Lisp_Object Vcharset_arabic;
46 Lisp_Object Vcharset_greek;
47 Lisp_Object Vcharset_hebrew;
48 Lisp_Object Vcharset_latin_5;
49 Lisp_Object Vcharset_thai;
50 Lisp_Object Vcharset_japanese_jisx0201_kana;
51 Lisp_Object Vcharset_japanese_jisx0201_roman;
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
53 Lisp_Object Vcharset_japanese_jisx0208;
54 Lisp_Object Vcharset_japanese_jisx0212;
55 Lisp_Object Vcharset_chinese_gb;
56 Lisp_Object Vcharset_chinese_big5_1;
57 Lisp_Object Vcharset_chinese_big5_2;
58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_korean_ksc5601;
61 Lisp_Object Vcharset_composite;
62
63 /* Hashtables for composite chars. One maps string representing
64 composed chars to their equivalent chars; one goes the
65 other way. */
66 Lisp_Object Vcomposite_char_char2string_hashtable;
67 Lisp_Object Vcomposite_char_string2char_hashtable;
68
69 /* Table of charsets indexed by leading byte. */
70 Lisp_Object charset_by_leading_byte[128];
71
72 /* Table of charsets indexed by type/final-byte/direction. */
73 Lisp_Object charset_by_attributes[4][128][2];
74
75 static int composite_char_row_next;
76 static int composite_char_col_next;
77
78 /* Table of number of bytes in the string representation of a character
79 indexed by the first byte of that representation.
80
81 This value can be derived other ways -- e.g. something like
82
83 (BYTE_ASCII_P (first_byte) ? 1 :
84 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte)))
85
86 but it's faster this way. */
87
88 Bytecount rep_bytes_by_first_byte[0xA0] =
89 { /* 16 x 8 ones for ASCII */
90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 /* 1 x 8 for Dimension-1 official Mule charsets */
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 /* 0x90 - 0x9d are Dimension-2 official */
101 /* 0x9e is Dimension-1 private */
102 /* 0x9f is Dimension-2 private */
103 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
104 };
105
106 Lisp_Object Qcharsetp;
107
108 /* Qdoc_string, Qdimension, Qchars defined in general.c */
109 Lisp_Object Qregistry, Qfinal, Qgraphic;
110 Lisp_Object Qdirection;
111 Lisp_Object Qreverse_direction_charset;
112 Lisp_Object Qccl_program;
113
114 Lisp_Object Qascii, Qcontrol_1, Qlatin_1, Qlatin_2, Qlatin_3, Qlatin_4, Qlatin_5,
115 Qthai, Qarabic, Qhebrew, Qgreek, Qcyrillic,
116
117 Qjapanese_jisx0201_kana,
118 Qjapanese_jisx0201_roman,
119 Qjapanese_jisx0208_1978,
120 Qjapanese_jisx0208,
121 Qjapanese_jisx0212,
122
123 Qchinese_gb, Qchinese_big5_1, Qchinese_big5_2,
124 Qchinese_cns11643_1, Qchinese_cns11643_2,
125
126 Qkorean_ksc5601, Qcomposite;
127
128 Lisp_Object Ql2r, Qr2l;
129
130 Lisp_Object Vcharset_hashtable;
131
132 static Bufbyte next_allocated_1_byte_leading_byte;
133 static Bufbyte next_allocated_2_byte_leading_byte;
134
135 /* Composite characters are characters constructed by overstriking two
136 or more regular characters.
137
138 1) The old Mule implementation involves storing composite characters
139 in a buffer as a tag followed by all of the actual characters
140 used to make up the composite character. I think this is a bad
141 idea; it greatly complicates code that wants to handle strings
142 one character at a time because it has to deal with the possibility
143 of great big ungainly characters. It's much more reasonable to
144 simply store an index into a table of composite characters.
145
146 2) The current implementation only allows for 16,384 separate
147 composite characters over the lifetime of the XEmacs process.
148 This could become a potential problem if the user
149 edited lots of different files that use composite characters.
150 Due to FSF bogosity, increasing the number of allowable
151 composite characters under Mule would decrease the number
152 of possible faces that can exist. Mule already has shrunk
153 this to 2048, and further shrinkage would become uncomfortable.
154 No such problems exist in XEmacs.
155
156 Composite characters could be represented as 0x80 C1 C2 C3,
157 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
158 for slightly under 2^20 (one million) composite characters
159 over the XEmacs process lifetime, and you only need to
160 increase the size of a Mule character from 19 to 21 bits.
161 Or you could use 0x80 C1 C2 C3 C4, allowing for about
162 85 million (slightly over 2^26) composite characters. */
163
164
165 /************************************************************************/
166 /* Basic Emchar functions */
167 /************************************************************************/
168
169 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
170 string in STR. Returns the number of bytes stored.
171 Do not call this directly. Use the macro set_charptr_emchar() instead.
172 */
173
174 Bytecount
175 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
176 {
177 Bufbyte *p;
178 Bufbyte lb;
179 int c1, c2;
180 Lisp_Object charset;
181
182 p = str;
183 BREAKUP_CHAR (c, charset, c1, c2);
184 lb = CHAR_LEADING_BYTE (c);
185 if (LEADING_BYTE_PRIVATE_P (lb))
186 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
187 *p++ = lb;
188 if (EQ (charset, Vcharset_control_1))
189 c1 += 0x20;
190 *p++ = c1 | 0x80;
191 if (c2)
192 *p++ = c2 | 0x80;
193
194 return (p - str);
195 }
196
197 /* Return the first character from a Mule-encoded string in STR,
198 assuming it's non-ASCII. Do not call this directly.
199 Use the macro charptr_emchar() instead. */
200
201 Emchar
202 non_ascii_charptr_emchar (CONST Bufbyte *str)
203 {
204 Bufbyte i0 = *str, i1, i2 = 0;
205 Lisp_Object charset;
206
207 if (i0 == LEADING_BYTE_CONTROL_1)
208 return (Emchar) (*++str - 0x20);
209
210 if (LEADING_BYTE_PREFIX_P (i0))
211 i0 = *++str;
212
213 i1 = *++str & 0x7F;
214
215 charset = CHARSET_BY_LEADING_BYTE (i0);
216 if (XCHARSET_DIMENSION (charset) == 2)
217 i2 = *++str & 0x7F;
218
219 return MAKE_CHAR (charset, i1, i2);
220 }
221
222 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
223 Do not call this directly. Use the macro valid_char_p() instead. */
224
225 int
226 non_ascii_valid_char_p (Emchar ch)
227 {
228 int f1, f2, f3;
229
230 /* Must have only lowest 19 bits set */
231 if (ch & ~0x7FFFF)
232 return 0;
233
234 f1 = CHAR_FIELD1 (ch);
235 f2 = CHAR_FIELD2 (ch);
236 f3 = CHAR_FIELD3 (ch);
237
238 if (f1 == 0)
239 {
240 Lisp_Object charset;
241
242 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
243 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
244 f2 > MAX_CHAR_FIELD2_PRIVATE)
245 return 0;
246 if (f3 < 0x20)
247 return 0;
248
249 if (f3 != 0x20 && f3 != 0x7F)
250 return 1;
251
252 /*
253 NOTE: This takes advantage of the fact that
254 FIELD2_TO_OFFICIAL_LEADING_BYTE and
255 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
256 */
257 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
258 return (XCHARSET_CHARS (charset) == 96);
259 }
260 else
261 {
262 Lisp_Object charset;
263
264 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
265 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
266 f1 > MAX_CHAR_FIELD1_PRIVATE)
267 return 0;
268 if (f2 < 0x20 || f3 < 0x20)
269 return 0;
270
271 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
272 {
273 if (UNBOUNDP (Fgethash (make_int (ch),
274 Vcomposite_char_char2string_hashtable,
275 Qunbound)))
276 return 0;
277 return 1;
278 }
279
280 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
281 return 1;
282
283 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
284 charset =
285 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
286 else
287 charset =
288 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
289
290 return (XCHARSET_CHARS (charset) == 96);
291 }
292 }
293
294
295 /************************************************************************/
296 /* Basic string functions */
297 /************************************************************************/
298
299 /* Copy the character pointed to by PTR into STR, assuming it's
300 non-ASCII. Do not call this directly. Use the macro
301 charptr_copy_char() instead. */
302
303 Bytecount
304 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
305 {
306 Bufbyte *strptr = str;
307 *strptr = *ptr++;
308 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
309 {
310 /* Notice fallthrough. */
311 case 4: *++strptr = *ptr++;
312 case 3: *++strptr = *ptr++;
313 case 2: *++strptr = *ptr;
314 break;
315 default:
316 abort ();
317 }
318 return strptr + 1 - str;
319 }
320
321
322 /************************************************************************/
323 /* streams of Emchars */
324 /************************************************************************/
325
326 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
327 The functions below are not meant to be called directly; use
328 the macros in insdel.h. */
329
330 Emchar
331 Lstream_get_emchar_1 (Lstream *stream, int ch)
332 {
333 Bufbyte str[MAX_EMCHAR_LEN];
334 Bufbyte *strptr = str;
335
336 str[0] = (Bufbyte) ch;
337 switch (REP_BYTES_BY_FIRST_BYTE (ch))
338 {
339 /* Notice fallthrough. */
340 case 4:
341 ch = Lstream_getc (stream);
342 assert (ch >= 0);
343 *++strptr = (Bufbyte) ch;
344 case 3:
345 ch = Lstream_getc (stream);
346 assert (ch >= 0);
347 *++strptr = (Bufbyte) ch;
348 case 2:
349 ch = Lstream_getc (stream);
350 assert (ch >= 0);
351 *++strptr = (Bufbyte) ch;
352 break;
353 default:
354 abort ();
355 }
356 return charptr_emchar (str);
357 }
358
359 int
360 Lstream_fput_emchar (Lstream *stream, Emchar ch)
361 {
362 Bufbyte str[MAX_EMCHAR_LEN];
363 Bytecount len = set_charptr_emchar (str, ch);
364 return Lstream_write (stream, str, len);
365 }
366
367 void
368 Lstream_funget_emchar (Lstream *stream, Emchar ch)
369 {
370 Bufbyte str[MAX_EMCHAR_LEN];
371 Bytecount len = set_charptr_emchar (str, ch);
372 Lstream_unread (stream, str, len);
373 }
374
375
376 /************************************************************************/
377 /* charset object */
378 /************************************************************************/
379
380 static Lisp_Object mark_charset (Lisp_Object, void (*) (Lisp_Object));
381 static void print_charset (Lisp_Object, Lisp_Object, int);
382 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
383 mark_charset, print_charset, 0, 0, 0,
384 struct Lisp_Charset);
385
386 static Lisp_Object
387 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
388 {
389 struct Lisp_Charset *cs = XCHARSET (obj);
390
391 (markobj) (cs->doc_string);
392 (markobj) (cs->registry);
393 (markobj) (cs->ccl_program);
394 return cs->name;
395 }
396
397 static void
398 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
399 {
400 struct Lisp_Charset *cs = XCHARSET (obj);
401 char buf[200];
402
403 if (print_readably)
404 error ("printing unreadable object #<charset %s 0x%x>",
405 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
406 cs->header.uid);
407
408 write_c_string ("#<charset ", printcharfun);
409 print_internal (CHARSET_NAME (cs), printcharfun, 0);
410 write_c_string (" ", printcharfun);
411 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
412 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
413 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
414 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
415 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
416 "96x96",
417 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
418 CHARSET_COLUMNS (cs),
419 CHARSET_GRAPHIC (cs),
420 CHARSET_FINAL (cs));
421 write_c_string (buf, printcharfun);
422 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
423 sprintf (buf, " 0x%x>", cs->header.uid);
424 write_c_string (buf, printcharfun);
425 }
426
427 /* Make a new charset. */
428
429 static Lisp_Object
430 make_charset (Lisp_Object name, Bufbyte leading_byte, unsigned char rep_bytes,
431 unsigned char type, unsigned char columns, unsigned char graphic,
432 Bufbyte final, unsigned char direction, Lisp_Object doc,
433 Lisp_Object reg)
434 {
435 struct Lisp_Charset *cs;
436 Lisp_Object obj = Qnil;
437
438 cs = (struct Lisp_Charset *) alloc_lcrecord (sizeof (struct Lisp_Charset),
439 lrecord_charset);
440 XSETCHARSET (obj, cs);
441
442 CHARSET_NAME (cs) = name;
443 CHARSET_LEADING_BYTE (cs) = leading_byte;
444 CHARSET_REP_BYTES (cs) = rep_bytes;
445 CHARSET_DIRECTION (cs) = direction;
446 CHARSET_TYPE (cs) = type;
447 CHARSET_COLUMNS (cs) = columns;
448 CHARSET_GRAPHIC (cs) = graphic;
449 CHARSET_FINAL (cs) = final;
450 CHARSET_DOC_STRING (cs) = doc;
451 CHARSET_REGISTRY (cs) = reg;
452 CHARSET_CCL_PROGRAM (cs) = Qnil;
453 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
454
455 if (final)
456 {
457 /* some charsets do not have final characters. This includes
458 ASCII, Control-1, Composite, and the two faux private
459 charsets. */
460 assert (NILP (charset_by_attributes[type][final][direction]));
461 charset_by_attributes[type][final][direction] = obj;
462 }
463
464 assert (NILP (charset_by_leading_byte[leading_byte - 128]));
465 charset_by_leading_byte[leading_byte - 128] = obj;
466 if (leading_byte < 0xA0)
467 /* official leading byte */
468 rep_bytes_by_first_byte[leading_byte] = rep_bytes;
469
470 /* Some charsets are "faux" and don't have names or really exist at
471 all except in the leading-byte table. */
472 if (!NILP (name))
473 Fputhash (name, obj, Vcharset_hashtable);
474 return obj;
475 }
476
477 static int
478 get_unallocated_leading_byte (int dimension)
479 {
480 int lb;
481
482 if (dimension == 1)
483 {
484 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
485 lb = 0;
486 else
487 lb = next_allocated_1_byte_leading_byte++;
488 }
489 else
490 {
491 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
492 lb = 0;
493 else
494 lb = next_allocated_2_byte_leading_byte++;
495 }
496
497 if (!lb)
498 signal_simple_error
499 ("No more character sets free for this dimension",
500 make_int (dimension));
501
502 return lb;
503 }
504
505
506 /************************************************************************/
507 /* Basic charset Lisp functions */
508 /************************************************************************/
509
510 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
511 Return non-nil if OBJECT is a charset.
512 */
513 (object))
514 {
515 return (CHARSETP (object) ? Qt : Qnil);
516 }
517
518 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
519 Retrieve the charset of the given name.
520 If CHARSET-OR-NAME is a charset object, it is simply returned.
521 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
522 nil is returned. Otherwise the associated charset object is returned.
523 */
524 (charset_or_name))
525 {
526 if (CHARSETP (charset_or_name))
527 return charset_or_name;
528 CHECK_SYMBOL (charset_or_name);
529
530 return Fgethash (charset_or_name, Vcharset_hashtable, Qnil);
531 }
532
533 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
534 Retrieve the charset of the given name.
535 Same as `find-charset' except an error is signalled if there is no such
536 charset instead of returning nil.
537 */
538 (name))
539 {
540 Lisp_Object charset = Ffind_charset (name);
541
542 if (NILP (charset))
543 signal_simple_error ("No such charset", name);
544 return charset;
545 }
546
547 /* We store the charsets in hash tables with the names as the key and the
548 actual charset object as the value. Occasionally we need to use them
549 in a list format. These routines provide us with that. */
550 struct charset_list_closure
551 {
552 Lisp_Object *charset_list;
553 };
554
555 static void
556 add_charset_to_list_mapper (CONST void *hash_key, void *hash_contents,
557 void *charset_list_closure)
558 {
559 /* This function can GC */
560 Lisp_Object key, contents;
561 Lisp_Object *charset_list;
562 struct charset_list_closure *chcl = charset_list_closure;
563 CVOID_TO_LISP (key, hash_key);
564 VOID_TO_LISP (contents, hash_contents);
565 charset_list = chcl->charset_list;
566
567 *charset_list = Fcons (XCHARSET_NAME (contents), *charset_list);
568 }
569
570 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
571 Return a list of the names of all defined charsets.
572 */
573 ())
574 {
575 Lisp_Object charset_list = Qnil;
576 struct gcpro gcpro1;
577 struct charset_list_closure charset_list_closure;
578
579 GCPRO1 (charset_list);
580 charset_list_closure.charset_list = &charset_list;
581 elisp_maphash (add_charset_to_list_mapper, Vcharset_hashtable,
582 &charset_list_closure);
583 UNGCPRO;
584
585 return charset_list;
586 }
587
588 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
589 Return the name of the given charset.
590 */
591 (charset))
592 {
593 return (XCHARSET_NAME (Fget_charset (charset)));
594 }
595
596 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
597 Define a new character set.
598 This function is for use with Mule support.
599 NAME is a symbol, the name by which the character set is normally referred.
600 DOC-STRING is a string describing the character set.
601 PROPS is a property list, describing the specific nature of the
602 character set. Recognized properties are:
603
604 'registry A regular expression matching the font registry field for
605 this character set.
606 'dimension Number of octets used to index a character in this charset.
607 Either 1 or 2. Defaults to 1.
608 'columns Number of columns used to display a character in this charset.
609 Only used in TTY mode. (Under X, the actual width of a
610 character can be derived from the font used to display the
611 characters.) If unspecified, defaults to the dimension
612 (this is almost always the correct value).
613 'chars Number of characters in each dimension (94 or 96).
614 Defaults to 94. Note that if the dimension is 2, the
615 character set thus described is 94x94 or 96x96.
616 'final Final byte of ISO 2022 escape sequence. Must be
617 supplied. Each combination of (DIMENSION, CHARS) defines a
618 separate namespace for final bytes. Note that ISO
619 2022 restricts the final byte to the range
620 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
621 dimension == 2. Note also that final bytes in the range
622 0x30 - 0x3F are reserved for user-defined (not official)
623 character sets.
624 'graphic 0 (use left half of font on output) or 1 (use right half
625 of font on output). Defaults to 0. For example, for
626 a font whose registry is ISO8859-1, the left half
627 (octets 0x20 - 0x7F) is the `ascii' character set, while
628 the right half (octets 0xA0 - 0xFF) is the `latin-1'
629 character set. With 'graphic set to 0, the octets
630 will have their high bit cleared; with it set to 1,
631 the octets will have their high bit set.
632 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
633 Defaults to 'l2r.
634 'ccl-program A compiled CCL program used to convert a character in
635 this charset into an index into the font. This is in
636 addition to the 'graphic property. The CCL program
637 is passed the octets of the character, with the high
638 bit cleared and set depending upon whether the value
639 of the 'graphic property is 0 or 1.
640 */
641 (name, doc_string, props))
642 {
643 int lb, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
644 int direction = CHARSET_LEFT_TO_RIGHT;
645 int type;
646 Lisp_Object registry = Qnil;
647 Lisp_Object charset;
648 Lisp_Object rest, keyword, value;
649 Lisp_Object ccl_program = Qnil;
650
651 CHECK_SYMBOL (name);
652 if (!NILP (doc_string))
653 CHECK_STRING (doc_string);
654
655 charset = Ffind_charset (name);
656 if (!NILP (charset))
657 signal_simple_error ("Cannot redefine existing charset", name);
658
659 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
660 {
661 if (EQ (keyword, Qdimension))
662 {
663 CHECK_INT (value);
664 dimension = XINT (value);
665 if (dimension < 1 || dimension > 2)
666 signal_simple_error ("Invalid value for 'dimension", value);
667 }
668
669 else if (EQ (keyword, Qchars))
670 {
671 CHECK_INT (value);
672 chars = XINT (value);
673 if (chars != 94 && chars != 96)
674 signal_simple_error ("Invalid value for 'chars", value);
675 }
676
677 else if (EQ (keyword, Qcolumns))
678 {
679 CHECK_INT (value);
680 columns = XINT (value);
681 if (columns != 1 && columns != 2)
682 signal_simple_error ("Invalid value for 'columns", value);
683 }
684
685 else if (EQ (keyword, Qgraphic))
686 {
687 CHECK_INT (value);
688 graphic = XINT (value);
689 if (graphic < 0 || graphic > 1)
690 signal_simple_error ("Invalid value for 'graphic", value);
691 }
692
693 else if (EQ (keyword, Qregistry))
694 {
695 CHECK_STRING (value);
696 registry = value;
697 }
698
699 else if (EQ (keyword, Qdirection))
700 {
701 if (EQ (value, Ql2r))
702 direction = CHARSET_LEFT_TO_RIGHT;
703 else if (EQ (value, Qr2l))
704 direction = CHARSET_RIGHT_TO_LEFT;
705 else
706 signal_simple_error ("Invalid value for 'direction", value);
707 }
708
709 else if (EQ (keyword, Qfinal))
710 {
711 CHECK_CHAR_COERCE_INT (value);
712 final = XCHAR (value);
713 if (final < '0' || final > '~')
714 signal_simple_error ("Invalid value for 'final", value);
715 }
716
717 else if (EQ (keyword, Qccl_program))
718 {
719 CHECK_VECTOR (value);
720 ccl_program = value;
721 }
722
723 else
724 signal_simple_error ("Unrecognized property", keyword);
725 }
726
727 if (!final)
728 error ("'final must be specified");
729 if (dimension == 2 && final > 0x5F)
730 signal_simple_error
731 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
732 make_char (final));
733
734 if (dimension == 1)
735 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
736 else
737 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
738
739 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
740 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
741 error
742 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
743
744 lb = get_unallocated_leading_byte (dimension);
745
746 if (NILP (doc_string))
747 doc_string = build_string ("");
748
749 if (NILP (registry))
750 registry = build_string ("");
751
752 if (columns == -1)
753 columns = dimension;
754 charset = make_charset (name, lb, dimension + 2, type, columns, graphic,
755 final, direction, doc_string, registry);
756 if (!NILP (ccl_program))
757 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
758 return charset;
759 }
760
761 DEFUN ("make-reverse-direction-charset",
762 Fmake_reverse_direction_charset, 2, 2, 0, /*
763 Make a charset equivalent to CHARSET but which goes in the opposite direction.
764 NEW-NAME is the name of the new charset. Return the new charset.
765 */
766 (charset, new_name))
767 {
768 Lisp_Object new_charset = Qnil;
769 int lb, dimension, columns, graphic, final;
770 int direction, type;
771 Lisp_Object registry, doc_string;
772 struct Lisp_Charset *cs;
773
774 charset = Fget_charset (charset);
775 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
776 signal_simple_error ("Charset already has reverse-direction charset",
777 charset);
778
779 CHECK_SYMBOL (new_name);
780 if (!NILP (Ffind_charset (new_name)))
781 signal_simple_error ("Cannot redefine existing charset", new_name);
782
783 cs = XCHARSET (charset);
784
785 type = CHARSET_TYPE (cs);
786 columns = CHARSET_COLUMNS (cs);
787 dimension = CHARSET_DIMENSION (cs);
788 lb = get_unallocated_leading_byte (dimension);
789
790 graphic = CHARSET_GRAPHIC (cs);
791 final = CHARSET_FINAL (cs);
792 direction = CHARSET_RIGHT_TO_LEFT;
793 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
794 direction = CHARSET_LEFT_TO_RIGHT;
795 doc_string = CHARSET_DOC_STRING (cs);
796 registry = CHARSET_REGISTRY (cs);
797
798 new_charset = make_charset (new_name, lb, dimension + 2, type, columns,
799 graphic, final, direction, doc_string, registry);
800
801 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
802 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
803
804 return new_charset;
805 }
806
807 /* #### The defsubr for this is commented out at the moment but no
808 reason why is given. */
809 #if 0
810 DEFUN ("charset-reverse-direction-charset",
811 Fcharset_reverse_direction_charset, 1, 1, 0, /*
812 Return the reverse-direction charset parallel to CHARSET, if any.
813 This is the charset with the same properties (in particular, the same
814 dimension, number of characters per dimension, and final byte) as
815 CHARSET but whose characters are displayed in the opposite direction.
816 */
817 (charset))
818 {
819 charset = Fget_charset (charset);
820 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
821 }
822 #endif
823
824 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
825 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
826 If DIRECTION is omitted, both directions will be checked (left-to-right
827 will be returned if character sets exist for both directions).
828 */
829 (dimension, chars, final, direction))
830 {
831 int dm, ch, fi, di = -1;
832 int type;
833 Lisp_Object obj = Qnil;
834
835 CHECK_INT (dimension);
836 dm = XINT (dimension);
837 if (dm < 1 || dm > 2)
838 signal_simple_error ("Invalid value for DIMENSION", dimension);
839
840 CHECK_INT (chars);
841 ch = XINT (chars);
842 if (ch != 94 && ch != 96)
843 signal_simple_error ("Invalid value for CHARS", chars);
844
845 CHECK_CHAR_COERCE_INT (final);
846 fi = XCHAR (final);
847 if (fi < '0' || fi > '~')
848 signal_simple_error ("Invalid value for FINAL", final);
849
850 if (EQ (direction, Ql2r))
851 di = CHARSET_LEFT_TO_RIGHT;
852 else if (EQ (direction, Qr2l))
853 di = CHARSET_RIGHT_TO_LEFT;
854 else if (!NILP (direction))
855 signal_simple_error ("Invalid value for DIRECTION", direction);
856
857 if (dm == 2 && fi > 0x5F)
858 signal_simple_error
859 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
860
861 if (dm == 1)
862 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
863 else
864 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
865
866 if (di == -1)
867 {
868 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
869 if (NILP (obj))
870 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
871 }
872 else
873 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
874
875 if (CHARSETP (obj))
876 return XCHARSET_NAME (obj);
877 return obj;
878 }
879
880 DEFUN ("charset-doc-string", Fcharset_doc_string, 1, 1, 0, /*
881 Return doc string of CHARSET.
882 */
883 (charset))
884 {
885 return XCHARSET_DOC_STRING (Fget_charset (charset));
886 }
887
888 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
889 Return dimension of CHARSET.
890 */
891 (charset))
892 {
893 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
894 }
895
896 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
897 Return property PROP of CHARSET.
898 Recognized properties are those listed in `make-charset', as well as
899 'name and 'doc-string.
900 */
901 (charset, prop))
902 {
903 struct Lisp_Charset *cs;
904
905 charset = Fget_charset (charset);
906 cs = XCHARSET (charset);
907
908 CHECK_SYMBOL (prop);
909 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
910 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
911 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
912 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
913 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
914 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
915 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
916 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
917 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
918 if (EQ (prop, Qdirection))
919 return (CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l);
920 if (EQ (prop, Qreverse_direction_charset))
921 {
922 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
923 if (NILP (obj))
924 return Qnil;
925 else
926 return XCHARSET_NAME (obj);
927 }
928 signal_simple_error ("Unrecognized charset property name", prop);
929 return Qnil; /* not reached */
930 }
931
932 /* #### We need to figure out which properties we really want to
933 allow to be set. */
934
935 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
936 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
937 */
938 (charset, ccl_program))
939 {
940 charset = Fget_charset (charset);
941 CHECK_VECTOR (ccl_program);
942 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
943 return Qnil;
944 }
945
946 static void
947 invalidate_charset_font_caches (Lisp_Object charset)
948 {
949 /* Invalidate font cache entries for charset on all devices. */
950 Lisp_Object devcons, concons, hashtab;
951 DEVICE_LOOP_NO_BREAK (devcons, concons)
952 {
953 struct device *d = XDEVICE (XCAR (devcons));
954 hashtab = Fgethash (charset, d->charset_font_cache, Qunbound);
955 if (!UNBOUNDP (hashtab))
956 Fclrhash (hashtab);
957 }
958 }
959
960 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
961 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
962 Set the 'registry property of CHARSET to REGISTRY.
963 */
964 (charset, registry))
965 {
966 charset = Fget_charset (charset);
967 CHECK_STRING (registry);
968 XCHARSET_REGISTRY (charset) = registry;
969 invalidate_charset_font_caches (charset);
970 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
971 return Qnil;
972 }
973
974
975 /************************************************************************/
976 /* Lisp primitives for working with characters */
977 /************************************************************************/
978
979 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
980 Make a multi-byte character from CHARSET and octets ARG1 and ARG2.
981 */
982 (charset, arg1, arg2))
983 {
984 struct Lisp_Charset *cs;
985 int a1, a2;
986 int lowlim, highlim;
987
988 charset = Fget_charset (charset);
989 cs = XCHARSET (charset);
990
991 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
992 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
993 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
994 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
995
996 CHECK_INT (arg1);
997 a1 = XINT (arg1);
998 if (a1 < lowlim || a1 > highlim)
999 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1000
1001 if (CHARSET_DIMENSION (cs) == 1)
1002 {
1003 if (!NILP (arg2))
1004 signal_simple_error
1005 ("Charset is of dimension one; second octet must be nil", arg2);
1006 return make_char (MAKE_CHAR (charset, a1, 0));
1007 }
1008
1009 CHECK_INT (arg2);
1010 a2 = XINT (arg2);
1011 if (a2 < lowlim || a2 > highlim)
1012 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1013
1014 return make_char (MAKE_CHAR (charset, a1, a2));
1015 }
1016
1017 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
1018 Return the character set of char CH.
1019 */
1020 (ch))
1021 {
1022 CHECK_CHAR_COERCE_INT (ch);
1023
1024 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
1025 (CHAR_LEADING_BYTE (XCHAR (ch))));
1026 }
1027
1028 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
1029 Return the octet numbered N (should be 0 or 1) of char CH.
1030 N defaults to 0 if omitted.
1031 */
1032 (ch, n))
1033 {
1034 Lisp_Object charset;
1035 int c1, c2;
1036
1037 CHECK_CHAR_COERCE_INT (ch);
1038 if (NILP (n))
1039 n = Qzero;
1040 else
1041 {
1042 CHECK_INT (n);
1043 if (XINT (n) != 0 && XINT (n) != 1)
1044 signal_simple_error ("Octet number must be 0 or 1", n);
1045 }
1046 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
1047 if (XINT (n) == 0)
1048 return make_int (c1);
1049 else
1050 return make_int (c2);
1051 }
1052
1053
1054 /************************************************************************/
1055 /* composite character functions */
1056 /************************************************************************/
1057
1058 Emchar
1059 lookup_composite_char (Bufbyte *str, int len)
1060 {
1061 Lisp_Object lispstr = make_string (str, len);
1062 Lisp_Object ch = Fgethash (lispstr,
1063 Vcomposite_char_string2char_hashtable,
1064 Qunbound);
1065 Emchar emch;
1066
1067 if (UNBOUNDP (ch))
1068 {
1069 if (composite_char_row_next >= 128)
1070 signal_simple_error ("No more composite chars available", lispstr);
1071 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
1072 composite_char_col_next);
1073 Fputhash (make_char (emch), lispstr,
1074 Vcomposite_char_char2string_hashtable);
1075 Fputhash (lispstr, make_char (emch),
1076 Vcomposite_char_string2char_hashtable);
1077 composite_char_col_next++;
1078 if (composite_char_col_next >= 128)
1079 {
1080 composite_char_col_next = 32;
1081 composite_char_row_next++;
1082 }
1083 }
1084 else
1085 emch = XCHAR (ch);
1086 return emch;
1087 }
1088
1089 Lisp_Object
1090 composite_char_string (Emchar ch)
1091 {
1092 Lisp_Object str = Fgethash (make_char (ch),
1093 Vcomposite_char_char2string_hashtable,
1094 Qunbound);
1095 assert (!UNBOUNDP (str));
1096 return str;
1097 }
1098
1099 DEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1100 Convert a string into a single composite character.
1101 The character is the result of overstriking all the characters in
1102 the string.
1103 */
1104 (string))
1105 {
1106 CHECK_STRING (string);
1107 return make_char (lookup_composite_char (XSTRING_DATA (string),
1108 XSTRING_LENGTH (string)));
1109 }
1110
1111 DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1112 Return a string of the characters comprising a composite character.
1113 */
1114 (ch))
1115 {
1116 Emchar emch;
1117
1118 CHECK_CHAR (ch);
1119 emch = XCHAR (ch);
1120 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1121 signal_simple_error ("Must be composite char", ch);
1122 return composite_char_string (emch);
1123 }
1124
1125
1126 /************************************************************************/
1127 /* initialization */
1128 /************************************************************************/
1129
1130 void
1131 syms_of_mule_charset (void)
1132 {
1133 DEFSUBR (Fcharsetp);
1134 DEFSUBR (Ffind_charset);
1135 DEFSUBR (Fget_charset);
1136 DEFSUBR (Fcharset_list);
1137 DEFSUBR (Fcharset_name);
1138 DEFSUBR (Fmake_charset);
1139 DEFSUBR (Fmake_reverse_direction_charset);
1140 /* DEFSUBR (Freverse_direction_charset); */
1141 DEFSUBR (Fcharset_from_attributes);
1142 DEFSUBR (Fcharset_doc_string);
1143 DEFSUBR (Fcharset_dimension);
1144 DEFSUBR (Fcharset_property);
1145 DEFSUBR (Fset_charset_ccl_program);
1146 DEFSUBR (Fset_charset_registry);
1147
1148 DEFSUBR (Fmake_char);
1149 DEFSUBR (Fchar_charset);
1150 DEFSUBR (Fchar_octet);
1151
1152 DEFSUBR (Fmake_composite_char);
1153 DEFSUBR (Fcomposite_char_string);
1154
1155 defsymbol (&Qcharsetp, "charsetp");
1156 defsymbol (&Qregistry, "registry");
1157 defsymbol (&Qfinal, "final");
1158 defsymbol (&Qgraphic, "graphic");
1159 defsymbol (&Qdirection, "direction");
1160 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
1161 defsymbol (&Qccl_program, "ccl-program");
1162
1163 defsymbol (&Ql2r, "l2r");
1164 defsymbol (&Qr2l, "r2l");
1165
1166 /* Charsets */
1167 defsymbol (&Qascii, "ascii");
1168 defsymbol (&Qcontrol_1, "control-1");
1169 defsymbol (&Qlatin_1, "latin-1");
1170 defsymbol (&Qlatin_2, "latin-2");
1171 defsymbol (&Qlatin_3, "latin-3");
1172 defsymbol (&Qlatin_4, "latin-4");
1173 defsymbol (&Qcyrillic, "cyrillic");
1174 defsymbol (&Qarabic, "arabic");
1175 defsymbol (&Qgreek, "greek");
1176 defsymbol (&Qhebrew, "hebrew");
1177 defsymbol (&Qlatin_5, "latin-5");
1178 defsymbol (&Qthai, "thai");
1179
1180 defsymbol (&Qjapanese_jisx0201_kana, "japanese-jisx0201-kana");
1181 defsymbol (&Qjapanese_jisx0201_roman, "japanese-jisx0201-roman");
1182 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1183 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1184 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1185
1186 defsymbol (&Qchinese_gb, "chinese-gb");
1187 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1188 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1189 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1190 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1191
1192 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1193 defsymbol (&Qcomposite, "composite");
1194 }
1195
1196 void
1197 vars_of_mule_charset (void)
1198 {
1199 int i, j, k;
1200
1201 for (i = 0; i < 128; i++)
1202 charset_by_leading_byte[i] = Qnil;
1203
1204 for (i = 0; i < 4; i++)
1205 for (j = 0; j < 128; j++)
1206 for (k = 0; k < 2; k ++)
1207 charset_by_attributes[i][j][k] = Qnil;
1208
1209 /* Now done at compile time
1210 for (i = 0; i < 128; i++)
1211 rep_bytes_by_first_byte[i] = 1;
1212 */
1213
1214 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1215 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1216 }
1217
1218 void
1219 complex_vars_of_mule_charset (void)
1220 {
1221 staticpro (&Vcharset_hashtable);
1222 Vcharset_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK,
1223 HASHTABLE_EQ);
1224
1225 /* Predefined character sets. We store them into variables for
1226 ease of access. */
1227
1228 Vcharset_ascii =
1229 make_charset (Qascii, LEADING_BYTE_ASCII, 1,
1230 CHARSET_TYPE_94, 1, 0, 'B',
1231 CHARSET_LEFT_TO_RIGHT,
1232 build_string ("ASCII"),
1233 build_string ("ISO8859-1"));
1234 Vcharset_control_1 =
1235 make_charset (Qcontrol_1, LEADING_BYTE_CONTROL_1, 2,
1236 CHARSET_TYPE_94, 1, 0, 0,
1237 CHARSET_LEFT_TO_RIGHT,
1238 build_string ("Control characters"),
1239 build_string (""));
1240 Vcharset_latin_1 =
1241 make_charset (Qlatin_1, LEADING_BYTE_LATIN_1, 2,
1242 CHARSET_TYPE_96, 1, 1, 'A',
1243 CHARSET_LEFT_TO_RIGHT,
1244 build_string ("Latin-1"),
1245 build_string ("ISO8859-1"));
1246 Vcharset_latin_2 =
1247 make_charset (Qlatin_2, LEADING_BYTE_LATIN_2, 2,
1248 CHARSET_TYPE_96, 1, 1, 'B',
1249 CHARSET_LEFT_TO_RIGHT,
1250 build_string ("Latin-2"),
1251 build_string ("ISO8859-2"));
1252 Vcharset_latin_3 =
1253 make_charset (Qlatin_3, LEADING_BYTE_LATIN_3, 2,
1254 CHARSET_TYPE_96, 1, 1, 'C',
1255 CHARSET_LEFT_TO_RIGHT,
1256 build_string ("Latin-3"),
1257 build_string ("ISO8859-3"));
1258 Vcharset_latin_4 =
1259 make_charset (Qlatin_4, LEADING_BYTE_LATIN_4, 2,
1260 CHARSET_TYPE_96, 1, 1, 'D',
1261 CHARSET_LEFT_TO_RIGHT,
1262 build_string ("Latin-4"),
1263 build_string ("ISO8859-4"));
1264 Vcharset_cyrillic =
1265 make_charset (Qcyrillic, LEADING_BYTE_CYRILLIC, 2,
1266 CHARSET_TYPE_96, 1, 1, 'L',
1267 CHARSET_LEFT_TO_RIGHT,
1268 build_string ("Cyrillic"),
1269 build_string ("ISO8859-5"));
1270 Vcharset_arabic =
1271 make_charset (Qarabic, LEADING_BYTE_ARABIC, 2,
1272 CHARSET_TYPE_96, 1, 1, 'G',
1273 CHARSET_RIGHT_TO_LEFT,
1274 build_string ("Arabic"),
1275 build_string ("ISO8859-6"));
1276 Vcharset_greek =
1277 make_charset (Qgreek, LEADING_BYTE_GREEK, 2,
1278 CHARSET_TYPE_96, 1, 1, 'F',
1279 CHARSET_LEFT_TO_RIGHT,
1280 build_string ("Greek"),
1281 build_string ("ISO8859-7"));
1282 Vcharset_hebrew =
1283 make_charset (Qhebrew, LEADING_BYTE_HEBREW, 2,
1284 CHARSET_TYPE_96, 1, 1, 'H',
1285 CHARSET_RIGHT_TO_LEFT,
1286 build_string ("Hebrew"),
1287 build_string ("ISO8859-8"));
1288 Vcharset_latin_5 =
1289 make_charset (Qlatin_5, LEADING_BYTE_LATIN_5, 2,
1290 CHARSET_TYPE_96, 1, 1, 'M',
1291 CHARSET_LEFT_TO_RIGHT,
1292 build_string ("Latin-5"),
1293 build_string ("ISO8859-9"));
1294 Vcharset_thai =
1295 make_charset (Qthai, LEADING_BYTE_THAI, 2,
1296 CHARSET_TYPE_96, 1, 1, 'T',
1297 CHARSET_LEFT_TO_RIGHT,
1298 build_string ("Thai"),
1299 build_string ("TIS620"));
1300
1301 /* Japanese */
1302 Vcharset_japanese_jisx0201_kana =
1303 make_charset (Qjapanese_jisx0201_kana,
1304 LEADING_BYTE_JAPANESE_JISX0201_KANA, 2,
1305 CHARSET_TYPE_94, 1, 1, 'I',
1306 CHARSET_LEFT_TO_RIGHT,
1307 build_string ("Japanese Katakana"),
1308 build_string ("JISX0201.1976"));
1309 Vcharset_japanese_jisx0201_roman =
1310 make_charset (Qjapanese_jisx0201_roman,
1311 LEADING_BYTE_JAPANESE_JISX0201_ROMAN, 2,
1312 CHARSET_TYPE_94, 1, 0, 'J',
1313 CHARSET_LEFT_TO_RIGHT,
1314 build_string ("Japanese Roman"),
1315 build_string ("JISX0201.1976"));
1316 Vcharset_japanese_jisx0208_1978 =
1317 make_charset (Qjapanese_jisx0208_1978,
1318 LEADING_BYTE_JAPANESE_JISX0208_1978, 3,
1319 CHARSET_TYPE_94X94, 2, 0, '@',
1320 CHARSET_LEFT_TO_RIGHT,
1321 build_string ("Japanese Old"),
1322 build_string ("\\(JISX0208\\|jisc6226\\).1978"));
1323 Vcharset_japanese_jisx0208 =
1324 make_charset (Qjapanese_jisx0208,
1325 LEADING_BYTE_JAPANESE_JISX0208, 3,
1326 CHARSET_TYPE_94X94, 2, 0, 'B',
1327 CHARSET_LEFT_TO_RIGHT,
1328 build_string ("Japanese"),
1329 build_string ("JISX0208.\\(1983\\|1990\\)"));
1330 Vcharset_japanese_jisx0212 =
1331 make_charset (Qjapanese_jisx0212,
1332 LEADING_BYTE_JAPANESE_JISX0212, 3,
1333 CHARSET_TYPE_94X94, 2, 0, 'D',
1334 CHARSET_LEFT_TO_RIGHT,
1335 build_string ("Japanese Supplement"),
1336 build_string ("JISX0212"));
1337
1338 /* Chinese */
1339 Vcharset_chinese_gb =
1340 make_charset (Qchinese_gb, LEADING_BYTE_CHINESE_GB, 3,
1341 CHARSET_TYPE_94X94, 2, 0, 'A',
1342 CHARSET_LEFT_TO_RIGHT,
1343 build_string ("Chinese GB"),
1344 build_string ("GB2312"));
1345 #define CHINESE_CNS_PLANE_RE(n) "CNS11643[.-]\\(.*[.-]\\)?" n "$"
1346 Vcharset_chinese_cns11643_1 =
1347 make_charset (Qchinese_cns11643_1,
1348 LEADING_BYTE_CHINESE_CNS11643_1, 3,
1349 CHARSET_TYPE_94X94, 2, 0, 'G',
1350 CHARSET_LEFT_TO_RIGHT,
1351 build_string ("Chinese CNS Plane 1"),
1352 build_string (CHINESE_CNS_PLANE_RE("1")));
1353 Vcharset_chinese_cns11643_2 =
1354 make_charset (Qchinese_cns11643_2,
1355 LEADING_BYTE_CHINESE_CNS11643_2, 3,
1356 CHARSET_TYPE_94X94, 2, 0, 'H',
1357 CHARSET_LEFT_TO_RIGHT,
1358 build_string ("Chinese CNS Plane 2"),
1359 build_string (CHINESE_CNS_PLANE_RE("2")));
1360 Vcharset_chinese_big5_1 =
1361 make_charset (Qchinese_big5_1, LEADING_BYTE_CHINESE_BIG5_1, 3,
1362 CHARSET_TYPE_94X94, 2, 0, '0',
1363 CHARSET_LEFT_TO_RIGHT,
1364 build_string ("Chinese Big5 Level 1"),
1365 build_string ("Big5"));
1366 Vcharset_chinese_big5_2 =
1367 make_charset (Qchinese_big5_2, LEADING_BYTE_CHINESE_BIG5_2, 3,
1368 CHARSET_TYPE_94X94, 2, 0, '1',
1369 CHARSET_LEFT_TO_RIGHT,
1370 build_string ("Chinese Big5 Level 2"),
1371 build_string ("Big5"));
1372
1373 Vcharset_korean_ksc5601 =
1374 make_charset (Qkorean_ksc5601, LEADING_BYTE_KOREAN_KSC5601, 3,
1375 CHARSET_TYPE_94X94, 2, 0, 'C',
1376 CHARSET_LEFT_TO_RIGHT,
1377 build_string ("Korean"),
1378 build_string ("KSC5601"));
1379 /* #### For simplicity, we put composite chars into a 96x96 charset.
1380 This is going to lead to problems because you can run out of
1381 room, esp. as we don't yet recycle numbers. */
1382 Vcharset_composite =
1383 make_charset (Qcomposite, LEADING_BYTE_COMPOSITE, 3,
1384 CHARSET_TYPE_96X96, 2, 0, 0,
1385 CHARSET_LEFT_TO_RIGHT,
1386 build_string ("Composite characters"),
1387 build_string (""));
1388
1389 composite_char_row_next = 32;
1390 composite_char_col_next = 32;
1391
1392 Vcomposite_char_string2char_hashtable =
1393 make_lisp_hashtable (500, HASHTABLE_NONWEAK,
1394 HASHTABLE_EQUAL);
1395 Vcomposite_char_char2string_hashtable =
1396 make_lisp_hashtable (500, HASHTABLE_NONWEAK,
1397 HASHTABLE_EQ);
1398 staticpro (&Vcomposite_char_string2char_hashtable);
1399 staticpro (&Vcomposite_char_char2string_hashtable);
1400
1401 /* Faux charsets used only for convenience in retrieving the
1402 number of rep bytes associated with a leading byte. */
1403
1404 make_charset (Qnil, PRE_LEADING_BYTE_PRIVATE_1, 3, 0, 0, 0, 0,
1405 0, Qnil, Qnil);
1406 make_charset (Qnil, PRE_LEADING_BYTE_PRIVATE_2, 4, 0, 0, 0, 0,
1407 0, Qnil, Qnil);
1408 }