Mercurial > hg > xemacs-beta
diff src/mule-charset.c @ 70:131b0175ea99 r20-0b30
Import from CVS: tag r20-0b30
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:02:59 +0200 |
parents | |
children | 54cc21c15cbb |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/mule-charset.c Mon Aug 13 09:02:59 2007 +0200 @@ -0,0 +1,1408 @@ +/* Functions to handle multilingual characters. + Copyright (C) 1992, 1995 Free Software Foundation, Inc. + Copyright (C) 1995 Sun Microsystems, Inc. + +This file is part of XEmacs. + +XEmacs is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +XEmacs is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with XEmacs; see the file COPYING. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* Synched up with: Mule 2.3. Not in FSF. */ + +/* Rewritten by Ben Wing <wing@666.com>. */ + +#include <config.h> +#include "lisp.h" + +#include "buffer.h" +#include "chartab.h" +#include "elhash.h" +#include "lstream.h" +#include "device.h" +#include "faces.h" + +/* The various pre-defined charsets. */ + +Lisp_Object Vcharset_ascii; +Lisp_Object Vcharset_control_1; +Lisp_Object Vcharset_latin_1; +Lisp_Object Vcharset_latin_2; +Lisp_Object Vcharset_latin_3; +Lisp_Object Vcharset_latin_4; +Lisp_Object Vcharset_cyrillic; +Lisp_Object Vcharset_arabic; +Lisp_Object Vcharset_greek; +Lisp_Object Vcharset_hebrew; +Lisp_Object Vcharset_latin_5; +Lisp_Object Vcharset_thai; +Lisp_Object Vcharset_japanese_jisx0201_kana; +Lisp_Object Vcharset_japanese_jisx0201_roman; +Lisp_Object Vcharset_japanese_jisx0208_1978; +Lisp_Object Vcharset_japanese_jisx0208; +Lisp_Object Vcharset_japanese_jisx0212; +Lisp_Object Vcharset_chinese_gb; +Lisp_Object Vcharset_chinese_big5_1; +Lisp_Object Vcharset_chinese_big5_2; +Lisp_Object Vcharset_chinese_cns11643_1; +Lisp_Object Vcharset_chinese_cns11643_2; +Lisp_Object Vcharset_korean_ksc5601; +Lisp_Object Vcharset_composite; + +/* Hashtables for composite chars. One maps string representing + composed chars to their equivalent chars; one goes the + other way. */ +Lisp_Object Vcomposite_char_char2string_hashtable; +Lisp_Object Vcomposite_char_string2char_hashtable; + +/* Table of charsets indexed by leading byte. */ +Lisp_Object charset_by_leading_byte[128]; + +/* Table of charsets indexed by type/final-byte/direction. */ +Lisp_Object charset_by_attributes[4][128][2]; + +static int composite_char_row_next; +static int composite_char_col_next; + +/* Table of number of bytes in the string representation of a character + indexed by the first byte of that representation. + + This value can be derived other ways -- e.g. something like + + (BYTE_ASCII_P (first_byte) ? 1 : + XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte))) + + but it's faster this way. */ + +Bytecount rep_bytes_by_first_byte[0xA0] = +{ /* 16 x 8 ones for ASCII */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 1 x 8 for Dimension-1 official Mule charsets */ + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + /* 0x90 - 0x9d are Dimension-2 official */ + /* 0x9e is Dimension-1 private */ + /* 0x9f is Dimension-2 private */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4 +}; + +Lisp_Object Qcharsetp; + +/* Qdoc_string, Qdimension, Qchars defined in general.c */ +Lisp_Object Qregistry, Qfinal, Qgraphic; +Lisp_Object Qdirection; +Lisp_Object Qreverse_direction_charset; +Lisp_Object Qccl_program; + +Lisp_Object Qascii, Qcontrol_1, Qlatin_1, Qlatin_2, Qlatin_3, Qlatin_4, Qlatin_5, + Qthai, Qarabic, Qhebrew, Qgreek, Qcyrillic, + + Qjapanese_jisx0201_kana, + Qjapanese_jisx0201_roman, + Qjapanese_jisx0208_1978, + Qjapanese_jisx0208, + Qjapanese_jisx0212, + + Qchinese_gb, Qchinese_big5_1, Qchinese_big5_2, + Qchinese_cns11643_1, Qchinese_cns11643_2, + + Qkorean_ksc5601, Qcomposite; + +Lisp_Object Ql2r, Qr2l; + +Lisp_Object Vcharset_hashtable; + +static Bufbyte next_allocated_1_byte_leading_byte; +static Bufbyte next_allocated_2_byte_leading_byte; + +/* Composite characters are characters constructed by overstriking two + or more regular characters. + + 1) The old Mule implementation involves storing composite characters + in a buffer as a tag followed by all of the actual characters + used to make up the composite character. I think this is a bad + idea; it greatly complicates code that wants to handle strings + one character at a time because it has to deal with the possibility + of great big ungainly characters. It's much more reasonable to + simply store an index into a table of composite characters. + + 2) The current implementation only allows for 16,384 separate + composite characters over the lifetime of the XEmacs process. + This could become a potential problem if the user + edited lots of different files that use composite characters. + Due to FSF bogosity, increasing the number of allowable + composite characters under Mule would decrease the number + of possible faces that can exist. Mule already has shrunk + this to 2048, and further shrinkage would become uncomfortable. + No such problems exist in XEmacs. + + Composite characters could be represented as 0x80 C1 C2 C3, + where each C[1-3] is in the range 0xA0 - 0xFF. This allows + for slightly under 2^20 (one million) composite characters + over the XEmacs process lifetime, and you only need to + increase the size of a Mule character from 19 to 21 bits. + Or you could use 0x80 C1 C2 C3 C4, allowing for about + 85 million (slightly over 2^26) composite characters. */ + + +/************************************************************************/ +/* Basic Emchar functions */ +/************************************************************************/ + +/* Convert a non-ASCII Mule character C into a one-character Mule-encoded + string in STR. Returns the number of bytes stored. + Do not call this directly. Use the macro set_charptr_emchar() instead. + */ + +Bytecount +non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c) +{ + Bufbyte *p; + Bufbyte lb; + int c1, c2; + Lisp_Object charset; + + p = str; + BREAKUP_CHAR (c, charset, c1, c2); + lb = CHAR_LEADING_BYTE (c); + if (LEADING_BYTE_PRIVATE_P (lb)) + *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb); + *p++ = lb; + if (EQ (charset, Vcharset_control_1)) + c1 += 0x20; + *p++ = c1 | 0x80; + if (c2) + *p++ = c2 | 0x80; + + return (p - str); +} + +/* Return the first character from a Mule-encoded string in STR, + assuming it's non-ASCII. Do not call this directly. + Use the macro charptr_emchar() instead. */ + +Emchar +non_ascii_charptr_emchar (CONST Bufbyte *str) +{ + Bufbyte i0 = *str, i1, i2 = 0; + Lisp_Object charset; + + if (i0 == LEADING_BYTE_CONTROL_1) + return (Emchar) (*++str - 0x20); + + if (LEADING_BYTE_PREFIX_P (i0)) + i0 = *++str; + + i1 = *++str & 0x7F; + + charset = CHARSET_BY_LEADING_BYTE (i0); + if (XCHARSET_DIMENSION (charset) == 2) + i2 = *++str & 0x7F; + + return MAKE_CHAR (charset, i1, i2); +} + +/* Return whether CH is a valid Emchar, assuming it's non-ASCII. + Do not call this directly. Use the macro valid_char_p() instead. */ + +int +non_ascii_valid_char_p (Emchar ch) +{ + int f1, f2, f3; + + /* Must have only lowest 19 bits set */ + if (ch & ~0x7FFFF) + return 0; + + f1 = CHAR_FIELD1 (ch); + f2 = CHAR_FIELD2 (ch); + f3 = CHAR_FIELD3 (ch); + + if (f1 == 0) + { + Lisp_Object charset; + + if (f2 < MIN_CHAR_FIELD2_OFFICIAL || + (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) || + f2 > MAX_CHAR_FIELD2_PRIVATE) + return 0; + if (f3 < 0x20) + return 0; + + if (f3 != 0x20 && f3 != 0x7F) + return 1; + + /* + NOTE: This takes advantage of the fact that + FIELD2_TO_OFFICIAL_LEADING_BYTE and + FIELD2_TO_PRIVATE_LEADING_BYTE are the same. + */ + charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE); + return (XCHARSET_CHARS (charset) == 96); + } + else + { + Lisp_Object charset; + + if (f1 < MIN_CHAR_FIELD1_OFFICIAL || + (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) || + f1 > MAX_CHAR_FIELD1_PRIVATE) + return 0; + if (f2 < 0x20 || f3 < 0x20) + return 0; + + if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE) + { + if (UNBOUNDP (Fgethash (make_int (ch), + Vcomposite_char_char2string_hashtable, + Qunbound))) + return 0; + return 1; + } + + if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F) + return 1; + + if (f1 <= MAX_CHAR_FIELD1_OFFICIAL) + charset = + CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE); + else + charset = + CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE); + + return (XCHARSET_CHARS (charset) == 96); + } +} + + +/************************************************************************/ +/* Basic string functions */ +/************************************************************************/ + +/* Copy the character pointed to by PTR into STR, assuming it's + non-ASCII. Do not call this directly. Use the macro + charptr_copy_char() instead. */ + +Bytecount +non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str) +{ + Bufbyte *strptr = str; + *strptr = *ptr++; + switch (REP_BYTES_BY_FIRST_BYTE (*strptr)) + { + /* Notice fallthrough. */ + case 4: *++strptr = *ptr++; + case 3: *++strptr = *ptr++; + case 2: *++strptr = *ptr; + break; + default: + abort (); + } + return strptr + 1 - str; +} + + +/************************************************************************/ +/* streams of Emchars */ +/************************************************************************/ + +/* Treat a stream as a stream of Emchar's rather than a stream of bytes. + The functions below are not meant to be called directly; use + the macros in insdel.h. */ + +Emchar +Lstream_get_emchar_1 (Lstream *stream, int ch) +{ + Bufbyte str[MAX_EMCHAR_LEN]; + Bufbyte *strptr = str; + + str[0] = (Bufbyte) ch; + switch (REP_BYTES_BY_FIRST_BYTE (ch)) + { + /* Notice fallthrough. */ + case 4: + ch = Lstream_getc (stream); + assert (ch >= 0); + *++strptr = (Bufbyte) ch; + case 3: + ch = Lstream_getc (stream); + assert (ch >= 0); + *++strptr = (Bufbyte) ch; + case 2: + ch = Lstream_getc (stream); + assert (ch >= 0); + *++strptr = (Bufbyte) ch; + break; + default: + abort (); + } + return charptr_emchar (str); +} + +int +Lstream_fput_emchar (Lstream *stream, Emchar ch) +{ + Bufbyte str[MAX_EMCHAR_LEN]; + Bytecount len = set_charptr_emchar (str, ch); + return Lstream_write (stream, str, len); +} + +void +Lstream_funget_emchar (Lstream *stream, Emchar ch) +{ + Bufbyte str[MAX_EMCHAR_LEN]; + Bytecount len = set_charptr_emchar (str, ch); + Lstream_unread (stream, str, len); +} + + +/************************************************************************/ +/* charset object */ +/************************************************************************/ + +static Lisp_Object mark_charset (Lisp_Object, void (*) (Lisp_Object)); +static void print_charset (Lisp_Object, Lisp_Object, int); +DEFINE_LRECORD_IMPLEMENTATION ("charset", charset, + mark_charset, print_charset, 0, 0, 0, + struct Lisp_Charset); + +static Lisp_Object +mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object)) +{ + struct Lisp_Charset *cs = XCHARSET (obj); + + (markobj) (cs->doc_string); + (markobj) (cs->registry); + (markobj) (cs->ccl_program); + return cs->name; +} + +static void +print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) +{ + struct Lisp_Charset *cs = XCHARSET (obj); + char buf[200]; + + if (print_readably) + error ("printing unreadable object #<charset %s 0x%x>", + string_data (XSYMBOL (CHARSET_NAME (cs))->name), + cs->header.uid); + + write_c_string ("#<charset ", printcharfun); + print_internal (CHARSET_NAME (cs), printcharfun, 0); + write_c_string (" ", printcharfun); + print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1); + sprintf (buf, " %s %s cols=%d g%d final='%c' reg=", + CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" : + CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" : + CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" : + "96x96", + CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l", + CHARSET_COLUMNS (cs), + CHARSET_GRAPHIC (cs), + CHARSET_FINAL (cs)); + write_c_string (buf, printcharfun); + print_internal (CHARSET_REGISTRY (cs), printcharfun, 0); + sprintf (buf, " 0x%x>", cs->header.uid); + write_c_string (buf, printcharfun); +} + +/* Make a new charset. */ + +static Lisp_Object +make_charset (Lisp_Object name, Bufbyte leading_byte, unsigned char rep_bytes, + unsigned char type, unsigned char columns, unsigned char graphic, + Bufbyte final, unsigned char direction, Lisp_Object doc, + Lisp_Object reg) +{ + struct Lisp_Charset *cs; + Lisp_Object obj = Qnil; + + cs = (struct Lisp_Charset *) alloc_lcrecord (sizeof (struct Lisp_Charset), + lrecord_charset); + XSETCHARSET (obj, cs); + + CHARSET_NAME (cs) = name; + CHARSET_LEADING_BYTE (cs) = leading_byte; + CHARSET_REP_BYTES (cs) = rep_bytes; + CHARSET_DIRECTION (cs) = direction; + CHARSET_TYPE (cs) = type; + CHARSET_COLUMNS (cs) = columns; + CHARSET_GRAPHIC (cs) = graphic; + CHARSET_FINAL (cs) = final; + CHARSET_DOC_STRING (cs) = doc; + CHARSET_REGISTRY (cs) = reg; + CHARSET_CCL_PROGRAM (cs) = Qnil; + CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; + + if (final) + { + /* some charsets do not have final characters. This includes + ASCII, Control-1, Composite, and the two faux private + charsets. */ + assert (NILP (charset_by_attributes[type][final][direction])); + charset_by_attributes[type][final][direction] = obj; + } + + assert (NILP (charset_by_leading_byte[leading_byte - 128])); + charset_by_leading_byte[leading_byte - 128] = obj; + if (leading_byte < 0xA0) + /* official leading byte */ + rep_bytes_by_first_byte[leading_byte] = rep_bytes; + + /* Some charsets are "faux" and don't have names or really exist at + all except in the leading-byte table. */ + if (!NILP (name)) + Fputhash (name, obj, Vcharset_hashtable); + return obj; +} + +static int +get_unallocated_leading_byte (int dimension) +{ + int lb; + + if (dimension == 1) + { + if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1) + lb = 0; + else + lb = next_allocated_1_byte_leading_byte++; + } + else + { + if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2) + lb = 0; + else + lb = next_allocated_2_byte_leading_byte++; + } + + if (!lb) + signal_simple_error + ("No more character sets free for this dimension", + make_int (dimension)); + + return lb; +} + + +/************************************************************************/ +/* Basic charset Lisp functions */ +/************************************************************************/ + +DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /* +Return non-nil if OBJECT is a charset. +*/ + (object)) +{ + return (CHARSETP (object) ? Qt : Qnil); +} + +DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /* +Retrieve the charset of the given name. +If CHARSET-OR-NAME is a charset object, it is simply returned. +Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset, +nil is returned. Otherwise the associated charset object is returned. +*/ + (charset_or_name)) +{ + if (CHARSETP (charset_or_name)) + return charset_or_name; + CHECK_SYMBOL (charset_or_name); + + return Fgethash (charset_or_name, Vcharset_hashtable, Qnil); +} + +DEFUN ("get-charset", Fget_charset, 1, 1, 0, /* +Retrieve the charset of the given name. +Same as `find-charset' except an error is signalled if there is no such +charset instead of returning nil. +*/ + (name)) +{ + Lisp_Object charset = Ffind_charset (name); + + if (NILP (charset)) + signal_simple_error ("No such charset", name); + return charset; +} + +/* We store the charsets in hash tables with the names as the key and the + actual charset object as the value. Occasionally we need to use them + in a list format. These routines provide us with that. */ +struct charset_list_closure +{ + Lisp_Object *charset_list; +}; + +static void +add_charset_to_list_mapper (CONST void *hash_key, void *hash_contents, + void *charset_list_closure) +{ + /* This function can GC */ + Lisp_Object key, contents; + Lisp_Object *charset_list; + struct charset_list_closure *chcl = charset_list_closure; + CVOID_TO_LISP (key, hash_key); + VOID_TO_LISP (contents, hash_contents); + charset_list = chcl->charset_list; + + *charset_list = Fcons (XCHARSET_NAME (contents), *charset_list); +} + +DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /* +Return a list of the names of all defined charsets. +*/ + ()) +{ + Lisp_Object charset_list = Qnil; + struct gcpro gcpro1; + struct charset_list_closure charset_list_closure; + + GCPRO1 (charset_list); + charset_list_closure.charset_list = &charset_list; + elisp_maphash (add_charset_to_list_mapper, Vcharset_hashtable, + &charset_list_closure); + UNGCPRO; + + return charset_list; +} + +DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /* +Return the name of the given charset. +*/ + (charset)) +{ + return (XCHARSET_NAME (Fget_charset (charset))); +} + +DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /* +Define a new character set. +This function is for use with Mule support. +NAME is a symbol, the name by which the character set is normally referred. +DOC-STRING is a string describing the character set. +PROPS is a property list, describing the specific nature of the +character set. Recognized properties are: + +'registry A regular expression matching the font registry field for + this character set. +'dimension Number of octets used to index a character in this charset. + Either 1 or 2. Defaults to 1. +'columns Number of columns used to display a character in this charset. + Only used in TTY mode. (Under X, the actual width of a + character can be derived from the font used to display the + characters.) If unspecified, defaults to the dimension + (this is almost always the correct value). +'chars Number of characters in each dimension (94 or 96). + Defaults to 94. Note that if the dimension is 2, the + character set thus described is 94x94 or 96x96. +'final Final byte of ISO 2022 escape sequence. Must be + supplied. Each combination of (DIMENSION, CHARS) defines a + separate namespace for final bytes. Note that ISO + 2022 restricts the final byte to the range + 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if + dimension == 2. Note also that final bytes in the range + 0x30 - 0x3F are reserved for user-defined (not official) + character sets. +'graphic 0 (use left half of font on output) or 1 (use right half + of font on output). Defaults to 0. For example, for + a font whose registry is ISO8859-1, the left half + (octets 0x20 - 0x7F) is the `ascii' character set, while + the right half (octets 0xA0 - 0xFF) is the `latin-1' + character set. With 'graphic set to 0, the octets + will have their high bit cleared; with it set to 1, + the octets will have their high bit set. +'direction 'l2r (left-to-right) or 'r2l (right-to-left). + Defaults to 'l2r. +'ccl-program A compiled CCL program used to convert a character in + this charset into an index into the font. This is in + addition to the 'graphic property. The CCL program + is passed the octets of the character, with the high + bit cleared and set depending upon whether the value + of the 'graphic property is 0 or 1. +*/ + (name, doc_string, props)) +{ + int lb, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1; + int direction = CHARSET_LEFT_TO_RIGHT; + int type; + Lisp_Object registry = Qnil; + Lisp_Object charset; + Lisp_Object rest, keyword, value; + Lisp_Object ccl_program = Qnil; + + CHECK_SYMBOL (name); + if (!NILP (doc_string)) + CHECK_STRING (doc_string); + + charset = Ffind_charset (name); + if (!NILP (charset)) + signal_simple_error ("Cannot redefine existing charset", name); + + EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props) + { + if (EQ (keyword, Qdimension)) + { + CHECK_INT (value); + dimension = XINT (value); + if (dimension < 1 || dimension > 2) + signal_simple_error ("Invalid value for 'dimension", value); + } + + else if (EQ (keyword, Qchars)) + { + CHECK_INT (value); + chars = XINT (value); + if (chars != 94 && chars != 96) + signal_simple_error ("Invalid value for 'chars", value); + } + + else if (EQ (keyword, Qcolumns)) + { + CHECK_INT (value); + columns = XINT (value); + if (columns != 1 && columns != 2) + signal_simple_error ("Invalid value for 'columns", value); + } + + else if (EQ (keyword, Qgraphic)) + { + CHECK_INT (value); + graphic = XINT (value); + if (graphic < 0 || graphic > 1) + signal_simple_error ("Invalid value for 'graphic", value); + } + + else if (EQ (keyword, Qregistry)) + { + CHECK_STRING (value); + registry = value; + } + + else if (EQ (keyword, Qdirection)) + { + if (EQ (value, Ql2r)) + direction = CHARSET_LEFT_TO_RIGHT; + else if (EQ (value, Qr2l)) + direction = CHARSET_RIGHT_TO_LEFT; + else + signal_simple_error ("Invalid value for 'direction", value); + } + + else if (EQ (keyword, Qfinal)) + { + CHECK_CHAR_COERCE_INT (value); + final = XCHAR (value); + if (final < '0' || final > '~') + signal_simple_error ("Invalid value for 'final", value); + } + + else if (EQ (keyword, Qccl_program)) + { + CHECK_VECTOR (value); + ccl_program = value; + } + + else + signal_simple_error ("Unrecognized property", keyword); + } + + if (!final) + error ("'final must be specified"); + if (dimension == 2 && final > 0x5F) + signal_simple_error + ("Final must be in the range 0x30 - 0x5F for dimension == 2", + make_char (final)); + + if (dimension == 1) + type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96; + else + type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96; + + if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) || + !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT))) + error + ("Character set already defined for this DIMENSION/CHARS/FINAL combo"); + + lb = get_unallocated_leading_byte (dimension); + + if (NILP (doc_string)) + doc_string = build_string (""); + + if (NILP (registry)) + registry = build_string (""); + + if (columns == -1) + columns = dimension; + charset = make_charset (name, lb, dimension + 2, type, columns, graphic, + final, direction, doc_string, registry); + if (!NILP (ccl_program)) + XCHARSET_CCL_PROGRAM (charset) = ccl_program; + return charset; +} + +DEFUN ("make-reverse-direction-charset", + Fmake_reverse_direction_charset, 2, 2, 0, /* +Make a charset equivalent to CHARSET but which goes in the opposite direction. +NEW-NAME is the name of the new charset. Return the new charset. +*/ + (charset, new_name)) +{ + Lisp_Object new_charset = Qnil; + int lb, dimension, columns, graphic, final; + int direction, type; + Lisp_Object registry, doc_string; + struct Lisp_Charset *cs; + + charset = Fget_charset (charset); + if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset))) + signal_simple_error ("Charset already has reverse-direction charset", + charset); + + CHECK_SYMBOL (new_name); + if (!NILP (Ffind_charset (new_name))) + signal_simple_error ("Cannot redefine existing charset", new_name); + + cs = XCHARSET (charset); + + type = CHARSET_TYPE (cs); + columns = CHARSET_COLUMNS (cs); + dimension = CHARSET_DIMENSION (cs); + lb = get_unallocated_leading_byte (dimension); + + graphic = CHARSET_GRAPHIC (cs); + final = CHARSET_FINAL (cs); + direction = CHARSET_RIGHT_TO_LEFT; + if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT) + direction = CHARSET_LEFT_TO_RIGHT; + doc_string = CHARSET_DOC_STRING (cs); + registry = CHARSET_REGISTRY (cs); + + new_charset = make_charset (new_name, lb, dimension + 2, type, columns, + graphic, final, direction, doc_string, registry); + + CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset; + XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset; + + return new_charset; +} + +/* #### The defsubr for this is commented out at the moment but no + reason why is given. */ +#if 0 +DEFUN ("charset-reverse-direction-charset", + Fcharset_reverse_direction_charset, 1, 1, 0, /* +Return the reverse-direction charset parallel to CHARSET, if any. +This is the charset with the same properties (in particular, the same +dimension, number of characters per dimension, and final byte) as +CHARSET but whose characters are displayed in the opposite direction. +*/ + (charset)) +{ + charset = Fget_charset (charset); + return XCHARSET_REVERSE_DIRECTION_CHARSET (charset); +} +#endif + +DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /* +Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION. +If DIRECTION is omitted, both directions will be checked (left-to-right +will be returned if character sets exist for both directions). +*/ + (dimension, chars, final, direction)) +{ + int dm, ch, fi, di = -1; + int type; + Lisp_Object obj = Qnil; + + CHECK_INT (dimension); + dm = XINT (dimension); + if (dm < 1 || dm > 2) + signal_simple_error ("Invalid value for DIMENSION", dimension); + + CHECK_INT (chars); + ch = XINT (chars); + if (ch != 94 && ch != 96) + signal_simple_error ("Invalid value for CHARS", chars); + + CHECK_CHAR_COERCE_INT (final); + fi = XCHAR (final); + if (fi < '0' || fi > '~') + signal_simple_error ("Invalid value for FINAL", final); + + if (EQ (direction, Ql2r)) + di = CHARSET_LEFT_TO_RIGHT; + else if (EQ (direction, Qr2l)) + di = CHARSET_RIGHT_TO_LEFT; + else if (!NILP (direction)) + signal_simple_error ("Invalid value for DIRECTION", direction); + + if (dm == 2 && fi > 0x5F) + signal_simple_error + ("Final must be in the range 0x30 - 0x5F for dimension == 2", final); + + if (dm == 1) + type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96; + else + type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96; + + if (di == -1) + { + obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT); + if (NILP (obj)) + obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT); + } + else + obj = CHARSET_BY_ATTRIBUTES (type, fi, di); + + if (CHARSETP (obj)) + return XCHARSET_NAME (obj); + return obj; +} + +DEFUN ("charset-doc-string", Fcharset_doc_string, 1, 1, 0, /* +Return doc string of CHARSET. +*/ + (charset)) +{ + return XCHARSET_DOC_STRING (Fget_charset (charset)); +} + +DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /* +Return dimension of CHARSET. +*/ + (charset)) +{ + return make_int (XCHARSET_DIMENSION (Fget_charset (charset))); +} + +DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /* +Return property PROP of CHARSET. +Recognized properties are those listed in `make-charset', as well as +'name and 'doc-string. +*/ + (charset, prop)) +{ + struct Lisp_Charset *cs; + + charset = Fget_charset (charset); + cs = XCHARSET (charset); + + CHECK_SYMBOL (prop); + if (EQ (prop, Qname)) return CHARSET_NAME (cs); + if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs); + if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs)); + if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs)); + if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs)); + if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs)); + if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs)); + if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs); + if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs); + if (EQ (prop, Qdirection)) + return (CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l); + if (EQ (prop, Qreverse_direction_charset)) + { + Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs); + if (NILP (obj)) + return Qnil; + else + return XCHARSET_NAME (obj); + } + signal_simple_error ("Unrecognized charset property name", prop); + return Qnil; /* not reached */ +} + +/* #### We need to figure out which properties we really want to + allow to be set. */ + +DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /* +Set the 'ccl-program property of CHARSET to CCL-PROGRAM. +*/ + (charset, ccl_program)) +{ + charset = Fget_charset (charset); + CHECK_VECTOR (ccl_program); + XCHARSET_CCL_PROGRAM (charset) = ccl_program; + return Qnil; +} + +static void +invalidate_charset_font_caches (Lisp_Object charset) +{ + /* Invalidate font cache entries for charset on all devices. */ + Lisp_Object devcons, concons, hashtab; + DEVICE_LOOP_NO_BREAK (devcons, concons) + { + struct device *d = XDEVICE (XCAR (devcons)); + hashtab = Fgethash (charset, d->charset_font_cache, Qunbound); + if (!UNBOUNDP (hashtab)) + Fclrhash (hashtab); + } +} + +/* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */ +DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /* +Set the 'registry property of CHARSET to REGISTRY. +*/ + (charset, registry)) +{ + charset = Fget_charset (charset); + CHECK_STRING (registry); + XCHARSET_REGISTRY (charset) = registry; + invalidate_charset_font_caches (charset); + face_property_was_changed (Vdefault_face, Qfont, Qglobal); + return Qnil; +} + + +/************************************************************************/ +/* Lisp primitives for working with characters */ +/************************************************************************/ + +DEFUN ("make-char", Fmake_char, 2, 3, 0, /* +Make a multi-byte character from CHARSET and octets ARG1 and ARG2. +*/ + (charset, arg1, arg2)) +{ + struct Lisp_Charset *cs; + int a1, a2; + int lowlim, highlim; + + charset = Fget_charset (charset); + cs = XCHARSET (charset); + + if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127; + else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31; + else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126; + else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127; + + CHECK_INT (arg1); + a1 = XINT (arg1); + if (a1 < lowlim || a1 > highlim) + args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim)); + + if (CHARSET_DIMENSION (cs) == 1) + { + if (!NILP (arg2)) + signal_simple_error + ("Charset is of dimension one; second octet must be nil", arg2); + return make_char (MAKE_CHAR (charset, a1, 0)); + } + + CHECK_INT (arg2); + a2 = XINT (arg2); + if (a2 < lowlim || a2 > highlim) + args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim)); + + return make_char (MAKE_CHAR (charset, a1, a2)); +} + +DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /* +Return the character set of char CH. +*/ + (ch)) +{ + CHECK_CHAR_COERCE_INT (ch); + + return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE + (CHAR_LEADING_BYTE (XCHAR (ch)))); +} + +DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /* +Return the octet numbered N (should be 0 or 1) of char CH. +N defaults to 0 if omitted. +*/ + (ch, n)) +{ + Lisp_Object charset; + int c1, c2; + + CHECK_CHAR_COERCE_INT (ch); + if (NILP (n)) + n = Qzero; + else + { + CHECK_INT (n); + if (XINT (n) != 0 && XINT (n) != 1) + signal_simple_error ("Octet number must be 0 or 1", n); + } + BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); + if (XINT (n) == 0) + return make_int (c1); + else + return make_int (c2); +} + + +/************************************************************************/ +/* composite character functions */ +/************************************************************************/ + +Emchar +lookup_composite_char (Bufbyte *str, int len) +{ + Lisp_Object lispstr = make_string (str, len); + Lisp_Object ch = Fgethash (lispstr, + Vcomposite_char_string2char_hashtable, + Qunbound); + Emchar emch; + + if (UNBOUNDP (ch)) + { + if (composite_char_row_next >= 128) + signal_simple_error ("No more composite chars available", lispstr); + emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next, + composite_char_col_next); + Fputhash (make_char (emch), lispstr, + Vcomposite_char_char2string_hashtable); + Fputhash (lispstr, make_char (emch), + Vcomposite_char_string2char_hashtable); + composite_char_col_next++; + if (composite_char_col_next >= 128) + { + composite_char_col_next = 32; + composite_char_row_next++; + } + } + else + emch = XCHAR (ch); + return emch; +} + +Lisp_Object +composite_char_string (Emchar ch) +{ + Lisp_Object str = Fgethash (make_char (ch), + Vcomposite_char_char2string_hashtable, + Qunbound); + assert (!UNBOUNDP (str)); + return str; +} + +DEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /* +Convert a string into a single composite character. +The character is the result of overstriking all the characters in +the string. +*/ + (string)) +{ + CHECK_STRING (string); + return make_char (lookup_composite_char (XSTRING_DATA (string), + XSTRING_LENGTH (string))); +} + +DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /* +Return a string of the characters comprising a composite character. +*/ + (ch)) +{ + Emchar emch; + + CHECK_CHAR (ch); + emch = XCHAR (ch); + if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE) + signal_simple_error ("Must be composite char", ch); + return composite_char_string (emch); +} + + +/************************************************************************/ +/* initialization */ +/************************************************************************/ + +void +syms_of_mule_charset (void) +{ + DEFSUBR (Fcharsetp); + DEFSUBR (Ffind_charset); + DEFSUBR (Fget_charset); + DEFSUBR (Fcharset_list); + DEFSUBR (Fcharset_name); + DEFSUBR (Fmake_charset); + DEFSUBR (Fmake_reverse_direction_charset); + /* DEFSUBR (Freverse_direction_charset); */ + DEFSUBR (Fcharset_from_attributes); + DEFSUBR (Fcharset_doc_string); + DEFSUBR (Fcharset_dimension); + DEFSUBR (Fcharset_property); + DEFSUBR (Fset_charset_ccl_program); + DEFSUBR (Fset_charset_registry); + + DEFSUBR (Fmake_char); + DEFSUBR (Fchar_charset); + DEFSUBR (Fchar_octet); + + DEFSUBR (Fmake_composite_char); + DEFSUBR (Fcomposite_char_string); + + defsymbol (&Qcharsetp, "charsetp"); + defsymbol (&Qregistry, "registry"); + defsymbol (&Qfinal, "final"); + defsymbol (&Qgraphic, "graphic"); + defsymbol (&Qdirection, "direction"); + defsymbol (&Qreverse_direction_charset, "reverse-direction-charset"); + defsymbol (&Qccl_program, "ccl-program"); + + defsymbol (&Ql2r, "l2r"); + defsymbol (&Qr2l, "r2l"); + + /* Charsets */ + defsymbol (&Qascii, "ascii"); + defsymbol (&Qcontrol_1, "control-1"); + defsymbol (&Qlatin_1, "latin-1"); + defsymbol (&Qlatin_2, "latin-2"); + defsymbol (&Qlatin_3, "latin-3"); + defsymbol (&Qlatin_4, "latin-4"); + defsymbol (&Qcyrillic, "cyrillic"); + defsymbol (&Qarabic, "arabic"); + defsymbol (&Qgreek, "greek"); + defsymbol (&Qhebrew, "hebrew"); + defsymbol (&Qlatin_5, "latin-5"); + defsymbol (&Qthai, "thai"); + + defsymbol (&Qjapanese_jisx0201_kana, "japanese-jisx0201-kana"); + defsymbol (&Qjapanese_jisx0201_roman, "japanese-jisx0201-roman"); + defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978"); + defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208"); + defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212"); + + defsymbol (&Qchinese_gb, "chinese-gb"); + defsymbol (&Qchinese_big5_1, "chinese-big5-1"); + defsymbol (&Qchinese_big5_2, "chinese-big5-2"); + defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1"); + defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2"); + + defsymbol (&Qkorean_ksc5601, "korean-ksc5601"); + defsymbol (&Qcomposite, "composite"); +} + +void +vars_of_mule_charset (void) +{ + int i, j, k; + + for (i = 0; i < 128; i++) + charset_by_leading_byte[i] = Qnil; + + for (i = 0; i < 4; i++) + for (j = 0; j < 128; j++) + for (k = 0; k < 2; k ++) + charset_by_attributes[i][j][k] = Qnil; + + /* Now done at compile time + for (i = 0; i < 128; i++) + rep_bytes_by_first_byte[i] = 1; + */ + + next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; + next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; +} + +void +complex_vars_of_mule_charset (void) +{ + staticpro (&Vcharset_hashtable); + Vcharset_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK, + HASHTABLE_EQ); + + /* Predefined character sets. We store them into variables for + ease of access. */ + + Vcharset_ascii = + make_charset (Qascii, LEADING_BYTE_ASCII, 1, + CHARSET_TYPE_94, 1, 0, 'B', + CHARSET_LEFT_TO_RIGHT, + build_string ("ASCII"), + build_string ("ISO8859-1")); + Vcharset_control_1 = + make_charset (Qcontrol_1, LEADING_BYTE_CONTROL_1, 2, + CHARSET_TYPE_94, 1, 0, 0, + CHARSET_LEFT_TO_RIGHT, + build_string ("Control characters"), + build_string ("")); + Vcharset_latin_1 = + make_charset (Qlatin_1, LEADING_BYTE_LATIN_1, 2, + CHARSET_TYPE_96, 1, 1, 'A', + CHARSET_LEFT_TO_RIGHT, + build_string ("Latin-1"), + build_string ("ISO8859-1")); + Vcharset_latin_2 = + make_charset (Qlatin_2, LEADING_BYTE_LATIN_2, 2, + CHARSET_TYPE_96, 1, 1, 'B', + CHARSET_LEFT_TO_RIGHT, + build_string ("Latin-2"), + build_string ("ISO8859-2")); + Vcharset_latin_3 = + make_charset (Qlatin_3, LEADING_BYTE_LATIN_3, 2, + CHARSET_TYPE_96, 1, 1, 'C', + CHARSET_LEFT_TO_RIGHT, + build_string ("Latin-3"), + build_string ("ISO8859-3")); + Vcharset_latin_4 = + make_charset (Qlatin_4, LEADING_BYTE_LATIN_4, 2, + CHARSET_TYPE_96, 1, 1, 'D', + CHARSET_LEFT_TO_RIGHT, + build_string ("Latin-4"), + build_string ("ISO8859-4")); + Vcharset_cyrillic = + make_charset (Qcyrillic, LEADING_BYTE_CYRILLIC, 2, + CHARSET_TYPE_96, 1, 1, 'L', + CHARSET_LEFT_TO_RIGHT, + build_string ("Cyrillic"), + build_string ("ISO8859-5")); + Vcharset_arabic = + make_charset (Qarabic, LEADING_BYTE_ARABIC, 2, + CHARSET_TYPE_96, 1, 1, 'G', + CHARSET_RIGHT_TO_LEFT, + build_string ("Arabic"), + build_string ("ISO8859-6")); + Vcharset_greek = + make_charset (Qgreek, LEADING_BYTE_GREEK, 2, + CHARSET_TYPE_96, 1, 1, 'F', + CHARSET_LEFT_TO_RIGHT, + build_string ("Greek"), + build_string ("ISO8859-7")); + Vcharset_hebrew = + make_charset (Qhebrew, LEADING_BYTE_HEBREW, 2, + CHARSET_TYPE_96, 1, 1, 'H', + CHARSET_RIGHT_TO_LEFT, + build_string ("Hebrew"), + build_string ("ISO8859-8")); + Vcharset_latin_5 = + make_charset (Qlatin_5, LEADING_BYTE_LATIN_5, 2, + CHARSET_TYPE_96, 1, 1, 'M', + CHARSET_LEFT_TO_RIGHT, + build_string ("Latin-5"), + build_string ("ISO8859-9")); + Vcharset_thai = + make_charset (Qthai, LEADING_BYTE_THAI, 2, + CHARSET_TYPE_96, 1, 1, 'T', + CHARSET_LEFT_TO_RIGHT, + build_string ("Thai"), + build_string ("TIS620")); + + /* Japanese */ + Vcharset_japanese_jisx0201_kana = + make_charset (Qjapanese_jisx0201_kana, + LEADING_BYTE_JAPANESE_JISX0201_KANA, 2, + CHARSET_TYPE_94, 1, 1, 'I', + CHARSET_LEFT_TO_RIGHT, + build_string ("Japanese Katakana"), + build_string ("JISX0201.1976")); + Vcharset_japanese_jisx0201_roman = + make_charset (Qjapanese_jisx0201_roman, + LEADING_BYTE_JAPANESE_JISX0201_ROMAN, 2, + CHARSET_TYPE_94, 1, 0, 'J', + CHARSET_LEFT_TO_RIGHT, + build_string ("Japanese Roman"), + build_string ("JISX0201.1976")); + Vcharset_japanese_jisx0208_1978 = + make_charset (Qjapanese_jisx0208_1978, + LEADING_BYTE_JAPANESE_JISX0208_1978, 3, + CHARSET_TYPE_94X94, 2, 0, '@', + CHARSET_LEFT_TO_RIGHT, + build_string ("Japanese Old"), + build_string ("\\(JISX0208\\|jisc6226\\).1978")); + Vcharset_japanese_jisx0208 = + make_charset (Qjapanese_jisx0208, + LEADING_BYTE_JAPANESE_JISX0208, 3, + CHARSET_TYPE_94X94, 2, 0, 'B', + CHARSET_LEFT_TO_RIGHT, + build_string ("Japanese"), + build_string ("JISX0208.\\(1983\\|1990\\)")); + Vcharset_japanese_jisx0212 = + make_charset (Qjapanese_jisx0212, + LEADING_BYTE_JAPANESE_JISX0212, 3, + CHARSET_TYPE_94X94, 2, 0, 'D', + CHARSET_LEFT_TO_RIGHT, + build_string ("Japanese Supplement"), + build_string ("JISX0212")); + + /* Chinese */ + Vcharset_chinese_gb = + make_charset (Qchinese_gb, LEADING_BYTE_CHINESE_GB, 3, + CHARSET_TYPE_94X94, 2, 0, 'A', + CHARSET_LEFT_TO_RIGHT, + build_string ("Chinese GB"), + build_string ("GB2312")); +#define CHINESE_CNS_PLANE_RE(n) "CNS11643[.-]\\(.*[.-]\\)?" n "$" + Vcharset_chinese_cns11643_1 = + make_charset (Qchinese_cns11643_1, + LEADING_BYTE_CHINESE_CNS11643_1, 3, + CHARSET_TYPE_94X94, 2, 0, 'G', + CHARSET_LEFT_TO_RIGHT, + build_string ("Chinese CNS Plane 1"), + build_string (CHINESE_CNS_PLANE_RE("1"))); + Vcharset_chinese_cns11643_2 = + make_charset (Qchinese_cns11643_2, + LEADING_BYTE_CHINESE_CNS11643_2, 3, + CHARSET_TYPE_94X94, 2, 0, 'H', + CHARSET_LEFT_TO_RIGHT, + build_string ("Chinese CNS Plane 2"), + build_string (CHINESE_CNS_PLANE_RE("2"))); + Vcharset_chinese_big5_1 = + make_charset (Qchinese_big5_1, LEADING_BYTE_CHINESE_BIG5_1, 3, + CHARSET_TYPE_94X94, 2, 0, '0', + CHARSET_LEFT_TO_RIGHT, + build_string ("Chinese Big5 Level 1"), + build_string ("Big5")); + Vcharset_chinese_big5_2 = + make_charset (Qchinese_big5_2, LEADING_BYTE_CHINESE_BIG5_2, 3, + CHARSET_TYPE_94X94, 2, 0, '1', + CHARSET_LEFT_TO_RIGHT, + build_string ("Chinese Big5 Level 2"), + build_string ("Big5")); + + Vcharset_korean_ksc5601 = + make_charset (Qkorean_ksc5601, LEADING_BYTE_KOREAN_KSC5601, 3, + CHARSET_TYPE_94X94, 2, 0, 'C', + CHARSET_LEFT_TO_RIGHT, + build_string ("Korean"), + build_string ("KSC5601")); + /* #### For simplicity, we put composite chars into a 96x96 charset. + This is going to lead to problems because you can run out of + room, esp. as we don't yet recycle numbers. */ + Vcharset_composite = + make_charset (Qcomposite, LEADING_BYTE_COMPOSITE, 3, + CHARSET_TYPE_96X96, 2, 0, 0, + CHARSET_LEFT_TO_RIGHT, + build_string ("Composite characters"), + build_string ("")); + + composite_char_row_next = 32; + composite_char_col_next = 32; + + Vcomposite_char_string2char_hashtable = + make_lisp_hashtable (500, HASHTABLE_NONWEAK, + HASHTABLE_EQUAL); + Vcomposite_char_char2string_hashtable = + make_lisp_hashtable (500, HASHTABLE_NONWEAK, + HASHTABLE_EQ); + staticpro (&Vcomposite_char_string2char_hashtable); + staticpro (&Vcomposite_char_char2string_hashtable); + + /* Faux charsets used only for convenience in retrieving the + number of rep bytes associated with a leading byte. */ + + make_charset (Qnil, PRE_LEADING_BYTE_PRIVATE_1, 3, 0, 0, 0, 0, + 0, Qnil, Qnil); + make_charset (Qnil, PRE_LEADING_BYTE_PRIVATE_2, 4, 0, 0, 0, 0, + 0, Qnil, Qnil); +}