771
+ − 1 /* Header for charsets.
+ − 2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
+ − 3 Copyright (C) 1995 Sun Microsystems, Inc.
+ − 4 Copyright (C) 2001, 2002 Ben Wing.
+ − 5
+ − 6 This file is part of XEmacs.
+ − 7
+ − 8 XEmacs is free software; you can redistribute it and/or modify it
+ − 9 under the terms of the GNU General Public License as published by the
+ − 10 Free Software Foundation; either version 2, or (at your option) any
+ − 11 later version.
+ − 12
+ − 13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
+ − 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ − 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ − 16 for more details.
+ − 17
+ − 18 You should have received a copy of the GNU General Public License
+ − 19 along with XEmacs; see the file COPYING. If not, write to
+ − 20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ − 21 Boston, MA 02111-1307, USA. */
+ − 22
+ − 23 /* Synched up with: Mule 2.3. Not in FSF. */
+ − 24
+ − 25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
+ − 26
+ − 27 #ifndef INCLUDED_charset_h_
+ − 28 #define INCLUDED_charset_h_
+ − 29
+ − 30
+ − 31
+ − 32 #ifndef MULE
+ − 33
+ − 34 /************************************************************************/
+ − 35 /* fake charset defs */
+ − 36 /************************************************************************/
+ − 37
+ − 38 /* used when MULE is not defined, so that Charset-type stuff can still
+ − 39 be done */
+ − 40
+ − 41 #define Vcharset_ascii Qnil
+ − 42
867
+ − 43 #define ichar_charset(ch) Vcharset_ascii
+ − 44 #define ichar_leading_byte(ch) LEADING_BYTE_ASCII
+ − 45 #define ichar_len(ch) 1
+ − 46 #define ichar_len_fmt(ch, fmt) 1
771
+ − 47 #define LEADING_BYTE_ASCII 0x80
+ − 48 #define NUM_LEADING_BYTES 1
+ − 49 #define MIN_LEADING_BYTE 0x80
+ − 50 #define CHARSETP(cs) 1
826
+ − 51 #define charset_by_leading_byte(lb) Vcharset_ascii
771
+ − 52 #define XCHARSET_LEADING_BYTE(cs) LEADING_BYTE_ASCII
+ − 53 #define XCHARSET_GRAPHIC(cs) -1
+ − 54 #define XCHARSET_COLUMNS(cs) 1
+ − 55 #define XCHARSET_DIMENSION(cs) 1
867
+ − 56 #define BREAKUP_ICHAR(ch, charset, byte1, byte2) do { \
771
+ − 57 (charset) = Vcharset_ascii; \
+ − 58 (byte1) = (ch); \
+ − 59 (byte2) = 0; \
+ − 60 } while (0)
3659
+ − 61 #define XCHARSET_CCL_PROGRAM(cs) Qnil
+ − 62 #define XCHARSET_NAME(cs) Qascii
771
+ − 63
+ − 64 #else /* MULE */
+ − 65
+ − 66
+ − 67 /************************************************************************/
+ − 68 /* Definition of leading bytes */
+ − 69 /************************************************************************/
+ − 70
+ − 71 #define MIN_LEADING_BYTE 0x7F
+ − 72
+ − 73 /** The following are for 1-byte characters in an official charset. **/
+ − 74 enum LEADING_BYTE_OFFICIAL_1
+ − 75 {
+ − 76 MIN_LEADING_BYTE_OFFICIAL_1 = 0x80,
+ − 77 /* LEADING_BYTE_LATIN_ISO8859_1 *MUST* be equal to
+ − 78 MIN_LEADING_BYTE_OFFICIAL_1. */
+ − 79 LEADING_BYTE_LATIN_ISO8859_1 = /* 0x80 Right half of ISO 8859-1 */
+ − 80 MIN_LEADING_BYTE_OFFICIAL_1,
+ − 81 LEADING_BYTE_LATIN_ISO8859_2, /* 0x81 Right half of ISO 8859-2 */
+ − 82 LEADING_BYTE_LATIN_ISO8859_3, /* 0x82 Right half of ISO 8859-3 */
+ − 83 LEADING_BYTE_LATIN_ISO8859_4, /* 0x83 Right half of ISO 8859-4 */
+ − 84 LEADING_BYTE_THAI_TIS620, /* 0x84 TIS620-2533 */
+ − 85 LEADING_BYTE_GREEK_ISO8859_7, /* 0x85 Right half of ISO 8859-7 */
+ − 86 LEADING_BYTE_ARABIC_ISO8859_6, /* 0x86 Right half of ISO 8859-6 */
+ − 87 LEADING_BYTE_HEBREW_ISO8859_8, /* 0x87 Right half of ISO 8859-8 */
+ − 88 LEADING_BYTE_KATAKANA_JISX0201, /* 0x88 Right half of JIS X0201-1976 */
+ − 89 LEADING_BYTE_LATIN_JISX0201, /* 0x89 Left half of JIS X0201-1976 */
+ − 90 LEADING_BYTE_CYRILLIC_ISO8859_5,/* 0x8A Right half of ISO 8859-5 */
+ − 91 LEADING_BYTE_LATIN_ISO8859_9, /* 0x8B Right half of ISO 8859-9 */
+ − 92 LEADING_BYTE_LATIN_ISO8859_15, /* 0x8C Right half of ISO 8859-15 */
+ − 93 #ifdef ENABLE_COMPOSITE_CHARS
+ − 94 LEADING_BYTE_COMPOSITE, /* 0x8D For a composite character */
+ − 95 MAX_LEADING_BYTE_OFFICIAL_1 =
+ − 96 LEADING_BYTE_COMPOSITE - 1,
+ − 97 #else
+ − 98 /* Does not need to be the last entry, but simplifies things */
+ − 99 LEADING_BYTE_COMPOSITE_REPLACEMENT, /* 0x8D Replaces ESC 0 - ESC 4 in a
+ − 100 buffer */
+ − 101 MAX_LEADING_BYTE_OFFICIAL_1 =
+ − 102 LEADING_BYTE_COMPOSITE_REPLACEMENT,
+ − 103 #endif
+ − 104 /* 0x8E Unused */
+ − 105 };
+ − 106
+ − 107 /* These next 2 + LEADING_BYTE_COMPOSITE need special treatment in a string
+ − 108 and/or character */
+ − 109
+ − 110 #define LEADING_BYTE_ASCII 0x7F /* Not used except in arrays
+ − 111 indexed by leading byte */
+ − 112 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */
+ − 113
+ − 114 /** The following are for 2-byte characters in an official charset. **/
+ − 115 enum LEADING_BYTE_OFFICIAL_2
+ − 116 {
+ − 117 MIN_LEADING_BYTE_OFFICIAL_2 = 0x90,
+ − 118 LEADING_BYTE_JAPANESE_JISX0208_1978 =
+ − 119 MIN_LEADING_BYTE_OFFICIAL_2, /* 0x90 Japanese JIS X0208-1978 */
+ − 120 LEADING_BYTE_CHINESE_GB2312, /* 0x91 Chinese Hanzi GB2312-1980 */
+ − 121 LEADING_BYTE_JAPANESE_JISX0208, /* 0x92 Japanese JIS X0208-1983 */
+ − 122 LEADING_BYTE_KOREAN_KSC5601, /* 0x93 Hangul KS C5601-1987 */
+ − 123 LEADING_BYTE_JAPANESE_JISX0212, /* 0x94 Japanese JIS X0212-1990 */
+ − 124 LEADING_BYTE_CHINESE_CNS11643_1, /* 0x95 Chinese CNS11643 Set 1 */
+ − 125 LEADING_BYTE_CHINESE_CNS11643_2, /* 0x96 Chinese CNS11643 Set 2 */
+ − 126 LEADING_BYTE_CHINESE_BIG5_1, /* 0x97 Big5 Level 1 */
+ − 127 LEADING_BYTE_CHINESE_BIG5_2, /* 0x98 Big5 Level 2 */
+ − 128 MAX_LEADING_BYTE_OFFICIAL_2 =
+ − 129 LEADING_BYTE_CHINESE_BIG5_2,
+ − 130
+ − 131 /* 0x99 unused */
+ − 132 /* 0x9A unused */
+ − 133 /* 0x9B unused */
+ − 134 /* 0x9C unused */
+ − 135 /* 0x9D unused */
+ − 136 };
+ − 137
+ − 138
+ − 139 /** The following are for 1- and 2-byte characters in a private charset. **/
+ − 140
+ − 141 #define PRE_LEADING_BYTE_PRIVATE_1 0x9E /* 1-byte char-set */
+ − 142 #define PRE_LEADING_BYTE_PRIVATE_2 0x9F /* 2-byte char-set */
+ − 143
+ − 144 #define MIN_LEADING_BYTE_PRIVATE_1 0xA0
3496
+ − 145 #define MAX_LEADING_BYTE_PRIVATE_1 0xC0
+ − 146 #define MIN_LEADING_BYTE_PRIVATE_2 0xC1
771
+ − 147 #define MAX_LEADING_BYTE_PRIVATE_2 0xFF
+ − 148
+ − 149 #define NUM_LEADING_BYTES 129
+ − 150
+ − 151
+ − 152 /************************************************************************/
+ − 153 /* Operations on leading bytes */
+ − 154 /************************************************************************/
+ − 155
+ − 156 /* Is this leading byte for a private charset? */
+ − 157
826
+ − 158 #define leading_byte_private_p(lb) ((lb) >= MIN_LEADING_BYTE_PRIVATE_1)
771
+ − 159
+ − 160 /* Is this a prefix for a private leading byte? */
+ − 161
826
+ − 162 DECLARE_INLINE_HEADER (
+ − 163 int
867
+ − 164 leading_byte_prefix_p (Ibyte lb)
826
+ − 165 )
771
+ − 166 {
+ − 167 return (lb == PRE_LEADING_BYTE_PRIVATE_1 ||
+ − 168 lb == PRE_LEADING_BYTE_PRIVATE_2);
+ − 169 }
+ − 170
+ − 171 /* Given a private leading byte, return the leading byte prefix stored
+ − 172 in a string. */
+ − 173
826
+ − 174 #define private_leading_byte_prefix(lb) \
771
+ − 175 ((unsigned int) (lb) < MIN_LEADING_BYTE_PRIVATE_2 ? \
+ − 176 PRE_LEADING_BYTE_PRIVATE_1 : \
+ − 177 PRE_LEADING_BYTE_PRIVATE_2)
+ − 178
+ − 179
+ − 180 /************************************************************************/
+ − 181 /* Information about a particular character set */
+ − 182 /************************************************************************/
+ − 183
+ − 184 struct Lisp_Charset
+ − 185 {
3017
+ − 186 struct LCRECORD_HEADER header;
771
+ − 187
+ − 188 int id;
+ − 189 Lisp_Object name;
+ − 190 Lisp_Object doc_string;
3659
+ − 191 Lisp_Object registries;
771
+ − 192 Lisp_Object short_name;
+ − 193 Lisp_Object long_name;
+ − 194
+ − 195 Lisp_Object reverse_direction_charset;
+ − 196
+ − 197 Lisp_Object ccl_program;
+ − 198
2367
+ − 199 /* Unicode translation tables. See unicode.c for the format of these
+ − 200 tables, and discussion of how they are initialized.
+ − 201 */
771
+ − 202 void *to_unicode_table;
+ − 203 void *from_unicode_table;
+ − 204 int from_unicode_levels;
+ − 205
2367
+ − 206 /* Final byte of this character set in ISO2022 designating escape
+ − 207 sequence */
867
+ − 208 Ibyte final;
771
+ − 209
+ − 210 /* Number of bytes (1 - 4) required in the internal representation
+ − 211 for characters in this character set. This is *not* the
+ − 212 same as the dimension of the character set). */
+ − 213 int rep_bytes;
+ − 214
+ − 215 /* Number of columns a character in this charset takes up, on TTY
+ − 216 devices. Not used for X devices. */
+ − 217 int columns;
+ − 218
+ − 219 /* Direction of this character set */
+ − 220 int direction;
+ − 221
+ − 222 /* Type of this character set (94, 96, 94x94, 96x96) */
+ − 223 int type;
+ − 224
+ − 225 /* Number of bytes used in encoding of this character set (1 or 2) */
+ − 226 int dimension;
+ − 227
+ − 228 /* Number of chars in each dimension (usually 94 or 96) */
+ − 229 int chars;
+ − 230
+ − 231 /* Which half of font to be used to display this character set */
+ − 232 int graphic;
+ − 233
3439
+ − 234 /* If set, this charset should be written out in ISO-2022-based coding
+ − 235 systems using the escape sequence for UTF-8, not using our internal
+ − 236 representation and the associated real ISO 2022 designation. */
+ − 237 unsigned int encode_as_utf_8 :1;
+ − 238
771
+ − 239 /* If set, this is a "temporary" charset created when we encounter
+ − 240 an unknown final. This is so that we can successfully compile
+ − 241 and load such files. We allow a real charset to be created on top
+ − 242 of this temporary charset. */
+ − 243 unsigned int temporary :1;
+ − 244 };
+ − 245 typedef struct Lisp_Charset Lisp_Charset;
+ − 246
+ − 247 DECLARE_LRECORD (charset, Lisp_Charset);
+ − 248 #define XCHARSET(x) XRECORD (x, charset, Lisp_Charset)
+ − 249 #define wrap_charset(p) wrap_record (p, charset)
+ − 250 #define CHARSETP(x) RECORDP (x, charset)
+ − 251 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset)
+ − 252 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset)
+ − 253
+ − 254 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */
+ − 255 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */
+ − 256 #define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */
+ − 257 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */
+ − 258
+ − 259 #define CHARSET_LEFT_TO_RIGHT 0
+ − 260 #define CHARSET_RIGHT_TO_LEFT 1
+ − 261
+ − 262 /* Leading byte and id have been regrouped. -- OG */
+ − 263 #define CHARSET_ID(cs) ((cs)->id)
867
+ − 264 #define CHARSET_LEADING_BYTE(cs) ((Ibyte) CHARSET_ID (cs))
771
+ − 265 #define CHARSET_NAME(cs) ((cs)->name)
+ − 266 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name)
+ − 267 #define CHARSET_LONG_NAME(cs) ((cs)->long_name)
+ − 268 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes)
+ − 269 #define CHARSET_COLUMNS(cs) ((cs)->columns)
+ − 270 #define CHARSET_GRAPHIC(cs) ((cs)->graphic)
3439
+ − 271 #define CHARSET_ENCODE_AS_UTF_8(cs) ((cs)->encode_as_utf_8)
771
+ − 272 #define CHARSET_TYPE(cs) ((cs)->type)
+ − 273 #define CHARSET_DIRECTION(cs) ((cs)->direction)
+ − 274 #define CHARSET_FINAL(cs) ((cs)->final)
+ − 275 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string)
3659
+ − 276 #define CHARSET_REGISTRIES(cs) ((cs)->registries)
771
+ − 277 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program)
+ − 278 #define CHARSET_DIMENSION(cs) ((cs)->dimension)
+ − 279 #define CHARSET_CHARS(cs) ((cs)->chars)
+ − 280 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset)
+ − 281 #define CHARSET_TO_UNICODE_TABLE(cs) ((cs)->to_unicode_table)
+ − 282 #define CHARSET_FROM_UNICODE_TABLE(cs) ((cs)->from_unicode_table)
+ − 283 #define CHARSET_FROM_UNICODE_LEVELS(cs) ((cs)->from_unicode_levels)
+ − 284
826
+ − 285 #define CHARSET_PRIVATE_P(cs) leading_byte_private_p (CHARSET_LEADING_BYTE (cs))
771
+ − 286
+ − 287 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs))
+ − 288 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs))
+ − 289 #define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs))
+ − 290 #define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs))
+ − 291 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs))
+ − 292 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs))
+ − 293 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs))
3439
+ − 294 #define XCHARSET_ENCODE_AS_UTF_8(cs) CHARSET_ENCODE_AS_UTF_8 (XCHARSET (cs))
771
+ − 295 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs))
+ − 296 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs))
+ − 297 #define XCHARSET_FINAL(cs) CHARSET_FINAL (XCHARSET (cs))
+ − 298 #define XCHARSET_DOC_STRING(cs) CHARSET_DOC_STRING (XCHARSET (cs))
3659
+ − 299 #define XCHARSET_REGISTRIES(cs) CHARSET_REGISTRIES (XCHARSET (cs))
771
+ − 300 #define XCHARSET_LEADING_BYTE(cs) CHARSET_LEADING_BYTE (XCHARSET (cs))
+ − 301 #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs))
+ − 302 #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs))
+ − 303 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs))
3659
+ − 304
771
+ − 305 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs))
+ − 306 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \
+ − 307 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs))
+ − 308 #define XCHARSET_TO_UNICODE_TABLE(cs) \
+ − 309 CHARSET_TO_UNICODE_TABLE (XCHARSET (cs))
+ − 310 #define XCHARSET_FROM_UNICODE_TABLE(cs) \
+ − 311 CHARSET_FROM_UNICODE_TABLE (XCHARSET (cs))
+ − 312 #define XCHARSET_FROM_UNICODE_LEVELS(cs) \
+ − 313 CHARSET_FROM_UNICODE_LEVELS (XCHARSET (cs))
+ − 314
+ − 315 struct charset_lookup
+ − 316 {
+ − 317 /* Table of charsets indexed by leading byte. */
+ − 318 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
+ − 319
+ − 320 /* Table of charsets indexed by type/final-byte/direction. */
+ − 321 Lisp_Object charset_by_attributes[4][128][2];
867
+ − 322 Ibyte next_allocated_1_byte_leading_byte;
+ − 323 Ibyte next_allocated_2_byte_leading_byte;
771
+ − 324 };
+ − 325
1111
+ − 326 extern struct charset_lookup *chlook;
+ − 327
826
+ − 328 DECLARE_INLINE_HEADER (
+ − 329 Lisp_Object
+ − 330 charset_by_leading_byte (int lb)
+ − 331 )
771
+ − 332 {
800
+ − 333 #ifdef ERROR_CHECK_TEXT
771
+ − 334 /* When error-checking is on, x86 GCC 2.95.2 -O3 miscompiles the
+ − 335 following unless we introduce `tem'. */
+ − 336 int tem = lb;
800
+ − 337 text_checking_assert (tem >= MIN_LEADING_BYTE && tem <= 0xFF);
771
+ − 338 #endif
+ − 339 return chlook->charset_by_leading_byte[lb - MIN_LEADING_BYTE];
+ − 340 }
+ − 341
826
+ − 342 DECLARE_INLINE_HEADER (
+ − 343 Lisp_Object
+ − 344 charset_by_attributes (int type, int final, int dir)
+ − 345 )
771
+ − 346 {
+ − 347 type_checking_assert (type < countof (chlook->charset_by_attributes) &&
+ − 348 final < countof (chlook->charset_by_attributes[0]) &&
+ − 349 dir < countof (chlook->charset_by_attributes[0][0]));
+ − 350 return chlook->charset_by_attributes[type][final][dir];
+ − 351 }
+ − 352
+ − 353
+ − 354 /************************************************************************/
+ − 355 /* Dealing with characters */
+ − 356 /************************************************************************/
+ − 357
+ − 358 /* The bit fields of character are divided into 3 parts:
3496
+ − 359 FIELD1(7bits):FIELD2(7bits):FIELD3(7bits) */
771
+ − 360
3496
+ − 361 #define ICHAR_FIELD1_MASK (0x7F << 14)
867
+ − 362 #define ICHAR_FIELD2_MASK (0x7F << 7)
+ − 363 #define ICHAR_FIELD3_MASK 0x7F
771
+ − 364
+ − 365 /* Macros to access each field of a character code of C. */
+ − 366
867
+ − 367 #define ichar_field1(c) (((c) & ICHAR_FIELD1_MASK) >> 14)
+ − 368 #define ichar_field2(c) (((c) & ICHAR_FIELD2_MASK) >> 7)
+ − 369 #define ichar_field3(c) ((c) & ICHAR_FIELD3_MASK)
771
+ − 370
+ − 371 /* Field 1, if non-zero, usually holds a leading byte for a
+ − 372 dimension-2 charset. Field 2, if non-zero, usually holds a leading
+ − 373 byte for a dimension-1 charset. */
+ − 374
+ − 375 /* Converting between field values and leading bytes. */
+ − 376
+ − 377 #define FIELD2_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_1 - 1)
+ − 378 #define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80
+ − 379
+ − 380 #define FIELD1_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_2 - 1)
3496
+ − 381 #define FIELD1_TO_PRIVATE_LEADING_BYTE 0x80
771
+ − 382
+ − 383 /* Minimum and maximum allowed values for the fields. */
+ − 384
867
+ − 385 #define MIN_ICHAR_FIELD2_OFFICIAL \
771
+ − 386 (MIN_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE)
867
+ − 387 #define MAX_ICHAR_FIELD2_OFFICIAL \
771
+ − 388 (MAX_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE)
+ − 389
867
+ − 390 #define MIN_ICHAR_FIELD1_OFFICIAL \
771
+ − 391 (MIN_LEADING_BYTE_OFFICIAL_2 - FIELD1_TO_OFFICIAL_LEADING_BYTE)
867
+ − 392 #define MAX_ICHAR_FIELD1_OFFICIAL \
771
+ − 393 (MAX_LEADING_BYTE_OFFICIAL_2 - FIELD1_TO_OFFICIAL_LEADING_BYTE)
+ − 394
867
+ − 395 #define MIN_ICHAR_FIELD2_PRIVATE \
771
+ − 396 (MIN_LEADING_BYTE_PRIVATE_1 - FIELD2_TO_PRIVATE_LEADING_BYTE)
867
+ − 397 #define MAX_ICHAR_FIELD2_PRIVATE \
771
+ − 398 (MAX_LEADING_BYTE_PRIVATE_1 - FIELD2_TO_PRIVATE_LEADING_BYTE)
+ − 399
867
+ − 400 #define MIN_ICHAR_FIELD1_PRIVATE \
771
+ − 401 (MIN_LEADING_BYTE_PRIVATE_2 - FIELD1_TO_PRIVATE_LEADING_BYTE)
867
+ − 402 #define MAX_ICHAR_FIELD1_PRIVATE \
771
+ − 403 (MAX_LEADING_BYTE_PRIVATE_2 - FIELD1_TO_PRIVATE_LEADING_BYTE)
+ − 404
+ − 405 /* Minimum character code of each <type> character. */
+ − 406
867
+ − 407 #define MIN_CHAR_OFFICIAL_TYPE9N (MIN_ICHAR_FIELD2_OFFICIAL << 7)
+ − 408 #define MIN_CHAR_PRIVATE_TYPE9N (MIN_ICHAR_FIELD2_PRIVATE << 7)
+ − 409 #define MIN_CHAR_OFFICIAL_TYPE9NX9N (MIN_ICHAR_FIELD1_OFFICIAL << 14)
+ − 410 #define MIN_CHAR_PRIVATE_TYPE9NX9N (MIN_ICHAR_FIELD1_PRIVATE << 14)
3496
+ − 411 #define MIN_CHAR_COMPOSITION (0x7F << 14)
771
+ − 412
+ − 413 /* Leading byte of a character.
+ − 414
+ − 415 NOTE: This takes advantage of the fact that
+ − 416 FIELD2_TO_OFFICIAL_LEADING_BYTE and
+ − 417 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
+ − 418 */
+ − 419
826
+ − 420 DECLARE_INLINE_HEADER (
867
+ − 421 Ibyte
+ − 422 ichar_leading_byte (Ichar c)
826
+ − 423 )
771
+ − 424 {
867
+ − 425 if (ichar_ascii_p (c))
771
+ − 426 return LEADING_BYTE_ASCII;
+ − 427 else if (c < 0xA0)
+ − 428 return LEADING_BYTE_CONTROL_1;
+ − 429 else if (c < MIN_CHAR_OFFICIAL_TYPE9NX9N)
867
+ − 430 return ichar_field2 (c) + FIELD2_TO_OFFICIAL_LEADING_BYTE;
771
+ − 431 else if (c < MIN_CHAR_PRIVATE_TYPE9NX9N)
867
+ − 432 return ichar_field1 (c) + FIELD1_TO_OFFICIAL_LEADING_BYTE;
771
+ − 433 else if (c < MIN_CHAR_COMPOSITION)
867
+ − 434 return ichar_field1 (c) + FIELD1_TO_PRIVATE_LEADING_BYTE;
771
+ − 435 else
+ − 436 {
+ − 437 #ifdef ENABLE_COMPOSITE_CHARS
+ − 438 return LEADING_BYTE_COMPOSITE;
+ − 439 #else
2500
+ − 440 ABORT();
771
+ − 441 return 0;
+ − 442 #endif /* ENABLE_COMPOSITE_CHARS */
+ − 443 }
+ − 444 }
+ − 445
826
+ − 446 DECLARE_INLINE_HEADER (
+ − 447 Bytecount
867
+ − 448 ichar_len (Ichar c)
826
+ − 449 )
+ − 450 {
867
+ − 451 if (ichar_ascii_p (c))
826
+ − 452 return 1;
+ − 453 else if (c < MIN_CHAR_OFFICIAL_TYPE9NX9N)
+ − 454 return 2;
+ − 455 else if (c < MIN_CHAR_PRIVATE_TYPE9NX9N)
+ − 456 return 3; /* dimension-2 official or dimension-1 private */
+ − 457 else if (c < MIN_CHAR_COMPOSITION)
+ − 458 return 4;
+ − 459 else
+ − 460 {
+ − 461 #ifdef ENABLE_COMPOSITE_CHARS
+ − 462 #error Not yet implemented
+ − 463 #else
2500
+ − 464 ABORT();
826
+ − 465 return 0;
+ − 466 #endif /* ENABLE_COMPOSITE_CHARS */
+ − 467 }
+ − 468 }
+ − 469
+ − 470 DECLARE_INLINE_HEADER (
+ − 471 Bytecount
867
+ − 472 ichar_len_fmt (Ichar c, Internal_Format fmt)
826
+ − 473 )
+ − 474 {
+ − 475 switch (fmt)
+ − 476 {
+ − 477 case FORMAT_DEFAULT:
867
+ − 478 return ichar_len (c);
826
+ − 479 case FORMAT_16_BIT_FIXED:
+ − 480 return 2;
+ − 481 case FORMAT_32_BIT_FIXED:
+ − 482 return 4;
+ − 483 default:
+ − 484 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
+ − 485 return 1;
+ − 486 }
+ − 487 }
+ − 488
867
+ − 489 #define ichar_charset(c) charset_by_leading_byte (ichar_leading_byte (c))
771
+ − 490
+ − 491 /* Return a character whose charset is CHARSET and position-codes are C1
+ − 492 and C2. TYPE9N character ignores C2. (For typical charsets, i.e. not
+ − 493 ASCII, Control-1 or Composite, C1 and C2 will be in the range of 32 to
+ − 494 127 or 33 to 126. See `make-char'.)
+ − 495
+ − 496 NOTE: This takes advantage of the fact that
+ − 497 FIELD2_TO_OFFICIAL_LEADING_BYTE and
+ − 498 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
+ − 499 */
+ − 500
826
+ − 501 DECLARE_INLINE_HEADER (
867
+ − 502 Ichar
+ − 503 make_ichar (Lisp_Object charset, int c1, int c2)
826
+ − 504 )
771
+ − 505 {
867
+ − 506 Ichar retval;
771
+ − 507 if (EQ (charset, Vcharset_ascii))
826
+ − 508 retval = c1;
771
+ − 509 else if (EQ (charset, Vcharset_control_1))
826
+ − 510 retval = c1 | 0x80;
771
+ − 511 #ifdef ENABLE_COMPOSITE_CHARS
+ − 512 else if (EQ (charset, Vcharset_composite))
826
+ − 513 retval = (0x1F << 14) | ((c1) << 7) | (c2);
771
+ − 514 #endif
+ − 515 else if (XCHARSET_DIMENSION (charset) == 1)
826
+ − 516 retval = ((XCHARSET_LEADING_BYTE (charset) -
+ − 517 FIELD2_TO_OFFICIAL_LEADING_BYTE) << 7) | (c1);
771
+ − 518 else if (!XCHARSET_PRIVATE_P (charset))
826
+ − 519 retval = ((XCHARSET_LEADING_BYTE (charset) -
+ − 520 FIELD1_TO_OFFICIAL_LEADING_BYTE) << 14) | ((c1) << 7) | (c2);
771
+ − 521 else
826
+ − 522 retval = ((XCHARSET_LEADING_BYTE (charset) -
+ − 523 FIELD1_TO_PRIVATE_LEADING_BYTE) << 14) | ((c1) << 7) | (c2);
867
+ − 524 text_checking_assert (valid_ichar_p (retval));
826
+ − 525 return retval;
771
+ − 526 }
+ − 527
867
+ − 528 /* BREAKUP_ICHAR_1_UNSAFE assumes that the charset has already been
771
+ − 529 calculated, and just computes c1 and c2.
+ − 530
867
+ − 531 BREAKUP_ICHAR also computes and stores the charset. */
771
+ − 532
867
+ − 533 #define BREAKUP_ICHAR_1_UNSAFE(c, charset, c1, c2) \
771
+ − 534 XCHARSET_DIMENSION (charset) == 1 \
867
+ − 535 ? ((c1) = ichar_field3 (c), (c2) = 0) \
+ − 536 : ((c1) = ichar_field2 (c), \
+ − 537 (c2) = ichar_field3 (c))
771
+ − 538
826
+ − 539 DECLARE_INLINE_HEADER (
+ − 540 void
867
+ − 541 breakup_ichar_1 (Ichar c, Lisp_Object *charset, int *c1, int *c2)
826
+ − 542 )
771
+ − 543 {
867
+ − 544 text_checking_assert (valid_ichar_p (c));
+ − 545 *charset = ichar_charset (c);
+ − 546 BREAKUP_ICHAR_1_UNSAFE (c, *charset, *c1, *c2);
771
+ − 547 }
+ − 548
867
+ − 549 /* BREAKUP_ICHAR separates an Ichar into its components. The charset of
771
+ − 550 character C is set to CHARSET, and the position-codes of C are set to C1
+ − 551 and C2. C2 of TYPE9N character is 0. */
+ − 552
867
+ − 553 #define BREAKUP_ICHAR(c, charset, c1, c2) \
+ − 554 breakup_ichar_1 (c, &(charset), &(c1), &(c2))
771
+ − 555
788
+ − 556 void get_charset_limits (Lisp_Object charset, int *low, int *high);
867
+ − 557 int ichar_to_unicode (Ichar chr);
788
+ − 558
3439
+ − 559 EXFUN (Fcharset_name, 1);
+ − 560
771
+ − 561 #endif /* MULE */
+ − 562
3439
+ − 563 /* ISO 10646 UTF-16, UCS-4, UTF-8, UTF-7, etc. */
+ − 564
+ − 565 enum unicode_type
+ − 566 {
+ − 567 UNICODE_UTF_16,
+ − 568 UNICODE_UTF_8,
+ − 569 UNICODE_UTF_7,
4096
+ − 570 UNICODE_UCS_4,
+ − 571 UNICODE_UTF_32
3439
+ − 572 };
+ − 573
+ − 574 void encode_unicode_char (Lisp_Object USED_IF_MULE (charset), int h,
+ − 575 int USED_IF_MULE (l), unsigned_char_dynarr *dst,
4096
+ − 576 enum unicode_type type, unsigned int little_endian,
+ − 577 int write_error_characters_as_such);
+ − 578
+ − 579 #define UNICODE_ERROR_OCTET_RANGE_START 0x200000
+ − 580
+ − 581 #define valid_utf_16_first_surrogate(ch) (((ch) & 0xFC00) == 0xD800)
+ − 582 #define valid_utf_16_last_surrogate(ch) (((ch) & 0xFC00) == 0xDC00)
+ − 583 #define valid_utf_16_surrogate(ch) (((ch) & 0xF800) == 0xD800)
3439
+ − 584
3676
+ − 585 void set_charset_registries(Lisp_Object charset, Lisp_Object registries);
+ − 586
3439
+ − 587 EXFUN (Funicode_to_char, 2);
+ − 588 EXFUN (Fchar_to_unicode, 1);
+ − 589
771
+ − 590 #endif /* INCLUDED_charset_h_ */