Mercurial > hg > xemacs-beta
annotate src/charset.h @ 5533:11da5b828d10
shell-command and shell-command-on-region API compliant with FSF 23.3.1
| author | Mats Lidell <mats.lidell@cag.se> |
|---|---|
| date | Sun, 31 Jul 2011 01:29:09 +0200 |
| parents | 308d34e9f07d |
| children |
| rev | line source |
|---|---|
| 771 | 1 /* Header for charsets. |
| 2 Copyright (C) 1992, 1995 Free Software Foundation, Inc. | |
| 3 Copyright (C) 1995 Sun Microsystems, Inc. | |
|
5200
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
4 Copyright (C) 2001, 2002, 2010 Ben Wing. |
| 771 | 5 |
| 6 This file is part of XEmacs. | |
| 7 | |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5200
diff
changeset
|
8 XEmacs is free software: you can redistribute it and/or modify it |
| 771 | 9 under the terms of the GNU General Public License as published by the |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5200
diff
changeset
|
10 Free Software Foundation, either version 3 of the License, or (at your |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5200
diff
changeset
|
11 option) any later version. |
| 771 | 12 |
| 13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
| 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
| 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
| 16 for more details. | |
| 17 | |
| 18 You should have received a copy of the GNU General Public License | |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5200
diff
changeset
|
19 along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */ |
| 771 | 20 |
| 21 /* Synched up with: Mule 2.3. Not in FSF. */ | |
| 22 | |
| 23 /* Rewritten by Ben Wing <ben@xemacs.org>. */ | |
| 24 | |
| 25 #ifndef INCLUDED_charset_h_ | |
| 26 #define INCLUDED_charset_h_ | |
| 27 | |
| 28 | |
| 29 | |
| 30 #ifndef MULE | |
| 31 | |
| 32 /************************************************************************/ | |
| 33 /* fake charset defs */ | |
| 34 /************************************************************************/ | |
| 35 | |
| 36 /* used when MULE is not defined, so that Charset-type stuff can still | |
| 37 be done */ | |
| 38 | |
| 39 #define Vcharset_ascii Qnil | |
| 40 | |
| 867 | 41 #define ichar_charset(ch) Vcharset_ascii |
| 42 #define ichar_leading_byte(ch) LEADING_BYTE_ASCII | |
| 43 #define ichar_len(ch) 1 | |
| 44 #define ichar_len_fmt(ch, fmt) 1 | |
| 771 | 45 #define LEADING_BYTE_ASCII 0x80 |
| 46 #define NUM_LEADING_BYTES 1 | |
| 47 #define MIN_LEADING_BYTE 0x80 | |
| 48 #define CHARSETP(cs) 1 | |
| 826 | 49 #define charset_by_leading_byte(lb) Vcharset_ascii |
| 771 | 50 #define XCHARSET_LEADING_BYTE(cs) LEADING_BYTE_ASCII |
| 51 #define XCHARSET_GRAPHIC(cs) -1 | |
| 52 #define XCHARSET_COLUMNS(cs) 1 | |
| 53 #define XCHARSET_DIMENSION(cs) 1 | |
| 867 | 54 #define BREAKUP_ICHAR(ch, charset, byte1, byte2) do { \ |
| 771 | 55 (charset) = Vcharset_ascii; \ |
| 56 (byte1) = (ch); \ | |
| 57 (byte2) = 0; \ | |
| 58 } while (0) | |
| 3659 | 59 #define XCHARSET_CCL_PROGRAM(cs) Qnil |
| 60 #define XCHARSET_NAME(cs) Qascii | |
| 5019 | 61 #define Fget_charset(cs) (cs) |
| 62 #define Fcharset_list() list1 (Vcharset_ascii) | |
| 771 | 63 |
| 64 #else /* MULE */ | |
| 65 | |
| 66 | |
| 67 /************************************************************************/ | |
| 68 /* Definition of leading bytes */ | |
| 69 /************************************************************************/ | |
| 70 | |
| 71 #define MIN_LEADING_BYTE 0x7F | |
| 72 | |
| 73 /** The following are for 1-byte characters in an official charset. **/ | |
| 74 enum LEADING_BYTE_OFFICIAL_1 | |
| 75 { | |
| 76 MIN_LEADING_BYTE_OFFICIAL_1 = 0x80, | |
| 77 /* LEADING_BYTE_LATIN_ISO8859_1 *MUST* be equal to | |
| 78 MIN_LEADING_BYTE_OFFICIAL_1. */ | |
| 79 LEADING_BYTE_LATIN_ISO8859_1 = /* 0x80 Right half of ISO 8859-1 */ | |
| 80 MIN_LEADING_BYTE_OFFICIAL_1, | |
| 81 LEADING_BYTE_LATIN_ISO8859_2, /* 0x81 Right half of ISO 8859-2 */ | |
| 82 LEADING_BYTE_LATIN_ISO8859_3, /* 0x82 Right half of ISO 8859-3 */ | |
| 83 LEADING_BYTE_LATIN_ISO8859_4, /* 0x83 Right half of ISO 8859-4 */ | |
| 84 LEADING_BYTE_THAI_TIS620, /* 0x84 TIS620-2533 */ | |
| 85 LEADING_BYTE_GREEK_ISO8859_7, /* 0x85 Right half of ISO 8859-7 */ | |
| 86 LEADING_BYTE_ARABIC_ISO8859_6, /* 0x86 Right half of ISO 8859-6 */ | |
| 87 LEADING_BYTE_HEBREW_ISO8859_8, /* 0x87 Right half of ISO 8859-8 */ | |
| 88 LEADING_BYTE_KATAKANA_JISX0201, /* 0x88 Right half of JIS X0201-1976 */ | |
| 89 LEADING_BYTE_LATIN_JISX0201, /* 0x89 Left half of JIS X0201-1976 */ | |
| 90 LEADING_BYTE_CYRILLIC_ISO8859_5,/* 0x8A Right half of ISO 8859-5 */ | |
| 91 LEADING_BYTE_LATIN_ISO8859_9, /* 0x8B Right half of ISO 8859-9 */ | |
| 92 LEADING_BYTE_LATIN_ISO8859_15, /* 0x8C Right half of ISO 8859-15 */ | |
| 93 #ifdef ENABLE_COMPOSITE_CHARS | |
| 94 LEADING_BYTE_COMPOSITE, /* 0x8D For a composite character */ | |
| 95 MAX_LEADING_BYTE_OFFICIAL_1 = | |
| 96 LEADING_BYTE_COMPOSITE - 1, | |
| 97 #else | |
| 98 /* Does not need to be the last entry, but simplifies things */ | |
| 99 LEADING_BYTE_COMPOSITE_REPLACEMENT, /* 0x8D Replaces ESC 0 - ESC 4 in a | |
| 100 buffer */ | |
| 101 MAX_LEADING_BYTE_OFFICIAL_1 = | |
| 102 LEADING_BYTE_COMPOSITE_REPLACEMENT, | |
| 103 #endif | |
| 104 /* 0x8E Unused */ | |
| 105 }; | |
| 106 | |
| 107 /* These next 2 + LEADING_BYTE_COMPOSITE need special treatment in a string | |
| 108 and/or character */ | |
| 109 | |
| 110 #define LEADING_BYTE_ASCII 0x7F /* Not used except in arrays | |
| 111 indexed by leading byte */ | |
| 112 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ | |
| 113 | |
| 114 /** The following are for 2-byte characters in an official charset. **/ | |
| 115 enum LEADING_BYTE_OFFICIAL_2 | |
| 116 { | |
| 117 MIN_LEADING_BYTE_OFFICIAL_2 = 0x90, | |
| 118 LEADING_BYTE_JAPANESE_JISX0208_1978 = | |
| 119 MIN_LEADING_BYTE_OFFICIAL_2, /* 0x90 Japanese JIS X0208-1978 */ | |
| 120 LEADING_BYTE_CHINESE_GB2312, /* 0x91 Chinese Hanzi GB2312-1980 */ | |
| 121 LEADING_BYTE_JAPANESE_JISX0208, /* 0x92 Japanese JIS X0208-1983 */ | |
| 122 LEADING_BYTE_KOREAN_KSC5601, /* 0x93 Hangul KS C5601-1987 */ | |
| 123 LEADING_BYTE_JAPANESE_JISX0212, /* 0x94 Japanese JIS X0212-1990 */ | |
| 124 LEADING_BYTE_CHINESE_CNS11643_1, /* 0x95 Chinese CNS11643 Set 1 */ | |
| 125 LEADING_BYTE_CHINESE_CNS11643_2, /* 0x96 Chinese CNS11643 Set 2 */ | |
| 126 LEADING_BYTE_CHINESE_BIG5_1, /* 0x97 Big5 Level 1 */ | |
| 127 LEADING_BYTE_CHINESE_BIG5_2, /* 0x98 Big5 Level 2 */ | |
| 128 MAX_LEADING_BYTE_OFFICIAL_2 = | |
| 129 LEADING_BYTE_CHINESE_BIG5_2, | |
| 130 | |
| 131 /* 0x99 unused */ | |
| 132 /* 0x9A unused */ | |
| 133 /* 0x9B unused */ | |
| 134 /* 0x9C unused */ | |
| 135 /* 0x9D unused */ | |
| 136 }; | |
| 137 | |
| 138 | |
| 139 /** The following are for 1- and 2-byte characters in a private charset. **/ | |
| 140 | |
| 141 #define PRE_LEADING_BYTE_PRIVATE_1 0x9E /* 1-byte char-set */ | |
| 142 #define PRE_LEADING_BYTE_PRIVATE_2 0x9F /* 2-byte char-set */ | |
| 143 | |
| 144 #define MIN_LEADING_BYTE_PRIVATE_1 0xA0 | |
| 3496 | 145 #define MAX_LEADING_BYTE_PRIVATE_1 0xC0 |
| 146 #define MIN_LEADING_BYTE_PRIVATE_2 0xC1 | |
| 771 | 147 #define MAX_LEADING_BYTE_PRIVATE_2 0xFF |
| 148 | |
| 149 #define NUM_LEADING_BYTES 129 | |
| 150 | |
| 151 | |
| 152 /************************************************************************/ | |
| 153 /* Operations on leading bytes */ | |
| 154 /************************************************************************/ | |
| 155 | |
| 156 /* Is this leading byte for a private charset? */ | |
| 157 | |
| 826 | 158 #define leading_byte_private_p(lb) ((lb) >= MIN_LEADING_BYTE_PRIVATE_1) |
| 771 | 159 |
| 160 /* Is this a prefix for a private leading byte? */ | |
| 161 | |
| 826 | 162 DECLARE_INLINE_HEADER ( |
| 163 int | |
| 867 | 164 leading_byte_prefix_p (Ibyte lb) |
| 826 | 165 ) |
| 771 | 166 { |
| 167 return (lb == PRE_LEADING_BYTE_PRIVATE_1 || | |
| 168 lb == PRE_LEADING_BYTE_PRIVATE_2); | |
| 169 } | |
| 170 | |
| 171 /* Given a private leading byte, return the leading byte prefix stored | |
| 172 in a string. */ | |
| 173 | |
| 826 | 174 #define private_leading_byte_prefix(lb) \ |
| 771 | 175 ((unsigned int) (lb) < MIN_LEADING_BYTE_PRIVATE_2 ? \ |
| 176 PRE_LEADING_BYTE_PRIVATE_1 : \ | |
| 177 PRE_LEADING_BYTE_PRIVATE_2) | |
| 178 | |
| 179 | |
| 180 /************************************************************************/ | |
| 181 /* Information about a particular character set */ | |
| 182 /************************************************************************/ | |
| 183 | |
| 184 struct Lisp_Charset | |
| 185 { | |
|
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5125
diff
changeset
|
186 NORMAL_LISP_OBJECT_HEADER header; |
| 771 | 187 |
| 188 int id; | |
| 189 Lisp_Object name; | |
| 190 Lisp_Object doc_string; | |
| 3659 | 191 Lisp_Object registries; |
| 771 | 192 Lisp_Object short_name; |
| 193 Lisp_Object long_name; | |
| 194 | |
| 195 Lisp_Object reverse_direction_charset; | |
| 196 | |
| 197 Lisp_Object ccl_program; | |
| 198 | |
| 2367 | 199 /* Unicode translation tables. See unicode.c for the format of these |
| 200 tables, and discussion of how they are initialized. | |
| 201 */ | |
| 771 | 202 void *to_unicode_table; |
| 203 void *from_unicode_table; | |
| 204 int from_unicode_levels; | |
| 205 | |
| 2367 | 206 /* Final byte of this character set in ISO2022 designating escape |
| 207 sequence */ | |
| 867 | 208 Ibyte final; |
| 771 | 209 |
| 210 /* Number of bytes (1 - 4) required in the internal representation | |
| 211 for characters in this character set. This is *not* the | |
| 212 same as the dimension of the character set). */ | |
| 213 int rep_bytes; | |
| 214 | |
| 215 /* Number of columns a character in this charset takes up, on TTY | |
| 216 devices. Not used for X devices. */ | |
| 217 int columns; | |
| 218 | |
| 219 /* Direction of this character set */ | |
| 220 int direction; | |
| 221 | |
| 222 /* Type of this character set (94, 96, 94x94, 96x96) */ | |
| 223 int type; | |
| 224 | |
| 225 /* Number of bytes used in encoding of this character set (1 or 2) */ | |
| 226 int dimension; | |
| 227 | |
| 228 /* Number of chars in each dimension (usually 94 or 96) */ | |
| 229 int chars; | |
| 230 | |
| 231 /* Which half of font to be used to display this character set */ | |
| 232 int graphic; | |
| 233 | |
| 3439 | 234 /* If set, this charset should be written out in ISO-2022-based coding |
| 235 systems using the escape sequence for UTF-8, not using our internal | |
| 236 representation and the associated real ISO 2022 designation. */ | |
| 237 unsigned int encode_as_utf_8 :1; | |
| 238 | |
| 771 | 239 /* If set, this is a "temporary" charset created when we encounter |
| 240 an unknown final. This is so that we can successfully compile | |
| 241 and load such files. We allow a real charset to be created on top | |
| 242 of this temporary charset. */ | |
| 243 unsigned int temporary :1; | |
| 244 }; | |
| 245 typedef struct Lisp_Charset Lisp_Charset; | |
| 246 | |
|
5118
e0db3c197671
merge up to latest default branch, doesn't compile yet
Ben Wing <ben@xemacs.org>
parents:
4096
diff
changeset
|
247 DECLARE_LISP_OBJECT (charset, Lisp_Charset); |
| 771 | 248 #define XCHARSET(x) XRECORD (x, charset, Lisp_Charset) |
| 249 #define wrap_charset(p) wrap_record (p, charset) | |
| 250 #define CHARSETP(x) RECORDP (x, charset) | |
| 251 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) | |
| 252 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) | |
| 253 | |
| 254 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ | |
| 255 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */ | |
| 256 #define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */ | |
| 257 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */ | |
| 258 | |
| 259 #define CHARSET_LEFT_TO_RIGHT 0 | |
| 260 #define CHARSET_RIGHT_TO_LEFT 1 | |
| 261 | |
| 262 /* Leading byte and id have been regrouped. -- OG */ | |
| 263 #define CHARSET_ID(cs) ((cs)->id) | |
| 867 | 264 #define CHARSET_LEADING_BYTE(cs) ((Ibyte) CHARSET_ID (cs)) |
| 771 | 265 #define CHARSET_NAME(cs) ((cs)->name) |
| 266 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name) | |
| 267 #define CHARSET_LONG_NAME(cs) ((cs)->long_name) | |
| 268 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) | |
| 269 #define CHARSET_COLUMNS(cs) ((cs)->columns) | |
| 270 #define CHARSET_GRAPHIC(cs) ((cs)->graphic) | |
| 3439 | 271 #define CHARSET_ENCODE_AS_UTF_8(cs) ((cs)->encode_as_utf_8) |
| 771 | 272 #define CHARSET_TYPE(cs) ((cs)->type) |
| 273 #define CHARSET_DIRECTION(cs) ((cs)->direction) | |
| 274 #define CHARSET_FINAL(cs) ((cs)->final) | |
| 275 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string) | |
| 3659 | 276 #define CHARSET_REGISTRIES(cs) ((cs)->registries) |
| 771 | 277 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program) |
| 278 #define CHARSET_DIMENSION(cs) ((cs)->dimension) | |
| 279 #define CHARSET_CHARS(cs) ((cs)->chars) | |
| 280 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset) | |
| 281 #define CHARSET_TO_UNICODE_TABLE(cs) ((cs)->to_unicode_table) | |
| 282 #define CHARSET_FROM_UNICODE_TABLE(cs) ((cs)->from_unicode_table) | |
| 283 #define CHARSET_FROM_UNICODE_LEVELS(cs) ((cs)->from_unicode_levels) | |
| 284 | |
| 826 | 285 #define CHARSET_PRIVATE_P(cs) leading_byte_private_p (CHARSET_LEADING_BYTE (cs)) |
| 771 | 286 |
| 287 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs)) | |
| 288 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) | |
| 289 #define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs)) | |
| 290 #define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs)) | |
| 291 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs)) | |
| 292 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs)) | |
| 293 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs)) | |
| 3439 | 294 #define XCHARSET_ENCODE_AS_UTF_8(cs) CHARSET_ENCODE_AS_UTF_8 (XCHARSET (cs)) |
| 771 | 295 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs)) |
| 296 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs)) | |
| 297 #define XCHARSET_FINAL(cs) CHARSET_FINAL (XCHARSET (cs)) | |
| 298 #define XCHARSET_DOC_STRING(cs) CHARSET_DOC_STRING (XCHARSET (cs)) | |
| 3659 | 299 #define XCHARSET_REGISTRIES(cs) CHARSET_REGISTRIES (XCHARSET (cs)) |
| 771 | 300 #define XCHARSET_LEADING_BYTE(cs) CHARSET_LEADING_BYTE (XCHARSET (cs)) |
| 301 #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs)) | |
| 302 #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs)) | |
| 303 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) | |
| 3659 | 304 |
| 771 | 305 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs)) |
| 306 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ | |
| 307 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) | |
| 308 #define XCHARSET_TO_UNICODE_TABLE(cs) \ | |
| 309 CHARSET_TO_UNICODE_TABLE (XCHARSET (cs)) | |
| 310 #define XCHARSET_FROM_UNICODE_TABLE(cs) \ | |
| 311 CHARSET_FROM_UNICODE_TABLE (XCHARSET (cs)) | |
| 312 #define XCHARSET_FROM_UNICODE_LEVELS(cs) \ | |
| 313 CHARSET_FROM_UNICODE_LEVELS (XCHARSET (cs)) | |
| 314 | |
| 315 struct charset_lookup | |
| 316 { | |
| 317 /* Table of charsets indexed by leading byte. */ | |
| 318 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES]; | |
| 319 | |
| 320 /* Table of charsets indexed by type/final-byte/direction. */ | |
| 321 Lisp_Object charset_by_attributes[4][128][2]; | |
| 867 | 322 Ibyte next_allocated_1_byte_leading_byte; |
| 323 Ibyte next_allocated_2_byte_leading_byte; | |
| 771 | 324 }; |
| 325 | |
| 1111 | 326 extern struct charset_lookup *chlook; |
| 327 | |
| 826 | 328 DECLARE_INLINE_HEADER ( |
| 329 Lisp_Object | |
| 330 charset_by_leading_byte (int lb) | |
| 331 ) | |
| 771 | 332 { |
| 800 | 333 #ifdef ERROR_CHECK_TEXT |
| 771 | 334 /* When error-checking is on, x86 GCC 2.95.2 -O3 miscompiles the |
| 335 following unless we introduce `tem'. */ | |
| 336 int tem = lb; | |
| 800 | 337 text_checking_assert (tem >= MIN_LEADING_BYTE && tem <= 0xFF); |
| 771 | 338 #endif |
| 339 return chlook->charset_by_leading_byte[lb - MIN_LEADING_BYTE]; | |
| 340 } | |
| 341 | |
| 826 | 342 DECLARE_INLINE_HEADER ( |
| 343 Lisp_Object | |
| 344 charset_by_attributes (int type, int final, int dir) | |
| 345 ) | |
| 771 | 346 { |
| 347 type_checking_assert (type < countof (chlook->charset_by_attributes) && | |
| 348 final < countof (chlook->charset_by_attributes[0]) && | |
| 349 dir < countof (chlook->charset_by_attributes[0][0])); | |
| 350 return chlook->charset_by_attributes[type][final][dir]; | |
| 351 } | |
| 352 | |
| 353 | |
| 354 /************************************************************************/ | |
| 355 /* Dealing with characters */ | |
| 356 /************************************************************************/ | |
| 357 | |
| 358 /* The bit fields of character are divided into 3 parts: | |
| 3496 | 359 FIELD1(7bits):FIELD2(7bits):FIELD3(7bits) */ |
| 771 | 360 |
| 3496 | 361 #define ICHAR_FIELD1_MASK (0x7F << 14) |
| 867 | 362 #define ICHAR_FIELD2_MASK (0x7F << 7) |
| 363 #define ICHAR_FIELD3_MASK 0x7F | |
| 771 | 364 |
| 365 /* Macros to access each field of a character code of C. */ | |
| 366 | |
| 867 | 367 #define ichar_field1(c) (((c) & ICHAR_FIELD1_MASK) >> 14) |
| 368 #define ichar_field2(c) (((c) & ICHAR_FIELD2_MASK) >> 7) | |
| 369 #define ichar_field3(c) ((c) & ICHAR_FIELD3_MASK) | |
| 771 | 370 |
| 371 /* Field 1, if non-zero, usually holds a leading byte for a | |
| 372 dimension-2 charset. Field 2, if non-zero, usually holds a leading | |
| 373 byte for a dimension-1 charset. */ | |
| 374 | |
| 375 /* Converting between field values and leading bytes. */ | |
| 376 | |
| 377 #define FIELD2_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_1 - 1) | |
| 378 #define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80 | |
| 379 | |
| 380 #define FIELD1_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_2 - 1) | |
| 3496 | 381 #define FIELD1_TO_PRIVATE_LEADING_BYTE 0x80 |
| 771 | 382 |
| 383 /* Minimum and maximum allowed values for the fields. */ | |
| 384 | |
| 867 | 385 #define MIN_ICHAR_FIELD2_OFFICIAL \ |
| 771 | 386 (MIN_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE) |
| 867 | 387 #define MAX_ICHAR_FIELD2_OFFICIAL \ |
| 771 | 388 (MAX_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE) |
| 389 | |
| 867 | 390 #define MIN_ICHAR_FIELD1_OFFICIAL \ |
| 771 | 391 (MIN_LEADING_BYTE_OFFICIAL_2 - FIELD1_TO_OFFICIAL_LEADING_BYTE) |
| 867 | 392 #define MAX_ICHAR_FIELD1_OFFICIAL \ |
| 771 | 393 (MAX_LEADING_BYTE_OFFICIAL_2 - FIELD1_TO_OFFICIAL_LEADING_BYTE) |
| 394 | |
| 867 | 395 #define MIN_ICHAR_FIELD2_PRIVATE \ |
| 771 | 396 (MIN_LEADING_BYTE_PRIVATE_1 - FIELD2_TO_PRIVATE_LEADING_BYTE) |
| 867 | 397 #define MAX_ICHAR_FIELD2_PRIVATE \ |
| 771 | 398 (MAX_LEADING_BYTE_PRIVATE_1 - FIELD2_TO_PRIVATE_LEADING_BYTE) |
| 399 | |
| 867 | 400 #define MIN_ICHAR_FIELD1_PRIVATE \ |
| 771 | 401 (MIN_LEADING_BYTE_PRIVATE_2 - FIELD1_TO_PRIVATE_LEADING_BYTE) |
| 867 | 402 #define MAX_ICHAR_FIELD1_PRIVATE \ |
| 771 | 403 (MAX_LEADING_BYTE_PRIVATE_2 - FIELD1_TO_PRIVATE_LEADING_BYTE) |
| 404 | |
| 405 /* Minimum character code of each <type> character. */ | |
| 406 | |
| 867 | 407 #define MIN_CHAR_OFFICIAL_TYPE9N (MIN_ICHAR_FIELD2_OFFICIAL << 7) |
| 408 #define MIN_CHAR_PRIVATE_TYPE9N (MIN_ICHAR_FIELD2_PRIVATE << 7) | |
| 409 #define MIN_CHAR_OFFICIAL_TYPE9NX9N (MIN_ICHAR_FIELD1_OFFICIAL << 14) | |
| 410 #define MIN_CHAR_PRIVATE_TYPE9NX9N (MIN_ICHAR_FIELD1_PRIVATE << 14) | |
| 3496 | 411 #define MIN_CHAR_COMPOSITION (0x7F << 14) |
| 771 | 412 |
| 413 /* Leading byte of a character. | |
| 414 | |
| 415 NOTE: This takes advantage of the fact that | |
| 416 FIELD2_TO_OFFICIAL_LEADING_BYTE and | |
| 417 FIELD2_TO_PRIVATE_LEADING_BYTE are the same. | |
| 418 */ | |
| 419 | |
| 826 | 420 DECLARE_INLINE_HEADER ( |
| 867 | 421 Ibyte |
| 422 ichar_leading_byte (Ichar c) | |
| 826 | 423 ) |
| 771 | 424 { |
| 867 | 425 if (ichar_ascii_p (c)) |
| 771 | 426 return LEADING_BYTE_ASCII; |
| 427 else if (c < 0xA0) | |
| 428 return LEADING_BYTE_CONTROL_1; | |
| 429 else if (c < MIN_CHAR_OFFICIAL_TYPE9NX9N) | |
| 867 | 430 return ichar_field2 (c) + FIELD2_TO_OFFICIAL_LEADING_BYTE; |
| 771 | 431 else if (c < MIN_CHAR_PRIVATE_TYPE9NX9N) |
| 867 | 432 return ichar_field1 (c) + FIELD1_TO_OFFICIAL_LEADING_BYTE; |
| 771 | 433 else if (c < MIN_CHAR_COMPOSITION) |
| 867 | 434 return ichar_field1 (c) + FIELD1_TO_PRIVATE_LEADING_BYTE; |
| 771 | 435 else |
| 436 { | |
| 437 #ifdef ENABLE_COMPOSITE_CHARS | |
| 438 return LEADING_BYTE_COMPOSITE; | |
| 439 #else | |
| 2500 | 440 ABORT(); |
| 771 | 441 return 0; |
| 442 #endif /* ENABLE_COMPOSITE_CHARS */ | |
| 443 } | |
| 444 } | |
| 445 | |
| 826 | 446 DECLARE_INLINE_HEADER ( |
| 447 Bytecount | |
| 867 | 448 ichar_len (Ichar c) |
| 826 | 449 ) |
| 450 { | |
| 867 | 451 if (ichar_ascii_p (c)) |
| 826 | 452 return 1; |
| 453 else if (c < MIN_CHAR_OFFICIAL_TYPE9NX9N) | |
| 454 return 2; | |
| 455 else if (c < MIN_CHAR_PRIVATE_TYPE9NX9N) | |
| 456 return 3; /* dimension-2 official or dimension-1 private */ | |
| 457 else if (c < MIN_CHAR_COMPOSITION) | |
| 458 return 4; | |
| 459 else | |
| 460 { | |
| 461 #ifdef ENABLE_COMPOSITE_CHARS | |
| 462 #error Not yet implemented | |
| 463 #else | |
| 2500 | 464 ABORT(); |
| 826 | 465 return 0; |
| 466 #endif /* ENABLE_COMPOSITE_CHARS */ | |
| 467 } | |
| 468 } | |
| 469 | |
| 470 DECLARE_INLINE_HEADER ( | |
| 471 Bytecount | |
| 867 | 472 ichar_len_fmt (Ichar c, Internal_Format fmt) |
| 826 | 473 ) |
| 474 { | |
| 475 switch (fmt) | |
| 476 { | |
| 477 case FORMAT_DEFAULT: | |
| 867 | 478 return ichar_len (c); |
| 826 | 479 case FORMAT_16_BIT_FIXED: |
| 480 return 2; | |
| 481 case FORMAT_32_BIT_FIXED: | |
| 482 return 4; | |
| 483 default: | |
| 484 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
| 485 return 1; | |
| 486 } | |
| 487 } | |
| 488 | |
| 867 | 489 #define ichar_charset(c) charset_by_leading_byte (ichar_leading_byte (c)) |
| 771 | 490 |
| 491 /* Return a character whose charset is CHARSET and position-codes are C1 | |
| 492 and C2. TYPE9N character ignores C2. (For typical charsets, i.e. not | |
| 493 ASCII, Control-1 or Composite, C1 and C2 will be in the range of 32 to | |
| 494 127 or 33 to 126. See `make-char'.) | |
| 495 | |
| 496 NOTE: This takes advantage of the fact that | |
| 497 FIELD2_TO_OFFICIAL_LEADING_BYTE and | |
| 498 FIELD2_TO_PRIVATE_LEADING_BYTE are the same. | |
| 499 */ | |
| 500 | |
| 826 | 501 DECLARE_INLINE_HEADER ( |
| 867 | 502 Ichar |
| 503 make_ichar (Lisp_Object charset, int c1, int c2) | |
| 826 | 504 ) |
| 771 | 505 { |
| 867 | 506 Ichar retval; |
| 771 | 507 if (EQ (charset, Vcharset_ascii)) |
| 826 | 508 retval = c1; |
| 771 | 509 else if (EQ (charset, Vcharset_control_1)) |
| 826 | 510 retval = c1 | 0x80; |
| 771 | 511 #ifdef ENABLE_COMPOSITE_CHARS |
| 512 else if (EQ (charset, Vcharset_composite)) | |
| 826 | 513 retval = (0x1F << 14) | ((c1) << 7) | (c2); |
| 771 | 514 #endif |
| 515 else if (XCHARSET_DIMENSION (charset) == 1) | |
| 826 | 516 retval = ((XCHARSET_LEADING_BYTE (charset) - |
| 517 FIELD2_TO_OFFICIAL_LEADING_BYTE) << 7) | (c1); | |
| 771 | 518 else if (!XCHARSET_PRIVATE_P (charset)) |
| 826 | 519 retval = ((XCHARSET_LEADING_BYTE (charset) - |
| 520 FIELD1_TO_OFFICIAL_LEADING_BYTE) << 14) | ((c1) << 7) | (c2); | |
| 771 | 521 else |
| 826 | 522 retval = ((XCHARSET_LEADING_BYTE (charset) - |
| 523 FIELD1_TO_PRIVATE_LEADING_BYTE) << 14) | ((c1) << 7) | (c2); | |
| 867 | 524 text_checking_assert (valid_ichar_p (retval)); |
| 826 | 525 return retval; |
| 771 | 526 } |
| 527 | |
| 867 | 528 /* BREAKUP_ICHAR_1_UNSAFE assumes that the charset has already been |
| 771 | 529 calculated, and just computes c1 and c2. |
| 530 | |
| 867 | 531 BREAKUP_ICHAR also computes and stores the charset. */ |
| 771 | 532 |
| 867 | 533 #define BREAKUP_ICHAR_1_UNSAFE(c, charset, c1, c2) \ |
| 771 | 534 XCHARSET_DIMENSION (charset) == 1 \ |
| 867 | 535 ? ((c1) = ichar_field3 (c), (c2) = 0) \ |
| 536 : ((c1) = ichar_field2 (c), \ | |
| 537 (c2) = ichar_field3 (c)) | |
| 771 | 538 |
| 826 | 539 DECLARE_INLINE_HEADER ( |
| 540 void | |
| 867 | 541 breakup_ichar_1 (Ichar c, Lisp_Object *charset, int *c1, int *c2) |
| 826 | 542 ) |
| 771 | 543 { |
| 867 | 544 text_checking_assert (valid_ichar_p (c)); |
| 545 *charset = ichar_charset (c); | |
| 546 BREAKUP_ICHAR_1_UNSAFE (c, *charset, *c1, *c2); | |
| 771 | 547 } |
| 548 | |
| 867 | 549 /* BREAKUP_ICHAR separates an Ichar into its components. The charset of |
| 771 | 550 character C is set to CHARSET, and the position-codes of C are set to C1 |
| 551 and C2. C2 of TYPE9N character is 0. */ | |
| 552 | |
| 867 | 553 #define BREAKUP_ICHAR(c, charset, c1, c2) \ |
| 554 breakup_ichar_1 (c, &(charset), &(c1), &(c2)) | |
| 771 | 555 |
|
5200
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
556 /* Forward compatibility from ben-unicode-internal: Convert a charset |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
557 codepoint into a character in the internal string representation. |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
558 Return number of bytes written out. FAIL controls failure mode when |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
559 charset conversion to Unicode is not possible (unused as of yet). */ |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
560 DECLARE_INLINE_HEADER ( |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
561 Bytecount |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
562 charset_codepoint_to_itext (Lisp_Object charset, int c1, int c2, Ibyte *ptr, |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
563 enum converr UNUSED (fail)) |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
564 ) |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
565 { |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
566 Ichar ch; |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
567 |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
568 if (EQ (charset, Vcharset_ascii)) |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
569 { |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
570 ptr[0] = (Ibyte) c2; |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
571 return 1; |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
572 } |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
573 |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
574 ch = make_ichar (charset, c1, c2); |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
575 |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
576 /* We can't rely on the converted character being non-ASCII. For |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
577 example, JISX0208 codepoint (33, 64) == Unicode 0x5C (ASCII |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
578 backslash). */ |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
579 return set_itext_ichar (ptr, ch); |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
580 } |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
581 |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
582 /* Forward compatibility from ben-unicode-internal */ |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
583 |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
584 DECLARE_INLINE_HEADER ( |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
585 void |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
586 buffer_itext_to_charset_codepoint (const Ibyte *ptr, |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
587 struct buffer *UNUSED (buf), |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
588 Lisp_Object *charset, int *c1, int *c2, |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
589 enum converr UNUSED (fail)) |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
590 ) |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
591 { |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
592 Ichar ch = itext_ichar (ptr); |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
593 breakup_ichar_1 (ch, charset, c1, c2); |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
594 } |
|
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
595 |
| 788 | 596 void get_charset_limits (Lisp_Object charset, int *low, int *high); |
| 867 | 597 int ichar_to_unicode (Ichar chr); |
| 788 | 598 |
| 3439 | 599 EXFUN (Fcharset_name, 1); |
| 600 | |
| 771 | 601 #endif /* MULE */ |
| 602 | |
| 3439 | 603 /* ISO 10646 UTF-16, UCS-4, UTF-8, UTF-7, etc. */ |
| 604 | |
| 605 enum unicode_type | |
| 606 { | |
| 607 UNICODE_UTF_16, | |
| 608 UNICODE_UTF_8, | |
| 609 UNICODE_UTF_7, | |
| 4096 | 610 UNICODE_UCS_4, |
| 611 UNICODE_UTF_32 | |
| 3439 | 612 }; |
| 613 | |
| 614 void encode_unicode_char (Lisp_Object USED_IF_MULE (charset), int h, | |
| 615 int USED_IF_MULE (l), unsigned_char_dynarr *dst, | |
| 4096 | 616 enum unicode_type type, unsigned int little_endian, |
| 617 int write_error_characters_as_such); | |
| 618 | |
| 619 #define UNICODE_ERROR_OCTET_RANGE_START 0x200000 | |
| 620 | |
| 621 #define valid_utf_16_first_surrogate(ch) (((ch) & 0xFC00) == 0xD800) | |
| 622 #define valid_utf_16_last_surrogate(ch) (((ch) & 0xFC00) == 0xDC00) | |
| 623 #define valid_utf_16_surrogate(ch) (((ch) & 0xF800) == 0xD800) | |
| 3439 | 624 |
| 3676 | 625 void set_charset_registries(Lisp_Object charset, Lisp_Object registries); |
| 626 | |
| 3439 | 627 EXFUN (Funicode_to_char, 2); |
| 628 EXFUN (Fchar_to_unicode, 1); | |
| 629 | |
| 771 | 630 #endif /* INCLUDED_charset_h_ */ |
