Mercurial > hg > xemacs-beta
annotate src/charset.h @ 5790:dcf9067f26bb
Add font-lock-regexp-grouping-{backslash, construct} from GNU Emacs.
2014-01-27 Michael Sperber <mike@xemacs.org>
* font-lock.el (font-lock-regexp-grouping-backslash,
font-lock-regexp-grouping-construct): Add these, as in GNU Emacs.
author | Mike Sperber <sperber@deinprogramm.de> |
---|---|
date | Mon, 27 Jan 2014 17:52:33 +0100 |
parents | 308d34e9f07d |
children |
rev | line source |
---|---|
771 | 1 /* Header for charsets. |
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
5200
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
4 Copyright (C) 2001, 2002, 2010 Ben Wing. |
771 | 5 |
6 This file is part of XEmacs. | |
7 | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5200
diff
changeset
|
8 XEmacs is free software: you can redistribute it and/or modify it |
771 | 9 under the terms of the GNU General Public License as published by the |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5200
diff
changeset
|
10 Free Software Foundation, either version 3 of the License, or (at your |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5200
diff
changeset
|
11 option) any later version. |
771 | 12 |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5200
diff
changeset
|
19 along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */ |
771 | 20 |
21 /* Synched up with: Mule 2.3. Not in FSF. */ | |
22 | |
23 /* Rewritten by Ben Wing <ben@xemacs.org>. */ | |
24 | |
25 #ifndef INCLUDED_charset_h_ | |
26 #define INCLUDED_charset_h_ | |
27 | |
28 | |
29 | |
30 #ifndef MULE | |
31 | |
32 /************************************************************************/ | |
33 /* fake charset defs */ | |
34 /************************************************************************/ | |
35 | |
36 /* used when MULE is not defined, so that Charset-type stuff can still | |
37 be done */ | |
38 | |
39 #define Vcharset_ascii Qnil | |
40 | |
867 | 41 #define ichar_charset(ch) Vcharset_ascii |
42 #define ichar_leading_byte(ch) LEADING_BYTE_ASCII | |
43 #define ichar_len(ch) 1 | |
44 #define ichar_len_fmt(ch, fmt) 1 | |
771 | 45 #define LEADING_BYTE_ASCII 0x80 |
46 #define NUM_LEADING_BYTES 1 | |
47 #define MIN_LEADING_BYTE 0x80 | |
48 #define CHARSETP(cs) 1 | |
826 | 49 #define charset_by_leading_byte(lb) Vcharset_ascii |
771 | 50 #define XCHARSET_LEADING_BYTE(cs) LEADING_BYTE_ASCII |
51 #define XCHARSET_GRAPHIC(cs) -1 | |
52 #define XCHARSET_COLUMNS(cs) 1 | |
53 #define XCHARSET_DIMENSION(cs) 1 | |
867 | 54 #define BREAKUP_ICHAR(ch, charset, byte1, byte2) do { \ |
771 | 55 (charset) = Vcharset_ascii; \ |
56 (byte1) = (ch); \ | |
57 (byte2) = 0; \ | |
58 } while (0) | |
3659 | 59 #define XCHARSET_CCL_PROGRAM(cs) Qnil |
60 #define XCHARSET_NAME(cs) Qascii | |
5019 | 61 #define Fget_charset(cs) (cs) |
62 #define Fcharset_list() list1 (Vcharset_ascii) | |
771 | 63 |
64 #else /* MULE */ | |
65 | |
66 | |
67 /************************************************************************/ | |
68 /* Definition of leading bytes */ | |
69 /************************************************************************/ | |
70 | |
71 #define MIN_LEADING_BYTE 0x7F | |
72 | |
73 /** The following are for 1-byte characters in an official charset. **/ | |
74 enum LEADING_BYTE_OFFICIAL_1 | |
75 { | |
76 MIN_LEADING_BYTE_OFFICIAL_1 = 0x80, | |
77 /* LEADING_BYTE_LATIN_ISO8859_1 *MUST* be equal to | |
78 MIN_LEADING_BYTE_OFFICIAL_1. */ | |
79 LEADING_BYTE_LATIN_ISO8859_1 = /* 0x80 Right half of ISO 8859-1 */ | |
80 MIN_LEADING_BYTE_OFFICIAL_1, | |
81 LEADING_BYTE_LATIN_ISO8859_2, /* 0x81 Right half of ISO 8859-2 */ | |
82 LEADING_BYTE_LATIN_ISO8859_3, /* 0x82 Right half of ISO 8859-3 */ | |
83 LEADING_BYTE_LATIN_ISO8859_4, /* 0x83 Right half of ISO 8859-4 */ | |
84 LEADING_BYTE_THAI_TIS620, /* 0x84 TIS620-2533 */ | |
85 LEADING_BYTE_GREEK_ISO8859_7, /* 0x85 Right half of ISO 8859-7 */ | |
86 LEADING_BYTE_ARABIC_ISO8859_6, /* 0x86 Right half of ISO 8859-6 */ | |
87 LEADING_BYTE_HEBREW_ISO8859_8, /* 0x87 Right half of ISO 8859-8 */ | |
88 LEADING_BYTE_KATAKANA_JISX0201, /* 0x88 Right half of JIS X0201-1976 */ | |
89 LEADING_BYTE_LATIN_JISX0201, /* 0x89 Left half of JIS X0201-1976 */ | |
90 LEADING_BYTE_CYRILLIC_ISO8859_5,/* 0x8A Right half of ISO 8859-5 */ | |
91 LEADING_BYTE_LATIN_ISO8859_9, /* 0x8B Right half of ISO 8859-9 */ | |
92 LEADING_BYTE_LATIN_ISO8859_15, /* 0x8C Right half of ISO 8859-15 */ | |
93 #ifdef ENABLE_COMPOSITE_CHARS | |
94 LEADING_BYTE_COMPOSITE, /* 0x8D For a composite character */ | |
95 MAX_LEADING_BYTE_OFFICIAL_1 = | |
96 LEADING_BYTE_COMPOSITE - 1, | |
97 #else | |
98 /* Does not need to be the last entry, but simplifies things */ | |
99 LEADING_BYTE_COMPOSITE_REPLACEMENT, /* 0x8D Replaces ESC 0 - ESC 4 in a | |
100 buffer */ | |
101 MAX_LEADING_BYTE_OFFICIAL_1 = | |
102 LEADING_BYTE_COMPOSITE_REPLACEMENT, | |
103 #endif | |
104 /* 0x8E Unused */ | |
105 }; | |
106 | |
107 /* These next 2 + LEADING_BYTE_COMPOSITE need special treatment in a string | |
108 and/or character */ | |
109 | |
110 #define LEADING_BYTE_ASCII 0x7F /* Not used except in arrays | |
111 indexed by leading byte */ | |
112 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ | |
113 | |
114 /** The following are for 2-byte characters in an official charset. **/ | |
115 enum LEADING_BYTE_OFFICIAL_2 | |
116 { | |
117 MIN_LEADING_BYTE_OFFICIAL_2 = 0x90, | |
118 LEADING_BYTE_JAPANESE_JISX0208_1978 = | |
119 MIN_LEADING_BYTE_OFFICIAL_2, /* 0x90 Japanese JIS X0208-1978 */ | |
120 LEADING_BYTE_CHINESE_GB2312, /* 0x91 Chinese Hanzi GB2312-1980 */ | |
121 LEADING_BYTE_JAPANESE_JISX0208, /* 0x92 Japanese JIS X0208-1983 */ | |
122 LEADING_BYTE_KOREAN_KSC5601, /* 0x93 Hangul KS C5601-1987 */ | |
123 LEADING_BYTE_JAPANESE_JISX0212, /* 0x94 Japanese JIS X0212-1990 */ | |
124 LEADING_BYTE_CHINESE_CNS11643_1, /* 0x95 Chinese CNS11643 Set 1 */ | |
125 LEADING_BYTE_CHINESE_CNS11643_2, /* 0x96 Chinese CNS11643 Set 2 */ | |
126 LEADING_BYTE_CHINESE_BIG5_1, /* 0x97 Big5 Level 1 */ | |
127 LEADING_BYTE_CHINESE_BIG5_2, /* 0x98 Big5 Level 2 */ | |
128 MAX_LEADING_BYTE_OFFICIAL_2 = | |
129 LEADING_BYTE_CHINESE_BIG5_2, | |
130 | |
131 /* 0x99 unused */ | |
132 /* 0x9A unused */ | |
133 /* 0x9B unused */ | |
134 /* 0x9C unused */ | |
135 /* 0x9D unused */ | |
136 }; | |
137 | |
138 | |
139 /** The following are for 1- and 2-byte characters in a private charset. **/ | |
140 | |
141 #define PRE_LEADING_BYTE_PRIVATE_1 0x9E /* 1-byte char-set */ | |
142 #define PRE_LEADING_BYTE_PRIVATE_2 0x9F /* 2-byte char-set */ | |
143 | |
144 #define MIN_LEADING_BYTE_PRIVATE_1 0xA0 | |
3496 | 145 #define MAX_LEADING_BYTE_PRIVATE_1 0xC0 |
146 #define MIN_LEADING_BYTE_PRIVATE_2 0xC1 | |
771 | 147 #define MAX_LEADING_BYTE_PRIVATE_2 0xFF |
148 | |
149 #define NUM_LEADING_BYTES 129 | |
150 | |
151 | |
152 /************************************************************************/ | |
153 /* Operations on leading bytes */ | |
154 /************************************************************************/ | |
155 | |
156 /* Is this leading byte for a private charset? */ | |
157 | |
826 | 158 #define leading_byte_private_p(lb) ((lb) >= MIN_LEADING_BYTE_PRIVATE_1) |
771 | 159 |
160 /* Is this a prefix for a private leading byte? */ | |
161 | |
826 | 162 DECLARE_INLINE_HEADER ( |
163 int | |
867 | 164 leading_byte_prefix_p (Ibyte lb) |
826 | 165 ) |
771 | 166 { |
167 return (lb == PRE_LEADING_BYTE_PRIVATE_1 || | |
168 lb == PRE_LEADING_BYTE_PRIVATE_2); | |
169 } | |
170 | |
171 /* Given a private leading byte, return the leading byte prefix stored | |
172 in a string. */ | |
173 | |
826 | 174 #define private_leading_byte_prefix(lb) \ |
771 | 175 ((unsigned int) (lb) < MIN_LEADING_BYTE_PRIVATE_2 ? \ |
176 PRE_LEADING_BYTE_PRIVATE_1 : \ | |
177 PRE_LEADING_BYTE_PRIVATE_2) | |
178 | |
179 | |
180 /************************************************************************/ | |
181 /* Information about a particular character set */ | |
182 /************************************************************************/ | |
183 | |
184 struct Lisp_Charset | |
185 { | |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5125
diff
changeset
|
186 NORMAL_LISP_OBJECT_HEADER header; |
771 | 187 |
188 int id; | |
189 Lisp_Object name; | |
190 Lisp_Object doc_string; | |
3659 | 191 Lisp_Object registries; |
771 | 192 Lisp_Object short_name; |
193 Lisp_Object long_name; | |
194 | |
195 Lisp_Object reverse_direction_charset; | |
196 | |
197 Lisp_Object ccl_program; | |
198 | |
2367 | 199 /* Unicode translation tables. See unicode.c for the format of these |
200 tables, and discussion of how they are initialized. | |
201 */ | |
771 | 202 void *to_unicode_table; |
203 void *from_unicode_table; | |
204 int from_unicode_levels; | |
205 | |
2367 | 206 /* Final byte of this character set in ISO2022 designating escape |
207 sequence */ | |
867 | 208 Ibyte final; |
771 | 209 |
210 /* Number of bytes (1 - 4) required in the internal representation | |
211 for characters in this character set. This is *not* the | |
212 same as the dimension of the character set). */ | |
213 int rep_bytes; | |
214 | |
215 /* Number of columns a character in this charset takes up, on TTY | |
216 devices. Not used for X devices. */ | |
217 int columns; | |
218 | |
219 /* Direction of this character set */ | |
220 int direction; | |
221 | |
222 /* Type of this character set (94, 96, 94x94, 96x96) */ | |
223 int type; | |
224 | |
225 /* Number of bytes used in encoding of this character set (1 or 2) */ | |
226 int dimension; | |
227 | |
228 /* Number of chars in each dimension (usually 94 or 96) */ | |
229 int chars; | |
230 | |
231 /* Which half of font to be used to display this character set */ | |
232 int graphic; | |
233 | |
3439 | 234 /* If set, this charset should be written out in ISO-2022-based coding |
235 systems using the escape sequence for UTF-8, not using our internal | |
236 representation and the associated real ISO 2022 designation. */ | |
237 unsigned int encode_as_utf_8 :1; | |
238 | |
771 | 239 /* If set, this is a "temporary" charset created when we encounter |
240 an unknown final. This is so that we can successfully compile | |
241 and load such files. We allow a real charset to be created on top | |
242 of this temporary charset. */ | |
243 unsigned int temporary :1; | |
244 }; | |
245 typedef struct Lisp_Charset Lisp_Charset; | |
246 | |
5118
e0db3c197671
merge up to latest default branch, doesn't compile yet
Ben Wing <ben@xemacs.org>
parents:
4096
diff
changeset
|
247 DECLARE_LISP_OBJECT (charset, Lisp_Charset); |
771 | 248 #define XCHARSET(x) XRECORD (x, charset, Lisp_Charset) |
249 #define wrap_charset(p) wrap_record (p, charset) | |
250 #define CHARSETP(x) RECORDP (x, charset) | |
251 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) | |
252 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) | |
253 | |
254 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ | |
255 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */ | |
256 #define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */ | |
257 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */ | |
258 | |
259 #define CHARSET_LEFT_TO_RIGHT 0 | |
260 #define CHARSET_RIGHT_TO_LEFT 1 | |
261 | |
262 /* Leading byte and id have been regrouped. -- OG */ | |
263 #define CHARSET_ID(cs) ((cs)->id) | |
867 | 264 #define CHARSET_LEADING_BYTE(cs) ((Ibyte) CHARSET_ID (cs)) |
771 | 265 #define CHARSET_NAME(cs) ((cs)->name) |
266 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name) | |
267 #define CHARSET_LONG_NAME(cs) ((cs)->long_name) | |
268 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) | |
269 #define CHARSET_COLUMNS(cs) ((cs)->columns) | |
270 #define CHARSET_GRAPHIC(cs) ((cs)->graphic) | |
3439 | 271 #define CHARSET_ENCODE_AS_UTF_8(cs) ((cs)->encode_as_utf_8) |
771 | 272 #define CHARSET_TYPE(cs) ((cs)->type) |
273 #define CHARSET_DIRECTION(cs) ((cs)->direction) | |
274 #define CHARSET_FINAL(cs) ((cs)->final) | |
275 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string) | |
3659 | 276 #define CHARSET_REGISTRIES(cs) ((cs)->registries) |
771 | 277 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program) |
278 #define CHARSET_DIMENSION(cs) ((cs)->dimension) | |
279 #define CHARSET_CHARS(cs) ((cs)->chars) | |
280 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset) | |
281 #define CHARSET_TO_UNICODE_TABLE(cs) ((cs)->to_unicode_table) | |
282 #define CHARSET_FROM_UNICODE_TABLE(cs) ((cs)->from_unicode_table) | |
283 #define CHARSET_FROM_UNICODE_LEVELS(cs) ((cs)->from_unicode_levels) | |
284 | |
826 | 285 #define CHARSET_PRIVATE_P(cs) leading_byte_private_p (CHARSET_LEADING_BYTE (cs)) |
771 | 286 |
287 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs)) | |
288 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) | |
289 #define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs)) | |
290 #define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs)) | |
291 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs)) | |
292 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs)) | |
293 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs)) | |
3439 | 294 #define XCHARSET_ENCODE_AS_UTF_8(cs) CHARSET_ENCODE_AS_UTF_8 (XCHARSET (cs)) |
771 | 295 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs)) |
296 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs)) | |
297 #define XCHARSET_FINAL(cs) CHARSET_FINAL (XCHARSET (cs)) | |
298 #define XCHARSET_DOC_STRING(cs) CHARSET_DOC_STRING (XCHARSET (cs)) | |
3659 | 299 #define XCHARSET_REGISTRIES(cs) CHARSET_REGISTRIES (XCHARSET (cs)) |
771 | 300 #define XCHARSET_LEADING_BYTE(cs) CHARSET_LEADING_BYTE (XCHARSET (cs)) |
301 #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs)) | |
302 #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs)) | |
303 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) | |
3659 | 304 |
771 | 305 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs)) |
306 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ | |
307 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) | |
308 #define XCHARSET_TO_UNICODE_TABLE(cs) \ | |
309 CHARSET_TO_UNICODE_TABLE (XCHARSET (cs)) | |
310 #define XCHARSET_FROM_UNICODE_TABLE(cs) \ | |
311 CHARSET_FROM_UNICODE_TABLE (XCHARSET (cs)) | |
312 #define XCHARSET_FROM_UNICODE_LEVELS(cs) \ | |
313 CHARSET_FROM_UNICODE_LEVELS (XCHARSET (cs)) | |
314 | |
315 struct charset_lookup | |
316 { | |
317 /* Table of charsets indexed by leading byte. */ | |
318 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES]; | |
319 | |
320 /* Table of charsets indexed by type/final-byte/direction. */ | |
321 Lisp_Object charset_by_attributes[4][128][2]; | |
867 | 322 Ibyte next_allocated_1_byte_leading_byte; |
323 Ibyte next_allocated_2_byte_leading_byte; | |
771 | 324 }; |
325 | |
1111 | 326 extern struct charset_lookup *chlook; |
327 | |
826 | 328 DECLARE_INLINE_HEADER ( |
329 Lisp_Object | |
330 charset_by_leading_byte (int lb) | |
331 ) | |
771 | 332 { |
800 | 333 #ifdef ERROR_CHECK_TEXT |
771 | 334 /* When error-checking is on, x86 GCC 2.95.2 -O3 miscompiles the |
335 following unless we introduce `tem'. */ | |
336 int tem = lb; | |
800 | 337 text_checking_assert (tem >= MIN_LEADING_BYTE && tem <= 0xFF); |
771 | 338 #endif |
339 return chlook->charset_by_leading_byte[lb - MIN_LEADING_BYTE]; | |
340 } | |
341 | |
826 | 342 DECLARE_INLINE_HEADER ( |
343 Lisp_Object | |
344 charset_by_attributes (int type, int final, int dir) | |
345 ) | |
771 | 346 { |
347 type_checking_assert (type < countof (chlook->charset_by_attributes) && | |
348 final < countof (chlook->charset_by_attributes[0]) && | |
349 dir < countof (chlook->charset_by_attributes[0][0])); | |
350 return chlook->charset_by_attributes[type][final][dir]; | |
351 } | |
352 | |
353 | |
354 /************************************************************************/ | |
355 /* Dealing with characters */ | |
356 /************************************************************************/ | |
357 | |
358 /* The bit fields of character are divided into 3 parts: | |
3496 | 359 FIELD1(7bits):FIELD2(7bits):FIELD3(7bits) */ |
771 | 360 |
3496 | 361 #define ICHAR_FIELD1_MASK (0x7F << 14) |
867 | 362 #define ICHAR_FIELD2_MASK (0x7F << 7) |
363 #define ICHAR_FIELD3_MASK 0x7F | |
771 | 364 |
365 /* Macros to access each field of a character code of C. */ | |
366 | |
867 | 367 #define ichar_field1(c) (((c) & ICHAR_FIELD1_MASK) >> 14) |
368 #define ichar_field2(c) (((c) & ICHAR_FIELD2_MASK) >> 7) | |
369 #define ichar_field3(c) ((c) & ICHAR_FIELD3_MASK) | |
771 | 370 |
371 /* Field 1, if non-zero, usually holds a leading byte for a | |
372 dimension-2 charset. Field 2, if non-zero, usually holds a leading | |
373 byte for a dimension-1 charset. */ | |
374 | |
375 /* Converting between field values and leading bytes. */ | |
376 | |
377 #define FIELD2_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_1 - 1) | |
378 #define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80 | |
379 | |
380 #define FIELD1_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_2 - 1) | |
3496 | 381 #define FIELD1_TO_PRIVATE_LEADING_BYTE 0x80 |
771 | 382 |
383 /* Minimum and maximum allowed values for the fields. */ | |
384 | |
867 | 385 #define MIN_ICHAR_FIELD2_OFFICIAL \ |
771 | 386 (MIN_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE) |
867 | 387 #define MAX_ICHAR_FIELD2_OFFICIAL \ |
771 | 388 (MAX_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE) |
389 | |
867 | 390 #define MIN_ICHAR_FIELD1_OFFICIAL \ |
771 | 391 (MIN_LEADING_BYTE_OFFICIAL_2 - FIELD1_TO_OFFICIAL_LEADING_BYTE) |
867 | 392 #define MAX_ICHAR_FIELD1_OFFICIAL \ |
771 | 393 (MAX_LEADING_BYTE_OFFICIAL_2 - FIELD1_TO_OFFICIAL_LEADING_BYTE) |
394 | |
867 | 395 #define MIN_ICHAR_FIELD2_PRIVATE \ |
771 | 396 (MIN_LEADING_BYTE_PRIVATE_1 - FIELD2_TO_PRIVATE_LEADING_BYTE) |
867 | 397 #define MAX_ICHAR_FIELD2_PRIVATE \ |
771 | 398 (MAX_LEADING_BYTE_PRIVATE_1 - FIELD2_TO_PRIVATE_LEADING_BYTE) |
399 | |
867 | 400 #define MIN_ICHAR_FIELD1_PRIVATE \ |
771 | 401 (MIN_LEADING_BYTE_PRIVATE_2 - FIELD1_TO_PRIVATE_LEADING_BYTE) |
867 | 402 #define MAX_ICHAR_FIELD1_PRIVATE \ |
771 | 403 (MAX_LEADING_BYTE_PRIVATE_2 - FIELD1_TO_PRIVATE_LEADING_BYTE) |
404 | |
405 /* Minimum character code of each <type> character. */ | |
406 | |
867 | 407 #define MIN_CHAR_OFFICIAL_TYPE9N (MIN_ICHAR_FIELD2_OFFICIAL << 7) |
408 #define MIN_CHAR_PRIVATE_TYPE9N (MIN_ICHAR_FIELD2_PRIVATE << 7) | |
409 #define MIN_CHAR_OFFICIAL_TYPE9NX9N (MIN_ICHAR_FIELD1_OFFICIAL << 14) | |
410 #define MIN_CHAR_PRIVATE_TYPE9NX9N (MIN_ICHAR_FIELD1_PRIVATE << 14) | |
3496 | 411 #define MIN_CHAR_COMPOSITION (0x7F << 14) |
771 | 412 |
413 /* Leading byte of a character. | |
414 | |
415 NOTE: This takes advantage of the fact that | |
416 FIELD2_TO_OFFICIAL_LEADING_BYTE and | |
417 FIELD2_TO_PRIVATE_LEADING_BYTE are the same. | |
418 */ | |
419 | |
826 | 420 DECLARE_INLINE_HEADER ( |
867 | 421 Ibyte |
422 ichar_leading_byte (Ichar c) | |
826 | 423 ) |
771 | 424 { |
867 | 425 if (ichar_ascii_p (c)) |
771 | 426 return LEADING_BYTE_ASCII; |
427 else if (c < 0xA0) | |
428 return LEADING_BYTE_CONTROL_1; | |
429 else if (c < MIN_CHAR_OFFICIAL_TYPE9NX9N) | |
867 | 430 return ichar_field2 (c) + FIELD2_TO_OFFICIAL_LEADING_BYTE; |
771 | 431 else if (c < MIN_CHAR_PRIVATE_TYPE9NX9N) |
867 | 432 return ichar_field1 (c) + FIELD1_TO_OFFICIAL_LEADING_BYTE; |
771 | 433 else if (c < MIN_CHAR_COMPOSITION) |
867 | 434 return ichar_field1 (c) + FIELD1_TO_PRIVATE_LEADING_BYTE; |
771 | 435 else |
436 { | |
437 #ifdef ENABLE_COMPOSITE_CHARS | |
438 return LEADING_BYTE_COMPOSITE; | |
439 #else | |
2500 | 440 ABORT(); |
771 | 441 return 0; |
442 #endif /* ENABLE_COMPOSITE_CHARS */ | |
443 } | |
444 } | |
445 | |
826 | 446 DECLARE_INLINE_HEADER ( |
447 Bytecount | |
867 | 448 ichar_len (Ichar c) |
826 | 449 ) |
450 { | |
867 | 451 if (ichar_ascii_p (c)) |
826 | 452 return 1; |
453 else if (c < MIN_CHAR_OFFICIAL_TYPE9NX9N) | |
454 return 2; | |
455 else if (c < MIN_CHAR_PRIVATE_TYPE9NX9N) | |
456 return 3; /* dimension-2 official or dimension-1 private */ | |
457 else if (c < MIN_CHAR_COMPOSITION) | |
458 return 4; | |
459 else | |
460 { | |
461 #ifdef ENABLE_COMPOSITE_CHARS | |
462 #error Not yet implemented | |
463 #else | |
2500 | 464 ABORT(); |
826 | 465 return 0; |
466 #endif /* ENABLE_COMPOSITE_CHARS */ | |
467 } | |
468 } | |
469 | |
470 DECLARE_INLINE_HEADER ( | |
471 Bytecount | |
867 | 472 ichar_len_fmt (Ichar c, Internal_Format fmt) |
826 | 473 ) |
474 { | |
475 switch (fmt) | |
476 { | |
477 case FORMAT_DEFAULT: | |
867 | 478 return ichar_len (c); |
826 | 479 case FORMAT_16_BIT_FIXED: |
480 return 2; | |
481 case FORMAT_32_BIT_FIXED: | |
482 return 4; | |
483 default: | |
484 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
485 return 1; | |
486 } | |
487 } | |
488 | |
867 | 489 #define ichar_charset(c) charset_by_leading_byte (ichar_leading_byte (c)) |
771 | 490 |
491 /* Return a character whose charset is CHARSET and position-codes are C1 | |
492 and C2. TYPE9N character ignores C2. (For typical charsets, i.e. not | |
493 ASCII, Control-1 or Composite, C1 and C2 will be in the range of 32 to | |
494 127 or 33 to 126. See `make-char'.) | |
495 | |
496 NOTE: This takes advantage of the fact that | |
497 FIELD2_TO_OFFICIAL_LEADING_BYTE and | |
498 FIELD2_TO_PRIVATE_LEADING_BYTE are the same. | |
499 */ | |
500 | |
826 | 501 DECLARE_INLINE_HEADER ( |
867 | 502 Ichar |
503 make_ichar (Lisp_Object charset, int c1, int c2) | |
826 | 504 ) |
771 | 505 { |
867 | 506 Ichar retval; |
771 | 507 if (EQ (charset, Vcharset_ascii)) |
826 | 508 retval = c1; |
771 | 509 else if (EQ (charset, Vcharset_control_1)) |
826 | 510 retval = c1 | 0x80; |
771 | 511 #ifdef ENABLE_COMPOSITE_CHARS |
512 else if (EQ (charset, Vcharset_composite)) | |
826 | 513 retval = (0x1F << 14) | ((c1) << 7) | (c2); |
771 | 514 #endif |
515 else if (XCHARSET_DIMENSION (charset) == 1) | |
826 | 516 retval = ((XCHARSET_LEADING_BYTE (charset) - |
517 FIELD2_TO_OFFICIAL_LEADING_BYTE) << 7) | (c1); | |
771 | 518 else if (!XCHARSET_PRIVATE_P (charset)) |
826 | 519 retval = ((XCHARSET_LEADING_BYTE (charset) - |
520 FIELD1_TO_OFFICIAL_LEADING_BYTE) << 14) | ((c1) << 7) | (c2); | |
771 | 521 else |
826 | 522 retval = ((XCHARSET_LEADING_BYTE (charset) - |
523 FIELD1_TO_PRIVATE_LEADING_BYTE) << 14) | ((c1) << 7) | (c2); | |
867 | 524 text_checking_assert (valid_ichar_p (retval)); |
826 | 525 return retval; |
771 | 526 } |
527 | |
867 | 528 /* BREAKUP_ICHAR_1_UNSAFE assumes that the charset has already been |
771 | 529 calculated, and just computes c1 and c2. |
530 | |
867 | 531 BREAKUP_ICHAR also computes and stores the charset. */ |
771 | 532 |
867 | 533 #define BREAKUP_ICHAR_1_UNSAFE(c, charset, c1, c2) \ |
771 | 534 XCHARSET_DIMENSION (charset) == 1 \ |
867 | 535 ? ((c1) = ichar_field3 (c), (c2) = 0) \ |
536 : ((c1) = ichar_field2 (c), \ | |
537 (c2) = ichar_field3 (c)) | |
771 | 538 |
826 | 539 DECLARE_INLINE_HEADER ( |
540 void | |
867 | 541 breakup_ichar_1 (Ichar c, Lisp_Object *charset, int *c1, int *c2) |
826 | 542 ) |
771 | 543 { |
867 | 544 text_checking_assert (valid_ichar_p (c)); |
545 *charset = ichar_charset (c); | |
546 BREAKUP_ICHAR_1_UNSAFE (c, *charset, *c1, *c2); | |
771 | 547 } |
548 | |
867 | 549 /* BREAKUP_ICHAR separates an Ichar into its components. The charset of |
771 | 550 character C is set to CHARSET, and the position-codes of C are set to C1 |
551 and C2. C2 of TYPE9N character is 0. */ | |
552 | |
867 | 553 #define BREAKUP_ICHAR(c, charset, c1, c2) \ |
554 breakup_ichar_1 (c, &(charset), &(c1), &(c2)) | |
771 | 555 |
5200
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
556 /* Forward compatibility from ben-unicode-internal: Convert a charset |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
557 codepoint into a character in the internal string representation. |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
558 Return number of bytes written out. FAIL controls failure mode when |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
559 charset conversion to Unicode is not possible (unused as of yet). */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
560 DECLARE_INLINE_HEADER ( |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
561 Bytecount |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
562 charset_codepoint_to_itext (Lisp_Object charset, int c1, int c2, Ibyte *ptr, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
563 enum converr UNUSED (fail)) |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
564 ) |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
565 { |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
566 Ichar ch; |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
567 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
568 if (EQ (charset, Vcharset_ascii)) |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
569 { |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
570 ptr[0] = (Ibyte) c2; |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
571 return 1; |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
572 } |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
573 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
574 ch = make_ichar (charset, c1, c2); |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
575 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
576 /* We can't rely on the converted character being non-ASCII. For |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
577 example, JISX0208 codepoint (33, 64) == Unicode 0x5C (ASCII |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
578 backslash). */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
579 return set_itext_ichar (ptr, ch); |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
580 } |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
581 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
582 /* Forward compatibility from ben-unicode-internal */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
583 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
584 DECLARE_INLINE_HEADER ( |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
585 void |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
586 buffer_itext_to_charset_codepoint (const Ibyte *ptr, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
587 struct buffer *UNUSED (buf), |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
588 Lisp_Object *charset, int *c1, int *c2, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
589 enum converr UNUSED (fail)) |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
590 ) |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
591 { |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
592 Ichar ch = itext_ichar (ptr); |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
593 breakup_ichar_1 (ch, charset, c1, c2); |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
594 } |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5127
diff
changeset
|
595 |
788 | 596 void get_charset_limits (Lisp_Object charset, int *low, int *high); |
867 | 597 int ichar_to_unicode (Ichar chr); |
788 | 598 |
3439 | 599 EXFUN (Fcharset_name, 1); |
600 | |
771 | 601 #endif /* MULE */ |
602 | |
3439 | 603 /* ISO 10646 UTF-16, UCS-4, UTF-8, UTF-7, etc. */ |
604 | |
605 enum unicode_type | |
606 { | |
607 UNICODE_UTF_16, | |
608 UNICODE_UTF_8, | |
609 UNICODE_UTF_7, | |
4096 | 610 UNICODE_UCS_4, |
611 UNICODE_UTF_32 | |
3439 | 612 }; |
613 | |
614 void encode_unicode_char (Lisp_Object USED_IF_MULE (charset), int h, | |
615 int USED_IF_MULE (l), unsigned_char_dynarr *dst, | |
4096 | 616 enum unicode_type type, unsigned int little_endian, |
617 int write_error_characters_as_such); | |
618 | |
619 #define UNICODE_ERROR_OCTET_RANGE_START 0x200000 | |
620 | |
621 #define valid_utf_16_first_surrogate(ch) (((ch) & 0xFC00) == 0xD800) | |
622 #define valid_utf_16_last_surrogate(ch) (((ch) & 0xFC00) == 0xDC00) | |
623 #define valid_utf_16_surrogate(ch) (((ch) & 0xF800) == 0xD800) | |
3439 | 624 |
3676 | 625 void set_charset_registries(Lisp_Object charset, Lisp_Object registries); |
626 | |
3439 | 627 EXFUN (Funicode_to_char, 2); |
628 EXFUN (Fchar_to_unicode, 1); | |
629 | |
771 | 630 #endif /* INCLUDED_charset_h_ */ |