771
|
1 /* Header for charsets.
|
|
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
|
|
3 Copyright (C) 1995 Sun Microsystems, Inc.
|
|
4 Copyright (C) 2001, 2002 Ben Wing.
|
|
5
|
|
6 This file is part of XEmacs.
|
|
7
|
|
8 XEmacs is free software; you can redistribute it and/or modify it
|
|
9 under the terms of the GNU General Public License as published by the
|
|
10 Free Software Foundation; either version 2, or (at your option) any
|
|
11 later version.
|
|
12
|
|
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
|
|
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
16 for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with XEmacs; see the file COPYING. If not, write to
|
|
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
21 Boston, MA 02111-1307, USA. */
|
|
22
|
|
23 /* Synched up with: Mule 2.3. Not in FSF. */
|
|
24
|
|
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
|
|
26
|
|
27 #ifndef INCLUDED_charset_h_
|
|
28 #define INCLUDED_charset_h_
|
|
29
|
|
30
|
|
31
|
|
32 #ifndef MULE
|
|
33
|
|
34 /************************************************************************/
|
|
35 /* fake charset defs */
|
|
36 /************************************************************************/
|
|
37
|
|
38 /* used when MULE is not defined, so that Charset-type stuff can still
|
|
39 be done */
|
|
40
|
|
41 #define Vcharset_ascii Qnil
|
|
42
|
867
|
43 #define ichar_charset(ch) Vcharset_ascii
|
|
44 #define ichar_leading_byte(ch) LEADING_BYTE_ASCII
|
|
45 #define ichar_len(ch) 1
|
|
46 #define ichar_len_fmt(ch, fmt) 1
|
771
|
47 #define LEADING_BYTE_ASCII 0x80
|
|
48 #define NUM_LEADING_BYTES 1
|
|
49 #define MIN_LEADING_BYTE 0x80
|
|
50 #define CHARSETP(cs) 1
|
826
|
51 #define charset_by_leading_byte(lb) Vcharset_ascii
|
771
|
52 #define XCHARSET_LEADING_BYTE(cs) LEADING_BYTE_ASCII
|
|
53 #define XCHARSET_GRAPHIC(cs) -1
|
|
54 #define XCHARSET_COLUMNS(cs) 1
|
|
55 #define XCHARSET_DIMENSION(cs) 1
|
867
|
56 #define BREAKUP_ICHAR(ch, charset, byte1, byte2) do { \
|
771
|
57 (charset) = Vcharset_ascii; \
|
|
58 (byte1) = (ch); \
|
|
59 (byte2) = 0; \
|
|
60 } while (0)
|
3659
|
61 #define XCHARSET_CCL_PROGRAM(cs) Qnil
|
|
62 #define XCHARSET_NAME(cs) Qascii
|
5019
|
63 #define Fget_charset(cs) (cs)
|
|
64 #define Fcharset_list() list1 (Vcharset_ascii)
|
771
|
65
|
|
66 #else /* MULE */
|
|
67
|
|
68
|
|
69 /************************************************************************/
|
|
70 /* Definition of leading bytes */
|
|
71 /************************************************************************/
|
|
72
|
|
73 #define MIN_LEADING_BYTE 0x7F
|
|
74
|
|
75 /** The following are for 1-byte characters in an official charset. **/
|
|
76 enum LEADING_BYTE_OFFICIAL_1
|
|
77 {
|
|
78 MIN_LEADING_BYTE_OFFICIAL_1 = 0x80,
|
|
79 /* LEADING_BYTE_LATIN_ISO8859_1 *MUST* be equal to
|
|
80 MIN_LEADING_BYTE_OFFICIAL_1. */
|
|
81 LEADING_BYTE_LATIN_ISO8859_1 = /* 0x80 Right half of ISO 8859-1 */
|
|
82 MIN_LEADING_BYTE_OFFICIAL_1,
|
|
83 LEADING_BYTE_LATIN_ISO8859_2, /* 0x81 Right half of ISO 8859-2 */
|
|
84 LEADING_BYTE_LATIN_ISO8859_3, /* 0x82 Right half of ISO 8859-3 */
|
|
85 LEADING_BYTE_LATIN_ISO8859_4, /* 0x83 Right half of ISO 8859-4 */
|
|
86 LEADING_BYTE_THAI_TIS620, /* 0x84 TIS620-2533 */
|
|
87 LEADING_BYTE_GREEK_ISO8859_7, /* 0x85 Right half of ISO 8859-7 */
|
|
88 LEADING_BYTE_ARABIC_ISO8859_6, /* 0x86 Right half of ISO 8859-6 */
|
|
89 LEADING_BYTE_HEBREW_ISO8859_8, /* 0x87 Right half of ISO 8859-8 */
|
|
90 LEADING_BYTE_KATAKANA_JISX0201, /* 0x88 Right half of JIS X0201-1976 */
|
|
91 LEADING_BYTE_LATIN_JISX0201, /* 0x89 Left half of JIS X0201-1976 */
|
|
92 LEADING_BYTE_CYRILLIC_ISO8859_5,/* 0x8A Right half of ISO 8859-5 */
|
|
93 LEADING_BYTE_LATIN_ISO8859_9, /* 0x8B Right half of ISO 8859-9 */
|
|
94 LEADING_BYTE_LATIN_ISO8859_15, /* 0x8C Right half of ISO 8859-15 */
|
|
95 #ifdef ENABLE_COMPOSITE_CHARS
|
|
96 LEADING_BYTE_COMPOSITE, /* 0x8D For a composite character */
|
|
97 MAX_LEADING_BYTE_OFFICIAL_1 =
|
|
98 LEADING_BYTE_COMPOSITE - 1,
|
|
99 #else
|
|
100 /* Does not need to be the last entry, but simplifies things */
|
|
101 LEADING_BYTE_COMPOSITE_REPLACEMENT, /* 0x8D Replaces ESC 0 - ESC 4 in a
|
|
102 buffer */
|
|
103 MAX_LEADING_BYTE_OFFICIAL_1 =
|
|
104 LEADING_BYTE_COMPOSITE_REPLACEMENT,
|
|
105 #endif
|
|
106 /* 0x8E Unused */
|
|
107 };
|
|
108
|
|
109 /* These next 2 + LEADING_BYTE_COMPOSITE need special treatment in a string
|
|
110 and/or character */
|
|
111
|
|
112 #define LEADING_BYTE_ASCII 0x7F /* Not used except in arrays
|
|
113 indexed by leading byte */
|
|
114 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */
|
|
115
|
|
116 /** The following are for 2-byte characters in an official charset. **/
|
|
117 enum LEADING_BYTE_OFFICIAL_2
|
|
118 {
|
|
119 MIN_LEADING_BYTE_OFFICIAL_2 = 0x90,
|
|
120 LEADING_BYTE_JAPANESE_JISX0208_1978 =
|
|
121 MIN_LEADING_BYTE_OFFICIAL_2, /* 0x90 Japanese JIS X0208-1978 */
|
|
122 LEADING_BYTE_CHINESE_GB2312, /* 0x91 Chinese Hanzi GB2312-1980 */
|
|
123 LEADING_BYTE_JAPANESE_JISX0208, /* 0x92 Japanese JIS X0208-1983 */
|
|
124 LEADING_BYTE_KOREAN_KSC5601, /* 0x93 Hangul KS C5601-1987 */
|
|
125 LEADING_BYTE_JAPANESE_JISX0212, /* 0x94 Japanese JIS X0212-1990 */
|
|
126 LEADING_BYTE_CHINESE_CNS11643_1, /* 0x95 Chinese CNS11643 Set 1 */
|
|
127 LEADING_BYTE_CHINESE_CNS11643_2, /* 0x96 Chinese CNS11643 Set 2 */
|
|
128 LEADING_BYTE_CHINESE_BIG5_1, /* 0x97 Big5 Level 1 */
|
|
129 LEADING_BYTE_CHINESE_BIG5_2, /* 0x98 Big5 Level 2 */
|
|
130 MAX_LEADING_BYTE_OFFICIAL_2 =
|
|
131 LEADING_BYTE_CHINESE_BIG5_2,
|
|
132
|
|
133 /* 0x99 unused */
|
|
134 /* 0x9A unused */
|
|
135 /* 0x9B unused */
|
|
136 /* 0x9C unused */
|
|
137 /* 0x9D unused */
|
|
138 };
|
|
139
|
|
140
|
|
141 /** The following are for 1- and 2-byte characters in a private charset. **/
|
|
142
|
|
143 #define PRE_LEADING_BYTE_PRIVATE_1 0x9E /* 1-byte char-set */
|
|
144 #define PRE_LEADING_BYTE_PRIVATE_2 0x9F /* 2-byte char-set */
|
|
145
|
|
146 #define MIN_LEADING_BYTE_PRIVATE_1 0xA0
|
3496
|
147 #define MAX_LEADING_BYTE_PRIVATE_1 0xC0
|
|
148 #define MIN_LEADING_BYTE_PRIVATE_2 0xC1
|
771
|
149 #define MAX_LEADING_BYTE_PRIVATE_2 0xFF
|
|
150
|
|
151 #define NUM_LEADING_BYTES 129
|
|
152
|
|
153
|
|
154 /************************************************************************/
|
|
155 /* Operations on leading bytes */
|
|
156 /************************************************************************/
|
|
157
|
|
158 /* Is this leading byte for a private charset? */
|
|
159
|
826
|
160 #define leading_byte_private_p(lb) ((lb) >= MIN_LEADING_BYTE_PRIVATE_1)
|
771
|
161
|
|
162 /* Is this a prefix for a private leading byte? */
|
|
163
|
826
|
164 DECLARE_INLINE_HEADER (
|
|
165 int
|
867
|
166 leading_byte_prefix_p (Ibyte lb)
|
826
|
167 )
|
771
|
168 {
|
|
169 return (lb == PRE_LEADING_BYTE_PRIVATE_1 ||
|
|
170 lb == PRE_LEADING_BYTE_PRIVATE_2);
|
|
171 }
|
|
172
|
|
173 /* Given a private leading byte, return the leading byte prefix stored
|
|
174 in a string. */
|
|
175
|
826
|
176 #define private_leading_byte_prefix(lb) \
|
771
|
177 ((unsigned int) (lb) < MIN_LEADING_BYTE_PRIVATE_2 ? \
|
|
178 PRE_LEADING_BYTE_PRIVATE_1 : \
|
|
179 PRE_LEADING_BYTE_PRIVATE_2)
|
|
180
|
|
181
|
|
182 /************************************************************************/
|
|
183 /* Information about a particular character set */
|
|
184 /************************************************************************/
|
|
185
|
|
186 struct Lisp_Charset
|
|
187 {
|
3017
|
188 struct LCRECORD_HEADER header;
|
771
|
189
|
|
190 int id;
|
|
191 Lisp_Object name;
|
|
192 Lisp_Object doc_string;
|
3659
|
193 Lisp_Object registries;
|
771
|
194 Lisp_Object short_name;
|
|
195 Lisp_Object long_name;
|
|
196
|
|
197 Lisp_Object reverse_direction_charset;
|
|
198
|
|
199 Lisp_Object ccl_program;
|
|
200
|
2367
|
201 /* Unicode translation tables. See unicode.c for the format of these
|
|
202 tables, and discussion of how they are initialized.
|
|
203 */
|
771
|
204 void *to_unicode_table;
|
|
205 void *from_unicode_table;
|
|
206 int from_unicode_levels;
|
|
207
|
2367
|
208 /* Final byte of this character set in ISO2022 designating escape
|
|
209 sequence */
|
867
|
210 Ibyte final;
|
771
|
211
|
|
212 /* Number of bytes (1 - 4) required in the internal representation
|
|
213 for characters in this character set. This is *not* the
|
|
214 same as the dimension of the character set). */
|
|
215 int rep_bytes;
|
|
216
|
|
217 /* Number of columns a character in this charset takes up, on TTY
|
|
218 devices. Not used for X devices. */
|
|
219 int columns;
|
|
220
|
|
221 /* Direction of this character set */
|
|
222 int direction;
|
|
223
|
|
224 /* Type of this character set (94, 96, 94x94, 96x96) */
|
|
225 int type;
|
|
226
|
|
227 /* Number of bytes used in encoding of this character set (1 or 2) */
|
|
228 int dimension;
|
|
229
|
|
230 /* Number of chars in each dimension (usually 94 or 96) */
|
|
231 int chars;
|
|
232
|
|
233 /* Which half of font to be used to display this character set */
|
|
234 int graphic;
|
|
235
|
3439
|
236 /* If set, this charset should be written out in ISO-2022-based coding
|
|
237 systems using the escape sequence for UTF-8, not using our internal
|
|
238 representation and the associated real ISO 2022 designation. */
|
|
239 unsigned int encode_as_utf_8 :1;
|
|
240
|
771
|
241 /* If set, this is a "temporary" charset created when we encounter
|
|
242 an unknown final. This is so that we can successfully compile
|
|
243 and load such files. We allow a real charset to be created on top
|
|
244 of this temporary charset. */
|
|
245 unsigned int temporary :1;
|
|
246 };
|
|
247 typedef struct Lisp_Charset Lisp_Charset;
|
|
248
|
|
249 DECLARE_LRECORD (charset, Lisp_Charset);
|
|
250 #define XCHARSET(x) XRECORD (x, charset, Lisp_Charset)
|
|
251 #define wrap_charset(p) wrap_record (p, charset)
|
|
252 #define CHARSETP(x) RECORDP (x, charset)
|
|
253 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset)
|
|
254 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset)
|
|
255
|
|
256 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */
|
|
257 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */
|
|
258 #define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */
|
|
259 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */
|
|
260
|
|
261 #define CHARSET_LEFT_TO_RIGHT 0
|
|
262 #define CHARSET_RIGHT_TO_LEFT 1
|
|
263
|
|
264 /* Leading byte and id have been regrouped. -- OG */
|
|
265 #define CHARSET_ID(cs) ((cs)->id)
|
867
|
266 #define CHARSET_LEADING_BYTE(cs) ((Ibyte) CHARSET_ID (cs))
|
771
|
267 #define CHARSET_NAME(cs) ((cs)->name)
|
|
268 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name)
|
|
269 #define CHARSET_LONG_NAME(cs) ((cs)->long_name)
|
|
270 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes)
|
|
271 #define CHARSET_COLUMNS(cs) ((cs)->columns)
|
|
272 #define CHARSET_GRAPHIC(cs) ((cs)->graphic)
|
3439
|
273 #define CHARSET_ENCODE_AS_UTF_8(cs) ((cs)->encode_as_utf_8)
|
771
|
274 #define CHARSET_TYPE(cs) ((cs)->type)
|
|
275 #define CHARSET_DIRECTION(cs) ((cs)->direction)
|
|
276 #define CHARSET_FINAL(cs) ((cs)->final)
|
|
277 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string)
|
3659
|
278 #define CHARSET_REGISTRIES(cs) ((cs)->registries)
|
771
|
279 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program)
|
|
280 #define CHARSET_DIMENSION(cs) ((cs)->dimension)
|
|
281 #define CHARSET_CHARS(cs) ((cs)->chars)
|
|
282 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset)
|
|
283 #define CHARSET_TO_UNICODE_TABLE(cs) ((cs)->to_unicode_table)
|
|
284 #define CHARSET_FROM_UNICODE_TABLE(cs) ((cs)->from_unicode_table)
|
|
285 #define CHARSET_FROM_UNICODE_LEVELS(cs) ((cs)->from_unicode_levels)
|
|
286
|
826
|
287 #define CHARSET_PRIVATE_P(cs) leading_byte_private_p (CHARSET_LEADING_BYTE (cs))
|
771
|
288
|
|
289 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs))
|
|
290 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs))
|
|
291 #define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs))
|
|
292 #define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs))
|
|
293 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs))
|
|
294 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs))
|
|
295 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs))
|
3439
|
296 #define XCHARSET_ENCODE_AS_UTF_8(cs) CHARSET_ENCODE_AS_UTF_8 (XCHARSET (cs))
|
771
|
297 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs))
|
|
298 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs))
|
|
299 #define XCHARSET_FINAL(cs) CHARSET_FINAL (XCHARSET (cs))
|
|
300 #define XCHARSET_DOC_STRING(cs) CHARSET_DOC_STRING (XCHARSET (cs))
|
3659
|
301 #define XCHARSET_REGISTRIES(cs) CHARSET_REGISTRIES (XCHARSET (cs))
|
771
|
302 #define XCHARSET_LEADING_BYTE(cs) CHARSET_LEADING_BYTE (XCHARSET (cs))
|
|
303 #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs))
|
|
304 #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs))
|
|
305 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs))
|
3659
|
306
|
771
|
307 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs))
|
|
308 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \
|
|
309 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs))
|
|
310 #define XCHARSET_TO_UNICODE_TABLE(cs) \
|
|
311 CHARSET_TO_UNICODE_TABLE (XCHARSET (cs))
|
|
312 #define XCHARSET_FROM_UNICODE_TABLE(cs) \
|
|
313 CHARSET_FROM_UNICODE_TABLE (XCHARSET (cs))
|
|
314 #define XCHARSET_FROM_UNICODE_LEVELS(cs) \
|
|
315 CHARSET_FROM_UNICODE_LEVELS (XCHARSET (cs))
|
|
316
|
|
317 struct charset_lookup
|
|
318 {
|
|
319 /* Table of charsets indexed by leading byte. */
|
|
320 Lisp_Object charset_by_leading_byte[NUM_LEADING_BYTES];
|
|
321
|
|
322 /* Table of charsets indexed by type/final-byte/direction. */
|
|
323 Lisp_Object charset_by_attributes[4][128][2];
|
867
|
324 Ibyte next_allocated_1_byte_leading_byte;
|
|
325 Ibyte next_allocated_2_byte_leading_byte;
|
771
|
326 };
|
|
327
|
1111
|
328 extern struct charset_lookup *chlook;
|
|
329
|
826
|
330 DECLARE_INLINE_HEADER (
|
|
331 Lisp_Object
|
|
332 charset_by_leading_byte (int lb)
|
|
333 )
|
771
|
334 {
|
800
|
335 #ifdef ERROR_CHECK_TEXT
|
771
|
336 /* When error-checking is on, x86 GCC 2.95.2 -O3 miscompiles the
|
|
337 following unless we introduce `tem'. */
|
|
338 int tem = lb;
|
800
|
339 text_checking_assert (tem >= MIN_LEADING_BYTE && tem <= 0xFF);
|
771
|
340 #endif
|
|
341 return chlook->charset_by_leading_byte[lb - MIN_LEADING_BYTE];
|
|
342 }
|
|
343
|
826
|
344 DECLARE_INLINE_HEADER (
|
|
345 Lisp_Object
|
|
346 charset_by_attributes (int type, int final, int dir)
|
|
347 )
|
771
|
348 {
|
|
349 type_checking_assert (type < countof (chlook->charset_by_attributes) &&
|
|
350 final < countof (chlook->charset_by_attributes[0]) &&
|
|
351 dir < countof (chlook->charset_by_attributes[0][0]));
|
|
352 return chlook->charset_by_attributes[type][final][dir];
|
|
353 }
|
|
354
|
|
355
|
|
356 /************************************************************************/
|
|
357 /* Dealing with characters */
|
|
358 /************************************************************************/
|
|
359
|
|
360 /* The bit fields of character are divided into 3 parts:
|
3496
|
361 FIELD1(7bits):FIELD2(7bits):FIELD3(7bits) */
|
771
|
362
|
3496
|
363 #define ICHAR_FIELD1_MASK (0x7F << 14)
|
867
|
364 #define ICHAR_FIELD2_MASK (0x7F << 7)
|
|
365 #define ICHAR_FIELD3_MASK 0x7F
|
771
|
366
|
|
367 /* Macros to access each field of a character code of C. */
|
|
368
|
867
|
369 #define ichar_field1(c) (((c) & ICHAR_FIELD1_MASK) >> 14)
|
|
370 #define ichar_field2(c) (((c) & ICHAR_FIELD2_MASK) >> 7)
|
|
371 #define ichar_field3(c) ((c) & ICHAR_FIELD3_MASK)
|
771
|
372
|
|
373 /* Field 1, if non-zero, usually holds a leading byte for a
|
|
374 dimension-2 charset. Field 2, if non-zero, usually holds a leading
|
|
375 byte for a dimension-1 charset. */
|
|
376
|
|
377 /* Converting between field values and leading bytes. */
|
|
378
|
|
379 #define FIELD2_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_1 - 1)
|
|
380 #define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80
|
|
381
|
|
382 #define FIELD1_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_2 - 1)
|
3496
|
383 #define FIELD1_TO_PRIVATE_LEADING_BYTE 0x80
|
771
|
384
|
|
385 /* Minimum and maximum allowed values for the fields. */
|
|
386
|
867
|
387 #define MIN_ICHAR_FIELD2_OFFICIAL \
|
771
|
388 (MIN_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE)
|
867
|
389 #define MAX_ICHAR_FIELD2_OFFICIAL \
|
771
|
390 (MAX_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE)
|
|
391
|
867
|
392 #define MIN_ICHAR_FIELD1_OFFICIAL \
|
771
|
393 (MIN_LEADING_BYTE_OFFICIAL_2 - FIELD1_TO_OFFICIAL_LEADING_BYTE)
|
867
|
394 #define MAX_ICHAR_FIELD1_OFFICIAL \
|
771
|
395 (MAX_LEADING_BYTE_OFFICIAL_2 - FIELD1_TO_OFFICIAL_LEADING_BYTE)
|
|
396
|
867
|
397 #define MIN_ICHAR_FIELD2_PRIVATE \
|
771
|
398 (MIN_LEADING_BYTE_PRIVATE_1 - FIELD2_TO_PRIVATE_LEADING_BYTE)
|
867
|
399 #define MAX_ICHAR_FIELD2_PRIVATE \
|
771
|
400 (MAX_LEADING_BYTE_PRIVATE_1 - FIELD2_TO_PRIVATE_LEADING_BYTE)
|
|
401
|
867
|
402 #define MIN_ICHAR_FIELD1_PRIVATE \
|
771
|
403 (MIN_LEADING_BYTE_PRIVATE_2 - FIELD1_TO_PRIVATE_LEADING_BYTE)
|
867
|
404 #define MAX_ICHAR_FIELD1_PRIVATE \
|
771
|
405 (MAX_LEADING_BYTE_PRIVATE_2 - FIELD1_TO_PRIVATE_LEADING_BYTE)
|
|
406
|
|
407 /* Minimum character code of each <type> character. */
|
|
408
|
867
|
409 #define MIN_CHAR_OFFICIAL_TYPE9N (MIN_ICHAR_FIELD2_OFFICIAL << 7)
|
|
410 #define MIN_CHAR_PRIVATE_TYPE9N (MIN_ICHAR_FIELD2_PRIVATE << 7)
|
|
411 #define MIN_CHAR_OFFICIAL_TYPE9NX9N (MIN_ICHAR_FIELD1_OFFICIAL << 14)
|
|
412 #define MIN_CHAR_PRIVATE_TYPE9NX9N (MIN_ICHAR_FIELD1_PRIVATE << 14)
|
3496
|
413 #define MIN_CHAR_COMPOSITION (0x7F << 14)
|
771
|
414
|
|
415 /* Leading byte of a character.
|
|
416
|
|
417 NOTE: This takes advantage of the fact that
|
|
418 FIELD2_TO_OFFICIAL_LEADING_BYTE and
|
|
419 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
|
|
420 */
|
|
421
|
826
|
422 DECLARE_INLINE_HEADER (
|
867
|
423 Ibyte
|
|
424 ichar_leading_byte (Ichar c)
|
826
|
425 )
|
771
|
426 {
|
867
|
427 if (ichar_ascii_p (c))
|
771
|
428 return LEADING_BYTE_ASCII;
|
|
429 else if (c < 0xA0)
|
|
430 return LEADING_BYTE_CONTROL_1;
|
|
431 else if (c < MIN_CHAR_OFFICIAL_TYPE9NX9N)
|
867
|
432 return ichar_field2 (c) + FIELD2_TO_OFFICIAL_LEADING_BYTE;
|
771
|
433 else if (c < MIN_CHAR_PRIVATE_TYPE9NX9N)
|
867
|
434 return ichar_field1 (c) + FIELD1_TO_OFFICIAL_LEADING_BYTE;
|
771
|
435 else if (c < MIN_CHAR_COMPOSITION)
|
867
|
436 return ichar_field1 (c) + FIELD1_TO_PRIVATE_LEADING_BYTE;
|
771
|
437 else
|
|
438 {
|
|
439 #ifdef ENABLE_COMPOSITE_CHARS
|
|
440 return LEADING_BYTE_COMPOSITE;
|
|
441 #else
|
2500
|
442 ABORT();
|
771
|
443 return 0;
|
|
444 #endif /* ENABLE_COMPOSITE_CHARS */
|
|
445 }
|
|
446 }
|
|
447
|
826
|
448 DECLARE_INLINE_HEADER (
|
|
449 Bytecount
|
867
|
450 ichar_len (Ichar c)
|
826
|
451 )
|
|
452 {
|
867
|
453 if (ichar_ascii_p (c))
|
826
|
454 return 1;
|
|
455 else if (c < MIN_CHAR_OFFICIAL_TYPE9NX9N)
|
|
456 return 2;
|
|
457 else if (c < MIN_CHAR_PRIVATE_TYPE9NX9N)
|
|
458 return 3; /* dimension-2 official or dimension-1 private */
|
|
459 else if (c < MIN_CHAR_COMPOSITION)
|
|
460 return 4;
|
|
461 else
|
|
462 {
|
|
463 #ifdef ENABLE_COMPOSITE_CHARS
|
|
464 #error Not yet implemented
|
|
465 #else
|
2500
|
466 ABORT();
|
826
|
467 return 0;
|
|
468 #endif /* ENABLE_COMPOSITE_CHARS */
|
|
469 }
|
|
470 }
|
|
471
|
|
472 DECLARE_INLINE_HEADER (
|
|
473 Bytecount
|
867
|
474 ichar_len_fmt (Ichar c, Internal_Format fmt)
|
826
|
475 )
|
|
476 {
|
|
477 switch (fmt)
|
|
478 {
|
|
479 case FORMAT_DEFAULT:
|
867
|
480 return ichar_len (c);
|
826
|
481 case FORMAT_16_BIT_FIXED:
|
|
482 return 2;
|
|
483 case FORMAT_32_BIT_FIXED:
|
|
484 return 4;
|
|
485 default:
|
|
486 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
487 return 1;
|
|
488 }
|
|
489 }
|
|
490
|
867
|
491 #define ichar_charset(c) charset_by_leading_byte (ichar_leading_byte (c))
|
771
|
492
|
|
493 /* Return a character whose charset is CHARSET and position-codes are C1
|
|
494 and C2. TYPE9N character ignores C2. (For typical charsets, i.e. not
|
|
495 ASCII, Control-1 or Composite, C1 and C2 will be in the range of 32 to
|
|
496 127 or 33 to 126. See `make-char'.)
|
|
497
|
|
498 NOTE: This takes advantage of the fact that
|
|
499 FIELD2_TO_OFFICIAL_LEADING_BYTE and
|
|
500 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
|
|
501 */
|
|
502
|
826
|
503 DECLARE_INLINE_HEADER (
|
867
|
504 Ichar
|
|
505 make_ichar (Lisp_Object charset, int c1, int c2)
|
826
|
506 )
|
771
|
507 {
|
867
|
508 Ichar retval;
|
771
|
509 if (EQ (charset, Vcharset_ascii))
|
826
|
510 retval = c1;
|
771
|
511 else if (EQ (charset, Vcharset_control_1))
|
826
|
512 retval = c1 | 0x80;
|
771
|
513 #ifdef ENABLE_COMPOSITE_CHARS
|
|
514 else if (EQ (charset, Vcharset_composite))
|
826
|
515 retval = (0x1F << 14) | ((c1) << 7) | (c2);
|
771
|
516 #endif
|
|
517 else if (XCHARSET_DIMENSION (charset) == 1)
|
826
|
518 retval = ((XCHARSET_LEADING_BYTE (charset) -
|
|
519 FIELD2_TO_OFFICIAL_LEADING_BYTE) << 7) | (c1);
|
771
|
520 else if (!XCHARSET_PRIVATE_P (charset))
|
826
|
521 retval = ((XCHARSET_LEADING_BYTE (charset) -
|
|
522 FIELD1_TO_OFFICIAL_LEADING_BYTE) << 14) | ((c1) << 7) | (c2);
|
771
|
523 else
|
826
|
524 retval = ((XCHARSET_LEADING_BYTE (charset) -
|
|
525 FIELD1_TO_PRIVATE_LEADING_BYTE) << 14) | ((c1) << 7) | (c2);
|
867
|
526 text_checking_assert (valid_ichar_p (retval));
|
826
|
527 return retval;
|
771
|
528 }
|
|
529
|
867
|
530 /* BREAKUP_ICHAR_1_UNSAFE assumes that the charset has already been
|
771
|
531 calculated, and just computes c1 and c2.
|
|
532
|
867
|
533 BREAKUP_ICHAR also computes and stores the charset. */
|
771
|
534
|
867
|
535 #define BREAKUP_ICHAR_1_UNSAFE(c, charset, c1, c2) \
|
771
|
536 XCHARSET_DIMENSION (charset) == 1 \
|
867
|
537 ? ((c1) = ichar_field3 (c), (c2) = 0) \
|
|
538 : ((c1) = ichar_field2 (c), \
|
|
539 (c2) = ichar_field3 (c))
|
771
|
540
|
826
|
541 DECLARE_INLINE_HEADER (
|
|
542 void
|
867
|
543 breakup_ichar_1 (Ichar c, Lisp_Object *charset, int *c1, int *c2)
|
826
|
544 )
|
771
|
545 {
|
867
|
546 text_checking_assert (valid_ichar_p (c));
|
|
547 *charset = ichar_charset (c);
|
|
548 BREAKUP_ICHAR_1_UNSAFE (c, *charset, *c1, *c2);
|
771
|
549 }
|
|
550
|
867
|
551 /* BREAKUP_ICHAR separates an Ichar into its components. The charset of
|
771
|
552 character C is set to CHARSET, and the position-codes of C are set to C1
|
|
553 and C2. C2 of TYPE9N character is 0. */
|
|
554
|
867
|
555 #define BREAKUP_ICHAR(c, charset, c1, c2) \
|
|
556 breakup_ichar_1 (c, &(charset), &(c1), &(c2))
|
771
|
557
|
788
|
558 void get_charset_limits (Lisp_Object charset, int *low, int *high);
|
867
|
559 int ichar_to_unicode (Ichar chr);
|
788
|
560
|
3439
|
561 EXFUN (Fcharset_name, 1);
|
|
562
|
771
|
563 #endif /* MULE */
|
|
564
|
3439
|
565 /* ISO 10646 UTF-16, UCS-4, UTF-8, UTF-7, etc. */
|
|
566
|
|
567 enum unicode_type
|
|
568 {
|
|
569 UNICODE_UTF_16,
|
|
570 UNICODE_UTF_8,
|
|
571 UNICODE_UTF_7,
|
4096
|
572 UNICODE_UCS_4,
|
|
573 UNICODE_UTF_32
|
3439
|
574 };
|
|
575
|
|
576 void encode_unicode_char (Lisp_Object USED_IF_MULE (charset), int h,
|
|
577 int USED_IF_MULE (l), unsigned_char_dynarr *dst,
|
4096
|
578 enum unicode_type type, unsigned int little_endian,
|
|
579 int write_error_characters_as_such);
|
|
580
|
|
581 #define UNICODE_ERROR_OCTET_RANGE_START 0x200000
|
|
582
|
|
583 #define valid_utf_16_first_surrogate(ch) (((ch) & 0xFC00) == 0xD800)
|
|
584 #define valid_utf_16_last_surrogate(ch) (((ch) & 0xFC00) == 0xDC00)
|
|
585 #define valid_utf_16_surrogate(ch) (((ch) & 0xF800) == 0xD800)
|
3439
|
586
|
3676
|
587 void set_charset_registries(Lisp_Object charset, Lisp_Object registries);
|
|
588
|
3439
|
589 EXFUN (Funicode_to_char, 2);
|
|
590 EXFUN (Fchar_to_unicode, 1);
|
|
591
|
771
|
592 #endif /* INCLUDED_charset_h_ */
|