comparison src/charset.h @ 5118:e0db3c197671 ben-lisp-object

merge up to latest default branch, doesn't compile yet
author Ben Wing <ben@xemacs.org>
date Sat, 26 Dec 2009 21:18:49 -0600
parents 1abf84db2c7f
children d1247f3cc363
comparison
equal deleted inserted replaced
5117:3742ea8250b5 5118:e0db3c197671
56 #define BREAKUP_ICHAR(ch, charset, byte1, byte2) do { \ 56 #define BREAKUP_ICHAR(ch, charset, byte1, byte2) do { \
57 (charset) = Vcharset_ascii; \ 57 (charset) = Vcharset_ascii; \
58 (byte1) = (ch); \ 58 (byte1) = (ch); \
59 (byte2) = 0; \ 59 (byte2) = 0; \
60 } while (0) 60 } while (0)
61 #define XCHARSET_CCL_PROGRAM(cs) Qnil
62 #define XCHARSET_NAME(cs) Qascii
61 63
62 #else /* MULE */ 64 #else /* MULE */
63 65
64 66
65 /************************************************************************/ 67 /************************************************************************/
138 140
139 #define PRE_LEADING_BYTE_PRIVATE_1 0x9E /* 1-byte char-set */ 141 #define PRE_LEADING_BYTE_PRIVATE_1 0x9E /* 1-byte char-set */
140 #define PRE_LEADING_BYTE_PRIVATE_2 0x9F /* 2-byte char-set */ 142 #define PRE_LEADING_BYTE_PRIVATE_2 0x9F /* 2-byte char-set */
141 143
142 #define MIN_LEADING_BYTE_PRIVATE_1 0xA0 144 #define MIN_LEADING_BYTE_PRIVATE_1 0xA0
143 #define MAX_LEADING_BYTE_PRIVATE_1 0xEF 145 #define MAX_LEADING_BYTE_PRIVATE_1 0xC0
144 #define MIN_LEADING_BYTE_PRIVATE_2 0xF0 146 #define MIN_LEADING_BYTE_PRIVATE_2 0xC1
145 #define MAX_LEADING_BYTE_PRIVATE_2 0xFF 147 #define MAX_LEADING_BYTE_PRIVATE_2 0xFF
146 148
147 #define NUM_LEADING_BYTES 129 149 #define NUM_LEADING_BYTES 129
148 150
149 151
184 struct LCRECORD_HEADER header; 186 struct LCRECORD_HEADER header;
185 187
186 int id; 188 int id;
187 Lisp_Object name; 189 Lisp_Object name;
188 Lisp_Object doc_string; 190 Lisp_Object doc_string;
189 Lisp_Object registry; 191 Lisp_Object registries;
190 Lisp_Object short_name; 192 Lisp_Object short_name;
191 Lisp_Object long_name; 193 Lisp_Object long_name;
192 194
193 Lisp_Object reverse_direction_charset; 195 Lisp_Object reverse_direction_charset;
194 196
226 /* Number of chars in each dimension (usually 94 or 96) */ 228 /* Number of chars in each dimension (usually 94 or 96) */
227 int chars; 229 int chars;
228 230
229 /* Which half of font to be used to display this character set */ 231 /* Which half of font to be used to display this character set */
230 int graphic; 232 int graphic;
233
234 /* If set, this charset should be written out in ISO-2022-based coding
235 systems using the escape sequence for UTF-8, not using our internal
236 representation and the associated real ISO 2022 designation. */
237 unsigned int encode_as_utf_8 :1;
231 238
232 /* If set, this is a "temporary" charset created when we encounter 239 /* If set, this is a "temporary" charset created when we encounter
233 an unknown final. This is so that we can successfully compile 240 an unknown final. This is so that we can successfully compile
234 and load such files. We allow a real charset to be created on top 241 and load such files. We allow a real charset to be created on top
235 of this temporary charset. */ 242 of this temporary charset. */
236 unsigned int temporary :1; 243 unsigned int temporary :1;
237 }; 244 };
238 typedef struct Lisp_Charset Lisp_Charset; 245 typedef struct Lisp_Charset Lisp_Charset;
239 246
240 DECLARE_LRECORD (charset, Lisp_Charset); 247 DECLARE_LISP_OBJECT (charset, Lisp_Charset);
241 #define XCHARSET(x) XRECORD (x, charset, Lisp_Charset) 248 #define XCHARSET(x) XRECORD (x, charset, Lisp_Charset)
242 #define wrap_charset(p) wrap_record (p, charset) 249 #define wrap_charset(p) wrap_record (p, charset)
243 #define CHARSETP(x) RECORDP (x, charset) 250 #define CHARSETP(x) RECORDP (x, charset)
244 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) 251 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset)
245 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) 252 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset)
259 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name) 266 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name)
260 #define CHARSET_LONG_NAME(cs) ((cs)->long_name) 267 #define CHARSET_LONG_NAME(cs) ((cs)->long_name)
261 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) 268 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes)
262 #define CHARSET_COLUMNS(cs) ((cs)->columns) 269 #define CHARSET_COLUMNS(cs) ((cs)->columns)
263 #define CHARSET_GRAPHIC(cs) ((cs)->graphic) 270 #define CHARSET_GRAPHIC(cs) ((cs)->graphic)
271 #define CHARSET_ENCODE_AS_UTF_8(cs) ((cs)->encode_as_utf_8)
264 #define CHARSET_TYPE(cs) ((cs)->type) 272 #define CHARSET_TYPE(cs) ((cs)->type)
265 #define CHARSET_DIRECTION(cs) ((cs)->direction) 273 #define CHARSET_DIRECTION(cs) ((cs)->direction)
266 #define CHARSET_FINAL(cs) ((cs)->final) 274 #define CHARSET_FINAL(cs) ((cs)->final)
267 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string) 275 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string)
268 #define CHARSET_REGISTRY(cs) ((cs)->registry) 276 #define CHARSET_REGISTRIES(cs) ((cs)->registries)
269 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program) 277 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program)
270 #define CHARSET_DIMENSION(cs) ((cs)->dimension) 278 #define CHARSET_DIMENSION(cs) ((cs)->dimension)
271 #define CHARSET_CHARS(cs) ((cs)->chars) 279 #define CHARSET_CHARS(cs) ((cs)->chars)
272 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset) 280 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset)
273 #define CHARSET_TO_UNICODE_TABLE(cs) ((cs)->to_unicode_table) 281 #define CHARSET_TO_UNICODE_TABLE(cs) ((cs)->to_unicode_table)
274 #define CHARSET_FROM_UNICODE_TABLE(cs) ((cs)->from_unicode_table) 282 #define CHARSET_FROM_UNICODE_TABLE(cs) ((cs)->from_unicode_table)
275 #define CHARSET_FROM_UNICODE_LEVELS(cs) ((cs)->from_unicode_levels) 283 #define CHARSET_FROM_UNICODE_LEVELS(cs) ((cs)->from_unicode_levels)
276 284
277
278 #define CHARSET_PRIVATE_P(cs) leading_byte_private_p (CHARSET_LEADING_BYTE (cs)) 285 #define CHARSET_PRIVATE_P(cs) leading_byte_private_p (CHARSET_LEADING_BYTE (cs))
279 286
280 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs)) 287 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs))
281 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) 288 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs))
282 #define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs)) 289 #define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs))
283 #define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs)) 290 #define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs))
284 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs)) 291 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs))
285 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs)) 292 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs))
286 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs)) 293 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs))
294 #define XCHARSET_ENCODE_AS_UTF_8(cs) CHARSET_ENCODE_AS_UTF_8 (XCHARSET (cs))
287 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs)) 295 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs))
288 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs)) 296 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs))
289 #define XCHARSET_FINAL(cs) CHARSET_FINAL (XCHARSET (cs)) 297 #define XCHARSET_FINAL(cs) CHARSET_FINAL (XCHARSET (cs))
290 #define XCHARSET_DOC_STRING(cs) CHARSET_DOC_STRING (XCHARSET (cs)) 298 #define XCHARSET_DOC_STRING(cs) CHARSET_DOC_STRING (XCHARSET (cs))
291 #define XCHARSET_REGISTRY(cs) CHARSET_REGISTRY (XCHARSET (cs)) 299 #define XCHARSET_REGISTRIES(cs) CHARSET_REGISTRIES (XCHARSET (cs))
292 #define XCHARSET_LEADING_BYTE(cs) CHARSET_LEADING_BYTE (XCHARSET (cs)) 300 #define XCHARSET_LEADING_BYTE(cs) CHARSET_LEADING_BYTE (XCHARSET (cs))
293 #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs)) 301 #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs))
294 #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs)) 302 #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs))
295 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) 303 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs))
304
296 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs)) 305 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs))
297 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ 306 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \
298 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) 307 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs))
299 #define XCHARSET_TO_UNICODE_TABLE(cs) \ 308 #define XCHARSET_TO_UNICODE_TABLE(cs) \
300 CHARSET_TO_UNICODE_TABLE (XCHARSET (cs)) 309 CHARSET_TO_UNICODE_TABLE (XCHARSET (cs))
345 /************************************************************************/ 354 /************************************************************************/
346 /* Dealing with characters */ 355 /* Dealing with characters */
347 /************************************************************************/ 356 /************************************************************************/
348 357
349 /* The bit fields of character are divided into 3 parts: 358 /* The bit fields of character are divided into 3 parts:
350 FIELD1(5bits):FIELD2(7bits):FIELD3(7bits) */ 359 FIELD1(7bits):FIELD2(7bits):FIELD3(7bits) */
351 360
352 #define ICHAR_FIELD1_MASK (0x1F << 14) 361 #define ICHAR_FIELD1_MASK (0x7F << 14)
353 #define ICHAR_FIELD2_MASK (0x7F << 7) 362 #define ICHAR_FIELD2_MASK (0x7F << 7)
354 #define ICHAR_FIELD3_MASK 0x7F 363 #define ICHAR_FIELD3_MASK 0x7F
355 364
356 /* Macros to access each field of a character code of C. */ 365 /* Macros to access each field of a character code of C. */
357 366
367 376
368 #define FIELD2_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_1 - 1) 377 #define FIELD2_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_1 - 1)
369 #define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80 378 #define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80
370 379
371 #define FIELD1_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_2 - 1) 380 #define FIELD1_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_2 - 1)
372 #define FIELD1_TO_PRIVATE_LEADING_BYTE 0xE1 381 #define FIELD1_TO_PRIVATE_LEADING_BYTE 0x80
373 382
374 /* Minimum and maximum allowed values for the fields. */ 383 /* Minimum and maximum allowed values for the fields. */
375 384
376 #define MIN_ICHAR_FIELD2_OFFICIAL \ 385 #define MIN_ICHAR_FIELD2_OFFICIAL \
377 (MIN_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE) 386 (MIN_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE)
397 406
398 #define MIN_CHAR_OFFICIAL_TYPE9N (MIN_ICHAR_FIELD2_OFFICIAL << 7) 407 #define MIN_CHAR_OFFICIAL_TYPE9N (MIN_ICHAR_FIELD2_OFFICIAL << 7)
399 #define MIN_CHAR_PRIVATE_TYPE9N (MIN_ICHAR_FIELD2_PRIVATE << 7) 408 #define MIN_CHAR_PRIVATE_TYPE9N (MIN_ICHAR_FIELD2_PRIVATE << 7)
400 #define MIN_CHAR_OFFICIAL_TYPE9NX9N (MIN_ICHAR_FIELD1_OFFICIAL << 14) 409 #define MIN_CHAR_OFFICIAL_TYPE9NX9N (MIN_ICHAR_FIELD1_OFFICIAL << 14)
401 #define MIN_CHAR_PRIVATE_TYPE9NX9N (MIN_ICHAR_FIELD1_PRIVATE << 14) 410 #define MIN_CHAR_PRIVATE_TYPE9NX9N (MIN_ICHAR_FIELD1_PRIVATE << 14)
402 #define MIN_CHAR_COMPOSITION (0x1F << 14) 411 #define MIN_CHAR_COMPOSITION (0x7F << 14)
403 412
404 /* Leading byte of a character. 413 /* Leading byte of a character.
405 414
406 NOTE: This takes advantage of the fact that 415 NOTE: This takes advantage of the fact that
407 FIELD2_TO_OFFICIAL_LEADING_BYTE and 416 FIELD2_TO_OFFICIAL_LEADING_BYTE and
545 breakup_ichar_1 (c, &(charset), &(c1), &(c2)) 554 breakup_ichar_1 (c, &(charset), &(c1), &(c2))
546 555
547 void get_charset_limits (Lisp_Object charset, int *low, int *high); 556 void get_charset_limits (Lisp_Object charset, int *low, int *high);
548 int ichar_to_unicode (Ichar chr); 557 int ichar_to_unicode (Ichar chr);
549 558
559 EXFUN (Fcharset_name, 1);
560
550 #endif /* MULE */ 561 #endif /* MULE */
551 562
563 /* ISO 10646 UTF-16, UCS-4, UTF-8, UTF-7, etc. */
564
565 enum unicode_type
566 {
567 UNICODE_UTF_16,
568 UNICODE_UTF_8,
569 UNICODE_UTF_7,
570 UNICODE_UCS_4,
571 UNICODE_UTF_32
572 };
573
574 void encode_unicode_char (Lisp_Object USED_IF_MULE (charset), int h,
575 int USED_IF_MULE (l), unsigned_char_dynarr *dst,
576 enum unicode_type type, unsigned int little_endian,
577 int write_error_characters_as_such);
578
579 #define UNICODE_ERROR_OCTET_RANGE_START 0x200000
580
581 #define valid_utf_16_first_surrogate(ch) (((ch) & 0xFC00) == 0xD800)
582 #define valid_utf_16_last_surrogate(ch) (((ch) & 0xFC00) == 0xDC00)
583 #define valid_utf_16_surrogate(ch) (((ch) & 0xF800) == 0xD800)
584
585 void set_charset_registries(Lisp_Object charset, Lisp_Object registries);
586
587 EXFUN (Funicode_to_char, 2);
588 EXFUN (Fchar_to_unicode, 1);
589
552 #endif /* INCLUDED_charset_h_ */ 590 #endif /* INCLUDED_charset_h_ */