Mercurial > hg > xemacs-beta
comparison src/charset.h @ 5118:e0db3c197671 ben-lisp-object
merge up to latest default branch, doesn't compile yet
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Sat, 26 Dec 2009 21:18:49 -0600 |
parents | 1abf84db2c7f |
children | d1247f3cc363 |
comparison
equal
deleted
inserted
replaced
5117:3742ea8250b5 | 5118:e0db3c197671 |
---|---|
56 #define BREAKUP_ICHAR(ch, charset, byte1, byte2) do { \ | 56 #define BREAKUP_ICHAR(ch, charset, byte1, byte2) do { \ |
57 (charset) = Vcharset_ascii; \ | 57 (charset) = Vcharset_ascii; \ |
58 (byte1) = (ch); \ | 58 (byte1) = (ch); \ |
59 (byte2) = 0; \ | 59 (byte2) = 0; \ |
60 } while (0) | 60 } while (0) |
61 #define XCHARSET_CCL_PROGRAM(cs) Qnil | |
62 #define XCHARSET_NAME(cs) Qascii | |
61 | 63 |
62 #else /* MULE */ | 64 #else /* MULE */ |
63 | 65 |
64 | 66 |
65 /************************************************************************/ | 67 /************************************************************************/ |
138 | 140 |
139 #define PRE_LEADING_BYTE_PRIVATE_1 0x9E /* 1-byte char-set */ | 141 #define PRE_LEADING_BYTE_PRIVATE_1 0x9E /* 1-byte char-set */ |
140 #define PRE_LEADING_BYTE_PRIVATE_2 0x9F /* 2-byte char-set */ | 142 #define PRE_LEADING_BYTE_PRIVATE_2 0x9F /* 2-byte char-set */ |
141 | 143 |
142 #define MIN_LEADING_BYTE_PRIVATE_1 0xA0 | 144 #define MIN_LEADING_BYTE_PRIVATE_1 0xA0 |
143 #define MAX_LEADING_BYTE_PRIVATE_1 0xEF | 145 #define MAX_LEADING_BYTE_PRIVATE_1 0xC0 |
144 #define MIN_LEADING_BYTE_PRIVATE_2 0xF0 | 146 #define MIN_LEADING_BYTE_PRIVATE_2 0xC1 |
145 #define MAX_LEADING_BYTE_PRIVATE_2 0xFF | 147 #define MAX_LEADING_BYTE_PRIVATE_2 0xFF |
146 | 148 |
147 #define NUM_LEADING_BYTES 129 | 149 #define NUM_LEADING_BYTES 129 |
148 | 150 |
149 | 151 |
184 struct LCRECORD_HEADER header; | 186 struct LCRECORD_HEADER header; |
185 | 187 |
186 int id; | 188 int id; |
187 Lisp_Object name; | 189 Lisp_Object name; |
188 Lisp_Object doc_string; | 190 Lisp_Object doc_string; |
189 Lisp_Object registry; | 191 Lisp_Object registries; |
190 Lisp_Object short_name; | 192 Lisp_Object short_name; |
191 Lisp_Object long_name; | 193 Lisp_Object long_name; |
192 | 194 |
193 Lisp_Object reverse_direction_charset; | 195 Lisp_Object reverse_direction_charset; |
194 | 196 |
226 /* Number of chars in each dimension (usually 94 or 96) */ | 228 /* Number of chars in each dimension (usually 94 or 96) */ |
227 int chars; | 229 int chars; |
228 | 230 |
229 /* Which half of font to be used to display this character set */ | 231 /* Which half of font to be used to display this character set */ |
230 int graphic; | 232 int graphic; |
233 | |
234 /* If set, this charset should be written out in ISO-2022-based coding | |
235 systems using the escape sequence for UTF-8, not using our internal | |
236 representation and the associated real ISO 2022 designation. */ | |
237 unsigned int encode_as_utf_8 :1; | |
231 | 238 |
232 /* If set, this is a "temporary" charset created when we encounter | 239 /* If set, this is a "temporary" charset created when we encounter |
233 an unknown final. This is so that we can successfully compile | 240 an unknown final. This is so that we can successfully compile |
234 and load such files. We allow a real charset to be created on top | 241 and load such files. We allow a real charset to be created on top |
235 of this temporary charset. */ | 242 of this temporary charset. */ |
236 unsigned int temporary :1; | 243 unsigned int temporary :1; |
237 }; | 244 }; |
238 typedef struct Lisp_Charset Lisp_Charset; | 245 typedef struct Lisp_Charset Lisp_Charset; |
239 | 246 |
240 DECLARE_LRECORD (charset, Lisp_Charset); | 247 DECLARE_LISP_OBJECT (charset, Lisp_Charset); |
241 #define XCHARSET(x) XRECORD (x, charset, Lisp_Charset) | 248 #define XCHARSET(x) XRECORD (x, charset, Lisp_Charset) |
242 #define wrap_charset(p) wrap_record (p, charset) | 249 #define wrap_charset(p) wrap_record (p, charset) |
243 #define CHARSETP(x) RECORDP (x, charset) | 250 #define CHARSETP(x) RECORDP (x, charset) |
244 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) | 251 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) |
245 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) | 252 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) |
259 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name) | 266 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name) |
260 #define CHARSET_LONG_NAME(cs) ((cs)->long_name) | 267 #define CHARSET_LONG_NAME(cs) ((cs)->long_name) |
261 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) | 268 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) |
262 #define CHARSET_COLUMNS(cs) ((cs)->columns) | 269 #define CHARSET_COLUMNS(cs) ((cs)->columns) |
263 #define CHARSET_GRAPHIC(cs) ((cs)->graphic) | 270 #define CHARSET_GRAPHIC(cs) ((cs)->graphic) |
271 #define CHARSET_ENCODE_AS_UTF_8(cs) ((cs)->encode_as_utf_8) | |
264 #define CHARSET_TYPE(cs) ((cs)->type) | 272 #define CHARSET_TYPE(cs) ((cs)->type) |
265 #define CHARSET_DIRECTION(cs) ((cs)->direction) | 273 #define CHARSET_DIRECTION(cs) ((cs)->direction) |
266 #define CHARSET_FINAL(cs) ((cs)->final) | 274 #define CHARSET_FINAL(cs) ((cs)->final) |
267 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string) | 275 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string) |
268 #define CHARSET_REGISTRY(cs) ((cs)->registry) | 276 #define CHARSET_REGISTRIES(cs) ((cs)->registries) |
269 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program) | 277 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program) |
270 #define CHARSET_DIMENSION(cs) ((cs)->dimension) | 278 #define CHARSET_DIMENSION(cs) ((cs)->dimension) |
271 #define CHARSET_CHARS(cs) ((cs)->chars) | 279 #define CHARSET_CHARS(cs) ((cs)->chars) |
272 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset) | 280 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset) |
273 #define CHARSET_TO_UNICODE_TABLE(cs) ((cs)->to_unicode_table) | 281 #define CHARSET_TO_UNICODE_TABLE(cs) ((cs)->to_unicode_table) |
274 #define CHARSET_FROM_UNICODE_TABLE(cs) ((cs)->from_unicode_table) | 282 #define CHARSET_FROM_UNICODE_TABLE(cs) ((cs)->from_unicode_table) |
275 #define CHARSET_FROM_UNICODE_LEVELS(cs) ((cs)->from_unicode_levels) | 283 #define CHARSET_FROM_UNICODE_LEVELS(cs) ((cs)->from_unicode_levels) |
276 | 284 |
277 | |
278 #define CHARSET_PRIVATE_P(cs) leading_byte_private_p (CHARSET_LEADING_BYTE (cs)) | 285 #define CHARSET_PRIVATE_P(cs) leading_byte_private_p (CHARSET_LEADING_BYTE (cs)) |
279 | 286 |
280 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs)) | 287 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs)) |
281 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) | 288 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) |
282 #define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs)) | 289 #define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs)) |
283 #define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs)) | 290 #define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs)) |
284 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs)) | 291 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs)) |
285 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs)) | 292 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs)) |
286 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs)) | 293 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs)) |
294 #define XCHARSET_ENCODE_AS_UTF_8(cs) CHARSET_ENCODE_AS_UTF_8 (XCHARSET (cs)) | |
287 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs)) | 295 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs)) |
288 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs)) | 296 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs)) |
289 #define XCHARSET_FINAL(cs) CHARSET_FINAL (XCHARSET (cs)) | 297 #define XCHARSET_FINAL(cs) CHARSET_FINAL (XCHARSET (cs)) |
290 #define XCHARSET_DOC_STRING(cs) CHARSET_DOC_STRING (XCHARSET (cs)) | 298 #define XCHARSET_DOC_STRING(cs) CHARSET_DOC_STRING (XCHARSET (cs)) |
291 #define XCHARSET_REGISTRY(cs) CHARSET_REGISTRY (XCHARSET (cs)) | 299 #define XCHARSET_REGISTRIES(cs) CHARSET_REGISTRIES (XCHARSET (cs)) |
292 #define XCHARSET_LEADING_BYTE(cs) CHARSET_LEADING_BYTE (XCHARSET (cs)) | 300 #define XCHARSET_LEADING_BYTE(cs) CHARSET_LEADING_BYTE (XCHARSET (cs)) |
293 #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs)) | 301 #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs)) |
294 #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs)) | 302 #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs)) |
295 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) | 303 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) |
304 | |
296 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs)) | 305 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs)) |
297 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ | 306 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ |
298 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) | 307 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) |
299 #define XCHARSET_TO_UNICODE_TABLE(cs) \ | 308 #define XCHARSET_TO_UNICODE_TABLE(cs) \ |
300 CHARSET_TO_UNICODE_TABLE (XCHARSET (cs)) | 309 CHARSET_TO_UNICODE_TABLE (XCHARSET (cs)) |
345 /************************************************************************/ | 354 /************************************************************************/ |
346 /* Dealing with characters */ | 355 /* Dealing with characters */ |
347 /************************************************************************/ | 356 /************************************************************************/ |
348 | 357 |
349 /* The bit fields of character are divided into 3 parts: | 358 /* The bit fields of character are divided into 3 parts: |
350 FIELD1(5bits):FIELD2(7bits):FIELD3(7bits) */ | 359 FIELD1(7bits):FIELD2(7bits):FIELD3(7bits) */ |
351 | 360 |
352 #define ICHAR_FIELD1_MASK (0x1F << 14) | 361 #define ICHAR_FIELD1_MASK (0x7F << 14) |
353 #define ICHAR_FIELD2_MASK (0x7F << 7) | 362 #define ICHAR_FIELD2_MASK (0x7F << 7) |
354 #define ICHAR_FIELD3_MASK 0x7F | 363 #define ICHAR_FIELD3_MASK 0x7F |
355 | 364 |
356 /* Macros to access each field of a character code of C. */ | 365 /* Macros to access each field of a character code of C. */ |
357 | 366 |
367 | 376 |
368 #define FIELD2_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_1 - 1) | 377 #define FIELD2_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_1 - 1) |
369 #define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80 | 378 #define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80 |
370 | 379 |
371 #define FIELD1_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_2 - 1) | 380 #define FIELD1_TO_OFFICIAL_LEADING_BYTE (MIN_LEADING_BYTE_OFFICIAL_2 - 1) |
372 #define FIELD1_TO_PRIVATE_LEADING_BYTE 0xE1 | 381 #define FIELD1_TO_PRIVATE_LEADING_BYTE 0x80 |
373 | 382 |
374 /* Minimum and maximum allowed values for the fields. */ | 383 /* Minimum and maximum allowed values for the fields. */ |
375 | 384 |
376 #define MIN_ICHAR_FIELD2_OFFICIAL \ | 385 #define MIN_ICHAR_FIELD2_OFFICIAL \ |
377 (MIN_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE) | 386 (MIN_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE) |
397 | 406 |
398 #define MIN_CHAR_OFFICIAL_TYPE9N (MIN_ICHAR_FIELD2_OFFICIAL << 7) | 407 #define MIN_CHAR_OFFICIAL_TYPE9N (MIN_ICHAR_FIELD2_OFFICIAL << 7) |
399 #define MIN_CHAR_PRIVATE_TYPE9N (MIN_ICHAR_FIELD2_PRIVATE << 7) | 408 #define MIN_CHAR_PRIVATE_TYPE9N (MIN_ICHAR_FIELD2_PRIVATE << 7) |
400 #define MIN_CHAR_OFFICIAL_TYPE9NX9N (MIN_ICHAR_FIELD1_OFFICIAL << 14) | 409 #define MIN_CHAR_OFFICIAL_TYPE9NX9N (MIN_ICHAR_FIELD1_OFFICIAL << 14) |
401 #define MIN_CHAR_PRIVATE_TYPE9NX9N (MIN_ICHAR_FIELD1_PRIVATE << 14) | 410 #define MIN_CHAR_PRIVATE_TYPE9NX9N (MIN_ICHAR_FIELD1_PRIVATE << 14) |
402 #define MIN_CHAR_COMPOSITION (0x1F << 14) | 411 #define MIN_CHAR_COMPOSITION (0x7F << 14) |
403 | 412 |
404 /* Leading byte of a character. | 413 /* Leading byte of a character. |
405 | 414 |
406 NOTE: This takes advantage of the fact that | 415 NOTE: This takes advantage of the fact that |
407 FIELD2_TO_OFFICIAL_LEADING_BYTE and | 416 FIELD2_TO_OFFICIAL_LEADING_BYTE and |
545 breakup_ichar_1 (c, &(charset), &(c1), &(c2)) | 554 breakup_ichar_1 (c, &(charset), &(c1), &(c2)) |
546 | 555 |
547 void get_charset_limits (Lisp_Object charset, int *low, int *high); | 556 void get_charset_limits (Lisp_Object charset, int *low, int *high); |
548 int ichar_to_unicode (Ichar chr); | 557 int ichar_to_unicode (Ichar chr); |
549 | 558 |
559 EXFUN (Fcharset_name, 1); | |
560 | |
550 #endif /* MULE */ | 561 #endif /* MULE */ |
551 | 562 |
563 /* ISO 10646 UTF-16, UCS-4, UTF-8, UTF-7, etc. */ | |
564 | |
565 enum unicode_type | |
566 { | |
567 UNICODE_UTF_16, | |
568 UNICODE_UTF_8, | |
569 UNICODE_UTF_7, | |
570 UNICODE_UCS_4, | |
571 UNICODE_UTF_32 | |
572 }; | |
573 | |
574 void encode_unicode_char (Lisp_Object USED_IF_MULE (charset), int h, | |
575 int USED_IF_MULE (l), unsigned_char_dynarr *dst, | |
576 enum unicode_type type, unsigned int little_endian, | |
577 int write_error_characters_as_such); | |
578 | |
579 #define UNICODE_ERROR_OCTET_RANGE_START 0x200000 | |
580 | |
581 #define valid_utf_16_first_surrogate(ch) (((ch) & 0xFC00) == 0xD800) | |
582 #define valid_utf_16_last_surrogate(ch) (((ch) & 0xFC00) == 0xDC00) | |
583 #define valid_utf_16_surrogate(ch) (((ch) & 0xF800) == 0xD800) | |
584 | |
585 void set_charset_registries(Lisp_Object charset, Lisp_Object registries); | |
586 | |
587 EXFUN (Funicode_to_char, 2); | |
588 EXFUN (Fchar_to_unicode, 1); | |
589 | |
552 #endif /* INCLUDED_charset_h_ */ | 590 #endif /* INCLUDED_charset_h_ */ |