Mercurial > hg > xemacs-beta
comparison src/mule-charset.h @ 398:74fd4e045ea6 r21-2-29
Import from CVS: tag r21-2-29
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:13:30 +0200 |
parents | 6719134a07c2 |
children | 2f8bb876ab1d |
comparison
equal
deleted
inserted
replaced
397:f4aeb21a5bad | 398:74fd4e045ea6 |
---|---|
21 | 21 |
22 /* Synched up with: Mule 2.3. Not in FSF. */ | 22 /* Synched up with: Mule 2.3. Not in FSF. */ |
23 | 23 |
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */ | 24 /* Rewritten by Ben Wing <ben@xemacs.org>. */ |
25 | 25 |
26 #ifndef _XEMACS_MULE_CHARSET_H | 26 #ifndef INCLUDED_mule_charset_h_ |
27 #define _XEMACS_MULE_CHARSET_H | 27 #define INCLUDED_mule_charset_h_ |
28 | 28 |
29 /* | 29 /* |
30 1. Character Sets | 30 1. Character Sets |
31 ================= | 31 ================= |
32 | 32 |
237 a "leading byte prefix", which is either 0x9E or 0x9F. (No | 237 a "leading byte prefix", which is either 0x9E or 0x9F. (No |
238 character sets are ever assigned these leading bytes.) Specifically: | 238 character sets are ever assigned these leading bytes.) Specifically: |
239 | 239 |
240 Character set Encoding (PC == position-code) | 240 Character set Encoding (PC == position-code) |
241 ------------- -------- (LB == leading-byte) | 241 ------------- -------- (LB == leading-byte) |
242 ASCII PC1 | | 242 ASCII PC1 | |
243 Control-1 LB | PC1 + 0xA0 | 243 Control-1 LB | PC1 + 0xA0 |
244 Dimension-1 official LB | PC1 + 0x80 | 244 Dimension-1 official LB | PC1 + 0x80 |
245 Dimension-1 private 0x9E | LB | PC1 + 0x80 | 245 Dimension-1 private 0x9E | LB | PC1 + 0x80 |
246 Dimension-2 official LB | PC1 | PC2 + 0x80 | 246 Dimension-2 official LB | PC1 | PC2 + 0x80 |
247 Dimension-2 private 0x9F | LB | PC1 + 0x80 | PC2 + 0x80 | 247 Dimension-2 private 0x9F | LB | PC1 + 0x80 | PC2 + 0x80 |
330 #ifdef ENABLE_COMPOSITE_CHARS | 330 #ifdef ENABLE_COMPOSITE_CHARS |
331 #endif | 331 #endif |
332 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ | 332 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ |
333 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ | 333 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ |
334 | 334 |
335 /* Note the gap in each official charset can cause core dump | |
336 as first and last values are used to determine whether | |
337 charset is defined or not in non_ascii_valid_char_p */ | |
338 | |
335 /** The following are for 1-byte characters in an official charset. **/ | 339 /** The following are for 1-byte characters in an official charset. **/ |
336 | 340 enum LEADING_BYTE_OFFICIAL_1 |
337 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */ | 341 { |
338 #define LEADING_BYTE_LATIN_ISO8859_2 0x82 /* Right half of ISO 8859-2 */ | 342 LEADING_BYTE_LATIN_ISO8859_1 = 0x81, /* Right half of ISO 8859-1 */ |
339 #define LEADING_BYTE_LATIN_ISO8859_3 0x83 /* Right half of ISO 8859-3 */ | 343 LEADING_BYTE_LATIN_ISO8859_2, /* 0x82 Right half of ISO 8859-2 */ |
340 #define LEADING_BYTE_LATIN_ISO8859_4 0x84 /* Right half of ISO 8859-4 */ | 344 LEADING_BYTE_LATIN_ISO8859_3, /* 0x83 Right half of ISO 8859-3 */ |
341 #define LEADING_BYTE_THAI_TIS620 0x85 /* TIS620-2533 */ | 345 LEADING_BYTE_LATIN_ISO8859_4, /* 0x84 Right half of ISO 8859-4 */ |
342 #define LEADING_BYTE_GREEK_ISO8859_7 0x86 /* Right half of ISO 8859-7 */ | 346 LEADING_BYTE_THAI_TIS620, /* 0x85 TIS620-2533 */ |
343 #define LEADING_BYTE_ARABIC_ISO8859_6 0x87 /* Right half of ISO 8859-6 */ | 347 LEADING_BYTE_GREEK_ISO8859_7, /* 0x86 Right half of ISO 8859-7 */ |
344 #define LEADING_BYTE_HEBREW_ISO8859_8 0x88 /* Right half of ISO 8859-8 */ | 348 LEADING_BYTE_ARABIC_ISO8859_6, /* 0x87 Right half of ISO 8859-6 */ |
345 #define LEADING_BYTE_KATAKANA_JISX0201 0x89 /* Right half of JIS X0201-1976 */ | 349 LEADING_BYTE_HEBREW_ISO8859_8, /* 0x88 Right half of ISO 8859-8 */ |
346 #define LEADING_BYTE_LATIN_JISX0201 0x8A /* Left half of JIS X0201-1976 */ | 350 LEADING_BYTE_KATAKANA_JISX0201, /* 0x89 Right half of JIS X0201-1976 */ |
347 #define LEADING_BYTE_CYRILLIC_ISO8859_5 0x8C /* Right half of ISO 8859-5 */ | 351 LEADING_BYTE_LATIN_JISX0201, /* 0x8A Left half of JIS X0201-1976 */ |
348 #define LEADING_BYTE_LATIN_ISO8859_9 0x8D /* Right half of ISO 8859-9 */ | 352 LEADING_BYTE_CYRILLIC_ISO8859_5,/* 0x8B Right half of ISO 8859-5 */ |
353 LEADING_BYTE_LATIN_ISO8859_9 /* 0x8C Right half of ISO 8859-9 */ | |
354 /* 0x8D unused */ | |
355 }; | |
349 | 356 |
350 #define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_1 | 357 #define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_1 |
351 #define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_9 | 358 #define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_9 |
352 | 359 |
353 /** The following are for 2-byte characters in an official charset. **/ | 360 /** The following are for 2-byte characters in an official charset. **/ |
354 | 361 enum LEADING_BYTE_OFFICIAL_2 |
355 #define LEADING_BYTE_JAPANESE_JISX0208_1978 0x90/* Japanese JIS X0208-1978 */ | 362 { |
356 #define LEADING_BYTE_CHINESE_GB2312 0x91 /* Chinese Hanzi GB2312-1980 */ | 363 LEADING_BYTE_JAPANESE_JISX0208_1978 = 0x90, /* Japanese JIS X0208-1978 */ |
357 #define LEADING_BYTE_JAPANESE_JISX0208 0x92 /* Japanese JIS X0208-1983 */ | 364 LEADING_BYTE_CHINESE_GB2312, /* 0x91 Chinese Hanzi GB2312-1980 */ |
358 #define LEADING_BYTE_KOREAN_KSC5601 0x93 /* Hangul KS C5601-1987 */ | 365 LEADING_BYTE_JAPANESE_JISX0208, /* 0x92 Japanese JIS X0208-1983 */ |
359 #define LEADING_BYTE_JAPANESE_JISX0212 0x94 /* Japanese JIS X0212-1990 */ | 366 LEADING_BYTE_KOREAN_KSC5601, /* 0x93 Hangul KS C5601-1987 */ |
360 #define LEADING_BYTE_CHINESE_CNS11643_1 0x95 /* Chinese CNS11643 Set 1 */ | 367 LEADING_BYTE_JAPANESE_JISX0212, /* 0x94 Japanese JIS X0212-1990 */ |
361 #define LEADING_BYTE_CHINESE_CNS11643_2 0x96 /* Chinese CNS11643 Set 2 */ | 368 LEADING_BYTE_CHINESE_CNS11643_1, /* 0x95 Chinese CNS11643 Set 1 */ |
362 #define LEADING_BYTE_CHINESE_BIG5_1 0x97 /* Big5 Level 1 */ | 369 LEADING_BYTE_CHINESE_CNS11643_2, /* 0x96 Chinese CNS11643 Set 2 */ |
363 #define LEADING_BYTE_CHINESE_BIG5_2 0x98 /* Big5 Level 2 */ | 370 LEADING_BYTE_CHINESE_BIG5_1, /* 0x97 Big5 Level 1 */ |
364 /* 0x99 unused */ | 371 LEADING_BYTE_CHINESE_BIG5_2 /* 0x98 Big5 Level 2 */ |
365 /* 0x9A unused */ | 372 /* 0x99 unused */ |
366 /* 0x9B unused */ | 373 /* 0x9A unused */ |
367 /* 0x9C unused */ | 374 /* 0x9B unused */ |
368 /* 0x9D unused */ | 375 /* 0x9C unused */ |
376 }; | |
369 | 377 |
370 #define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978 | 378 #define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978 |
371 #define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_CHINESE_BIG5_2 | 379 #define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_CHINESE_BIG5_2 |
372 | 380 |
373 /** The following are for 1- and 2-byte characters in a private charset. **/ | 381 /** The following are for 1- and 2-byte characters in a private charset. **/ |
446 { | 454 { |
447 struct lcrecord_header header; | 455 struct lcrecord_header header; |
448 | 456 |
449 int id; | 457 int id; |
450 Lisp_Object name; | 458 Lisp_Object name; |
451 Lisp_Object doc_string, registry; | 459 Lisp_Object doc_string; |
460 Lisp_Object registry; | |
461 Lisp_Object short_name; | |
462 Lisp_Object long_name; | |
452 | 463 |
453 Lisp_Object reverse_direction_charset; | 464 Lisp_Object reverse_direction_charset; |
454 | 465 |
455 Lisp_Object ccl_program; | 466 Lisp_Object ccl_program; |
456 | |
457 Bufbyte leading_byte; | |
458 | 467 |
459 /* Final byte of this character set in ISO2022 designating escape sequence */ | 468 /* Final byte of this character set in ISO2022 designating escape sequence */ |
460 Bufbyte final; | 469 Bufbyte final; |
461 | 470 |
462 /* Number of bytes (1 - 4) required in the internal representation | 471 /* Number of bytes (1 - 4) required in the internal representation |
481 unsigned int chars; | 490 unsigned int chars; |
482 | 491 |
483 /* Which half of font to be used to display this character set */ | 492 /* Which half of font to be used to display this character set */ |
484 unsigned int graphic; | 493 unsigned int graphic; |
485 }; | 494 }; |
486 | 495 typedef struct Lisp_Charset Lisp_Charset; |
487 DECLARE_LRECORD (charset, struct Lisp_Charset); | 496 |
488 #define XCHARSET(x) XRECORD (x, charset, struct Lisp_Charset) | 497 DECLARE_LRECORD (charset, Lisp_Charset); |
498 #define XCHARSET(x) XRECORD (x, charset, Lisp_Charset) | |
489 #define XSETCHARSET(x, p) XSETRECORD (x, p, charset) | 499 #define XSETCHARSET(x, p) XSETRECORD (x, p, charset) |
490 #define CHARSETP(x) RECORDP (x, charset) | 500 #define CHARSETP(x) RECORDP (x, charset) |
491 #define GC_CHARSETP(x) GC_RECORDP (x, charset) | |
492 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) | 501 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) |
493 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) | 502 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) |
494 | 503 |
495 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ | 504 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ |
496 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */ | 505 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */ |
498 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */ | 507 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */ |
499 | 508 |
500 #define CHARSET_LEFT_TO_RIGHT 0 | 509 #define CHARSET_LEFT_TO_RIGHT 0 |
501 #define CHARSET_RIGHT_TO_LEFT 1 | 510 #define CHARSET_RIGHT_TO_LEFT 1 |
502 | 511 |
512 /* Leading byte and id have been regrouped. -- OG */ | |
503 #define CHARSET_ID(cs) ((cs)->id) | 513 #define CHARSET_ID(cs) ((cs)->id) |
514 #define CHARSET_LEADING_BYTE(cs) ((Bufbyte) CHARSET_ID(cs)) | |
504 #define CHARSET_NAME(cs) ((cs)->name) | 515 #define CHARSET_NAME(cs) ((cs)->name) |
505 #define CHARSET_LEADING_BYTE(cs) ((cs)->leading_byte) | 516 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name) |
517 #define CHARSET_LONG_NAME(cs) ((cs)->long_name) | |
506 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) | 518 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) |
507 #define CHARSET_COLUMNS(cs) ((cs)->columns) | 519 #define CHARSET_COLUMNS(cs) ((cs)->columns) |
508 #define CHARSET_GRAPHIC(cs) ((cs)->graphic) | 520 #define CHARSET_GRAPHIC(cs) ((cs)->graphic) |
509 #define CHARSET_TYPE(cs) ((cs)->type) | 521 #define CHARSET_TYPE(cs) ((cs)->type) |
510 #define CHARSET_DIRECTION(cs) ((cs)->direction) | 522 #define CHARSET_DIRECTION(cs) ((cs)->direction) |
519 | 531 |
520 #define CHARSET_PRIVATE_P(cs) LEADING_BYTE_PRIVATE_P (CHARSET_LEADING_BYTE (cs)) | 532 #define CHARSET_PRIVATE_P(cs) LEADING_BYTE_PRIVATE_P (CHARSET_LEADING_BYTE (cs)) |
521 | 533 |
522 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs)) | 534 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs)) |
523 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) | 535 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) |
536 #define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs)) | |
537 #define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs)) | |
524 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs)) | 538 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs)) |
525 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs)) | 539 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs)) |
526 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs)) | 540 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs)) |
527 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs)) | 541 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs)) |
528 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs)) | 542 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs)) |
535 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) | 549 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) |
536 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs)) | 550 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs)) |
537 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ | 551 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ |
538 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) | 552 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) |
539 | 553 |
540 /* Table of charsets indexed by (leading byte - 128). */ | 554 struct charset_lookup { |
541 extern Lisp_Object charset_by_leading_byte[128]; | 555 /* Table of charsets indexed by leading byte. */ |
542 | 556 Lisp_Object charset_by_leading_byte[128]; |
543 /* Table of charsets indexed by type/final-byte/direction. */ | 557 |
544 extern Lisp_Object charset_by_attributes[4][128][2]; | 558 /* Table of charsets indexed by type/final-byte/direction. */ |
545 | 559 Lisp_Object charset_by_attributes[4][128][2]; |
546 /* Table of number of bytes in the string representation of a character | 560 }; |
547 indexed by the first byte of that representation. | 561 |
548 | 562 extern struct charset_lookup *chlook; |
549 This value can be derived other ways -- e.g. something like | |
550 | |
551 (BYTE_ASCII_P (first_byte) ? 1 : | |
552 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte))) | |
553 | |
554 but it's faster this way. */ | |
555 extern Bytecount rep_bytes_by_first_byte[0xA0]; | |
556 | 563 |
557 #ifdef ERROR_CHECK_TYPECHECK | 564 #ifdef ERROR_CHECK_TYPECHECK |
558 /* int not Bufbyte even though that is the actual type of a leading byte. | 565 /* int not Bufbyte even though that is the actual type of a leading byte. |
559 This way, out-ot-range values will get caught rather than automatically | 566 This way, out-ot-range values will get caught rather than automatically |
560 truncated. */ | 567 truncated. */ |
561 INLINE Lisp_Object CHARSET_BY_LEADING_BYTE (int lb); | 568 INLINE Lisp_Object CHARSET_BY_LEADING_BYTE (int lb); |
562 INLINE Lisp_Object | 569 INLINE Lisp_Object |
563 CHARSET_BY_LEADING_BYTE (int lb) | 570 CHARSET_BY_LEADING_BYTE (int lb) |
564 { | 571 { |
565 assert (lb >= 0x80 && lb <= 0xFF); | 572 assert (lb >= 0x80 && lb <= 0xFF); |
566 return charset_by_leading_byte[lb - 128]; | 573 return chlook->charset_by_leading_byte[lb - 128]; |
567 } | 574 } |
568 | 575 |
569 #else | 576 #else |
570 | 577 |
571 #define CHARSET_BY_LEADING_BYTE(lb) (charset_by_leading_byte[(lb) - 128]) | 578 #define CHARSET_BY_LEADING_BYTE(lb) (chlook->charset_by_leading_byte[(lb) - 128]) |
572 | 579 |
573 #endif | 580 #endif |
574 | 581 |
575 #define CHARSET_BY_ATTRIBUTES(type, final, dir) \ | 582 #define CHARSET_BY_ATTRIBUTES(type, final, dir) \ |
576 (charset_by_attributes[type][final][dir]) | 583 (chlook->charset_by_attributes[type][final][dir]) |
577 | 584 |
578 #ifdef ERROR_CHECK_TYPECHECK | 585 |
579 | 586 /* Table of number of bytes in the string representation of a character |
580 /* Number of bytes in the string representation of a character */ | 587 indexed by the first byte of that representation. |
588 | |
589 This value can be derived in other ways -- e.g. something like | |
590 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte)) | |
591 but it's faster this way. */ | |
592 extern const Bytecount rep_bytes_by_first_byte[0xA0]; | |
593 | |
594 /* Number of bytes in the string representation of a character. */ | |
581 INLINE int REP_BYTES_BY_FIRST_BYTE (int fb); | 595 INLINE int REP_BYTES_BY_FIRST_BYTE (int fb); |
582 INLINE int | 596 INLINE int |
583 REP_BYTES_BY_FIRST_BYTE (int fb) | 597 REP_BYTES_BY_FIRST_BYTE (int fb) |
584 { | 598 { |
585 assert (fb >= 0 && fb < 0xA0); | 599 #ifdef ERROR_CHECK_TYPECHECK |
600 assert (0 <= fb && fb < 0xA0); | |
601 #endif | |
586 return rep_bytes_by_first_byte[fb]; | 602 return rep_bytes_by_first_byte[fb]; |
587 } | 603 } |
588 | |
589 #else | |
590 #define REP_BYTES_BY_FIRST_BYTE(fb) (rep_bytes_by_first_byte[fb]) | |
591 #endif | |
592 | 604 |
593 | 605 |
594 /************************************************************************/ | 606 /************************************************************************/ |
595 /* Dealing with characters */ | 607 /* Dealing with characters */ |
596 /************************************************************************/ | 608 /************************************************************************/ |
772 | 784 |
773 Emchar Lstream_get_emchar_1 (Lstream *stream, int first_char); | 785 Emchar Lstream_get_emchar_1 (Lstream *stream, int first_char); |
774 int Lstream_fput_emchar (Lstream *stream, Emchar ch); | 786 int Lstream_fput_emchar (Lstream *stream, Emchar ch); |
775 void Lstream_funget_emchar (Lstream *stream, Emchar ch); | 787 void Lstream_funget_emchar (Lstream *stream, Emchar ch); |
776 | 788 |
777 int copy_internal_to_external (CONST Bufbyte *internal, Bytecount len, | 789 int copy_internal_to_external (const Bufbyte *internal, Bytecount len, |
778 unsigned char *external); | 790 unsigned char *external); |
779 Bytecount copy_external_to_internal (CONST unsigned char *external, | 791 Bytecount copy_external_to_internal (const unsigned char *external, |
780 int len, Bufbyte *internal); | 792 int len, Bufbyte *internal); |
781 | 793 |
782 #endif /* _XEMACS_MULE_CHARSET_H */ | 794 #endif /* INCLUDED_mule_charset_h_ */ |