comparison src/mule-charset.h @ 412:697ef44129c6 r21-2-14

Import from CVS: tag r21-2-14
author cvs
date Mon, 13 Aug 2007 11:20:41 +0200
parents b8cc9ab3f761
children 11054d720c21
comparison
equal deleted inserted replaced
411:12e008d41344 412:697ef44129c6
21 21
22 /* Synched up with: Mule 2.3. Not in FSF. */ 22 /* Synched up with: Mule 2.3. Not in FSF. */
23 23
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */ 24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 25
26 #ifndef INCLUDED_mule_charset_h_ 26 #ifndef _XEMACS_MULE_CHARSET_H
27 #define INCLUDED_mule_charset_h_ 27 #define _XEMACS_MULE_CHARSET_H
28 28
29 /* 29 /*
30 1. Character Sets 30 1. Character Sets
31 ================= 31 =================
32 32
237 a "leading byte prefix", which is either 0x9E or 0x9F. (No 237 a "leading byte prefix", which is either 0x9E or 0x9F. (No
238 character sets are ever assigned these leading bytes.) Specifically: 238 character sets are ever assigned these leading bytes.) Specifically:
239 239
240 Character set Encoding (PC == position-code) 240 Character set Encoding (PC == position-code)
241 ------------- -------- (LB == leading-byte) 241 ------------- -------- (LB == leading-byte)
242 ASCII PC1 | 242 ASCII PC1 |
243 Control-1 LB | PC1 + 0xA0 243 Control-1 LB | PC1 + 0xA0
244 Dimension-1 official LB | PC1 + 0x80 244 Dimension-1 official LB | PC1 + 0x80
245 Dimension-1 private 0x9E | LB | PC1 + 0x80 245 Dimension-1 private 0x9E | LB | PC1 + 0x80
246 Dimension-2 official LB | PC1 | PC2 + 0x80 246 Dimension-2 official LB | PC1 | PC2 + 0x80
247 Dimension-2 private 0x9F | LB | PC1 + 0x80 | PC2 + 0x80 247 Dimension-2 private 0x9F | LB | PC1 + 0x80 | PC2 + 0x80
330 #ifdef ENABLE_COMPOSITE_CHARS 330 #ifdef ENABLE_COMPOSITE_CHARS
331 #endif 331 #endif
332 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ 332 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */
333 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ 333 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */
334 334
335 /* Note the gap in each official charset can cause core dump
336 as first and last values are used to determine whether
337 charset is defined or not in non_ascii_valid_char_p */
338
339 /** The following are for 1-byte characters in an official charset. **/ 335 /** The following are for 1-byte characters in an official charset. **/
340 enum LEADING_BYTE_OFFICIAL_1 336
341 { 337 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */
342 LEADING_BYTE_LATIN_ISO8859_1 = 0x81, /* Right half of ISO 8859-1 */ 338 #define LEADING_BYTE_LATIN_ISO8859_2 0x82 /* Right half of ISO 8859-2 */
343 LEADING_BYTE_LATIN_ISO8859_2, /* 0x82 Right half of ISO 8859-2 */ 339 #define LEADING_BYTE_LATIN_ISO8859_3 0x83 /* Right half of ISO 8859-3 */
344 LEADING_BYTE_LATIN_ISO8859_3, /* 0x83 Right half of ISO 8859-3 */ 340 #define LEADING_BYTE_LATIN_ISO8859_4 0x84 /* Right half of ISO 8859-4 */
345 LEADING_BYTE_LATIN_ISO8859_4, /* 0x84 Right half of ISO 8859-4 */ 341 #define LEADING_BYTE_THAI_TIS620 0x85 /* TIS620-2533 */
346 LEADING_BYTE_THAI_TIS620, /* 0x85 TIS620-2533 */ 342 #define LEADING_BYTE_GREEK_ISO8859_7 0x86 /* Right half of ISO 8859-7 */
347 LEADING_BYTE_GREEK_ISO8859_7, /* 0x86 Right half of ISO 8859-7 */ 343 #define LEADING_BYTE_ARABIC_ISO8859_6 0x87 /* Right half of ISO 8859-6 */
348 LEADING_BYTE_ARABIC_ISO8859_6, /* 0x87 Right half of ISO 8859-6 */ 344 #define LEADING_BYTE_HEBREW_ISO8859_8 0x88 /* Right half of ISO 8859-8 */
349 LEADING_BYTE_HEBREW_ISO8859_8, /* 0x88 Right half of ISO 8859-8 */ 345 #define LEADING_BYTE_KATAKANA_JISX0201 0x89 /* Right half of JIS X0201-1976 */
350 LEADING_BYTE_KATAKANA_JISX0201, /* 0x89 Right half of JIS X0201-1976 */ 346 #define LEADING_BYTE_LATIN_JISX0201 0x8A /* Left half of JIS X0201-1976 */
351 LEADING_BYTE_LATIN_JISX0201, /* 0x8A Left half of JIS X0201-1976 */ 347 #define LEADING_BYTE_CYRILLIC_ISO8859_5 0x8C /* Right half of ISO 8859-5 */
352 LEADING_BYTE_CYRILLIC_ISO8859_5,/* 0x8B Right half of ISO 8859-5 */ 348 #define LEADING_BYTE_LATIN_ISO8859_9 0x8D /* Right half of ISO 8859-9 */
353 LEADING_BYTE_LATIN_ISO8859_9 /* 0x8C Right half of ISO 8859-9 */
354 /* 0x8D unused */
355 };
356 349
357 #define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_1 350 #define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_1
358 #define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_9 351 #define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_9
359 352
360 /** The following are for 2-byte characters in an official charset. **/ 353 /** The following are for 2-byte characters in an official charset. **/
361 enum LEADING_BYTE_OFFICIAL_2 354
362 { 355 #define LEADING_BYTE_JAPANESE_JISX0208_1978 0x90/* Japanese JIS X0208-1978 */
363 LEADING_BYTE_JAPANESE_JISX0208_1978 = 0x90, /* Japanese JIS X0208-1978 */ 356 #define LEADING_BYTE_CHINESE_GB2312 0x91 /* Chinese Hanzi GB2312-1980 */
364 LEADING_BYTE_CHINESE_GB2312, /* 0x91 Chinese Hanzi GB2312-1980 */ 357 #define LEADING_BYTE_JAPANESE_JISX0208 0x92 /* Japanese JIS X0208-1983 */
365 LEADING_BYTE_JAPANESE_JISX0208, /* 0x92 Japanese JIS X0208-1983 */ 358 #define LEADING_BYTE_KOREAN_KSC5601 0x93 /* Hangul KS C5601-1987 */
366 LEADING_BYTE_KOREAN_KSC5601, /* 0x93 Hangul KS C5601-1987 */ 359 #define LEADING_BYTE_JAPANESE_JISX0212 0x94 /* Japanese JIS X0212-1990 */
367 LEADING_BYTE_JAPANESE_JISX0212, /* 0x94 Japanese JIS X0212-1990 */ 360 #define LEADING_BYTE_CHINESE_CNS11643_1 0x95 /* Chinese CNS11643 Set 1 */
368 LEADING_BYTE_CHINESE_CNS11643_1, /* 0x95 Chinese CNS11643 Set 1 */ 361 #define LEADING_BYTE_CHINESE_CNS11643_2 0x96 /* Chinese CNS11643 Set 2 */
369 LEADING_BYTE_CHINESE_CNS11643_2, /* 0x96 Chinese CNS11643 Set 2 */ 362 #define LEADING_BYTE_CHINESE_BIG5_1 0x97 /* Big5 Level 1 */
370 LEADING_BYTE_CHINESE_BIG5_1, /* 0x97 Big5 Level 1 */ 363 #define LEADING_BYTE_CHINESE_BIG5_2 0x98 /* Big5 Level 2 */
371 LEADING_BYTE_CHINESE_BIG5_2 /* 0x98 Big5 Level 2 */ 364 /* 0x99 unused */
372 /* 0x99 unused */ 365 /* 0x9A unused */
373 /* 0x9A unused */ 366 /* 0x9B unused */
374 /* 0x9B unused */ 367 /* 0x9C unused */
375 /* 0x9C unused */ 368 /* 0x9D unused */
376 };
377 369
378 #define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978 370 #define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978
379 #define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_CHINESE_BIG5_2 371 #define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_CHINESE_BIG5_2
380 372
381 /** The following are for 1- and 2-byte characters in a private charset. **/ 373 /** The following are for 1- and 2-byte characters in a private charset. **/
399 391
400 #define LEADING_BYTE_PRIVATE_P(lb) ((lb) >= MIN_LEADING_BYTE_PRIVATE_1) 392 #define LEADING_BYTE_PRIVATE_P(lb) ((lb) >= MIN_LEADING_BYTE_PRIVATE_1)
401 393
402 /* Is this a prefix for a private leading byte? */ 394 /* Is this a prefix for a private leading byte? */
403 395
404 INLINE_HEADER int LEADING_BYTE_PREFIX_P (unsigned char lb); 396 INLINE int LEADING_BYTE_PREFIX_P (unsigned char lb);
405 INLINE_HEADER int 397 INLINE int
406 LEADING_BYTE_PREFIX_P (unsigned char lb) 398 LEADING_BYTE_PREFIX_P (unsigned char lb)
407 { 399 {
408 return (lb == PRE_LEADING_BYTE_PRIVATE_1 || 400 return (lb == PRE_LEADING_BYTE_PRIVATE_1 ||
409 lb == PRE_LEADING_BYTE_PRIVATE_2); 401 lb == PRE_LEADING_BYTE_PRIVATE_2);
410 } 402 }
454 { 446 {
455 struct lcrecord_header header; 447 struct lcrecord_header header;
456 448
457 int id; 449 int id;
458 Lisp_Object name; 450 Lisp_Object name;
459 Lisp_Object doc_string; 451 Lisp_Object doc_string, registry, short_name, long_name;
460 Lisp_Object registry;
461 Lisp_Object short_name;
462 Lisp_Object long_name;
463 452
464 Lisp_Object reverse_direction_charset; 453 Lisp_Object reverse_direction_charset;
465 454
466 Lisp_Object ccl_program; 455 Lisp_Object ccl_program;
467 456
490 unsigned int chars; 479 unsigned int chars;
491 480
492 /* Which half of font to be used to display this character set */ 481 /* Which half of font to be used to display this character set */
493 unsigned int graphic; 482 unsigned int graphic;
494 }; 483 };
495 typedef struct Lisp_Charset Lisp_Charset; 484
496 485 DECLARE_LRECORD (charset, struct Lisp_Charset);
497 DECLARE_LRECORD (charset, Lisp_Charset); 486 #define XCHARSET(x) XRECORD (x, charset, struct Lisp_Charset)
498 #define XCHARSET(x) XRECORD (x, charset, Lisp_Charset)
499 #define XSETCHARSET(x, p) XSETRECORD (x, p, charset) 487 #define XSETCHARSET(x, p) XSETRECORD (x, p, charset)
500 #define CHARSETP(x) RECORDP (x, charset) 488 #define CHARSETP(x) RECORDP (x, charset)
489 #define GC_CHARSETP(x) GC_RECORDP (x, charset)
501 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) 490 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset)
502 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) 491 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset)
503 492
504 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ 493 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */
505 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */ 494 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */
509 #define CHARSET_LEFT_TO_RIGHT 0 498 #define CHARSET_LEFT_TO_RIGHT 0
510 #define CHARSET_RIGHT_TO_LEFT 1 499 #define CHARSET_RIGHT_TO_LEFT 1
511 500
512 /* Leading byte and id have been regrouped. -- OG */ 501 /* Leading byte and id have been regrouped. -- OG */
513 #define CHARSET_ID(cs) ((cs)->id) 502 #define CHARSET_ID(cs) ((cs)->id)
514 #define CHARSET_LEADING_BYTE(cs) ((Bufbyte) CHARSET_ID(cs)) 503 #define CHARSET_LEADING_BYTE(cs) ((Bufbyte)(CHARSET_ID(cs)))
515 #define CHARSET_NAME(cs) ((cs)->name) 504 #define CHARSET_NAME(cs) ((cs)->name)
516 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name) 505 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name)
517 #define CHARSET_LONG_NAME(cs) ((cs)->long_name) 506 #define CHARSET_LONG_NAME(cs) ((cs)->long_name)
518 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) 507 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes)
519 #define CHARSET_COLUMNS(cs) ((cs)->columns) 508 #define CHARSET_COLUMNS(cs) ((cs)->columns)
549 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) 538 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs))
550 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs)) 539 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs))
551 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ 540 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \
552 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) 541 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs))
553 542
554 struct charset_lookup { 543 /* Table of charsets indexed by (leading byte - 128). */
555 /* Table of charsets indexed by leading byte. */ 544 extern Lisp_Object charset_by_leading_byte[128];
556 Lisp_Object charset_by_leading_byte[128]; 545
557 546 /* Table of charsets indexed by type/final-byte/direction. */
558 /* Table of charsets indexed by type/final-byte/direction. */ 547 extern Lisp_Object charset_by_attributes[4][128][2];
559 Lisp_Object charset_by_attributes[4][128][2]; 548
560 Bufbyte next_allocated_1_byte_leading_byte; 549 /* Table of number of bytes in the string representation of a character
561 Bufbyte next_allocated_2_byte_leading_byte; 550 indexed by the first byte of that representation.
562 }; 551
563 552 This value can be derived other ways -- e.g. something like
564 extern struct charset_lookup *chlook; 553
554 (BYTE_ASCII_P (first_byte) ? 1 :
555 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte)))
556
557 but it's faster this way. */
558 extern Bytecount rep_bytes_by_first_byte[0xA0];
565 559
566 #ifdef ERROR_CHECK_TYPECHECK 560 #ifdef ERROR_CHECK_TYPECHECK
567 /* int not Bufbyte even though that is the actual type of a leading byte. 561 /* int not Bufbyte even though that is the actual type of a leading byte.
568 This way, out-ot-range values will get caught rather than automatically 562 This way, out-ot-range values will get caught rather than automatically
569 truncated. */ 563 truncated. */
570 INLINE_HEADER Lisp_Object CHARSET_BY_LEADING_BYTE (int lb); 564 INLINE Lisp_Object CHARSET_BY_LEADING_BYTE (int lb);
571 INLINE_HEADER Lisp_Object 565 INLINE Lisp_Object
572 CHARSET_BY_LEADING_BYTE (int lb) 566 CHARSET_BY_LEADING_BYTE (int lb)
573 { 567 {
574 assert (lb >= 0x80 && lb <= 0xFF); 568 assert (lb >= 0x80 && lb <= 0xFF);
575 return chlook->charset_by_leading_byte[lb - 128]; 569 return charset_by_leading_byte[lb - 128];
576 } 570 }
577 571
578 #else 572 #else
579 573
580 #define CHARSET_BY_LEADING_BYTE(lb) (chlook->charset_by_leading_byte[(lb) - 128]) 574 #define CHARSET_BY_LEADING_BYTE(lb) (charset_by_leading_byte[(lb) - 128])
581 575
582 #endif 576 #endif
583 577
584 #define CHARSET_BY_ATTRIBUTES(type, final, dir) \ 578 #define CHARSET_BY_ATTRIBUTES(type, final, dir) \
585 (chlook->charset_by_attributes[type][final][dir]) 579 (charset_by_attributes[type][final][dir])
586 580
587 581 #ifdef ERROR_CHECK_TYPECHECK
588 /* Table of number of bytes in the string representation of a character 582
589 indexed by the first byte of that representation. 583 /* Number of bytes in the string representation of a character */
590 584 INLINE int REP_BYTES_BY_FIRST_BYTE (int fb);
591 This value can be derived in other ways -- e.g. something like 585 INLINE int
592 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte))
593 but it's faster this way. */
594 extern const Bytecount rep_bytes_by_first_byte[0xA0];
595
596 /* Number of bytes in the string representation of a character. */
597 INLINE_HEADER int REP_BYTES_BY_FIRST_BYTE (int fb);
598 INLINE_HEADER int
599 REP_BYTES_BY_FIRST_BYTE (int fb) 586 REP_BYTES_BY_FIRST_BYTE (int fb)
600 { 587 {
601 #ifdef ERROR_CHECK_TYPECHECK 588 assert (fb >= 0 && fb < 0xA0);
602 assert (0 <= fb && fb < 0xA0);
603 #endif
604 return rep_bytes_by_first_byte[fb]; 589 return rep_bytes_by_first_byte[fb];
605 } 590 }
591
592 #else
593 #define REP_BYTES_BY_FIRST_BYTE(fb) (rep_bytes_by_first_byte[fb])
594 #endif
606 595
607 596
608 /************************************************************************/ 597 /************************************************************************/
609 /* Dealing with characters */ 598 /* Dealing with characters */
610 /************************************************************************/ 599 /************************************************************************/
675 NOTE: This takes advantage of the fact that 664 NOTE: This takes advantage of the fact that
676 FIELD2_TO_OFFICIAL_LEADING_BYTE and 665 FIELD2_TO_OFFICIAL_LEADING_BYTE and
677 FIELD2_TO_PRIVATE_LEADING_BYTE are the same. 666 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
678 */ 667 */
679 668
680 INLINE_HEADER Bufbyte CHAR_LEADING_BYTE (Emchar c); 669 INLINE Bufbyte CHAR_LEADING_BYTE (Emchar c);
681 INLINE_HEADER Bufbyte 670 INLINE Bufbyte
682 CHAR_LEADING_BYTE (Emchar c) 671 CHAR_LEADING_BYTE (Emchar c)
683 { 672 {
684 if (CHAR_ASCII_P (c)) 673 if (CHAR_ASCII_P (c))
685 return LEADING_BYTE_ASCII; 674 return LEADING_BYTE_ASCII;
686 else if (c < 0xA0) 675 else if (c < 0xA0)
710 NOTE: This takes advantage of the fact that 699 NOTE: This takes advantage of the fact that
711 FIELD2_TO_OFFICIAL_LEADING_BYTE and 700 FIELD2_TO_OFFICIAL_LEADING_BYTE and
712 FIELD2_TO_PRIVATE_LEADING_BYTE are the same. 701 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
713 */ 702 */
714 703
715 INLINE_HEADER Emchar MAKE_CHAR (Lisp_Object charset, int c1, int c2); 704 INLINE Emchar MAKE_CHAR (Lisp_Object charset, int c1, int c2);
716 INLINE_HEADER Emchar 705 INLINE Emchar
717 MAKE_CHAR (Lisp_Object charset, int c1, int c2) 706 MAKE_CHAR (Lisp_Object charset, int c1, int c2)
718 { 707 {
719 if (EQ (charset, Vcharset_ascii)) 708 if (EQ (charset, Vcharset_ascii))
720 return c1; 709 return c1;
721 else if (EQ (charset, Vcharset_control_1)) 710 else if (EQ (charset, Vcharset_control_1))
748 XCHARSET_DIMENSION (charset) == 1 \ 737 XCHARSET_DIMENSION (charset) == 1 \
749 ? ((c1) = CHAR_FIELD3 (c), (c2) = 0) \ 738 ? ((c1) = CHAR_FIELD3 (c), (c2) = 0) \
750 : ((c1) = CHAR_FIELD2 (c), \ 739 : ((c1) = CHAR_FIELD2 (c), \
751 (c2) = CHAR_FIELD3 (c)) 740 (c2) = CHAR_FIELD3 (c))
752 741
753 INLINE_HEADER void breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2); 742 INLINE void breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2);
754 INLINE_HEADER void 743 INLINE void
755 breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2) 744 breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2)
756 { 745 {
757 *charset = CHAR_CHARSET (c); 746 *charset = CHAR_CHARSET (c);
758 BREAKUP_CHAR_1_UNSAFE (c, *charset, *c1, *c2); 747 BREAKUP_CHAR_1_UNSAFE (c, *charset, *c1, *c2);
759 } 748 }
786 775
787 Emchar Lstream_get_emchar_1 (Lstream *stream, int first_char); 776 Emchar Lstream_get_emchar_1 (Lstream *stream, int first_char);
788 int Lstream_fput_emchar (Lstream *stream, Emchar ch); 777 int Lstream_fput_emchar (Lstream *stream, Emchar ch);
789 void Lstream_funget_emchar (Lstream *stream, Emchar ch); 778 void Lstream_funget_emchar (Lstream *stream, Emchar ch);
790 779
791 int copy_internal_to_external (const Bufbyte *internal, Bytecount len, 780 int copy_internal_to_external (CONST Bufbyte *internal, Bytecount len,
792 unsigned char *external); 781 unsigned char *external);
793 Bytecount copy_external_to_internal (const unsigned char *external, 782 Bytecount copy_external_to_internal (CONST unsigned char *external,
794 int len, Bufbyte *internal); 783 int len, Bufbyte *internal);
795 784
796 #endif /* INCLUDED_mule_charset_h_ */ 785 #endif /* _XEMACS_MULE_CHARSET_H */