comparison src/mule-charset.h @ 398:74fd4e045ea6 r21-2-29

Import from CVS: tag r21-2-29
author cvs
date Mon, 13 Aug 2007 11:13:30 +0200
parents 6719134a07c2
children 2f8bb876ab1d
comparison
equal deleted inserted replaced
397:f4aeb21a5bad 398:74fd4e045ea6
21 21
22 /* Synched up with: Mule 2.3. Not in FSF. */ 22 /* Synched up with: Mule 2.3. Not in FSF. */
23 23
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */ 24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 25
26 #ifndef _XEMACS_MULE_CHARSET_H 26 #ifndef INCLUDED_mule_charset_h_
27 #define _XEMACS_MULE_CHARSET_H 27 #define INCLUDED_mule_charset_h_
28 28
29 /* 29 /*
30 1. Character Sets 30 1. Character Sets
31 ================= 31 =================
32 32
237 a "leading byte prefix", which is either 0x9E or 0x9F. (No 237 a "leading byte prefix", which is either 0x9E or 0x9F. (No
238 character sets are ever assigned these leading bytes.) Specifically: 238 character sets are ever assigned these leading bytes.) Specifically:
239 239
240 Character set Encoding (PC == position-code) 240 Character set Encoding (PC == position-code)
241 ------------- -------- (LB == leading-byte) 241 ------------- -------- (LB == leading-byte)
242 ASCII PC1 | 242 ASCII PC1 |
243 Control-1 LB | PC1 + 0xA0 243 Control-1 LB | PC1 + 0xA0
244 Dimension-1 official LB | PC1 + 0x80 244 Dimension-1 official LB | PC1 + 0x80
245 Dimension-1 private 0x9E | LB | PC1 + 0x80 245 Dimension-1 private 0x9E | LB | PC1 + 0x80
246 Dimension-2 official LB | PC1 | PC2 + 0x80 246 Dimension-2 official LB | PC1 | PC2 + 0x80
247 Dimension-2 private 0x9F | LB | PC1 + 0x80 | PC2 + 0x80 247 Dimension-2 private 0x9F | LB | PC1 + 0x80 | PC2 + 0x80
330 #ifdef ENABLE_COMPOSITE_CHARS 330 #ifdef ENABLE_COMPOSITE_CHARS
331 #endif 331 #endif
332 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ 332 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */
333 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ 333 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */
334 334
335 /* Note the gap in each official charset can cause core dump
336 as first and last values are used to determine whether
337 charset is defined or not in non_ascii_valid_char_p */
338
335 /** The following are for 1-byte characters in an official charset. **/ 339 /** The following are for 1-byte characters in an official charset. **/
336 340 enum LEADING_BYTE_OFFICIAL_1
337 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */ 341 {
338 #define LEADING_BYTE_LATIN_ISO8859_2 0x82 /* Right half of ISO 8859-2 */ 342 LEADING_BYTE_LATIN_ISO8859_1 = 0x81, /* Right half of ISO 8859-1 */
339 #define LEADING_BYTE_LATIN_ISO8859_3 0x83 /* Right half of ISO 8859-3 */ 343 LEADING_BYTE_LATIN_ISO8859_2, /* 0x82 Right half of ISO 8859-2 */
340 #define LEADING_BYTE_LATIN_ISO8859_4 0x84 /* Right half of ISO 8859-4 */ 344 LEADING_BYTE_LATIN_ISO8859_3, /* 0x83 Right half of ISO 8859-3 */
341 #define LEADING_BYTE_THAI_TIS620 0x85 /* TIS620-2533 */ 345 LEADING_BYTE_LATIN_ISO8859_4, /* 0x84 Right half of ISO 8859-4 */
342 #define LEADING_BYTE_GREEK_ISO8859_7 0x86 /* Right half of ISO 8859-7 */ 346 LEADING_BYTE_THAI_TIS620, /* 0x85 TIS620-2533 */
343 #define LEADING_BYTE_ARABIC_ISO8859_6 0x87 /* Right half of ISO 8859-6 */ 347 LEADING_BYTE_GREEK_ISO8859_7, /* 0x86 Right half of ISO 8859-7 */
344 #define LEADING_BYTE_HEBREW_ISO8859_8 0x88 /* Right half of ISO 8859-8 */ 348 LEADING_BYTE_ARABIC_ISO8859_6, /* 0x87 Right half of ISO 8859-6 */
345 #define LEADING_BYTE_KATAKANA_JISX0201 0x89 /* Right half of JIS X0201-1976 */ 349 LEADING_BYTE_HEBREW_ISO8859_8, /* 0x88 Right half of ISO 8859-8 */
346 #define LEADING_BYTE_LATIN_JISX0201 0x8A /* Left half of JIS X0201-1976 */ 350 LEADING_BYTE_KATAKANA_JISX0201, /* 0x89 Right half of JIS X0201-1976 */
347 #define LEADING_BYTE_CYRILLIC_ISO8859_5 0x8C /* Right half of ISO 8859-5 */ 351 LEADING_BYTE_LATIN_JISX0201, /* 0x8A Left half of JIS X0201-1976 */
348 #define LEADING_BYTE_LATIN_ISO8859_9 0x8D /* Right half of ISO 8859-9 */ 352 LEADING_BYTE_CYRILLIC_ISO8859_5,/* 0x8B Right half of ISO 8859-5 */
353 LEADING_BYTE_LATIN_ISO8859_9 /* 0x8C Right half of ISO 8859-9 */
354 /* 0x8D unused */
355 };
349 356
350 #define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_1 357 #define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_1
351 #define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_9 358 #define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_9
352 359
353 /** The following are for 2-byte characters in an official charset. **/ 360 /** The following are for 2-byte characters in an official charset. **/
354 361 enum LEADING_BYTE_OFFICIAL_2
355 #define LEADING_BYTE_JAPANESE_JISX0208_1978 0x90/* Japanese JIS X0208-1978 */ 362 {
356 #define LEADING_BYTE_CHINESE_GB2312 0x91 /* Chinese Hanzi GB2312-1980 */ 363 LEADING_BYTE_JAPANESE_JISX0208_1978 = 0x90, /* Japanese JIS X0208-1978 */
357 #define LEADING_BYTE_JAPANESE_JISX0208 0x92 /* Japanese JIS X0208-1983 */ 364 LEADING_BYTE_CHINESE_GB2312, /* 0x91 Chinese Hanzi GB2312-1980 */
358 #define LEADING_BYTE_KOREAN_KSC5601 0x93 /* Hangul KS C5601-1987 */ 365 LEADING_BYTE_JAPANESE_JISX0208, /* 0x92 Japanese JIS X0208-1983 */
359 #define LEADING_BYTE_JAPANESE_JISX0212 0x94 /* Japanese JIS X0212-1990 */ 366 LEADING_BYTE_KOREAN_KSC5601, /* 0x93 Hangul KS C5601-1987 */
360 #define LEADING_BYTE_CHINESE_CNS11643_1 0x95 /* Chinese CNS11643 Set 1 */ 367 LEADING_BYTE_JAPANESE_JISX0212, /* 0x94 Japanese JIS X0212-1990 */
361 #define LEADING_BYTE_CHINESE_CNS11643_2 0x96 /* Chinese CNS11643 Set 2 */ 368 LEADING_BYTE_CHINESE_CNS11643_1, /* 0x95 Chinese CNS11643 Set 1 */
362 #define LEADING_BYTE_CHINESE_BIG5_1 0x97 /* Big5 Level 1 */ 369 LEADING_BYTE_CHINESE_CNS11643_2, /* 0x96 Chinese CNS11643 Set 2 */
363 #define LEADING_BYTE_CHINESE_BIG5_2 0x98 /* Big5 Level 2 */ 370 LEADING_BYTE_CHINESE_BIG5_1, /* 0x97 Big5 Level 1 */
364 /* 0x99 unused */ 371 LEADING_BYTE_CHINESE_BIG5_2 /* 0x98 Big5 Level 2 */
365 /* 0x9A unused */ 372 /* 0x99 unused */
366 /* 0x9B unused */ 373 /* 0x9A unused */
367 /* 0x9C unused */ 374 /* 0x9B unused */
368 /* 0x9D unused */ 375 /* 0x9C unused */
376 };
369 377
370 #define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978 378 #define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978
371 #define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_CHINESE_BIG5_2 379 #define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_CHINESE_BIG5_2
372 380
373 /** The following are for 1- and 2-byte characters in a private charset. **/ 381 /** The following are for 1- and 2-byte characters in a private charset. **/
446 { 454 {
447 struct lcrecord_header header; 455 struct lcrecord_header header;
448 456
449 int id; 457 int id;
450 Lisp_Object name; 458 Lisp_Object name;
451 Lisp_Object doc_string, registry; 459 Lisp_Object doc_string;
460 Lisp_Object registry;
461 Lisp_Object short_name;
462 Lisp_Object long_name;
452 463
453 Lisp_Object reverse_direction_charset; 464 Lisp_Object reverse_direction_charset;
454 465
455 Lisp_Object ccl_program; 466 Lisp_Object ccl_program;
456
457 Bufbyte leading_byte;
458 467
459 /* Final byte of this character set in ISO2022 designating escape sequence */ 468 /* Final byte of this character set in ISO2022 designating escape sequence */
460 Bufbyte final; 469 Bufbyte final;
461 470
462 /* Number of bytes (1 - 4) required in the internal representation 471 /* Number of bytes (1 - 4) required in the internal representation
481 unsigned int chars; 490 unsigned int chars;
482 491
483 /* Which half of font to be used to display this character set */ 492 /* Which half of font to be used to display this character set */
484 unsigned int graphic; 493 unsigned int graphic;
485 }; 494 };
486 495 typedef struct Lisp_Charset Lisp_Charset;
487 DECLARE_LRECORD (charset, struct Lisp_Charset); 496
488 #define XCHARSET(x) XRECORD (x, charset, struct Lisp_Charset) 497 DECLARE_LRECORD (charset, Lisp_Charset);
498 #define XCHARSET(x) XRECORD (x, charset, Lisp_Charset)
489 #define XSETCHARSET(x, p) XSETRECORD (x, p, charset) 499 #define XSETCHARSET(x, p) XSETRECORD (x, p, charset)
490 #define CHARSETP(x) RECORDP (x, charset) 500 #define CHARSETP(x) RECORDP (x, charset)
491 #define GC_CHARSETP(x) GC_RECORDP (x, charset)
492 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) 501 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset)
493 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) 502 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset)
494 503
495 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ 504 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */
496 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */ 505 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */
498 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */ 507 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */
499 508
500 #define CHARSET_LEFT_TO_RIGHT 0 509 #define CHARSET_LEFT_TO_RIGHT 0
501 #define CHARSET_RIGHT_TO_LEFT 1 510 #define CHARSET_RIGHT_TO_LEFT 1
502 511
512 /* Leading byte and id have been regrouped. -- OG */
503 #define CHARSET_ID(cs) ((cs)->id) 513 #define CHARSET_ID(cs) ((cs)->id)
514 #define CHARSET_LEADING_BYTE(cs) ((Bufbyte) CHARSET_ID(cs))
504 #define CHARSET_NAME(cs) ((cs)->name) 515 #define CHARSET_NAME(cs) ((cs)->name)
505 #define CHARSET_LEADING_BYTE(cs) ((cs)->leading_byte) 516 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name)
517 #define CHARSET_LONG_NAME(cs) ((cs)->long_name)
506 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) 518 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes)
507 #define CHARSET_COLUMNS(cs) ((cs)->columns) 519 #define CHARSET_COLUMNS(cs) ((cs)->columns)
508 #define CHARSET_GRAPHIC(cs) ((cs)->graphic) 520 #define CHARSET_GRAPHIC(cs) ((cs)->graphic)
509 #define CHARSET_TYPE(cs) ((cs)->type) 521 #define CHARSET_TYPE(cs) ((cs)->type)
510 #define CHARSET_DIRECTION(cs) ((cs)->direction) 522 #define CHARSET_DIRECTION(cs) ((cs)->direction)
519 531
520 #define CHARSET_PRIVATE_P(cs) LEADING_BYTE_PRIVATE_P (CHARSET_LEADING_BYTE (cs)) 532 #define CHARSET_PRIVATE_P(cs) LEADING_BYTE_PRIVATE_P (CHARSET_LEADING_BYTE (cs))
521 533
522 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs)) 534 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs))
523 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) 535 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs))
536 #define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs))
537 #define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs))
524 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs)) 538 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs))
525 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs)) 539 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs))
526 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs)) 540 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs))
527 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs)) 541 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs))
528 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs)) 542 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs))
535 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs)) 549 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs))
536 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs)) 550 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs))
537 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \ 551 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \
538 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs)) 552 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs))
539 553
540 /* Table of charsets indexed by (leading byte - 128). */ 554 struct charset_lookup {
541 extern Lisp_Object charset_by_leading_byte[128]; 555 /* Table of charsets indexed by leading byte. */
542 556 Lisp_Object charset_by_leading_byte[128];
543 /* Table of charsets indexed by type/final-byte/direction. */ 557
544 extern Lisp_Object charset_by_attributes[4][128][2]; 558 /* Table of charsets indexed by type/final-byte/direction. */
545 559 Lisp_Object charset_by_attributes[4][128][2];
546 /* Table of number of bytes in the string representation of a character 560 };
547 indexed by the first byte of that representation. 561
548 562 extern struct charset_lookup *chlook;
549 This value can be derived other ways -- e.g. something like
550
551 (BYTE_ASCII_P (first_byte) ? 1 :
552 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte)))
553
554 but it's faster this way. */
555 extern Bytecount rep_bytes_by_first_byte[0xA0];
556 563
557 #ifdef ERROR_CHECK_TYPECHECK 564 #ifdef ERROR_CHECK_TYPECHECK
558 /* int not Bufbyte even though that is the actual type of a leading byte. 565 /* int not Bufbyte even though that is the actual type of a leading byte.
559 This way, out-ot-range values will get caught rather than automatically 566 This way, out-ot-range values will get caught rather than automatically
560 truncated. */ 567 truncated. */
561 INLINE Lisp_Object CHARSET_BY_LEADING_BYTE (int lb); 568 INLINE Lisp_Object CHARSET_BY_LEADING_BYTE (int lb);
562 INLINE Lisp_Object 569 INLINE Lisp_Object
563 CHARSET_BY_LEADING_BYTE (int lb) 570 CHARSET_BY_LEADING_BYTE (int lb)
564 { 571 {
565 assert (lb >= 0x80 && lb <= 0xFF); 572 assert (lb >= 0x80 && lb <= 0xFF);
566 return charset_by_leading_byte[lb - 128]; 573 return chlook->charset_by_leading_byte[lb - 128];
567 } 574 }
568 575
569 #else 576 #else
570 577
571 #define CHARSET_BY_LEADING_BYTE(lb) (charset_by_leading_byte[(lb) - 128]) 578 #define CHARSET_BY_LEADING_BYTE(lb) (chlook->charset_by_leading_byte[(lb) - 128])
572 579
573 #endif 580 #endif
574 581
575 #define CHARSET_BY_ATTRIBUTES(type, final, dir) \ 582 #define CHARSET_BY_ATTRIBUTES(type, final, dir) \
576 (charset_by_attributes[type][final][dir]) 583 (chlook->charset_by_attributes[type][final][dir])
577 584
578 #ifdef ERROR_CHECK_TYPECHECK 585
579 586 /* Table of number of bytes in the string representation of a character
580 /* Number of bytes in the string representation of a character */ 587 indexed by the first byte of that representation.
588
589 This value can be derived in other ways -- e.g. something like
590 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte))
591 but it's faster this way. */
592 extern const Bytecount rep_bytes_by_first_byte[0xA0];
593
594 /* Number of bytes in the string representation of a character. */
581 INLINE int REP_BYTES_BY_FIRST_BYTE (int fb); 595 INLINE int REP_BYTES_BY_FIRST_BYTE (int fb);
582 INLINE int 596 INLINE int
583 REP_BYTES_BY_FIRST_BYTE (int fb) 597 REP_BYTES_BY_FIRST_BYTE (int fb)
584 { 598 {
585 assert (fb >= 0 && fb < 0xA0); 599 #ifdef ERROR_CHECK_TYPECHECK
600 assert (0 <= fb && fb < 0xA0);
601 #endif
586 return rep_bytes_by_first_byte[fb]; 602 return rep_bytes_by_first_byte[fb];
587 } 603 }
588
589 #else
590 #define REP_BYTES_BY_FIRST_BYTE(fb) (rep_bytes_by_first_byte[fb])
591 #endif
592 604
593 605
594 /************************************************************************/ 606 /************************************************************************/
595 /* Dealing with characters */ 607 /* Dealing with characters */
596 /************************************************************************/ 608 /************************************************************************/
772 784
773 Emchar Lstream_get_emchar_1 (Lstream *stream, int first_char); 785 Emchar Lstream_get_emchar_1 (Lstream *stream, int first_char);
774 int Lstream_fput_emchar (Lstream *stream, Emchar ch); 786 int Lstream_fput_emchar (Lstream *stream, Emchar ch);
775 void Lstream_funget_emchar (Lstream *stream, Emchar ch); 787 void Lstream_funget_emchar (Lstream *stream, Emchar ch);
776 788
777 int copy_internal_to_external (CONST Bufbyte *internal, Bytecount len, 789 int copy_internal_to_external (const Bufbyte *internal, Bytecount len,
778 unsigned char *external); 790 unsigned char *external);
779 Bytecount copy_external_to_internal (CONST unsigned char *external, 791 Bytecount copy_external_to_internal (const unsigned char *external,
780 int len, Bufbyte *internal); 792 int len, Bufbyte *internal);
781 793
782 #endif /* _XEMACS_MULE_CHARSET_H */ 794 #endif /* INCLUDED_mule_charset_h_ */