Mercurial > hg > xemacs-beta
comparison src/mule-charset.h @ 74:54cc21c15cbb r20-0b32
Import from CVS: tag r20-0b32
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:04:33 +0200 |
parents | 131b0175ea99 |
children | c7528f8e288d |
comparison
equal
deleted
inserted
replaced
73:e2d7a37b7c8d | 74:54cc21c15cbb |
---|---|
135 | 135 |
136 A. Japanese EUC (Extended Unix Code) | 136 A. Japanese EUC (Extended Unix Code) |
137 | 137 |
138 This encompasses the character sets: | 138 This encompasses the character sets: |
139 - Printing-ASCII, | 139 - Printing-ASCII, |
140 - Japanese-JISX0201-Kana (half-width katakana, the right half of JISX0201). | 140 - Katakana-JISX0201 (half-width katakana, the right half of JISX0201). |
141 - Japanese-JISX0208 | 141 - Japanese-JISX0208 |
142 - Japanese-JISX0212 | 142 - Japanese-JISX0212 |
143 It uses 8-bit bytes. | 143 It uses 8-bit bytes. |
144 | 144 |
145 Note that Printing-ASCII and Japanese-JISX0201-Kana are 94-character | 145 Note that Printing-ASCII and Katakana-JISX0201 are 94-character |
146 charsets, while Japanese-JISX0208 is a 94x94-character charset. | 146 charsets, while Japanese-JISX0208 is a 94x94-character charset. |
147 | 147 |
148 The encoding is as follows: | 148 The encoding is as follows: |
149 | 149 |
150 Character set Representation (PC == position-code) | 150 Character set Representation (PC == position-code) |
151 ------------- -------------- | 151 ------------- -------------- |
152 Printing-ASCII PC-1 | 152 Printing-ASCII PC1 |
153 Japanese-JISX0208 PC-1 + 0x80 / PC-2 + 0x80 | 153 Japanese-JISX0208 PC1 + 0x80 | PC2 + 0x80 |
154 Japanese-JISX0201-Kana 0x8E / PC-1 + 0x80 | 154 Katakana-JISX0201 0x8E | PC1 + 0x80 |
155 | 155 |
156 | 156 |
157 B. JIS7 | 157 B. JIS7 |
158 | 158 |
159 This encompasses the character sets: | 159 This encompasses the character sets: |
160 - Printing-ASCII | 160 - Printing-ASCII |
161 - Japanese-JISX0201-Roman (the left half of JISX0201; this | 161 - Latin-JISX0201 (the left half of JISX0201; this character set is |
162 character set is very similar to Printing-ASCII and is a | 162 very similar to Printing-ASCII and is a 94-character charset) |
163 94-character charset) | |
164 - Japanese-JISX0208 | 163 - Japanese-JISX0208 |
165 - Japanese-JISX0201-Kana. | 164 - Katakana-JISX0201 |
166 It uses 7-bit bytes. | 165 It uses 7-bit bytes. |
167 | 166 |
168 Unlike Japanese EUC, this is a "modal" encoding, which | 167 Unlike Japanese EUC, this is a "modal" encoding, which |
169 means that there are multiple states that the encoding can | 168 means that there are multiple states that the encoding can |
170 be in, which affect how the bytes are to be interpreted. | 169 be in, which affect how the bytes are to be interpreted. |
171 Special sequences of bytes (called "escape sequences") | 170 Special sequences of bytes (called "escape sequences") |
172 are used to change states. | 171 are used to change states. |
173 | 172 |
174 The encoding is as follows: | 173 The encoding is as follows: |
175 | 174 |
176 Character set Representation | 175 Character set Representation |
177 ------------- -------------- | 176 ------------- -------------- |
178 Printing-ASCII PC-1 | 177 Printing-ASCII PC1 |
179 Japanese-JISX0201-Roman PC-1 | 178 Latin-JISX0201 PC1 |
180 Japanese-JISX0201-Kana PC-1 | 179 Katakana-JISX0201 PC1 |
181 Japanese-JISX0208 PC-1 / PC-2 | 180 Japanese-JISX0208 PC1 | PC2 |
182 | 181 |
183 Escape sequence ASCII equivalent Meaning | 182 Escape sequence ASCII equivalent Meaning |
184 --------------- ---------------- ------- | 183 --------------- ---------------- ------- |
185 0x1B 0x28 0x42 ESC ( B invoke Printing-ASCII | 184 0x1B 0x28 0x42 ESC ( B invoke Printing-ASCII |
186 0x1B 0x28 0x4A ESC ( J invoke Japanese-JISX0201-Roman | 185 0x1B 0x28 0x4A ESC ( J invoke Latin-JISX0201 |
187 0x1B 0x28 0x49 ESC ( I invoke Japanese-JISX0201-Kana | 186 0x1B 0x28 0x49 ESC ( I invoke Katakana-JISX0201 |
188 0x1B 0x24 0x42 ESC $ B invoke Japanese-JISX0208 | 187 0x1B 0x24 0x42 ESC $ B invoke Japanese-JISX0208 |
189 | 188 |
190 Initially, Printing-ASCII is invoked. | 189 Initially, Printing-ASCII is invoked. |
191 | 190 |
192 3. Internal Mule Encodings | 191 3. Internal Mule Encodings |
331 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ | 330 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ |
332 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ | 331 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ |
333 | 332 |
334 /** The following are for 1-byte characters in an official charset. **/ | 333 /** The following are for 1-byte characters in an official charset. **/ |
335 | 334 |
336 #define LEADING_BYTE_LATIN_1 0x81 /* Right half of ISO 8859-1 */ | 335 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */ |
337 #define LEADING_BYTE_LATIN_2 0x82 /* Right half of ISO 8859-2 */ | 336 #define LEADING_BYTE_LATIN_ISO8859_2 0x82 /* Right half of ISO 8859-2 */ |
338 #define LEADING_BYTE_LATIN_3 0x83 /* Right half of ISO 8859-3 */ | 337 #define LEADING_BYTE_LATIN_ISO8859_3 0x83 /* Right half of ISO 8859-3 */ |
339 #define LEADING_BYTE_LATIN_4 0x84 /* Right half of ISO 8859-4 */ | 338 #define LEADING_BYTE_LATIN_ISO8859_4 0x84 /* Right half of ISO 8859-4 */ |
340 #define LEADING_BYTE_THAI 0x85 /* TIS620-2533 */ | 339 #define LEADING_BYTE_THAI_TIS620 0x85 /* TIS620-2533 */ |
341 #define LEADING_BYTE_GREEK 0x86 /* Right half of ISO 8859-7 */ | 340 #define LEADING_BYTE_GREEK_ISO8859_7 0x86 /* Right half of ISO 8859-7 */ |
342 #define LEADING_BYTE_ARABIC 0x87 /* Right half of ISO 8859-6 */ | 341 #define LEADING_BYTE_ARABIC_ISO8859_6 0x87 /* Right half of ISO 8859-6 */ |
343 #define LEADING_BYTE_HEBREW 0x88 /* Right half of ISO 8859-8 */ | 342 #define LEADING_BYTE_HEBREW_ISO8859_8 0x88 /* Right half of ISO 8859-8 */ |
344 #define LEADING_BYTE_JAPANESE_JISX0201_KANA 0x89 /* Right half of JIS X0201-1976 */ | 343 #define LEADING_BYTE_KATAKANA_JISX0201 0x89 /* Right half of JIS X0201-1976 */ |
345 #define LEADING_BYTE_JAPANESE_JISX0201_ROMAN 0x8A /* Left half of JIS X0201-1976 */ | 344 #define LEADING_BYTE_LATIN_JISX0201 0x8A /* Left half of JIS X0201-1976 */ |
346 #define LEADING_BYTE_CYRILLIC 0x8C /* Right half of ISO 8859-5 */ | 345 #define LEADING_BYTE_CYRILLIC_ISO8859_5 0x8C /* Right half of ISO 8859-5 */ |
347 #define LEADING_BYTE_LATIN_5 0x8D /* Right half of ISO 8859-9 */ | 346 #define LEADING_BYTE_LATIN_ISO8859_9 0x8D /* Right half of ISO 8859-9 */ |
348 | 347 |
349 #define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_1 | 348 #define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_1 |
350 #define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_5 | 349 #define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_9 |
351 | 350 |
352 /** The following are for 2-byte characters in an official charset. **/ | 351 /** The following are for 2-byte characters in an official charset. **/ |
353 | 352 |
354 #define LEADING_BYTE_JAPANESE_JISX0208_1978 0x90/* Japanese JIS X0208-1978 */ | 353 #define LEADING_BYTE_JAPANESE_JISX0208_1978 0x90/* Japanese JIS X0208-1978 */ |
355 #define LEADING_BYTE_CHINESE_GB 0x91 /* Chinese Hanzi GB2312-1980 */ | 354 #define LEADING_BYTE_CHINESE_GB2312 0x91 /* Chinese Hanzi GB2312-1980 */ |
356 #define LEADING_BYTE_JAPANESE_JISX0208 0x92 /* Japanese JIS X0208-1983 */ | 355 #define LEADING_BYTE_JAPANESE_JISX0208 0x92 /* Japanese JIS X0208-1983 */ |
357 #define LEADING_BYTE_KOREAN_KSC5601 0x93 /* Hangul KS C5601-1987 */ | 356 #define LEADING_BYTE_KOREAN_KSC5601 0x93 /* Hangul KS C5601-1987 */ |
358 #define LEADING_BYTE_JAPANESE_JISX0212 0x94 /* Japanese JIS X0212-1990 */ | 357 #define LEADING_BYTE_JAPANESE_JISX0212 0x94 /* Japanese JIS X0212-1990 */ |
359 #define LEADING_BYTE_CHINESE_CNS11643_1 0x95 /* Chinese CNS11643 Set 1 */ | 358 #define LEADING_BYTE_CHINESE_CNS11643_1 0x95 /* Chinese CNS11643 Set 1 */ |
360 #define LEADING_BYTE_CHINESE_CNS11643_2 0x96 /* Chinese CNS11643 Set 2 */ | 359 #define LEADING_BYTE_CHINESE_CNS11643_2 0x96 /* Chinese CNS11643 Set 2 */ |
401 } | 400 } |
402 | 401 |
403 /* Given a private leading byte, return the leading byte prefix stored | 402 /* Given a private leading byte, return the leading byte prefix stored |
404 in a string */ | 403 in a string */ |
405 | 404 |
406 #define PRIVATE_LEADING_BYTE_PREFIX(lb) \ | 405 #define PRIVATE_LEADING_BYTE_PREFIX(lb) \ |
407 ((lb) < MIN_LEADING_BYTE_PRIVATE_2 ? PRE_LEADING_BYTE_PRIVATE_1 \ | 406 ((lb) < MIN_LEADING_BYTE_PRIVATE_2 ? \ |
408 : PRE_LEADING_BYTE_PRIVATE_2) | 407 PRE_LEADING_BYTE_PRIVATE_1 : \ |
409 | 408 PRE_LEADING_BYTE_PRIVATE_2) |
410 | |
411 | 409 |
412 | 410 |
413 /************************************************************************/ | 411 /************************************************************************/ |
414 /* Operations on individual bytes */ | 412 /* Operations on individual bytes */ |
415 /* of any format */ | 413 /* of any format */ |
484 #define CHARSETP(x) RECORDP (x, charset) | 482 #define CHARSETP(x) RECORDP (x, charset) |
485 #define GC_CHARSETP(x) GC_RECORDP (x, charset) | 483 #define GC_CHARSETP(x) GC_RECORDP (x, charset) |
486 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) | 484 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) |
487 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) | 485 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) |
488 | 486 |
489 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ | 487 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ |
490 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */ | 488 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */ |
491 #define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */ | 489 #define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */ |
492 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */ | 490 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */ |
493 | 491 |
494 #define CHARSET_LEFT_TO_RIGHT 0 | 492 #define CHARSET_LEFT_TO_RIGHT 0 |
495 #define CHARSET_RIGHT_TO_LEFT 1 | 493 #define CHARSET_RIGHT_TO_LEFT 1 |
496 | 494 |
497 #define CHARSET_NAME(cs) ((cs)->name) | 495 #define CHARSET_NAME(cs) ((cs)->name) |
498 #define CHARSET_LEADING_BYTE(cs) ((cs)->leading_byte) | 496 #define CHARSET_LEADING_BYTE(cs) ((cs)->leading_byte) |
499 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) | 497 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) |
500 #define CHARSET_COLUMNS(cs) ((cs)->columns) | 498 #define CHARSET_COLUMNS(cs) ((cs)->columns) |
505 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string) | 503 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string) |
506 #define CHARSET_REGISTRY(cs) ((cs)->registry) | 504 #define CHARSET_REGISTRY(cs) ((cs)->registry) |
507 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program) | 505 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program) |
508 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset) | 506 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset) |
509 | 507 |
510 INLINE int CHARSET_DIMENSION (struct Lisp_Charset *cs); | |
511 INLINE int | 508 INLINE int |
512 CHARSET_DIMENSION (struct Lisp_Charset *cs) | 509 CHARSET_DIMENSION (struct Lisp_Charset *cs) |
513 { | 510 { |
514 return (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || | 511 /* Optimized using inside knowledge of CHARSET_TYPE values */ |
515 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2; | 512 return (CHARSET_TYPE (cs) <= CHARSET_TYPE_96) ? 1 : 2; |
516 } | 513 } |
517 | 514 |
518 INLINE int CHARSET_CHARS (struct Lisp_Charset *cs); | |
519 INLINE int | 515 INLINE int |
520 CHARSET_CHARS (struct Lisp_Charset *cs) | 516 CHARSET_CHARS (struct Lisp_Charset *cs) |
521 { | 517 { |
522 return (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || | 518 /* Optimized using inside knowledge of CHARSET_TYPE values */ |
523 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96; | 519 return (CHARSET_TYPE (cs) & 0x1) ? 96 : 94; |
524 } | 520 } |
525 | 521 |
526 #define CHARSET_PRIVATE_P(cs) \ | 522 #define CHARSET_PRIVATE_P(cs) LEADING_BYTE_PRIVATE_P (CHARSET_LEADING_BYTE (cs)) |
527 LEADING_BYTE_PRIVATE_P (CHARSET_LEADING_BYTE (cs)) | |
528 | 523 |
529 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) | 524 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) |
530 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs)) | 525 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs)) |
531 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs)) | 526 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs)) |
532 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs)) | 527 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs)) |
596 #define REP_BYTES_BY_FIRST_BYTE(fb) (rep_bytes_by_first_byte[fb]) | 591 #define REP_BYTES_BY_FIRST_BYTE(fb) (rep_bytes_by_first_byte[fb]) |
597 #endif | 592 #endif |
598 | 593 |
599 extern Lisp_Object Vcharset_ascii; | 594 extern Lisp_Object Vcharset_ascii; |
600 extern Lisp_Object Vcharset_control_1; | 595 extern Lisp_Object Vcharset_control_1; |
601 extern Lisp_Object Vcharset_latin_1; | 596 extern Lisp_Object Vcharset_latin_iso8859_1; |
602 extern Lisp_Object Vcharset_latin_2; | 597 extern Lisp_Object Vcharset_latin_iso8859_2; |
603 extern Lisp_Object Vcharset_latin_3; | 598 extern Lisp_Object Vcharset_latin_iso8859_3; |
604 extern Lisp_Object Vcharset_latin_4; | 599 extern Lisp_Object Vcharset_latin_iso8859_4; |
605 extern Lisp_Object Vcharset_latin_5; | 600 extern Lisp_Object Vcharset_cyrillic_iso8859_5; |
606 extern Lisp_Object Vcharset_greek; | 601 extern Lisp_Object Vcharset_arabic_iso8859_6; |
607 extern Lisp_Object Vcharset_thai; | 602 extern Lisp_Object Vcharset_greek_iso8859_7; |
608 extern Lisp_Object Vcharset_arabic; | 603 extern Lisp_Object Vcharset_hebrew_iso8859_8; |
609 extern Lisp_Object Vcharset_hebrew; | 604 extern Lisp_Object Vcharset_latin_iso8859_9; |
610 extern Lisp_Object Vcharset_cyrillic; | 605 extern Lisp_Object Vcharset_thai_tis620; |
611 extern Lisp_Object Vcharset_japanese_jisx0201_kana; | 606 extern Lisp_Object Vcharset_katakana_jisx0201; |
612 extern Lisp_Object Vcharset_japanese_jisx0201_roman; | 607 extern Lisp_Object Vcharset_latin_jisx0201; |
613 extern Lisp_Object Vcharset_japanese_jisx0208_1978; | 608 extern Lisp_Object Vcharset_japanese_jisx0208_1978; |
614 extern Lisp_Object Vcharset_japanese_jisx0208; | 609 extern Lisp_Object Vcharset_japanese_jisx0208; |
615 extern Lisp_Object Vcharset_japanese_jisx0212; | 610 extern Lisp_Object Vcharset_japanese_jisx0212; |
616 extern Lisp_Object Vcharset_korean_ksc5601; | 611 extern Lisp_Object Vcharset_korean_ksc5601; |
617 extern Lisp_Object Vcharset_chinese_gb; | 612 extern Lisp_Object Vcharset_chinese_gb2312; |
618 extern Lisp_Object Vcharset_chinese_big5_1; | 613 extern Lisp_Object Vcharset_chinese_big5_1; |
619 extern Lisp_Object Vcharset_chinese_big5_2; | 614 extern Lisp_Object Vcharset_chinese_big5_2; |
620 extern Lisp_Object Vcharset_chinese_cns11643_1; | 615 extern Lisp_Object Vcharset_chinese_cns11643_1; |
621 extern Lisp_Object Vcharset_chinese_cns11643_2; | 616 extern Lisp_Object Vcharset_chinese_cns11643_2; |
622 extern Lisp_Object Vcharset_composite; | 617 extern Lisp_Object Vcharset_composite; |