comparison src/mule-charset.h @ 74:54cc21c15cbb r20-0b32

Import from CVS: tag r20-0b32
author cvs
date Mon, 13 Aug 2007 09:04:33 +0200
parents 131b0175ea99
children c7528f8e288d
comparison
equal deleted inserted replaced
73:e2d7a37b7c8d 74:54cc21c15cbb
135 135
136 A. Japanese EUC (Extended Unix Code) 136 A. Japanese EUC (Extended Unix Code)
137 137
138 This encompasses the character sets: 138 This encompasses the character sets:
139 - Printing-ASCII, 139 - Printing-ASCII,
140 - Japanese-JISX0201-Kana (half-width katakana, the right half of JISX0201). 140 - Katakana-JISX0201 (half-width katakana, the right half of JISX0201).
141 - Japanese-JISX0208 141 - Japanese-JISX0208
142 - Japanese-JISX0212 142 - Japanese-JISX0212
143 It uses 8-bit bytes. 143 It uses 8-bit bytes.
144 144
145 Note that Printing-ASCII and Japanese-JISX0201-Kana are 94-character 145 Note that Printing-ASCII and Katakana-JISX0201 are 94-character
146 charsets, while Japanese-JISX0208 is a 94x94-character charset. 146 charsets, while Japanese-JISX0208 is a 94x94-character charset.
147 147
148 The encoding is as follows: 148 The encoding is as follows:
149 149
150 Character set Representation (PC == position-code) 150 Character set Representation (PC == position-code)
151 ------------- -------------- 151 ------------- --------------
152 Printing-ASCII PC-1 152 Printing-ASCII PC1
153 Japanese-JISX0208 PC-1 + 0x80 / PC-2 + 0x80 153 Japanese-JISX0208 PC1 + 0x80 | PC2 + 0x80
154 Japanese-JISX0201-Kana 0x8E / PC-1 + 0x80 154 Katakana-JISX0201 0x8E | PC1 + 0x80
155 155
156 156
157 B. JIS7 157 B. JIS7
158 158
159 This encompasses the character sets: 159 This encompasses the character sets:
160 - Printing-ASCII 160 - Printing-ASCII
161 - Japanese-JISX0201-Roman (the left half of JISX0201; this 161 - Latin-JISX0201 (the left half of JISX0201; this character set is
162 character set is very similar to Printing-ASCII and is a 162 very similar to Printing-ASCII and is a 94-character charset)
163 94-character charset)
164 - Japanese-JISX0208 163 - Japanese-JISX0208
165 - Japanese-JISX0201-Kana. 164 - Katakana-JISX0201
166 It uses 7-bit bytes. 165 It uses 7-bit bytes.
167 166
168 Unlike Japanese EUC, this is a "modal" encoding, which 167 Unlike Japanese EUC, this is a "modal" encoding, which
169 means that there are multiple states that the encoding can 168 means that there are multiple states that the encoding can
170 be in, which affect how the bytes are to be interpreted. 169 be in, which affect how the bytes are to be interpreted.
171 Special sequences of bytes (called "escape sequences") 170 Special sequences of bytes (called "escape sequences")
172 are used to change states. 171 are used to change states.
173 172
174 The encoding is as follows: 173 The encoding is as follows:
175 174
176 Character set Representation 175 Character set Representation
177 ------------- -------------- 176 ------------- --------------
178 Printing-ASCII PC-1 177 Printing-ASCII PC1
179 Japanese-JISX0201-Roman PC-1 178 Latin-JISX0201 PC1
180 Japanese-JISX0201-Kana PC-1 179 Katakana-JISX0201 PC1
181 Japanese-JISX0208 PC-1 / PC-2 180 Japanese-JISX0208 PC1 | PC2
182 181
183 Escape sequence ASCII equivalent Meaning 182 Escape sequence ASCII equivalent Meaning
184 --------------- ---------------- ------- 183 --------------- ---------------- -------
185 0x1B 0x28 0x42 ESC ( B invoke Printing-ASCII 184 0x1B 0x28 0x42 ESC ( B invoke Printing-ASCII
186 0x1B 0x28 0x4A ESC ( J invoke Japanese-JISX0201-Roman 185 0x1B 0x28 0x4A ESC ( J invoke Latin-JISX0201
187 0x1B 0x28 0x49 ESC ( I invoke Japanese-JISX0201-Kana 186 0x1B 0x28 0x49 ESC ( I invoke Katakana-JISX0201
188 0x1B 0x24 0x42 ESC $ B invoke Japanese-JISX0208 187 0x1B 0x24 0x42 ESC $ B invoke Japanese-JISX0208
189 188
190 Initially, Printing-ASCII is invoked. 189 Initially, Printing-ASCII is invoked.
191 190
192 3. Internal Mule Encodings 191 3. Internal Mule Encodings
331 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ 330 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */
332 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ 331 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */
333 332
334 /** The following are for 1-byte characters in an official charset. **/ 333 /** The following are for 1-byte characters in an official charset. **/
335 334
336 #define LEADING_BYTE_LATIN_1 0x81 /* Right half of ISO 8859-1 */ 335 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */
337 #define LEADING_BYTE_LATIN_2 0x82 /* Right half of ISO 8859-2 */ 336 #define LEADING_BYTE_LATIN_ISO8859_2 0x82 /* Right half of ISO 8859-2 */
338 #define LEADING_BYTE_LATIN_3 0x83 /* Right half of ISO 8859-3 */ 337 #define LEADING_BYTE_LATIN_ISO8859_3 0x83 /* Right half of ISO 8859-3 */
339 #define LEADING_BYTE_LATIN_4 0x84 /* Right half of ISO 8859-4 */ 338 #define LEADING_BYTE_LATIN_ISO8859_4 0x84 /* Right half of ISO 8859-4 */
340 #define LEADING_BYTE_THAI 0x85 /* TIS620-2533 */ 339 #define LEADING_BYTE_THAI_TIS620 0x85 /* TIS620-2533 */
341 #define LEADING_BYTE_GREEK 0x86 /* Right half of ISO 8859-7 */ 340 #define LEADING_BYTE_GREEK_ISO8859_7 0x86 /* Right half of ISO 8859-7 */
342 #define LEADING_BYTE_ARABIC 0x87 /* Right half of ISO 8859-6 */ 341 #define LEADING_BYTE_ARABIC_ISO8859_6 0x87 /* Right half of ISO 8859-6 */
343 #define LEADING_BYTE_HEBREW 0x88 /* Right half of ISO 8859-8 */ 342 #define LEADING_BYTE_HEBREW_ISO8859_8 0x88 /* Right half of ISO 8859-8 */
344 #define LEADING_BYTE_JAPANESE_JISX0201_KANA 0x89 /* Right half of JIS X0201-1976 */ 343 #define LEADING_BYTE_KATAKANA_JISX0201 0x89 /* Right half of JIS X0201-1976 */
345 #define LEADING_BYTE_JAPANESE_JISX0201_ROMAN 0x8A /* Left half of JIS X0201-1976 */ 344 #define LEADING_BYTE_LATIN_JISX0201 0x8A /* Left half of JIS X0201-1976 */
346 #define LEADING_BYTE_CYRILLIC 0x8C /* Right half of ISO 8859-5 */ 345 #define LEADING_BYTE_CYRILLIC_ISO8859_5 0x8C /* Right half of ISO 8859-5 */
347 #define LEADING_BYTE_LATIN_5 0x8D /* Right half of ISO 8859-9 */ 346 #define LEADING_BYTE_LATIN_ISO8859_9 0x8D /* Right half of ISO 8859-9 */
348 347
349 #define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_1 348 #define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_1
350 #define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_5 349 #define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_9
351 350
352 /** The following are for 2-byte characters in an official charset. **/ 351 /** The following are for 2-byte characters in an official charset. **/
353 352
354 #define LEADING_BYTE_JAPANESE_JISX0208_1978 0x90/* Japanese JIS X0208-1978 */ 353 #define LEADING_BYTE_JAPANESE_JISX0208_1978 0x90/* Japanese JIS X0208-1978 */
355 #define LEADING_BYTE_CHINESE_GB 0x91 /* Chinese Hanzi GB2312-1980 */ 354 #define LEADING_BYTE_CHINESE_GB2312 0x91 /* Chinese Hanzi GB2312-1980 */
356 #define LEADING_BYTE_JAPANESE_JISX0208 0x92 /* Japanese JIS X0208-1983 */ 355 #define LEADING_BYTE_JAPANESE_JISX0208 0x92 /* Japanese JIS X0208-1983 */
357 #define LEADING_BYTE_KOREAN_KSC5601 0x93 /* Hangul KS C5601-1987 */ 356 #define LEADING_BYTE_KOREAN_KSC5601 0x93 /* Hangul KS C5601-1987 */
358 #define LEADING_BYTE_JAPANESE_JISX0212 0x94 /* Japanese JIS X0212-1990 */ 357 #define LEADING_BYTE_JAPANESE_JISX0212 0x94 /* Japanese JIS X0212-1990 */
359 #define LEADING_BYTE_CHINESE_CNS11643_1 0x95 /* Chinese CNS11643 Set 1 */ 358 #define LEADING_BYTE_CHINESE_CNS11643_1 0x95 /* Chinese CNS11643 Set 1 */
360 #define LEADING_BYTE_CHINESE_CNS11643_2 0x96 /* Chinese CNS11643 Set 2 */ 359 #define LEADING_BYTE_CHINESE_CNS11643_2 0x96 /* Chinese CNS11643 Set 2 */
401 } 400 }
402 401
403 /* Given a private leading byte, return the leading byte prefix stored 402 /* Given a private leading byte, return the leading byte prefix stored
404 in a string */ 403 in a string */
405 404
406 #define PRIVATE_LEADING_BYTE_PREFIX(lb) \ 405 #define PRIVATE_LEADING_BYTE_PREFIX(lb) \
407 ((lb) < MIN_LEADING_BYTE_PRIVATE_2 ? PRE_LEADING_BYTE_PRIVATE_1 \ 406 ((lb) < MIN_LEADING_BYTE_PRIVATE_2 ? \
408 : PRE_LEADING_BYTE_PRIVATE_2) 407 PRE_LEADING_BYTE_PRIVATE_1 : \
409 408 PRE_LEADING_BYTE_PRIVATE_2)
410
411 409
412 410
413 /************************************************************************/ 411 /************************************************************************/
414 /* Operations on individual bytes */ 412 /* Operations on individual bytes */
415 /* of any format */ 413 /* of any format */
484 #define CHARSETP(x) RECORDP (x, charset) 482 #define CHARSETP(x) RECORDP (x, charset)
485 #define GC_CHARSETP(x) GC_RECORDP (x, charset) 483 #define GC_CHARSETP(x) GC_RECORDP (x, charset)
486 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset) 484 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset)
487 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset) 485 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset)
488 486
489 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */ 487 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */
490 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */ 488 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */
491 #define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */ 489 #define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */
492 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */ 490 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */
493 491
494 #define CHARSET_LEFT_TO_RIGHT 0 492 #define CHARSET_LEFT_TO_RIGHT 0
495 #define CHARSET_RIGHT_TO_LEFT 1 493 #define CHARSET_RIGHT_TO_LEFT 1
496 494
497 #define CHARSET_NAME(cs) ((cs)->name) 495 #define CHARSET_NAME(cs) ((cs)->name)
498 #define CHARSET_LEADING_BYTE(cs) ((cs)->leading_byte) 496 #define CHARSET_LEADING_BYTE(cs) ((cs)->leading_byte)
499 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes) 497 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes)
500 #define CHARSET_COLUMNS(cs) ((cs)->columns) 498 #define CHARSET_COLUMNS(cs) ((cs)->columns)
505 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string) 503 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string)
506 #define CHARSET_REGISTRY(cs) ((cs)->registry) 504 #define CHARSET_REGISTRY(cs) ((cs)->registry)
507 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program) 505 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program)
508 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset) 506 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset)
509 507
510 INLINE int CHARSET_DIMENSION (struct Lisp_Charset *cs);
511 INLINE int 508 INLINE int
512 CHARSET_DIMENSION (struct Lisp_Charset *cs) 509 CHARSET_DIMENSION (struct Lisp_Charset *cs)
513 { 510 {
514 return (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || 511 /* Optimized using inside knowledge of CHARSET_TYPE values */
515 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2; 512 return (CHARSET_TYPE (cs) <= CHARSET_TYPE_96) ? 1 : 2;
516 } 513 }
517 514
518 INLINE int CHARSET_CHARS (struct Lisp_Charset *cs);
519 INLINE int 515 INLINE int
520 CHARSET_CHARS (struct Lisp_Charset *cs) 516 CHARSET_CHARS (struct Lisp_Charset *cs)
521 { 517 {
522 return (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || 518 /* Optimized using inside knowledge of CHARSET_TYPE values */
523 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96; 519 return (CHARSET_TYPE (cs) & 0x1) ? 96 : 94;
524 } 520 }
525 521
526 #define CHARSET_PRIVATE_P(cs) \ 522 #define CHARSET_PRIVATE_P(cs) LEADING_BYTE_PRIVATE_P (CHARSET_LEADING_BYTE (cs))
527 LEADING_BYTE_PRIVATE_P (CHARSET_LEADING_BYTE (cs))
528 523
529 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs)) 524 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs))
530 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs)) 525 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs))
531 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs)) 526 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs))
532 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs)) 527 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs))
596 #define REP_BYTES_BY_FIRST_BYTE(fb) (rep_bytes_by_first_byte[fb]) 591 #define REP_BYTES_BY_FIRST_BYTE(fb) (rep_bytes_by_first_byte[fb])
597 #endif 592 #endif
598 593
599 extern Lisp_Object Vcharset_ascii; 594 extern Lisp_Object Vcharset_ascii;
600 extern Lisp_Object Vcharset_control_1; 595 extern Lisp_Object Vcharset_control_1;
601 extern Lisp_Object Vcharset_latin_1; 596 extern Lisp_Object Vcharset_latin_iso8859_1;
602 extern Lisp_Object Vcharset_latin_2; 597 extern Lisp_Object Vcharset_latin_iso8859_2;
603 extern Lisp_Object Vcharset_latin_3; 598 extern Lisp_Object Vcharset_latin_iso8859_3;
604 extern Lisp_Object Vcharset_latin_4; 599 extern Lisp_Object Vcharset_latin_iso8859_4;
605 extern Lisp_Object Vcharset_latin_5; 600 extern Lisp_Object Vcharset_cyrillic_iso8859_5;
606 extern Lisp_Object Vcharset_greek; 601 extern Lisp_Object Vcharset_arabic_iso8859_6;
607 extern Lisp_Object Vcharset_thai; 602 extern Lisp_Object Vcharset_greek_iso8859_7;
608 extern Lisp_Object Vcharset_arabic; 603 extern Lisp_Object Vcharset_hebrew_iso8859_8;
609 extern Lisp_Object Vcharset_hebrew; 604 extern Lisp_Object Vcharset_latin_iso8859_9;
610 extern Lisp_Object Vcharset_cyrillic; 605 extern Lisp_Object Vcharset_thai_tis620;
611 extern Lisp_Object Vcharset_japanese_jisx0201_kana; 606 extern Lisp_Object Vcharset_katakana_jisx0201;
612 extern Lisp_Object Vcharset_japanese_jisx0201_roman; 607 extern Lisp_Object Vcharset_latin_jisx0201;
613 extern Lisp_Object Vcharset_japanese_jisx0208_1978; 608 extern Lisp_Object Vcharset_japanese_jisx0208_1978;
614 extern Lisp_Object Vcharset_japanese_jisx0208; 609 extern Lisp_Object Vcharset_japanese_jisx0208;
615 extern Lisp_Object Vcharset_japanese_jisx0212; 610 extern Lisp_Object Vcharset_japanese_jisx0212;
616 extern Lisp_Object Vcharset_korean_ksc5601; 611 extern Lisp_Object Vcharset_korean_ksc5601;
617 extern Lisp_Object Vcharset_chinese_gb; 612 extern Lisp_Object Vcharset_chinese_gb2312;
618 extern Lisp_Object Vcharset_chinese_big5_1; 613 extern Lisp_Object Vcharset_chinese_big5_1;
619 extern Lisp_Object Vcharset_chinese_big5_2; 614 extern Lisp_Object Vcharset_chinese_big5_2;
620 extern Lisp_Object Vcharset_chinese_cns11643_1; 615 extern Lisp_Object Vcharset_chinese_cns11643_1;
621 extern Lisp_Object Vcharset_chinese_cns11643_2; 616 extern Lisp_Object Vcharset_chinese_cns11643_2;
622 extern Lisp_Object Vcharset_composite; 617 extern Lisp_Object Vcharset_composite;