Mercurial > hg > xemacs-beta
comparison src/mule-charset.c @ 3439:d1754e7f0cea
[xemacs-hg @ 2006-06-03 17:50:39 by aidan]
Just-in-time Unicode code point support.
author | aidan |
---|---|
date | Sat, 03 Jun 2006 17:51:06 +0000 |
parents | facf3239ba30 |
children | 98af8a976fc3 |
comparison
equal
deleted
inserted
replaced
3438:14fbcab7c67b | 3439:d1754e7f0cea |
---|---|
188 static Lisp_Object | 188 static Lisp_Object |
189 make_charset (int id, Lisp_Object name, int rep_bytes, | 189 make_charset (int id, Lisp_Object name, int rep_bytes, |
190 int type, int columns, int graphic, | 190 int type, int columns, int graphic, |
191 Ibyte final, int direction, Lisp_Object short_name, | 191 Ibyte final, int direction, Lisp_Object short_name, |
192 Lisp_Object long_name, Lisp_Object doc, | 192 Lisp_Object long_name, Lisp_Object doc, |
193 Lisp_Object reg, int overwrite) | 193 Lisp_Object reg, int overwrite, int encode_as_utf_8) |
194 { | 194 { |
195 Lisp_Object obj; | 195 Lisp_Object obj; |
196 Lisp_Charset *cs; | 196 Lisp_Charset *cs; |
197 | 197 |
198 if (!overwrite) | 198 if (!overwrite) |
238 CHARSET_COLUMNS (cs) = columns; | 238 CHARSET_COLUMNS (cs) = columns; |
239 CHARSET_GRAPHIC (cs) = graphic; | 239 CHARSET_GRAPHIC (cs) = graphic; |
240 CHARSET_FINAL (cs) = final; | 240 CHARSET_FINAL (cs) = final; |
241 CHARSET_DOC_STRING (cs) = doc; | 241 CHARSET_DOC_STRING (cs) = doc; |
242 CHARSET_REGISTRY (cs) = reg; | 242 CHARSET_REGISTRY (cs) = reg; |
243 CHARSET_ENCODE_AS_UTF_8 (cs) = encode_as_utf_8 ? 1 : 0; | |
243 CHARSET_CCL_PROGRAM (cs) = Qnil; | 244 CHARSET_CCL_PROGRAM (cs) = Qnil; |
244 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; | 245 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; |
245 | 246 |
246 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || | 247 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || |
247 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2; | 248 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2; |
452 this charset into an index into the font. This is in | 453 this charset into an index into the font. This is in |
453 addition to the `graphic' property. The CCL program | 454 addition to the `graphic' property. The CCL program |
454 is passed the octets of the character, with the high | 455 is passed the octets of the character, with the high |
455 bit cleared and set depending upon whether the value | 456 bit cleared and set depending upon whether the value |
456 of the `graphic' property is 0 or 1. | 457 of the `graphic' property is 0 or 1. |
458 `encode-as-utf-8' | |
459 If non-nil, the charset will be written out using the UTF-8 | |
460 escape syntax in ISO 2022-oriented coding systems. Used for | |
461 supporting characters we know are part of Unicode but not of | |
462 any other known character set in escape-quoted and compound | |
463 text. | |
457 */ | 464 */ |
458 (name, doc_string, props)) | 465 (name, doc_string, props)) |
459 { | 466 { |
460 int id, dimension = 1, chars = 94, graphic = 0, columns = -1; | 467 int id, dimension = 1, chars = 94, graphic = 0, columns = -1; |
461 Ibyte final = 0; | 468 Ibyte final = 0; |
463 int type; | 470 int type; |
464 Lisp_Object registry = Qnil; | 471 Lisp_Object registry = Qnil; |
465 Lisp_Object charset = Qnil; | 472 Lisp_Object charset = Qnil; |
466 Lisp_Object ccl_program = Qnil; | 473 Lisp_Object ccl_program = Qnil; |
467 Lisp_Object short_name = Qnil, long_name = Qnil; | 474 Lisp_Object short_name = Qnil, long_name = Qnil; |
475 int encode_as_utf_8 = 0; | |
468 Lisp_Object existing_charset; | 476 Lisp_Object existing_charset; |
469 int temporary = UNBOUNDP (name); | 477 int temporary = UNBOUNDP (name); |
470 | 478 |
471 /* NOTE: name == Qunbound is a directive from the iso2022 code to | 479 /* NOTE: name == Qunbound is a directive from the iso2022 code to |
472 create a temporary charset for an unknown final. We allow the final | 480 create a temporary charset for an unknown final. We allow the final |
544 direction = CHARSET_RIGHT_TO_LEFT; | 552 direction = CHARSET_RIGHT_TO_LEFT; |
545 else | 553 else |
546 invalid_constant ("Invalid value for `direction'", value); | 554 invalid_constant ("Invalid value for `direction'", value); |
547 } | 555 } |
548 | 556 |
557 else if (EQ (keyword, Qencode_as_utf_8)) | |
558 { | |
559 encode_as_utf_8 = NILP (value) ? 0 : 1; | |
560 } | |
561 | |
549 else if (EQ (keyword, Qfinal)) | 562 else if (EQ (keyword, Qfinal)) |
550 { | 563 { |
551 CHECK_CHAR_COERCE_INT (value); | 564 CHECK_CHAR_COERCE_INT (value); |
552 final = XCHAR (value); | 565 final = XCHAR (value); |
553 if (final < '0' || final > '~') | 566 if (final < '0' || final > '~') |
554 invalid_constant ("Invalid value for `final'", value); | 567 invalid_constant ("Invalid value for `final'", value); |
555 } | 568 } |
556 | |
557 else if (EQ (keyword, Qccl_program)) | 569 else if (EQ (keyword, Qccl_program)) |
558 { | 570 { |
559 struct ccl_program test_ccl; | 571 struct ccl_program test_ccl; |
560 | 572 |
561 if (setup_ccl_program (&test_ccl, value) < 0) | 573 if (setup_ccl_program (&test_ccl, value) < 0) |
610 if (columns == -1) | 622 if (columns == -1) |
611 columns = dimension; | 623 columns = dimension; |
612 | 624 |
613 charset = make_charset (id, name, dimension + 2, type, columns, graphic, | 625 charset = make_charset (id, name, dimension + 2, type, columns, graphic, |
614 final, direction, short_name, long_name, | 626 final, direction, short_name, long_name, |
615 doc_string, registry, !NILP (existing_charset)); | 627 doc_string, registry, !NILP (existing_charset), |
628 encode_as_utf_8); | |
616 | 629 |
617 XCHARSET (charset)->temporary = temporary; | 630 XCHARSET (charset)->temporary = temporary; |
618 if (!NILP (ccl_program)) | 631 if (!NILP (ccl_program)) |
619 XCHARSET_CCL_PROGRAM (charset) = ccl_program; | 632 XCHARSET_CCL_PROGRAM (charset) = ccl_program; |
620 | 633 |
639 NEW-NAME is the name of the new charset. Return the new charset. | 652 NEW-NAME is the name of the new charset. Return the new charset. |
640 */ | 653 */ |
641 (charset, new_name)) | 654 (charset, new_name)) |
642 { | 655 { |
643 Lisp_Object new_charset = Qnil; | 656 Lisp_Object new_charset = Qnil; |
644 int id, dimension, columns, graphic; | 657 int id, dimension, columns, graphic, encode_as_utf_8; |
645 Ibyte final; | 658 Ibyte final; |
646 int direction, type; | 659 int direction, type; |
647 Lisp_Object registry, doc_string, short_name, long_name; | 660 Lisp_Object registry, doc_string, short_name, long_name; |
648 Lisp_Charset *cs; | 661 Lisp_Charset *cs; |
649 | 662 |
670 direction = CHARSET_LEFT_TO_RIGHT; | 683 direction = CHARSET_LEFT_TO_RIGHT; |
671 doc_string = CHARSET_DOC_STRING (cs); | 684 doc_string = CHARSET_DOC_STRING (cs); |
672 short_name = CHARSET_SHORT_NAME (cs); | 685 short_name = CHARSET_SHORT_NAME (cs); |
673 long_name = CHARSET_LONG_NAME (cs); | 686 long_name = CHARSET_LONG_NAME (cs); |
674 registry = CHARSET_REGISTRY (cs); | 687 registry = CHARSET_REGISTRY (cs); |
688 encode_as_utf_8 = CHARSET_ENCODE_AS_UTF_8 (cs); | |
675 | 689 |
676 new_charset = make_charset (id, new_name, dimension + 2, type, columns, | 690 new_charset = make_charset (id, new_name, dimension + 2, type, columns, |
677 graphic, final, direction, short_name, long_name, | 691 graphic, final, direction, short_name, long_name, |
678 doc_string, registry, 0); | 692 doc_string, registry, 0, encode_as_utf_8); |
679 | 693 |
680 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset; | 694 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset; |
681 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset; | 695 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset; |
682 | 696 |
683 return new_charset; | 697 return new_charset; |
805 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs)); | 819 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs)); |
806 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs)); | 820 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs)); |
807 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs)); | 821 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs)); |
808 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs)); | 822 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs)); |
809 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs); | 823 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs); |
824 if (EQ (prop, Qencode_as_utf_8)) | |
825 return CHARSET_ENCODE_AS_UTF_8 (cs) ? Qt : Qnil; | |
810 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs); | 826 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs); |
811 if (EQ (prop, Qdirection)) | 827 if (EQ (prop, Qdirection)) |
812 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l; | 828 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l; |
813 if (EQ (prop, Qreverse_direction_charset)) | 829 if (EQ (prop, Qreverse_direction_charset)) |
814 { | 830 { |
1038 CHARSET_TYPE_94, 1, 0, 'B', | 1054 CHARSET_TYPE_94, 1, 0, 'B', |
1039 CHARSET_LEFT_TO_RIGHT, | 1055 CHARSET_LEFT_TO_RIGHT, |
1040 build_string ("ASCII"), | 1056 build_string ("ASCII"), |
1041 build_msg_string ("ASCII"), | 1057 build_msg_string ("ASCII"), |
1042 build_msg_string ("ASCII (ISO646 IRV)"), | 1058 build_msg_string ("ASCII (ISO646 IRV)"), |
1043 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), 0); | 1059 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), 0, 0); |
1044 staticpro (&Vcharset_control_1); | 1060 staticpro (&Vcharset_control_1); |
1045 Vcharset_control_1 = | 1061 Vcharset_control_1 = |
1046 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2, | 1062 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2, |
1047 CHARSET_TYPE_94, 1, 1, 0, | 1063 CHARSET_TYPE_94, 1, 1, 0, |
1048 CHARSET_LEFT_TO_RIGHT, | 1064 CHARSET_LEFT_TO_RIGHT, |
1049 build_string ("C1"), | 1065 build_string ("C1"), |
1050 build_msg_string ("Control characters"), | 1066 build_msg_string ("Control characters"), |
1051 build_msg_string ("Control characters 128-191"), | 1067 build_msg_string ("Control characters 128-191"), |
1052 build_string (""), 0); | 1068 build_string (""), 0, 0); |
1053 staticpro (&Vcharset_latin_iso8859_1); | 1069 staticpro (&Vcharset_latin_iso8859_1); |
1054 Vcharset_latin_iso8859_1 = | 1070 Vcharset_latin_iso8859_1 = |
1055 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2, | 1071 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2, |
1056 CHARSET_TYPE_96, 1, 1, 'A', | 1072 CHARSET_TYPE_96, 1, 1, 'A', |
1057 CHARSET_LEFT_TO_RIGHT, | 1073 CHARSET_LEFT_TO_RIGHT, |
1058 build_string ("Latin-1"), | 1074 build_string ("Latin-1"), |
1059 build_msg_string ("ISO8859-1 (Latin-1)"), | 1075 build_msg_string ("ISO8859-1 (Latin-1)"), |
1060 build_msg_string ("ISO8859-1 (Latin-1)"), | 1076 build_msg_string ("ISO8859-1 (Latin-1)"), |
1061 build_string ("iso8859-1"), 0); | 1077 build_string ("iso8859-1"), 0, 0); |
1062 staticpro (&Vcharset_latin_iso8859_2); | 1078 staticpro (&Vcharset_latin_iso8859_2); |
1063 Vcharset_latin_iso8859_2 = | 1079 Vcharset_latin_iso8859_2 = |
1064 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2, | 1080 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2, |
1065 CHARSET_TYPE_96, 1, 1, 'B', | 1081 CHARSET_TYPE_96, 1, 1, 'B', |
1066 CHARSET_LEFT_TO_RIGHT, | 1082 CHARSET_LEFT_TO_RIGHT, |
1067 build_string ("Latin-2"), | 1083 build_string ("Latin-2"), |
1068 build_msg_string ("ISO8859-2 (Latin-2)"), | 1084 build_msg_string ("ISO8859-2 (Latin-2)"), |
1069 build_msg_string ("ISO8859-2 (Latin-2)"), | 1085 build_msg_string ("ISO8859-2 (Latin-2)"), |
1070 build_string ("iso8859-2"), 0); | 1086 build_string ("iso8859-2"), 0, 0); |
1071 staticpro (&Vcharset_latin_iso8859_3); | 1087 staticpro (&Vcharset_latin_iso8859_3); |
1072 Vcharset_latin_iso8859_3 = | 1088 Vcharset_latin_iso8859_3 = |
1073 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2, | 1089 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2, |
1074 CHARSET_TYPE_96, 1, 1, 'C', | 1090 CHARSET_TYPE_96, 1, 1, 'C', |
1075 CHARSET_LEFT_TO_RIGHT, | 1091 CHARSET_LEFT_TO_RIGHT, |
1076 build_string ("Latin-3"), | 1092 build_string ("Latin-3"), |
1077 build_msg_string ("ISO8859-3 (Latin-3)"), | 1093 build_msg_string ("ISO8859-3 (Latin-3)"), |
1078 build_msg_string ("ISO8859-3 (Latin-3)"), | 1094 build_msg_string ("ISO8859-3 (Latin-3)"), |
1079 build_string ("iso8859-3"), 0); | 1095 build_string ("iso8859-3"), 0, 0); |
1080 staticpro (&Vcharset_latin_iso8859_4); | 1096 staticpro (&Vcharset_latin_iso8859_4); |
1081 Vcharset_latin_iso8859_4 = | 1097 Vcharset_latin_iso8859_4 = |
1082 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2, | 1098 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2, |
1083 CHARSET_TYPE_96, 1, 1, 'D', | 1099 CHARSET_TYPE_96, 1, 1, 'D', |
1084 CHARSET_LEFT_TO_RIGHT, | 1100 CHARSET_LEFT_TO_RIGHT, |
1085 build_string ("Latin-4"), | 1101 build_string ("Latin-4"), |
1086 build_msg_string ("ISO8859-4 (Latin-4)"), | 1102 build_msg_string ("ISO8859-4 (Latin-4)"), |
1087 build_msg_string ("ISO8859-4 (Latin-4)"), | 1103 build_msg_string ("ISO8859-4 (Latin-4)"), |
1088 build_string ("iso8859-4"), 0); | 1104 build_string ("iso8859-4"), 0, 0); |
1089 staticpro (&Vcharset_thai_tis620); | 1105 staticpro (&Vcharset_thai_tis620); |
1090 Vcharset_thai_tis620 = | 1106 Vcharset_thai_tis620 = |
1091 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2, | 1107 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2, |
1092 CHARSET_TYPE_96, 1, 1, 'T', | 1108 CHARSET_TYPE_96, 1, 1, 'T', |
1093 CHARSET_LEFT_TO_RIGHT, | 1109 CHARSET_LEFT_TO_RIGHT, |
1094 build_string ("TIS620"), | 1110 build_string ("TIS620"), |
1095 build_msg_string ("TIS620 (Thai)"), | 1111 build_msg_string ("TIS620 (Thai)"), |
1096 build_msg_string ("TIS620.2529 (Thai)"), | 1112 build_msg_string ("TIS620.2529 (Thai)"), |
1097 build_string ("tis620"),0); | 1113 build_string ("tis620"), 0, 0); |
1098 staticpro (&Vcharset_greek_iso8859_7); | 1114 staticpro (&Vcharset_greek_iso8859_7); |
1099 Vcharset_greek_iso8859_7 = | 1115 Vcharset_greek_iso8859_7 = |
1100 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2, | 1116 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2, |
1101 CHARSET_TYPE_96, 1, 1, 'F', | 1117 CHARSET_TYPE_96, 1, 1, 'F', |
1102 CHARSET_LEFT_TO_RIGHT, | 1118 CHARSET_LEFT_TO_RIGHT, |
1103 build_string ("ISO8859-7"), | 1119 build_string ("ISO8859-7"), |
1104 build_msg_string ("ISO8859-7 (Greek)"), | 1120 build_msg_string ("ISO8859-7 (Greek)"), |
1105 build_msg_string ("ISO8859-7 (Greek)"), | 1121 build_msg_string ("ISO8859-7 (Greek)"), |
1106 build_string ("iso8859-7"), 0); | 1122 build_string ("iso8859-7"), 0, 0); |
1107 staticpro (&Vcharset_arabic_iso8859_6); | 1123 staticpro (&Vcharset_arabic_iso8859_6); |
1108 Vcharset_arabic_iso8859_6 = | 1124 Vcharset_arabic_iso8859_6 = |
1109 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2, | 1125 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2, |
1110 CHARSET_TYPE_96, 1, 1, 'G', | 1126 CHARSET_TYPE_96, 1, 1, 'G', |
1111 CHARSET_RIGHT_TO_LEFT, | 1127 CHARSET_RIGHT_TO_LEFT, |
1112 build_string ("ISO8859-6"), | 1128 build_string ("ISO8859-6"), |
1113 build_msg_string ("ISO8859-6 (Arabic)"), | 1129 build_msg_string ("ISO8859-6 (Arabic)"), |
1114 build_msg_string ("ISO8859-6 (Arabic)"), | 1130 build_msg_string ("ISO8859-6 (Arabic)"), |
1115 build_string ("iso8859-6"), 0); | 1131 build_string ("iso8859-6"), 0, 0); |
1116 staticpro (&Vcharset_hebrew_iso8859_8); | 1132 staticpro (&Vcharset_hebrew_iso8859_8); |
1117 Vcharset_hebrew_iso8859_8 = | 1133 Vcharset_hebrew_iso8859_8 = |
1118 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2, | 1134 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2, |
1119 CHARSET_TYPE_96, 1, 1, 'H', | 1135 CHARSET_TYPE_96, 1, 1, 'H', |
1120 CHARSET_RIGHT_TO_LEFT, | 1136 CHARSET_RIGHT_TO_LEFT, |
1121 build_string ("ISO8859-8"), | 1137 build_string ("ISO8859-8"), |
1122 build_msg_string ("ISO8859-8 (Hebrew)"), | 1138 build_msg_string ("ISO8859-8 (Hebrew)"), |
1123 build_msg_string ("ISO8859-8 (Hebrew)"), | 1139 build_msg_string ("ISO8859-8 (Hebrew)"), |
1124 build_string ("iso8859-8"), 0); | 1140 build_string ("iso8859-8"), 0, 0); |
1125 staticpro (&Vcharset_katakana_jisx0201); | 1141 staticpro (&Vcharset_katakana_jisx0201); |
1126 Vcharset_katakana_jisx0201 = | 1142 Vcharset_katakana_jisx0201 = |
1127 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2, | 1143 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2, |
1128 CHARSET_TYPE_94, 1, 1, 'I', | 1144 CHARSET_TYPE_94, 1, 1, 'I', |
1129 CHARSET_LEFT_TO_RIGHT, | 1145 CHARSET_LEFT_TO_RIGHT, |
1130 build_string ("JISX0201 Kana"), | 1146 build_string ("JISX0201 Kana"), |
1131 build_msg_string ("JISX0201.1976 (Japanese Kana)"), | 1147 build_msg_string ("JISX0201.1976 (Japanese Kana)"), |
1132 build_msg_string ("JISX0201.1976 Japanese Kana"), | 1148 build_msg_string ("JISX0201.1976 Japanese Kana"), |
1133 build_string ("jisx0201.1976"), 0); | 1149 build_string ("jisx0201.1976"), 0, 0); |
1134 staticpro (&Vcharset_latin_jisx0201); | 1150 staticpro (&Vcharset_latin_jisx0201); |
1135 Vcharset_latin_jisx0201 = | 1151 Vcharset_latin_jisx0201 = |
1136 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2, | 1152 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2, |
1137 CHARSET_TYPE_94, 1, 0, 'J', | 1153 CHARSET_TYPE_94, 1, 0, 'J', |
1138 CHARSET_LEFT_TO_RIGHT, | 1154 CHARSET_LEFT_TO_RIGHT, |
1139 build_string ("JISX0201 Roman"), | 1155 build_string ("JISX0201 Roman"), |
1140 build_msg_string ("JISX0201.1976 (Japanese Roman)"), | 1156 build_msg_string ("JISX0201.1976 (Japanese Roman)"), |
1141 build_msg_string ("JISX0201.1976 Japanese Roman"), | 1157 build_msg_string ("JISX0201.1976 Japanese Roman"), |
1142 build_string ("jisx0201.1976"), 0); | 1158 build_string ("jisx0201.1976"), 0, 0); |
1143 staticpro (&Vcharset_cyrillic_iso8859_5); | 1159 staticpro (&Vcharset_cyrillic_iso8859_5); |
1144 Vcharset_cyrillic_iso8859_5 = | 1160 Vcharset_cyrillic_iso8859_5 = |
1145 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2, | 1161 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2, |
1146 CHARSET_TYPE_96, 1, 1, 'L', | 1162 CHARSET_TYPE_96, 1, 1, 'L', |
1147 CHARSET_LEFT_TO_RIGHT, | 1163 CHARSET_LEFT_TO_RIGHT, |
1148 build_string ("ISO8859-5"), | 1164 build_string ("ISO8859-5"), |
1149 build_msg_string ("ISO8859-5 (Cyrillic)"), | 1165 build_msg_string ("ISO8859-5 (Cyrillic)"), |
1150 build_msg_string ("ISO8859-5 (Cyrillic)"), | 1166 build_msg_string ("ISO8859-5 (Cyrillic)"), |
1151 build_string ("iso8859-5"), 0); | 1167 build_string ("iso8859-5"), 0, 0); |
1152 staticpro (&Vcharset_latin_iso8859_9); | 1168 staticpro (&Vcharset_latin_iso8859_9); |
1153 Vcharset_latin_iso8859_9 = | 1169 Vcharset_latin_iso8859_9 = |
1154 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2, | 1170 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2, |
1155 CHARSET_TYPE_96, 1, 1, 'M', | 1171 CHARSET_TYPE_96, 1, 1, 'M', |
1156 CHARSET_LEFT_TO_RIGHT, | 1172 CHARSET_LEFT_TO_RIGHT, |
1157 build_string ("Latin-5"), | 1173 build_string ("Latin-5"), |
1158 build_msg_string ("ISO8859-9 (Latin-5)"), | 1174 build_msg_string ("ISO8859-9 (Latin-5)"), |
1159 build_msg_string ("ISO8859-9 (Latin-5)"), | 1175 build_msg_string ("ISO8859-9 (Latin-5)"), |
1160 build_string ("iso8859-9"), 0); | 1176 build_string ("iso8859-9"), 0, 0); |
1161 staticpro (&Vcharset_latin_iso8859_15); | 1177 staticpro (&Vcharset_latin_iso8859_15); |
1162 Vcharset_latin_iso8859_15 = | 1178 Vcharset_latin_iso8859_15 = |
1163 make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2, | 1179 make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2, |
1164 CHARSET_TYPE_96, 1, 1, 'b', | 1180 CHARSET_TYPE_96, 1, 1, 'b', |
1165 CHARSET_LEFT_TO_RIGHT, | 1181 CHARSET_LEFT_TO_RIGHT, |
1166 build_string ("Latin-9"), | 1182 build_string ("Latin-9"), |
1167 build_msg_string ("ISO8859-15 (Latin-9)"), | 1183 build_msg_string ("ISO8859-15 (Latin-9)"), |
1168 build_msg_string ("ISO8859-15 (Latin-9)"), | 1184 build_msg_string ("ISO8859-15 (Latin-9)"), |
1169 build_string ("iso8859-15"), 0); | 1185 build_string ("iso8859-15"), 0, 0); |
1170 staticpro (&Vcharset_japanese_jisx0208_1978); | 1186 staticpro (&Vcharset_japanese_jisx0208_1978); |
1171 Vcharset_japanese_jisx0208_1978 = | 1187 Vcharset_japanese_jisx0208_1978 = |
1172 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3, | 1188 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3, |
1173 CHARSET_TYPE_94X94, 2, 0, '@', | 1189 CHARSET_TYPE_94X94, 2, 0, '@', |
1174 CHARSET_LEFT_TO_RIGHT, | 1190 CHARSET_LEFT_TO_RIGHT, |
1175 build_string ("JISX0208.1978"), | 1191 build_string ("JISX0208.1978"), |
1176 build_msg_string ("JISX0208.1978 (Japanese)"), | 1192 build_msg_string ("JISX0208.1978 (Japanese)"), |
1177 build_msg_string | 1193 build_msg_string |
1178 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"), | 1194 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"), |
1179 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), 0); | 1195 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), 0, 0); |
1180 staticpro (&Vcharset_chinese_gb2312); | 1196 staticpro (&Vcharset_chinese_gb2312); |
1181 Vcharset_chinese_gb2312 = | 1197 Vcharset_chinese_gb2312 = |
1182 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3, | 1198 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3, |
1183 CHARSET_TYPE_94X94, 2, 0, 'A', | 1199 CHARSET_TYPE_94X94, 2, 0, 'A', |
1184 CHARSET_LEFT_TO_RIGHT, | 1200 CHARSET_LEFT_TO_RIGHT, |
1185 build_string ("GB2312"), | 1201 build_string ("GB2312"), |
1186 build_msg_string ("GB2312)"), | 1202 build_msg_string ("GB2312)"), |
1187 build_msg_string ("GB2312 Chinese simplified"), | 1203 build_msg_string ("GB2312 Chinese simplified"), |
1188 build_string ("gb2312"), 0); | 1204 build_string ("gb2312"), 0, 0); |
1189 staticpro (&Vcharset_japanese_jisx0208); | 1205 staticpro (&Vcharset_japanese_jisx0208); |
1190 Vcharset_japanese_jisx0208 = | 1206 Vcharset_japanese_jisx0208 = |
1191 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3, | 1207 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3, |
1192 CHARSET_TYPE_94X94, 2, 0, 'B', | 1208 CHARSET_TYPE_94X94, 2, 0, 'B', |
1193 CHARSET_LEFT_TO_RIGHT, | 1209 CHARSET_LEFT_TO_RIGHT, |
1194 build_string ("JISX0208"), | 1210 build_string ("JISX0208"), |
1195 build_msg_string ("JISX0208.1983/1990 (Japanese)"), | 1211 build_msg_string ("JISX0208.1983/1990 (Japanese)"), |
1196 build_msg_string ("JISX0208.1983/1990 Japanese Kanji"), | 1212 build_msg_string ("JISX0208.1983/1990 Japanese Kanji"), |
1197 build_string ("jisx0208.19\\(83\\|90\\)"), 0); | 1213 build_string ("jisx0208.19\\(83\\|90\\)"), 0, 0); |
1198 staticpro (&Vcharset_korean_ksc5601); | 1214 staticpro (&Vcharset_korean_ksc5601); |
1199 Vcharset_korean_ksc5601 = | 1215 Vcharset_korean_ksc5601 = |
1200 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3, | 1216 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3, |
1201 CHARSET_TYPE_94X94, 2, 0, 'C', | 1217 CHARSET_TYPE_94X94, 2, 0, 'C', |
1202 CHARSET_LEFT_TO_RIGHT, | 1218 CHARSET_LEFT_TO_RIGHT, |
1203 build_string ("KSC5601"), | 1219 build_string ("KSC5601"), |
1204 build_msg_string ("KSC5601 (Korean"), | 1220 build_msg_string ("KSC5601 (Korean"), |
1205 build_msg_string ("KSC5601 Korean Hangul and Hanja"), | 1221 build_msg_string ("KSC5601 Korean Hangul and Hanja"), |
1206 build_string ("ksc5601"), 0); | 1222 build_string ("ksc5601"), 0, 0); |
1207 staticpro (&Vcharset_japanese_jisx0212); | 1223 staticpro (&Vcharset_japanese_jisx0212); |
1208 Vcharset_japanese_jisx0212 = | 1224 Vcharset_japanese_jisx0212 = |
1209 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3, | 1225 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3, |
1210 CHARSET_TYPE_94X94, 2, 0, 'D', | 1226 CHARSET_TYPE_94X94, 2, 0, 'D', |
1211 CHARSET_LEFT_TO_RIGHT, | 1227 CHARSET_LEFT_TO_RIGHT, |
1212 build_string ("JISX0212"), | 1228 build_string ("JISX0212"), |
1213 build_msg_string ("JISX0212 (Japanese)"), | 1229 build_msg_string ("JISX0212 (Japanese)"), |
1214 build_msg_string ("JISX0212 Japanese Supplement"), | 1230 build_msg_string ("JISX0212 Japanese Supplement"), |
1215 build_string ("jisx0212"), 0); | 1231 build_string ("jisx0212"), 0, 0); |
1216 | 1232 |
1217 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$" | 1233 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$" |
1218 staticpro (&Vcharset_chinese_cns11643_1); | 1234 staticpro (&Vcharset_chinese_cns11643_1); |
1219 Vcharset_chinese_cns11643_1 = | 1235 Vcharset_chinese_cns11643_1 = |
1220 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3, | 1236 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3, |
1222 CHARSET_LEFT_TO_RIGHT, | 1238 CHARSET_LEFT_TO_RIGHT, |
1223 build_string ("CNS11643-1"), | 1239 build_string ("CNS11643-1"), |
1224 build_msg_string ("CNS11643-1 (Chinese traditional)"), | 1240 build_msg_string ("CNS11643-1 (Chinese traditional)"), |
1225 build_msg_string | 1241 build_msg_string |
1226 ("CNS 11643 Plane 1 Chinese traditional"), | 1242 ("CNS 11643 Plane 1 Chinese traditional"), |
1227 build_string (CHINESE_CNS_PLANE_RE("1")), 0); | 1243 build_string (CHINESE_CNS_PLANE_RE("1")), 0, 0); |
1228 staticpro (&Vcharset_chinese_cns11643_2); | 1244 staticpro (&Vcharset_chinese_cns11643_2); |
1229 Vcharset_chinese_cns11643_2 = | 1245 Vcharset_chinese_cns11643_2 = |
1230 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3, | 1246 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3, |
1231 CHARSET_TYPE_94X94, 2, 0, 'H', | 1247 CHARSET_TYPE_94X94, 2, 0, 'H', |
1232 CHARSET_LEFT_TO_RIGHT, | 1248 CHARSET_LEFT_TO_RIGHT, |
1233 build_string ("CNS11643-2"), | 1249 build_string ("CNS11643-2"), |
1234 build_msg_string ("CNS11643-2 (Chinese traditional)"), | 1250 build_msg_string ("CNS11643-2 (Chinese traditional)"), |
1235 build_msg_string | 1251 build_msg_string |
1236 ("CNS 11643 Plane 2 Chinese traditional"), | 1252 ("CNS 11643 Plane 2 Chinese traditional"), |
1237 build_string (CHINESE_CNS_PLANE_RE("2")), 0); | 1253 build_string (CHINESE_CNS_PLANE_RE("2")), 0, 0); |
1238 staticpro (&Vcharset_chinese_big5_1); | 1254 staticpro (&Vcharset_chinese_big5_1); |
1239 Vcharset_chinese_big5_1 = | 1255 Vcharset_chinese_big5_1 = |
1240 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3, | 1256 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3, |
1241 CHARSET_TYPE_94X94, 2, 0, '0', | 1257 CHARSET_TYPE_94X94, 2, 0, '0', |
1242 CHARSET_LEFT_TO_RIGHT, | 1258 CHARSET_LEFT_TO_RIGHT, |
1243 build_string ("Big5"), | 1259 build_string ("Big5"), |
1244 build_msg_string ("Big5 (Level-1)"), | 1260 build_msg_string ("Big5 (Level-1)"), |
1245 build_msg_string | 1261 build_msg_string |
1246 ("Big5 Level-1 Chinese traditional"), | 1262 ("Big5 Level-1 Chinese traditional"), |
1247 build_string ("big5"), 0); | 1263 build_string ("big5"), 0, 0); |
1248 staticpro (&Vcharset_chinese_big5_2); | 1264 staticpro (&Vcharset_chinese_big5_2); |
1249 Vcharset_chinese_big5_2 = | 1265 Vcharset_chinese_big5_2 = |
1250 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3, | 1266 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3, |
1251 CHARSET_TYPE_94X94, 2, 0, '1', | 1267 CHARSET_TYPE_94X94, 2, 0, '1', |
1252 CHARSET_LEFT_TO_RIGHT, | 1268 CHARSET_LEFT_TO_RIGHT, |
1253 build_string ("Big5"), | 1269 build_string ("Big5"), |
1254 build_msg_string ("Big5 (Level-2)"), | 1270 build_msg_string ("Big5 (Level-2)"), |
1255 build_msg_string | 1271 build_msg_string |
1256 ("Big5 Level-2 Chinese traditional"), | 1272 ("Big5 Level-2 Chinese traditional"), |
1257 build_string ("big5"), 0); | 1273 build_string ("big5"), 0, 0); |
1258 | 1274 |
1259 | 1275 |
1260 #ifdef ENABLE_COMPOSITE_CHARS | 1276 #ifdef ENABLE_COMPOSITE_CHARS |
1261 /* #### For simplicity, we put composite chars into a 96x96 charset. | 1277 /* #### For simplicity, we put composite chars into a 96x96 charset. |
1262 This is going to lead to problems because you can run out of | 1278 This is going to lead to problems because you can run out of |
1267 CHARSET_TYPE_96X96, 2, 0, 0, | 1283 CHARSET_TYPE_96X96, 2, 0, 0, |
1268 CHARSET_LEFT_TO_RIGHT, | 1284 CHARSET_LEFT_TO_RIGHT, |
1269 build_string ("Composite"), | 1285 build_string ("Composite"), |
1270 build_msg_string ("Composite characters"), | 1286 build_msg_string ("Composite characters"), |
1271 build_msg_string ("Composite characters"), | 1287 build_msg_string ("Composite characters"), |
1272 build_string (""), 0); | 1288 build_string (""), 0, 0); |
1273 #else | 1289 #else |
1274 /* We create a hack so that we have a way of storing ESC 0 and ESC 1 | 1290 /* We create a hack so that we have a way of storing ESC 0 and ESC 1 |
1275 sequences as "characters", so that they will be output correctly. */ | 1291 sequences as "characters", so that they will be output correctly. */ |
1276 staticpro (&Vcharset_composite); | 1292 staticpro (&Vcharset_composite); |
1277 Vcharset_composite = | 1293 Vcharset_composite = |
1279 CHARSET_TYPE_96, 1, 1, '|', | 1295 CHARSET_TYPE_96, 1, 1, '|', |
1280 CHARSET_LEFT_TO_RIGHT, | 1296 CHARSET_LEFT_TO_RIGHT, |
1281 build_string ("Composite hack"), | 1297 build_string ("Composite hack"), |
1282 build_msg_string ("Composite characters hack"), | 1298 build_msg_string ("Composite characters hack"), |
1283 build_msg_string ("Composite characters hack"), | 1299 build_msg_string ("Composite characters hack"), |
1284 build_string (""), 0); | 1300 build_string (""), 0, 0); |
1285 #endif /* ENABLE_COMPOSITE_CHARS */ | 1301 #endif /* ENABLE_COMPOSITE_CHARS */ |
1286 } | 1302 } |