Mercurial > hg > xemacs-beta
comparison src/mule-charset.c @ 183:e121b013d1f0 r20-3b18
Import from CVS: tag r20-3b18
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:54:23 +0200 |
parents | 85ec50267440 |
children | 3d6bfa290dbd |
comparison
equal
deleted
inserted
replaced
182:f07455f06202 | 183:e121b013d1f0 |
---|---|
76 static int composite_char_col_next; | 76 static int composite_char_col_next; |
77 | 77 |
78 /* Table of number of bytes in the string representation of a character | 78 /* Table of number of bytes in the string representation of a character |
79 indexed by the first byte of that representation. | 79 indexed by the first byte of that representation. |
80 | 80 |
81 This value can be derived other ways -- e.g. something like | 81 rep_bytes_by_first_byte(c) is more efficient than the equivalent |
82 | 82 canonical computation: |
83 (BYTE_ASCII_P (first_byte) ? 1 : | 83 |
84 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte))) | 84 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */ |
85 | |
86 but it's faster this way. */ | |
87 | 85 |
88 Bytecount rep_bytes_by_first_byte[0xA0] = | 86 Bytecount rep_bytes_by_first_byte[0xA0] = |
89 { /* 16 x 8 ones for ASCII */ | 87 { /* 0x00 - 0x7f are for straight ASCII */ |
90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 88 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 89 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | 95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
98 /* 1 x 8 for Dimension-1 official Mule charsets */ | 96 /* 0x80 - 0x8f are for Dimension-1 official charsets */ |
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | 97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
100 /* 0x90 - 0x9d are Dimension-2 official */ | 98 /* 0x90 - 0x9d are for Dimension-2 official charsets */ |
101 /* 0x9e is Dimension-1 private */ | 99 /* 0x9e is for Dimension-1 private charsets */ |
102 /* 0x9f is Dimension-2 private */ | 100 /* 0x9f is for Dimension-2 private charsets */ |
103 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4 | 101 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4 |
104 }; | 102 }; |
105 | 103 |
106 Lisp_Object Qcharsetp; | 104 Lisp_Object Qcharsetp; |
107 | 105 |
110 Lisp_Object Qdirection; | 108 Lisp_Object Qdirection; |
111 Lisp_Object Qreverse_direction_charset; | 109 Lisp_Object Qreverse_direction_charset; |
112 Lisp_Object Qccl_program; | 110 Lisp_Object Qccl_program; |
113 | 111 |
114 Lisp_Object Qascii, Qcontrol_1, | 112 Lisp_Object Qascii, Qcontrol_1, |
115 | 113 |
116 Qlatin_iso8859_1, | 114 Qlatin_iso8859_1, |
117 Qlatin_iso8859_2, | 115 Qlatin_iso8859_2, |
118 Qlatin_iso8859_3, | 116 Qlatin_iso8859_3, |
119 Qlatin_iso8859_4, | 117 Qlatin_iso8859_4, |
120 Qcyrillic_iso8859_5, | 118 Qcyrillic_iso8859_5, |
121 Qarabic_iso8859_6, | 119 Qarabic_iso8859_6, |
122 Qgreek_iso8859_7, | 120 Qgreek_iso8859_7, |
123 Qhebrew_iso8859_8, | 121 Qhebrew_iso8859_8, |
124 Qlatin_iso8859_9, | 122 Qlatin_iso8859_9, |
125 | 123 |
126 Qthai_tis620, | 124 Qthai_tis620, |
127 | 125 |
128 Qkatakana_jisx0201, Qlatin_jisx0201, | 126 Qkatakana_jisx0201, Qlatin_jisx0201, |
129 Qjapanese_jisx0208_1978, | 127 Qjapanese_jisx0208_1978, |
130 Qjapanese_jisx0208, | 128 Qjapanese_jisx0208, |
131 Qjapanese_jisx0212, | 129 Qjapanese_jisx0212, |
132 | 130 |
133 Qchinese_gb2312, | 131 Qchinese_gb2312, |
134 Qchinese_big5_1, Qchinese_big5_2, | 132 Qchinese_big5_1, Qchinese_big5_2, |
135 Qchinese_cns11643_1, Qchinese_cns11643_2, | 133 Qchinese_cns11643_1, Qchinese_cns11643_2, |
136 | 134 |
137 Qkorean_ksc5601, Qcomposite; | 135 Qkorean_ksc5601, Qcomposite; |
138 | 136 |
139 Lisp_Object Ql2r, Qr2l; | 137 Lisp_Object Ql2r, Qr2l; |
140 | 138 |
141 Lisp_Object Vcharset_hashtable; | 139 Lisp_Object Vcharset_hashtable; |
408 static void | 406 static void |
409 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) | 407 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) |
410 { | 408 { |
411 struct Lisp_Charset *cs = XCHARSET (obj); | 409 struct Lisp_Charset *cs = XCHARSET (obj); |
412 char buf[200]; | 410 char buf[200]; |
413 | 411 |
414 if (print_readably) | 412 if (print_readably) |
415 error ("printing unreadable object #<charset %s 0x%x>", | 413 error ("printing unreadable object #<charset %s 0x%x>", |
416 string_data (XSYMBOL (CHARSET_NAME (cs))->name), | 414 string_data (XSYMBOL (CHARSET_NAME (cs))->name), |
417 cs->header.uid); | 415 cs->header.uid); |
418 | 416 |
475 assert (NILP (charset_by_leading_byte[leading_byte - 128])); | 473 assert (NILP (charset_by_leading_byte[leading_byte - 128])); |
476 charset_by_leading_byte[leading_byte - 128] = obj; | 474 charset_by_leading_byte[leading_byte - 128] = obj; |
477 if (leading_byte < 0xA0) | 475 if (leading_byte < 0xA0) |
478 /* official leading byte */ | 476 /* official leading byte */ |
479 rep_bytes_by_first_byte[leading_byte] = rep_bytes; | 477 rep_bytes_by_first_byte[leading_byte] = rep_bytes; |
480 | 478 |
481 /* Some charsets are "faux" and don't have names or really exist at | 479 /* Some charsets are "faux" and don't have names or really exist at |
482 all except in the leading-byte table. */ | 480 all except in the leading-byte table. */ |
483 if (!NILP (name)) | 481 if (!NILP (name)) |
484 Fputhash (name, obj, Vcharset_hashtable); | 482 Fputhash (name, obj, Vcharset_hashtable); |
485 return obj; | 483 return obj; |
790 CHECK_SYMBOL (new_name); | 788 CHECK_SYMBOL (new_name); |
791 if (!NILP (Ffind_charset (new_name))) | 789 if (!NILP (Ffind_charset (new_name))) |
792 signal_simple_error ("Cannot redefine existing charset", new_name); | 790 signal_simple_error ("Cannot redefine existing charset", new_name); |
793 | 791 |
794 cs = XCHARSET (charset); | 792 cs = XCHARSET (charset); |
795 | 793 |
796 type = CHARSET_TYPE (cs); | 794 type = CHARSET_TYPE (cs); |
797 columns = CHARSET_COLUMNS (cs); | 795 columns = CHARSET_COLUMNS (cs); |
798 dimension = CHARSET_DIMENSION (cs); | 796 dimension = CHARSET_DIMENSION (cs); |
799 lb = get_unallocated_leading_byte (dimension); | 797 lb = get_unallocated_leading_byte (dimension); |
800 | 798 |
979 XCHARSET_REGISTRY (charset) = registry; | 977 XCHARSET_REGISTRY (charset) = registry; |
980 invalidate_charset_font_caches (charset); | 978 invalidate_charset_font_caches (charset); |
981 face_property_was_changed (Vdefault_face, Qfont, Qglobal); | 979 face_property_was_changed (Vdefault_face, Qfont, Qglobal); |
982 return Qnil; | 980 return Qnil; |
983 } | 981 } |
984 | 982 |
985 | 983 |
986 /************************************************************************/ | 984 /************************************************************************/ |
987 /* Lisp primitives for working with characters */ | 985 /* Lisp primitives for working with characters */ |
988 /************************************************************************/ | 986 /************************************************************************/ |
989 | 987 |
1041 N defaults to 0 if omitted. | 1039 N defaults to 0 if omitted. |
1042 */ | 1040 */ |
1043 (ch, n)) | 1041 (ch, n)) |
1044 { | 1042 { |
1045 Lisp_Object charset; | 1043 Lisp_Object charset; |
1046 int c1, c2; | 1044 int c1, c2, int_n; |
1047 | 1045 |
1048 CHECK_CHAR_COERCE_INT (ch); | 1046 CHECK_CHAR_COERCE_INT (ch); |
1049 if (NILP (n)) | 1047 if (NILP (n)) |
1050 n = Qzero; | 1048 int_n = 0; |
1051 else | 1049 else |
1052 { | 1050 { |
1053 CHECK_INT (n); | 1051 CHECK_INT (n); |
1054 if (XINT (n) != 0 && XINT (n) != 1) | 1052 int_n = XINT (n); |
1053 if (int_n != 0 && int_n != 1) | |
1055 signal_simple_error ("Octet number must be 0 or 1", n); | 1054 signal_simple_error ("Octet number must be 0 or 1", n); |
1056 } | 1055 } |
1057 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); | 1056 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); |
1058 if (XINT (n) == 0) | 1057 return make_int (int_n == 0 ? c1 : c2); |
1059 return make_int (c1); | |
1060 else | |
1061 return make_int (c2); | |
1062 } | 1058 } |
1063 | 1059 |
1064 | 1060 |
1065 /************************************************************************/ | 1061 /************************************************************************/ |
1066 /* composite character functions */ | 1062 /* composite character functions */ |
1186 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6"); | 1182 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6"); |
1187 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7"); | 1183 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7"); |
1188 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8"); | 1184 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8"); |
1189 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9"); | 1185 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9"); |
1190 defsymbol (&Qthai_tis620, "thai-tis620"); | 1186 defsymbol (&Qthai_tis620, "thai-tis620"); |
1191 | 1187 |
1192 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201"); | 1188 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201"); |
1193 defsymbol (&Qlatin_jisx0201, "latin-jisx0201"); | 1189 defsymbol (&Qlatin_jisx0201, "latin-jisx0201"); |
1194 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978"); | 1190 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978"); |
1195 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208"); | 1191 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208"); |
1196 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212"); | 1192 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212"); |
1197 | 1193 |
1198 defsymbol (&Qchinese_gb2312, "chinese-gb2312"); | 1194 defsymbol (&Qchinese_gb2312, "chinese-gb2312"); |
1199 defsymbol (&Qchinese_big5_1, "chinese-big5-1"); | 1195 defsymbol (&Qchinese_big5_1, "chinese-big5-1"); |
1200 defsymbol (&Qchinese_big5_2, "chinese-big5-2"); | 1196 defsymbol (&Qchinese_big5_2, "chinese-big5-2"); |
1201 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1"); | 1197 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1"); |
1202 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2"); | 1198 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2"); |
1203 | 1199 |
1204 defsymbol (&Qkorean_ksc5601, "korean-ksc5601"); | 1200 defsymbol (&Qkorean_ksc5601, "korean-ksc5601"); |
1205 defsymbol (&Qcomposite, "composite"); | 1201 defsymbol (&Qcomposite, "composite"); |
1206 } | 1202 } |
1207 | 1203 |
1208 void | 1204 void |
1209 vars_of_mule_charset (void) | 1205 vars_of_mule_charset (void) |
1210 { | 1206 { |
1211 int i, j, k; | 1207 int i, j, k; |
1212 | 1208 |
1213 for (i = 0; i < 128; i++) | 1209 /* Table of charsets indexed by leading byte. */ |
1210 for (i = 0; i < countof (charset_by_leading_byte); i++) | |
1214 charset_by_leading_byte[i] = Qnil; | 1211 charset_by_leading_byte[i] = Qnil; |
1215 | 1212 |
1216 for (i = 0; i < 4; i++) | 1213 /* Table of charsets indexed by type/final-byte/direction. */ |
1217 for (j = 0; j < 128; j++) | 1214 for (i = 0; i < countof (charset_by_attributes); i++) |
1218 for (k = 0; k < 2; k ++) | 1215 for (j = 0; j < countof (charset_by_attributes[0]); j++) |
1216 for (k = 0; k < countof (charset_by_attributes[0][0]); k++) | |
1219 charset_by_attributes[i][j][k] = Qnil; | 1217 charset_by_attributes[i][j][k] = Qnil; |
1220 | |
1221 /* Now done at compile time | |
1222 for (i = 0; i < 128; i++) | |
1223 rep_bytes_by_first_byte[i] = 1; | |
1224 */ | |
1225 | 1218 |
1226 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; | 1219 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; |
1227 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; | 1220 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; |
1228 } | 1221 } |
1229 | 1222 |
1231 complex_vars_of_mule_charset (void) | 1224 complex_vars_of_mule_charset (void) |
1232 { | 1225 { |
1233 staticpro (&Vcharset_hashtable); | 1226 staticpro (&Vcharset_hashtable); |
1234 Vcharset_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK, | 1227 Vcharset_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK, |
1235 HASHTABLE_EQ); | 1228 HASHTABLE_EQ); |
1236 | 1229 |
1237 /* Predefined character sets. We store them into variables for | 1230 /* Predefined character sets. We store them into variables for |
1238 ease of access. */ | 1231 ease of access. */ |
1239 | 1232 |
1240 Vcharset_ascii = | 1233 Vcharset_ascii = |
1241 make_charset (Qascii, LEADING_BYTE_ASCII, 1, | 1234 make_charset (Qascii, LEADING_BYTE_ASCII, 1, |
1405 | 1398 |
1406 composite_char_row_next = 32; | 1399 composite_char_row_next = 32; |
1407 composite_char_col_next = 32; | 1400 composite_char_col_next = 32; |
1408 | 1401 |
1409 Vcomposite_char_string2char_hashtable = | 1402 Vcomposite_char_string2char_hashtable = |
1410 make_lisp_hashtable (500, HASHTABLE_NONWEAK, | 1403 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQUAL); |
1411 HASHTABLE_EQUAL); | |
1412 Vcomposite_char_char2string_hashtable = | 1404 Vcomposite_char_char2string_hashtable = |
1413 make_lisp_hashtable (500, HASHTABLE_NONWEAK, | 1405 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQ); |
1414 HASHTABLE_EQ); | |
1415 staticpro (&Vcomposite_char_string2char_hashtable); | 1406 staticpro (&Vcomposite_char_string2char_hashtable); |
1416 staticpro (&Vcomposite_char_char2string_hashtable); | 1407 staticpro (&Vcomposite_char_char2string_hashtable); |
1417 | 1408 |
1418 /* Faux charsets used only for convenience in retrieving the | 1409 } |
1419 number of rep bytes associated with a leading byte. */ | |
1420 | |
1421 make_charset (Qnil, PRE_LEADING_BYTE_PRIVATE_1, 3, 0, 0, 0, 0, | |
1422 0, Qnil, Qnil); | |
1423 make_charset (Qnil, PRE_LEADING_BYTE_PRIVATE_2, 4, 0, 0, 0, 0, | |
1424 0, Qnil, Qnil); | |
1425 } |