Mercurial > hg > xemacs-beta
comparison src/mule-charset.c @ 371:cc15677e0335 r21-2b1
Import from CVS: tag r21-2b1
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:03:08 +0200 |
parents | a4f53d9b3154 |
children | d883f39b8495 |
comparison
equal
deleted
inserted
replaced
370:bd866891f083 | 371:cc15677e0335 |
---|---|
56 Lisp_Object Vcharset_chinese_big5_1; | 56 Lisp_Object Vcharset_chinese_big5_1; |
57 Lisp_Object Vcharset_chinese_big5_2; | 57 Lisp_Object Vcharset_chinese_big5_2; |
58 Lisp_Object Vcharset_chinese_cns11643_1; | 58 Lisp_Object Vcharset_chinese_cns11643_1; |
59 Lisp_Object Vcharset_chinese_cns11643_2; | 59 Lisp_Object Vcharset_chinese_cns11643_2; |
60 Lisp_Object Vcharset_korean_ksc5601; | 60 Lisp_Object Vcharset_korean_ksc5601; |
61 | |
62 #ifdef ENABLE_COMPOSITE_CHARS | |
63 Lisp_Object Vcharset_composite; | 61 Lisp_Object Vcharset_composite; |
64 | 62 |
65 /* Hashtables for composite chars. One maps string representing | 63 /* Hashtables for composite chars. One maps string representing |
66 composed chars to their equivalent chars; one goes the | 64 composed chars to their equivalent chars; one goes the |
67 other way. */ | 65 other way. */ |
68 Lisp_Object Vcomposite_char_char2string_hashtable; | 66 Lisp_Object Vcomposite_char_char2string_hashtable; |
69 Lisp_Object Vcomposite_char_string2char_hashtable; | 67 Lisp_Object Vcomposite_char_string2char_hashtable; |
70 | 68 |
69 /* Table of charsets indexed by leading byte. */ | |
70 Lisp_Object charset_by_leading_byte[128]; | |
71 | |
72 /* Table of charsets indexed by type/final-byte/direction. */ | |
73 Lisp_Object charset_by_attributes[4][128][2]; | |
74 | |
71 static int composite_char_row_next; | 75 static int composite_char_row_next; |
72 static int composite_char_col_next; | 76 static int composite_char_col_next; |
73 | |
74 #endif /* ENABLE_COMPOSITE_CHARS */ | |
75 | |
76 /* Table of charsets indexed by leading byte. */ | |
77 Lisp_Object charset_by_leading_byte[128]; | |
78 | |
79 /* Table of charsets indexed by type/final-byte/direction. */ | |
80 Lisp_Object charset_by_attributes[4][128][2]; | |
81 | 77 |
82 /* Table of number of bytes in the string representation of a character | 78 /* Table of number of bytes in the string representation of a character |
83 indexed by the first byte of that representation. | 79 indexed by the first byte of that representation. |
84 | 80 |
85 rep_bytes_by_first_byte(c) is more efficient than the equivalent | 81 rep_bytes_by_first_byte(c) is more efficient than the equivalent |
249 f3 = CHAR_FIELD3 (ch); | 245 f3 = CHAR_FIELD3 (ch); |
250 | 246 |
251 if (f1 == 0) | 247 if (f1 == 0) |
252 { | 248 { |
253 Lisp_Object charset; | 249 Lisp_Object charset; |
254 /* 0x8B checking is here to avoid the gap between | 250 |
255 LEADING_BYTE_LATIN_JISX0201 and | |
256 LEADING_BYTE_CYRILLIC_ISO8859_5. See mule-charset.h */ | |
257 if (f2 < MIN_CHAR_FIELD2_OFFICIAL || | 251 if (f2 < MIN_CHAR_FIELD2_OFFICIAL || |
258 f2 == (0x8B - FIELD2_TO_OFFICIAL_LEADING_BYTE) || | |
259 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) || | 252 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) || |
260 f2 > MAX_CHAR_FIELD2_PRIVATE) | 253 f2 > MAX_CHAR_FIELD2_PRIVATE) |
261 return 0; | 254 return 0; |
262 if (f3 < 0x20) | 255 if (f3 < 0x20) |
263 return 0; | 256 return 0; |
264 | 257 |
265 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE && | 258 if (f3 != 0x20 && f3 != 0x7F) |
266 f2 <= MAX_CHAR_FIELD2_PRIVATE)) | |
267 return 1; | 259 return 1; |
268 | 260 |
269 /* | 261 /* |
270 NOTE: This takes advantage of the fact that | 262 NOTE: This takes advantage of the fact that |
271 FIELD2_TO_OFFICIAL_LEADING_BYTE and | 263 FIELD2_TO_OFFICIAL_LEADING_BYTE and |
272 FIELD2_TO_PRIVATE_LEADING_BYTE are the same. | 264 FIELD2_TO_PRIVATE_LEADING_BYTE are the same. |
273 */ | 265 */ |
274 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE); | 266 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE); |
275 if (EQ (charset, Qnil)) | |
276 return 0; | |
277 return (XCHARSET_CHARS (charset) == 96); | 267 return (XCHARSET_CHARS (charset) == 96); |
278 } | 268 } |
279 else | 269 else |
280 { | 270 { |
281 Lisp_Object charset; | 271 Lisp_Object charset; |
285 f1 > MAX_CHAR_FIELD1_PRIVATE) | 275 f1 > MAX_CHAR_FIELD1_PRIVATE) |
286 return 0; | 276 return 0; |
287 if (f2 < 0x20 || f3 < 0x20) | 277 if (f2 < 0x20 || f3 < 0x20) |
288 return 0; | 278 return 0; |
289 | 279 |
290 | |
291 #ifdef ENABLE_COMPOSITE_CHARS | |
292 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE) | 280 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE) |
293 { | 281 { |
294 if (UNBOUNDP (Fgethash (make_int (ch), | 282 if (UNBOUNDP (Fgethash (make_int (ch), |
295 Vcomposite_char_char2string_hashtable, | 283 Vcomposite_char_char2string_hashtable, |
296 Qunbound))) | 284 Qunbound))) |
297 return 0; | 285 return 0; |
298 return 1; | 286 return 1; |
299 } | 287 } |
300 #endif /* ENABLE_COMPOSITE_CHARS */ | 288 |
301 | 289 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F) |
302 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F | |
303 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE)) | |
304 return 1; | 290 return 1; |
305 | 291 |
306 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL) | 292 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL) |
307 charset = | 293 charset = |
308 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE); | 294 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE); |
309 else | 295 else |
310 charset = | 296 charset = |
311 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE); | 297 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE); |
312 | 298 |
313 if (EQ (charset, Qnil)) | |
314 return 0; | |
315 return (XCHARSET_CHARS (charset) == 96); | 299 return (XCHARSET_CHARS (charset) == 96); |
316 } | 300 } |
317 } | 301 } |
318 | 302 |
319 | 303 |
475 | 459 |
476 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || | 460 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || |
477 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2; | 461 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2; |
478 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || | 462 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || |
479 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96; | 463 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96; |
480 | 464 |
481 if (final) | 465 if (final) |
482 { | 466 { |
483 /* some charsets do not have final characters. This includes | 467 /* some charsets do not have final characters. This includes |
484 ASCII, Control-1, Composite, and the two faux private | 468 ASCII, Control-1, Composite, and the two faux private |
485 charsets. */ | 469 charsets. */ |
945 if (EQ (prop, Qdirection)) | 929 if (EQ (prop, Qdirection)) |
946 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l; | 930 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l; |
947 if (EQ (prop, Qreverse_direction_charset)) | 931 if (EQ (prop, Qreverse_direction_charset)) |
948 { | 932 { |
949 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs); | 933 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs); |
950 /* #### Is this translation OK? If so, error checking sufficient? */ | 934 if (NILP (obj)) |
951 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj; | 935 return Qnil; |
936 else | |
937 return XCHARSET_NAME (obj); | |
952 } | 938 } |
953 signal_simple_error ("Unrecognized charset property name", prop); | 939 signal_simple_error ("Unrecognized charset property name", prop); |
954 return Qnil; /* not reached */ | 940 return Qnil; /* not reached */ |
955 } | 941 } |
956 | 942 |
1025 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31; | 1011 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31; |
1026 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126; | 1012 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126; |
1027 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127; | 1013 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127; |
1028 | 1014 |
1029 CHECK_INT (arg1); | 1015 CHECK_INT (arg1); |
1030 /* It is useful (and safe, according to Olivier Galibert) to strip | 1016 a1 = XINT (arg1); |
1031 the 8th bit off ARG1 and ARG2 becaue it allows programmers to | |
1032 write (make-char 'latin-iso8859-2 CODE) where code is the actual | |
1033 Latin 2 code of the character. */ | |
1034 a1 = XINT (arg1) & 0x7f; | |
1035 if (a1 < lowlim || a1 > highlim) | 1017 if (a1 < lowlim || a1 > highlim) |
1036 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim)); | 1018 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim)); |
1037 | 1019 |
1038 if (CHARSET_DIMENSION (cs) == 1) | 1020 if (CHARSET_DIMENSION (cs) == 1) |
1039 { | 1021 { |
1042 ("Charset is of dimension one; second octet must be nil", arg2); | 1024 ("Charset is of dimension one; second octet must be nil", arg2); |
1043 return make_char (MAKE_CHAR (charset, a1, 0)); | 1025 return make_char (MAKE_CHAR (charset, a1, 0)); |
1044 } | 1026 } |
1045 | 1027 |
1046 CHECK_INT (arg2); | 1028 CHECK_INT (arg2); |
1047 a2 = XINT (arg2) & 0x7f; | 1029 a2 = XINT (arg2); |
1048 if (a2 < lowlim || a2 > highlim) | 1030 if (a2 < lowlim || a2 > highlim) |
1049 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim)); | 1031 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim)); |
1050 | 1032 |
1051 return make_char (MAKE_CHAR (charset, a1, a2)); | 1033 return make_char (MAKE_CHAR (charset, a1, a2)); |
1052 } | 1034 } |
1084 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); | 1066 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); |
1085 return make_int (int_n == 0 ? c1 : c2); | 1067 return make_int (int_n == 0 ? c1 : c2); |
1086 } | 1068 } |
1087 | 1069 |
1088 | 1070 |
1089 #ifdef ENABLE_COMPOSITE_CHARS | |
1090 /************************************************************************/ | 1071 /************************************************************************/ |
1091 /* composite character functions */ | 1072 /* composite character functions */ |
1092 /************************************************************************/ | 1073 /************************************************************************/ |
1093 | 1074 |
1094 Emchar | 1075 Emchar |
1130 Qunbound); | 1111 Qunbound); |
1131 assert (!UNBOUNDP (str)); | 1112 assert (!UNBOUNDP (str)); |
1132 return str; | 1113 return str; |
1133 } | 1114 } |
1134 | 1115 |
1135 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /* | 1116 DEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /* |
1136 Convert a string into a single composite character. | 1117 Convert a string into a single composite character. |
1137 The character is the result of overstriking all the characters in | 1118 The character is the result of overstriking all the characters in |
1138 the string. | 1119 the string. |
1139 */ | 1120 */ |
1140 (string)) | 1121 (string)) |
1142 CHECK_STRING (string); | 1123 CHECK_STRING (string); |
1143 return make_char (lookup_composite_char (XSTRING_DATA (string), | 1124 return make_char (lookup_composite_char (XSTRING_DATA (string), |
1144 XSTRING_LENGTH (string))); | 1125 XSTRING_LENGTH (string))); |
1145 } | 1126 } |
1146 | 1127 |
1147 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /* | 1128 DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /* |
1148 Return a string of the characters comprising a composite character. | 1129 Return a string of the characters comprising a composite character. |
1149 */ | 1130 */ |
1150 (ch)) | 1131 (ch)) |
1151 { | 1132 { |
1152 Emchar emch; | 1133 Emchar emch; |
1155 emch = XCHAR (ch); | 1136 emch = XCHAR (ch); |
1156 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE) | 1137 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE) |
1157 signal_simple_error ("Must be composite char", ch); | 1138 signal_simple_error ("Must be composite char", ch); |
1158 return composite_char_string (emch); | 1139 return composite_char_string (emch); |
1159 } | 1140 } |
1160 #endif /* ENABLE_COMPOSITE_CHARS */ | |
1161 | 1141 |
1162 | 1142 |
1163 /************************************************************************/ | 1143 /************************************************************************/ |
1164 /* initialization */ | 1144 /* initialization */ |
1165 /************************************************************************/ | 1145 /************************************************************************/ |
1185 | 1165 |
1186 DEFSUBR (Fmake_char); | 1166 DEFSUBR (Fmake_char); |
1187 DEFSUBR (Fchar_charset); | 1167 DEFSUBR (Fchar_charset); |
1188 DEFSUBR (Fchar_octet); | 1168 DEFSUBR (Fchar_octet); |
1189 | 1169 |
1190 #ifdef ENABLE_COMPOSITE_CHARS | |
1191 DEFSUBR (Fmake_composite_char); | 1170 DEFSUBR (Fmake_composite_char); |
1192 DEFSUBR (Fcomposite_char_string); | 1171 DEFSUBR (Fcomposite_char_string); |
1193 #endif | |
1194 | 1172 |
1195 defsymbol (&Qcharsetp, "charsetp"); | 1173 defsymbol (&Qcharsetp, "charsetp"); |
1196 defsymbol (&Qregistry, "registry"); | 1174 defsymbol (&Qregistry, "registry"); |
1197 defsymbol (&Qfinal, "final"); | 1175 defsymbol (&Qfinal, "final"); |
1198 defsymbol (&Qgraphic, "graphic"); | 1176 defsymbol (&Qgraphic, "graphic"); |
1269 CHARSET_LEFT_TO_RIGHT, | 1247 CHARSET_LEFT_TO_RIGHT, |
1270 build_string ("ASCII (ISO 646 IRV)"), | 1248 build_string ("ASCII (ISO 646 IRV)"), |
1271 build_string ("iso8859-1")); | 1249 build_string ("iso8859-1")); |
1272 Vcharset_control_1 = | 1250 Vcharset_control_1 = |
1273 make_charset (-1, Qcontrol_1, LEADING_BYTE_CONTROL_1, 2, | 1251 make_charset (-1, Qcontrol_1, LEADING_BYTE_CONTROL_1, 2, |
1274 CHARSET_TYPE_94, 1, 1, 0, | 1252 CHARSET_TYPE_94, 1, 0, 0, |
1275 CHARSET_LEFT_TO_RIGHT, | 1253 CHARSET_LEFT_TO_RIGHT, |
1276 build_string ("Control characters"), | 1254 build_string ("Control characters"), |
1277 build_string ("")); | 1255 build_string ("")); |
1278 Vcharset_latin_iso8859_1 = | 1256 Vcharset_latin_iso8859_1 = |
1279 make_charset (129, Qlatin_iso8859_1, LEADING_BYTE_LATIN_ISO8859_1, 2, | 1257 make_charset (129, Qlatin_iso8859_1, LEADING_BYTE_LATIN_ISO8859_1, 2, |
1356 LEADING_BYTE_JAPANESE_JISX0208_1978, 3, | 1334 LEADING_BYTE_JAPANESE_JISX0208_1978, 3, |
1357 CHARSET_TYPE_94X94, 2, 0, '@', | 1335 CHARSET_TYPE_94X94, 2, 0, '@', |
1358 CHARSET_LEFT_TO_RIGHT, | 1336 CHARSET_LEFT_TO_RIGHT, |
1359 build_string | 1337 build_string |
1360 ("JIS X0208-1978 (Japanese Kanji; Old Version)"), | 1338 ("JIS X0208-1978 (Japanese Kanji; Old Version)"), |
1361 build_string ("\\(jisx0208\\|jisc6226\\).1978")); | 1339 build_string ("\\(jisx0208\\|jisc6226\\).19")); |
1362 Vcharset_japanese_jisx0208 = | 1340 Vcharset_japanese_jisx0208 = |
1363 make_charset (146, Qjapanese_jisx0208, | 1341 make_charset (146, Qjapanese_jisx0208, |
1364 LEADING_BYTE_JAPANESE_JISX0208, 3, | 1342 LEADING_BYTE_JAPANESE_JISX0208, 3, |
1365 CHARSET_TYPE_94X94, 2, 0, 'B', | 1343 CHARSET_TYPE_94X94, 2, 0, 'B', |
1366 CHARSET_LEFT_TO_RIGHT, | 1344 CHARSET_LEFT_TO_RIGHT, |
1417 make_charset (147, Qkorean_ksc5601, LEADING_BYTE_KOREAN_KSC5601, 3, | 1395 make_charset (147, Qkorean_ksc5601, LEADING_BYTE_KOREAN_KSC5601, 3, |
1418 CHARSET_TYPE_94X94, 2, 0, 'C', | 1396 CHARSET_TYPE_94X94, 2, 0, 'C', |
1419 CHARSET_LEFT_TO_RIGHT, | 1397 CHARSET_LEFT_TO_RIGHT, |
1420 build_string ("KS C5601 (Hangul and Korean Hanja)"), | 1398 build_string ("KS C5601 (Hangul and Korean Hanja)"), |
1421 build_string ("ksc5601")); | 1399 build_string ("ksc5601")); |
1422 | |
1423 #ifdef ENABLE_COMPOSITE_CHARS | |
1424 /* #### For simplicity, we put composite chars into a 96x96 charset. | 1400 /* #### For simplicity, we put composite chars into a 96x96 charset. |
1425 This is going to lead to problems because you can run out of | 1401 This is going to lead to problems because you can run out of |
1426 room, esp. as we don't yet recycle numbers. */ | 1402 room, esp. as we don't yet recycle numbers. */ |
1427 Vcharset_composite = | 1403 Vcharset_composite = |
1428 make_charset (-1, Qcomposite, LEADING_BYTE_COMPOSITE, 3, | 1404 make_charset (-1, Qcomposite, LEADING_BYTE_COMPOSITE, 3, |
1438 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQUAL); | 1414 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQUAL); |
1439 Vcomposite_char_char2string_hashtable = | 1415 Vcomposite_char_char2string_hashtable = |
1440 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQ); | 1416 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQ); |
1441 staticpro (&Vcomposite_char_string2char_hashtable); | 1417 staticpro (&Vcomposite_char_string2char_hashtable); |
1442 staticpro (&Vcomposite_char_char2string_hashtable); | 1418 staticpro (&Vcomposite_char_char2string_hashtable); |
1443 #endif /* ENABLE_COMPOSITE_CHARS */ | 1419 |
1444 | 1420 } |
1445 } |