comparison src/mule-charset.c @ 371:cc15677e0335 r21-2b1

Import from CVS: tag r21-2b1
author cvs
date Mon, 13 Aug 2007 11:03:08 +0200
parents a4f53d9b3154
children d883f39b8495
comparison
equal deleted inserted replaced
370:bd866891f083 371:cc15677e0335
56 Lisp_Object Vcharset_chinese_big5_1; 56 Lisp_Object Vcharset_chinese_big5_1;
57 Lisp_Object Vcharset_chinese_big5_2; 57 Lisp_Object Vcharset_chinese_big5_2;
58 Lisp_Object Vcharset_chinese_cns11643_1; 58 Lisp_Object Vcharset_chinese_cns11643_1;
59 Lisp_Object Vcharset_chinese_cns11643_2; 59 Lisp_Object Vcharset_chinese_cns11643_2;
60 Lisp_Object Vcharset_korean_ksc5601; 60 Lisp_Object Vcharset_korean_ksc5601;
61
62 #ifdef ENABLE_COMPOSITE_CHARS
63 Lisp_Object Vcharset_composite; 61 Lisp_Object Vcharset_composite;
64 62
65 /* Hashtables for composite chars. One maps string representing 63 /* Hashtables for composite chars. One maps string representing
66 composed chars to their equivalent chars; one goes the 64 composed chars to their equivalent chars; one goes the
67 other way. */ 65 other way. */
68 Lisp_Object Vcomposite_char_char2string_hashtable; 66 Lisp_Object Vcomposite_char_char2string_hashtable;
69 Lisp_Object Vcomposite_char_string2char_hashtable; 67 Lisp_Object Vcomposite_char_string2char_hashtable;
70 68
69 /* Table of charsets indexed by leading byte. */
70 Lisp_Object charset_by_leading_byte[128];
71
72 /* Table of charsets indexed by type/final-byte/direction. */
73 Lisp_Object charset_by_attributes[4][128][2];
74
71 static int composite_char_row_next; 75 static int composite_char_row_next;
72 static int composite_char_col_next; 76 static int composite_char_col_next;
73
74 #endif /* ENABLE_COMPOSITE_CHARS */
75
76 /* Table of charsets indexed by leading byte. */
77 Lisp_Object charset_by_leading_byte[128];
78
79 /* Table of charsets indexed by type/final-byte/direction. */
80 Lisp_Object charset_by_attributes[4][128][2];
81 77
82 /* Table of number of bytes in the string representation of a character 78 /* Table of number of bytes in the string representation of a character
83 indexed by the first byte of that representation. 79 indexed by the first byte of that representation.
84 80
85 rep_bytes_by_first_byte(c) is more efficient than the equivalent 81 rep_bytes_by_first_byte(c) is more efficient than the equivalent
249 f3 = CHAR_FIELD3 (ch); 245 f3 = CHAR_FIELD3 (ch);
250 246
251 if (f1 == 0) 247 if (f1 == 0)
252 { 248 {
253 Lisp_Object charset; 249 Lisp_Object charset;
254 /* 0x8B checking is here to avoid the gap between 250
255 LEADING_BYTE_LATIN_JISX0201 and
256 LEADING_BYTE_CYRILLIC_ISO8859_5. See mule-charset.h */
257 if (f2 < MIN_CHAR_FIELD2_OFFICIAL || 251 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
258 f2 == (0x8B - FIELD2_TO_OFFICIAL_LEADING_BYTE) ||
259 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) || 252 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
260 f2 > MAX_CHAR_FIELD2_PRIVATE) 253 f2 > MAX_CHAR_FIELD2_PRIVATE)
261 return 0; 254 return 0;
262 if (f3 < 0x20) 255 if (f3 < 0x20)
263 return 0; 256 return 0;
264 257
265 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE && 258 if (f3 != 0x20 && f3 != 0x7F)
266 f2 <= MAX_CHAR_FIELD2_PRIVATE))
267 return 1; 259 return 1;
268 260
269 /* 261 /*
270 NOTE: This takes advantage of the fact that 262 NOTE: This takes advantage of the fact that
271 FIELD2_TO_OFFICIAL_LEADING_BYTE and 263 FIELD2_TO_OFFICIAL_LEADING_BYTE and
272 FIELD2_TO_PRIVATE_LEADING_BYTE are the same. 264 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
273 */ 265 */
274 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE); 266 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
275 if (EQ (charset, Qnil))
276 return 0;
277 return (XCHARSET_CHARS (charset) == 96); 267 return (XCHARSET_CHARS (charset) == 96);
278 } 268 }
279 else 269 else
280 { 270 {
281 Lisp_Object charset; 271 Lisp_Object charset;
285 f1 > MAX_CHAR_FIELD1_PRIVATE) 275 f1 > MAX_CHAR_FIELD1_PRIVATE)
286 return 0; 276 return 0;
287 if (f2 < 0x20 || f3 < 0x20) 277 if (f2 < 0x20 || f3 < 0x20)
288 return 0; 278 return 0;
289 279
290
291 #ifdef ENABLE_COMPOSITE_CHARS
292 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE) 280 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
293 { 281 {
294 if (UNBOUNDP (Fgethash (make_int (ch), 282 if (UNBOUNDP (Fgethash (make_int (ch),
295 Vcomposite_char_char2string_hashtable, 283 Vcomposite_char_char2string_hashtable,
296 Qunbound))) 284 Qunbound)))
297 return 0; 285 return 0;
298 return 1; 286 return 1;
299 } 287 }
300 #endif /* ENABLE_COMPOSITE_CHARS */ 288
301 289 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
302 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
303 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
304 return 1; 290 return 1;
305 291
306 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL) 292 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
307 charset = 293 charset =
308 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE); 294 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
309 else 295 else
310 charset = 296 charset =
311 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE); 297 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
312 298
313 if (EQ (charset, Qnil))
314 return 0;
315 return (XCHARSET_CHARS (charset) == 96); 299 return (XCHARSET_CHARS (charset) == 96);
316 } 300 }
317 } 301 }
318 302
319 303
475 459
476 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || 460 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
477 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2; 461 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
478 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 || 462 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
479 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96; 463 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
480 464
481 if (final) 465 if (final)
482 { 466 {
483 /* some charsets do not have final characters. This includes 467 /* some charsets do not have final characters. This includes
484 ASCII, Control-1, Composite, and the two faux private 468 ASCII, Control-1, Composite, and the two faux private
485 charsets. */ 469 charsets. */
945 if (EQ (prop, Qdirection)) 929 if (EQ (prop, Qdirection))
946 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l; 930 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
947 if (EQ (prop, Qreverse_direction_charset)) 931 if (EQ (prop, Qreverse_direction_charset))
948 { 932 {
949 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs); 933 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
950 /* #### Is this translation OK? If so, error checking sufficient? */ 934 if (NILP (obj))
951 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj; 935 return Qnil;
936 else
937 return XCHARSET_NAME (obj);
952 } 938 }
953 signal_simple_error ("Unrecognized charset property name", prop); 939 signal_simple_error ("Unrecognized charset property name", prop);
954 return Qnil; /* not reached */ 940 return Qnil; /* not reached */
955 } 941 }
956 942
1025 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31; 1011 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
1026 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126; 1012 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
1027 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127; 1013 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
1028 1014
1029 CHECK_INT (arg1); 1015 CHECK_INT (arg1);
1030 /* It is useful (and safe, according to Olivier Galibert) to strip 1016 a1 = XINT (arg1);
1031 the 8th bit off ARG1 and ARG2 becaue it allows programmers to
1032 write (make-char 'latin-iso8859-2 CODE) where code is the actual
1033 Latin 2 code of the character. */
1034 a1 = XINT (arg1) & 0x7f;
1035 if (a1 < lowlim || a1 > highlim) 1017 if (a1 < lowlim || a1 > highlim)
1036 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim)); 1018 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
1037 1019
1038 if (CHARSET_DIMENSION (cs) == 1) 1020 if (CHARSET_DIMENSION (cs) == 1)
1039 { 1021 {
1042 ("Charset is of dimension one; second octet must be nil", arg2); 1024 ("Charset is of dimension one; second octet must be nil", arg2);
1043 return make_char (MAKE_CHAR (charset, a1, 0)); 1025 return make_char (MAKE_CHAR (charset, a1, 0));
1044 } 1026 }
1045 1027
1046 CHECK_INT (arg2); 1028 CHECK_INT (arg2);
1047 a2 = XINT (arg2) & 0x7f; 1029 a2 = XINT (arg2);
1048 if (a2 < lowlim || a2 > highlim) 1030 if (a2 < lowlim || a2 > highlim)
1049 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim)); 1031 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
1050 1032
1051 return make_char (MAKE_CHAR (charset, a1, a2)); 1033 return make_char (MAKE_CHAR (charset, a1, a2));
1052 } 1034 }
1084 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); 1066 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
1085 return make_int (int_n == 0 ? c1 : c2); 1067 return make_int (int_n == 0 ? c1 : c2);
1086 } 1068 }
1087 1069
1088 1070
1089 #ifdef ENABLE_COMPOSITE_CHARS
1090 /************************************************************************/ 1071 /************************************************************************/
1091 /* composite character functions */ 1072 /* composite character functions */
1092 /************************************************************************/ 1073 /************************************************************************/
1093 1074
1094 Emchar 1075 Emchar
1130 Qunbound); 1111 Qunbound);
1131 assert (!UNBOUNDP (str)); 1112 assert (!UNBOUNDP (str));
1132 return str; 1113 return str;
1133 } 1114 }
1134 1115
1135 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /* 1116 DEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
1136 Convert a string into a single composite character. 1117 Convert a string into a single composite character.
1137 The character is the result of overstriking all the characters in 1118 The character is the result of overstriking all the characters in
1138 the string. 1119 the string.
1139 */ 1120 */
1140 (string)) 1121 (string))
1142 CHECK_STRING (string); 1123 CHECK_STRING (string);
1143 return make_char (lookup_composite_char (XSTRING_DATA (string), 1124 return make_char (lookup_composite_char (XSTRING_DATA (string),
1144 XSTRING_LENGTH (string))); 1125 XSTRING_LENGTH (string)));
1145 } 1126 }
1146 1127
1147 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /* 1128 DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
1148 Return a string of the characters comprising a composite character. 1129 Return a string of the characters comprising a composite character.
1149 */ 1130 */
1150 (ch)) 1131 (ch))
1151 { 1132 {
1152 Emchar emch; 1133 Emchar emch;
1155 emch = XCHAR (ch); 1136 emch = XCHAR (ch);
1156 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE) 1137 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
1157 signal_simple_error ("Must be composite char", ch); 1138 signal_simple_error ("Must be composite char", ch);
1158 return composite_char_string (emch); 1139 return composite_char_string (emch);
1159 } 1140 }
1160 #endif /* ENABLE_COMPOSITE_CHARS */
1161 1141
1162 1142
1163 /************************************************************************/ 1143 /************************************************************************/
1164 /* initialization */ 1144 /* initialization */
1165 /************************************************************************/ 1145 /************************************************************************/
1185 1165
1186 DEFSUBR (Fmake_char); 1166 DEFSUBR (Fmake_char);
1187 DEFSUBR (Fchar_charset); 1167 DEFSUBR (Fchar_charset);
1188 DEFSUBR (Fchar_octet); 1168 DEFSUBR (Fchar_octet);
1189 1169
1190 #ifdef ENABLE_COMPOSITE_CHARS
1191 DEFSUBR (Fmake_composite_char); 1170 DEFSUBR (Fmake_composite_char);
1192 DEFSUBR (Fcomposite_char_string); 1171 DEFSUBR (Fcomposite_char_string);
1193 #endif
1194 1172
1195 defsymbol (&Qcharsetp, "charsetp"); 1173 defsymbol (&Qcharsetp, "charsetp");
1196 defsymbol (&Qregistry, "registry"); 1174 defsymbol (&Qregistry, "registry");
1197 defsymbol (&Qfinal, "final"); 1175 defsymbol (&Qfinal, "final");
1198 defsymbol (&Qgraphic, "graphic"); 1176 defsymbol (&Qgraphic, "graphic");
1269 CHARSET_LEFT_TO_RIGHT, 1247 CHARSET_LEFT_TO_RIGHT,
1270 build_string ("ASCII (ISO 646 IRV)"), 1248 build_string ("ASCII (ISO 646 IRV)"),
1271 build_string ("iso8859-1")); 1249 build_string ("iso8859-1"));
1272 Vcharset_control_1 = 1250 Vcharset_control_1 =
1273 make_charset (-1, Qcontrol_1, LEADING_BYTE_CONTROL_1, 2, 1251 make_charset (-1, Qcontrol_1, LEADING_BYTE_CONTROL_1, 2,
1274 CHARSET_TYPE_94, 1, 1, 0, 1252 CHARSET_TYPE_94, 1, 0, 0,
1275 CHARSET_LEFT_TO_RIGHT, 1253 CHARSET_LEFT_TO_RIGHT,
1276 build_string ("Control characters"), 1254 build_string ("Control characters"),
1277 build_string ("")); 1255 build_string (""));
1278 Vcharset_latin_iso8859_1 = 1256 Vcharset_latin_iso8859_1 =
1279 make_charset (129, Qlatin_iso8859_1, LEADING_BYTE_LATIN_ISO8859_1, 2, 1257 make_charset (129, Qlatin_iso8859_1, LEADING_BYTE_LATIN_ISO8859_1, 2,
1356 LEADING_BYTE_JAPANESE_JISX0208_1978, 3, 1334 LEADING_BYTE_JAPANESE_JISX0208_1978, 3,
1357 CHARSET_TYPE_94X94, 2, 0, '@', 1335 CHARSET_TYPE_94X94, 2, 0, '@',
1358 CHARSET_LEFT_TO_RIGHT, 1336 CHARSET_LEFT_TO_RIGHT,
1359 build_string 1337 build_string
1360 ("JIS X0208-1978 (Japanese Kanji; Old Version)"), 1338 ("JIS X0208-1978 (Japanese Kanji; Old Version)"),
1361 build_string ("\\(jisx0208\\|jisc6226\\).1978")); 1339 build_string ("\\(jisx0208\\|jisc6226\\).19"));
1362 Vcharset_japanese_jisx0208 = 1340 Vcharset_japanese_jisx0208 =
1363 make_charset (146, Qjapanese_jisx0208, 1341 make_charset (146, Qjapanese_jisx0208,
1364 LEADING_BYTE_JAPANESE_JISX0208, 3, 1342 LEADING_BYTE_JAPANESE_JISX0208, 3,
1365 CHARSET_TYPE_94X94, 2, 0, 'B', 1343 CHARSET_TYPE_94X94, 2, 0, 'B',
1366 CHARSET_LEFT_TO_RIGHT, 1344 CHARSET_LEFT_TO_RIGHT,
1417 make_charset (147, Qkorean_ksc5601, LEADING_BYTE_KOREAN_KSC5601, 3, 1395 make_charset (147, Qkorean_ksc5601, LEADING_BYTE_KOREAN_KSC5601, 3,
1418 CHARSET_TYPE_94X94, 2, 0, 'C', 1396 CHARSET_TYPE_94X94, 2, 0, 'C',
1419 CHARSET_LEFT_TO_RIGHT, 1397 CHARSET_LEFT_TO_RIGHT,
1420 build_string ("KS C5601 (Hangul and Korean Hanja)"), 1398 build_string ("KS C5601 (Hangul and Korean Hanja)"),
1421 build_string ("ksc5601")); 1399 build_string ("ksc5601"));
1422
1423 #ifdef ENABLE_COMPOSITE_CHARS
1424 /* #### For simplicity, we put composite chars into a 96x96 charset. 1400 /* #### For simplicity, we put composite chars into a 96x96 charset.
1425 This is going to lead to problems because you can run out of 1401 This is going to lead to problems because you can run out of
1426 room, esp. as we don't yet recycle numbers. */ 1402 room, esp. as we don't yet recycle numbers. */
1427 Vcharset_composite = 1403 Vcharset_composite =
1428 make_charset (-1, Qcomposite, LEADING_BYTE_COMPOSITE, 3, 1404 make_charset (-1, Qcomposite, LEADING_BYTE_COMPOSITE, 3,
1438 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQUAL); 1414 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQUAL);
1439 Vcomposite_char_char2string_hashtable = 1415 Vcomposite_char_char2string_hashtable =
1440 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQ); 1416 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQ);
1441 staticpro (&Vcomposite_char_string2char_hashtable); 1417 staticpro (&Vcomposite_char_string2char_hashtable);
1442 staticpro (&Vcomposite_char_char2string_hashtable); 1418 staticpro (&Vcomposite_char_char2string_hashtable);
1443 #endif /* ENABLE_COMPOSITE_CHARS */ 1419
1444 1420 }
1445 }