comparison src/mule-charset.c @ 183:e121b013d1f0 r20-3b18

Import from CVS: tag r20-3b18
author cvs
date Mon, 13 Aug 2007 09:54:23 +0200
parents 85ec50267440
children 3d6bfa290dbd
comparison
equal deleted inserted replaced
182:f07455f06202 183:e121b013d1f0
76 static int composite_char_col_next; 76 static int composite_char_col_next;
77 77
78 /* Table of number of bytes in the string representation of a character 78 /* Table of number of bytes in the string representation of a character
79 indexed by the first byte of that representation. 79 indexed by the first byte of that representation.
80 80
81 This value can be derived other ways -- e.g. something like 81 rep_bytes_by_first_byte(c) is more efficient than the equivalent
82 82 canonical computation:
83 (BYTE_ASCII_P (first_byte) ? 1 : 83
84 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte))) 84 (BYTE_ASCII_P (c) ? 1 : XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c))) */
85
86 but it's faster this way. */
87 85
88 Bytecount rep_bytes_by_first_byte[0xA0] = 86 Bytecount rep_bytes_by_first_byte[0xA0] =
89 { /* 16 x 8 ones for ASCII */ 87 { /* 0x00 - 0x7f are for straight ASCII */
90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 88 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 89 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
98 /* 1 x 8 for Dimension-1 official Mule charsets */ 96 /* 0x80 - 0x8f are for Dimension-1 official charsets */
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 /* 0x90 - 0x9d are Dimension-2 official */ 98 /* 0x90 - 0x9d are for Dimension-2 official charsets */
101 /* 0x9e is Dimension-1 private */ 99 /* 0x9e is for Dimension-1 private charsets */
102 /* 0x9f is Dimension-2 private */ 100 /* 0x9f is for Dimension-2 private charsets */
103 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4 101 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
104 }; 102 };
105 103
106 Lisp_Object Qcharsetp; 104 Lisp_Object Qcharsetp;
107 105
110 Lisp_Object Qdirection; 108 Lisp_Object Qdirection;
111 Lisp_Object Qreverse_direction_charset; 109 Lisp_Object Qreverse_direction_charset;
112 Lisp_Object Qccl_program; 110 Lisp_Object Qccl_program;
113 111
114 Lisp_Object Qascii, Qcontrol_1, 112 Lisp_Object Qascii, Qcontrol_1,
115 113
116 Qlatin_iso8859_1, 114 Qlatin_iso8859_1,
117 Qlatin_iso8859_2, 115 Qlatin_iso8859_2,
118 Qlatin_iso8859_3, 116 Qlatin_iso8859_3,
119 Qlatin_iso8859_4, 117 Qlatin_iso8859_4,
120 Qcyrillic_iso8859_5, 118 Qcyrillic_iso8859_5,
121 Qarabic_iso8859_6, 119 Qarabic_iso8859_6,
122 Qgreek_iso8859_7, 120 Qgreek_iso8859_7,
123 Qhebrew_iso8859_8, 121 Qhebrew_iso8859_8,
124 Qlatin_iso8859_9, 122 Qlatin_iso8859_9,
125 123
126 Qthai_tis620, 124 Qthai_tis620,
127 125
128 Qkatakana_jisx0201, Qlatin_jisx0201, 126 Qkatakana_jisx0201, Qlatin_jisx0201,
129 Qjapanese_jisx0208_1978, 127 Qjapanese_jisx0208_1978,
130 Qjapanese_jisx0208, 128 Qjapanese_jisx0208,
131 Qjapanese_jisx0212, 129 Qjapanese_jisx0212,
132 130
133 Qchinese_gb2312, 131 Qchinese_gb2312,
134 Qchinese_big5_1, Qchinese_big5_2, 132 Qchinese_big5_1, Qchinese_big5_2,
135 Qchinese_cns11643_1, Qchinese_cns11643_2, 133 Qchinese_cns11643_1, Qchinese_cns11643_2,
136 134
137 Qkorean_ksc5601, Qcomposite; 135 Qkorean_ksc5601, Qcomposite;
138 136
139 Lisp_Object Ql2r, Qr2l; 137 Lisp_Object Ql2r, Qr2l;
140 138
141 Lisp_Object Vcharset_hashtable; 139 Lisp_Object Vcharset_hashtable;
408 static void 406 static void
409 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) 407 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
410 { 408 {
411 struct Lisp_Charset *cs = XCHARSET (obj); 409 struct Lisp_Charset *cs = XCHARSET (obj);
412 char buf[200]; 410 char buf[200];
413 411
414 if (print_readably) 412 if (print_readably)
415 error ("printing unreadable object #<charset %s 0x%x>", 413 error ("printing unreadable object #<charset %s 0x%x>",
416 string_data (XSYMBOL (CHARSET_NAME (cs))->name), 414 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
417 cs->header.uid); 415 cs->header.uid);
418 416
475 assert (NILP (charset_by_leading_byte[leading_byte - 128])); 473 assert (NILP (charset_by_leading_byte[leading_byte - 128]));
476 charset_by_leading_byte[leading_byte - 128] = obj; 474 charset_by_leading_byte[leading_byte - 128] = obj;
477 if (leading_byte < 0xA0) 475 if (leading_byte < 0xA0)
478 /* official leading byte */ 476 /* official leading byte */
479 rep_bytes_by_first_byte[leading_byte] = rep_bytes; 477 rep_bytes_by_first_byte[leading_byte] = rep_bytes;
480 478
481 /* Some charsets are "faux" and don't have names or really exist at 479 /* Some charsets are "faux" and don't have names or really exist at
482 all except in the leading-byte table. */ 480 all except in the leading-byte table. */
483 if (!NILP (name)) 481 if (!NILP (name))
484 Fputhash (name, obj, Vcharset_hashtable); 482 Fputhash (name, obj, Vcharset_hashtable);
485 return obj; 483 return obj;
790 CHECK_SYMBOL (new_name); 788 CHECK_SYMBOL (new_name);
791 if (!NILP (Ffind_charset (new_name))) 789 if (!NILP (Ffind_charset (new_name)))
792 signal_simple_error ("Cannot redefine existing charset", new_name); 790 signal_simple_error ("Cannot redefine existing charset", new_name);
793 791
794 cs = XCHARSET (charset); 792 cs = XCHARSET (charset);
795 793
796 type = CHARSET_TYPE (cs); 794 type = CHARSET_TYPE (cs);
797 columns = CHARSET_COLUMNS (cs); 795 columns = CHARSET_COLUMNS (cs);
798 dimension = CHARSET_DIMENSION (cs); 796 dimension = CHARSET_DIMENSION (cs);
799 lb = get_unallocated_leading_byte (dimension); 797 lb = get_unallocated_leading_byte (dimension);
800 798
979 XCHARSET_REGISTRY (charset) = registry; 977 XCHARSET_REGISTRY (charset) = registry;
980 invalidate_charset_font_caches (charset); 978 invalidate_charset_font_caches (charset);
981 face_property_was_changed (Vdefault_face, Qfont, Qglobal); 979 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
982 return Qnil; 980 return Qnil;
983 } 981 }
984 982
985 983
986 /************************************************************************/ 984 /************************************************************************/
987 /* Lisp primitives for working with characters */ 985 /* Lisp primitives for working with characters */
988 /************************************************************************/ 986 /************************************************************************/
989 987
1041 N defaults to 0 if omitted. 1039 N defaults to 0 if omitted.
1042 */ 1040 */
1043 (ch, n)) 1041 (ch, n))
1044 { 1042 {
1045 Lisp_Object charset; 1043 Lisp_Object charset;
1046 int c1, c2; 1044 int c1, c2, int_n;
1047 1045
1048 CHECK_CHAR_COERCE_INT (ch); 1046 CHECK_CHAR_COERCE_INT (ch);
1049 if (NILP (n)) 1047 if (NILP (n))
1050 n = Qzero; 1048 int_n = 0;
1051 else 1049 else
1052 { 1050 {
1053 CHECK_INT (n); 1051 CHECK_INT (n);
1054 if (XINT (n) != 0 && XINT (n) != 1) 1052 int_n = XINT (n);
1053 if (int_n != 0 && int_n != 1)
1055 signal_simple_error ("Octet number must be 0 or 1", n); 1054 signal_simple_error ("Octet number must be 0 or 1", n);
1056 } 1055 }
1057 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); 1056 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
1058 if (XINT (n) == 0) 1057 return make_int (int_n == 0 ? c1 : c2);
1059 return make_int (c1);
1060 else
1061 return make_int (c2);
1062 } 1058 }
1063 1059
1064 1060
1065 /************************************************************************/ 1061 /************************************************************************/
1066 /* composite character functions */ 1062 /* composite character functions */
1186 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6"); 1182 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
1187 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7"); 1183 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
1188 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8"); 1184 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
1189 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9"); 1185 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
1190 defsymbol (&Qthai_tis620, "thai-tis620"); 1186 defsymbol (&Qthai_tis620, "thai-tis620");
1191 1187
1192 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201"); 1188 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
1193 defsymbol (&Qlatin_jisx0201, "latin-jisx0201"); 1189 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
1194 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978"); 1190 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
1195 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208"); 1191 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
1196 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212"); 1192 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
1197 1193
1198 defsymbol (&Qchinese_gb2312, "chinese-gb2312"); 1194 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
1199 defsymbol (&Qchinese_big5_1, "chinese-big5-1"); 1195 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
1200 defsymbol (&Qchinese_big5_2, "chinese-big5-2"); 1196 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
1201 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1"); 1197 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
1202 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2"); 1198 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
1203 1199
1204 defsymbol (&Qkorean_ksc5601, "korean-ksc5601"); 1200 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
1205 defsymbol (&Qcomposite, "composite"); 1201 defsymbol (&Qcomposite, "composite");
1206 } 1202 }
1207 1203
1208 void 1204 void
1209 vars_of_mule_charset (void) 1205 vars_of_mule_charset (void)
1210 { 1206 {
1211 int i, j, k; 1207 int i, j, k;
1212 1208
1213 for (i = 0; i < 128; i++) 1209 /* Table of charsets indexed by leading byte. */
1210 for (i = 0; i < countof (charset_by_leading_byte); i++)
1214 charset_by_leading_byte[i] = Qnil; 1211 charset_by_leading_byte[i] = Qnil;
1215 1212
1216 for (i = 0; i < 4; i++) 1213 /* Table of charsets indexed by type/final-byte/direction. */
1217 for (j = 0; j < 128; j++) 1214 for (i = 0; i < countof (charset_by_attributes); i++)
1218 for (k = 0; k < 2; k ++) 1215 for (j = 0; j < countof (charset_by_attributes[0]); j++)
1216 for (k = 0; k < countof (charset_by_attributes[0][0]); k++)
1219 charset_by_attributes[i][j][k] = Qnil; 1217 charset_by_attributes[i][j][k] = Qnil;
1220
1221 /* Now done at compile time
1222 for (i = 0; i < 128; i++)
1223 rep_bytes_by_first_byte[i] = 1;
1224 */
1225 1218
1226 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; 1219 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
1227 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; 1220 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
1228 } 1221 }
1229 1222
1231 complex_vars_of_mule_charset (void) 1224 complex_vars_of_mule_charset (void)
1232 { 1225 {
1233 staticpro (&Vcharset_hashtable); 1226 staticpro (&Vcharset_hashtable);
1234 Vcharset_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK, 1227 Vcharset_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK,
1235 HASHTABLE_EQ); 1228 HASHTABLE_EQ);
1236 1229
1237 /* Predefined character sets. We store them into variables for 1230 /* Predefined character sets. We store them into variables for
1238 ease of access. */ 1231 ease of access. */
1239 1232
1240 Vcharset_ascii = 1233 Vcharset_ascii =
1241 make_charset (Qascii, LEADING_BYTE_ASCII, 1, 1234 make_charset (Qascii, LEADING_BYTE_ASCII, 1,
1405 1398
1406 composite_char_row_next = 32; 1399 composite_char_row_next = 32;
1407 composite_char_col_next = 32; 1400 composite_char_col_next = 32;
1408 1401
1409 Vcomposite_char_string2char_hashtable = 1402 Vcomposite_char_string2char_hashtable =
1410 make_lisp_hashtable (500, HASHTABLE_NONWEAK, 1403 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQUAL);
1411 HASHTABLE_EQUAL);
1412 Vcomposite_char_char2string_hashtable = 1404 Vcomposite_char_char2string_hashtable =
1413 make_lisp_hashtable (500, HASHTABLE_NONWEAK, 1405 make_lisp_hashtable (500, HASHTABLE_NONWEAK, HASHTABLE_EQ);
1414 HASHTABLE_EQ);
1415 staticpro (&Vcomposite_char_string2char_hashtable); 1406 staticpro (&Vcomposite_char_string2char_hashtable);
1416 staticpro (&Vcomposite_char_char2string_hashtable); 1407 staticpro (&Vcomposite_char_char2string_hashtable);
1417 1408
1418 /* Faux charsets used only for convenience in retrieving the 1409 }
1419 number of rep bytes associated with a leading byte. */
1420
1421 make_charset (Qnil, PRE_LEADING_BYTE_PRIVATE_1, 3, 0, 0, 0, 0,
1422 0, Qnil, Qnil);
1423 make_charset (Qnil, PRE_LEADING_BYTE_PRIVATE_2, 4, 0, 0, 0, 0,
1424 0, Qnil, Qnil);
1425 }