Mercurial > hg > xemacs-beta
diff src/mule-charset.c @ 3439:d1754e7f0cea
[xemacs-hg @ 2006-06-03 17:50:39 by aidan]
Just-in-time Unicode code point support.
author | aidan |
---|---|
date | Sat, 03 Jun 2006 17:51:06 +0000 |
parents | facf3239ba30 |
children | 98af8a976fc3 |
line wrap: on
line diff
--- a/src/mule-charset.c Fri Jun 02 22:18:08 2006 +0000 +++ b/src/mule-charset.c Sat Jun 03 17:51:06 2006 +0000 @@ -190,7 +190,7 @@ int type, int columns, int graphic, Ibyte final, int direction, Lisp_Object short_name, Lisp_Object long_name, Lisp_Object doc, - Lisp_Object reg, int overwrite) + Lisp_Object reg, int overwrite, int encode_as_utf_8) { Lisp_Object obj; Lisp_Charset *cs; @@ -240,6 +240,7 @@ CHARSET_FINAL (cs) = final; CHARSET_DOC_STRING (cs) = doc; CHARSET_REGISTRY (cs) = reg; + CHARSET_ENCODE_AS_UTF_8 (cs) = encode_as_utf_8 ? 1 : 0; CHARSET_CCL_PROGRAM (cs) = Qnil; CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; @@ -454,6 +455,12 @@ is passed the octets of the character, with the high bit cleared and set depending upon whether the value of the `graphic' property is 0 or 1. +`encode-as-utf-8' + If non-nil, the charset will be written out using the UTF-8 + escape syntax in ISO 2022-oriented coding systems. Used for + supporting characters we know are part of Unicode but not of + any other known character set in escape-quoted and compound + text. */ (name, doc_string, props)) { @@ -465,6 +472,7 @@ Lisp_Object charset = Qnil; Lisp_Object ccl_program = Qnil; Lisp_Object short_name = Qnil, long_name = Qnil; + int encode_as_utf_8 = 0; Lisp_Object existing_charset; int temporary = UNBOUNDP (name); @@ -546,6 +554,11 @@ invalid_constant ("Invalid value for `direction'", value); } + else if (EQ (keyword, Qencode_as_utf_8)) + { + encode_as_utf_8 = NILP (value) ? 0 : 1; + } + else if (EQ (keyword, Qfinal)) { CHECK_CHAR_COERCE_INT (value); @@ -553,7 +566,6 @@ if (final < '0' || final > '~') invalid_constant ("Invalid value for `final'", value); } - else if (EQ (keyword, Qccl_program)) { struct ccl_program test_ccl; @@ -612,7 +624,8 @@ charset = make_charset (id, name, dimension + 2, type, columns, graphic, final, direction, short_name, long_name, - doc_string, registry, !NILP (existing_charset)); + doc_string, registry, !NILP (existing_charset), + encode_as_utf_8); XCHARSET (charset)->temporary = temporary; if (!NILP (ccl_program)) @@ -641,7 +654,7 @@ (charset, new_name)) { Lisp_Object new_charset = Qnil; - int id, dimension, columns, graphic; + int id, dimension, columns, graphic, encode_as_utf_8; Ibyte final; int direction, type; Lisp_Object registry, doc_string, short_name, long_name; @@ -672,10 +685,11 @@ short_name = CHARSET_SHORT_NAME (cs); long_name = CHARSET_LONG_NAME (cs); registry = CHARSET_REGISTRY (cs); + encode_as_utf_8 = CHARSET_ENCODE_AS_UTF_8 (cs); new_charset = make_charset (id, new_name, dimension + 2, type, columns, graphic, final, direction, short_name, long_name, - doc_string, registry, 0); + doc_string, registry, 0, encode_as_utf_8); CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset; XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset; @@ -807,6 +821,8 @@ if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs)); if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs)); if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs); + if (EQ (prop, Qencode_as_utf_8)) + return CHARSET_ENCODE_AS_UTF_8 (cs) ? Qt : Qnil; if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs); if (EQ (prop, Qdirection)) return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l; @@ -1040,7 +1056,7 @@ build_string ("ASCII"), build_msg_string ("ASCII"), build_msg_string ("ASCII (ISO646 IRV)"), - build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), 0); + build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), 0, 0); staticpro (&Vcharset_control_1); Vcharset_control_1 = make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2, @@ -1049,7 +1065,7 @@ build_string ("C1"), build_msg_string ("Control characters"), build_msg_string ("Control characters 128-191"), - build_string (""), 0); + build_string (""), 0, 0); staticpro (&Vcharset_latin_iso8859_1); Vcharset_latin_iso8859_1 = make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2, @@ -1058,7 +1074,7 @@ build_string ("Latin-1"), build_msg_string ("ISO8859-1 (Latin-1)"), build_msg_string ("ISO8859-1 (Latin-1)"), - build_string ("iso8859-1"), 0); + build_string ("iso8859-1"), 0, 0); staticpro (&Vcharset_latin_iso8859_2); Vcharset_latin_iso8859_2 = make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2, @@ -1067,7 +1083,7 @@ build_string ("Latin-2"), build_msg_string ("ISO8859-2 (Latin-2)"), build_msg_string ("ISO8859-2 (Latin-2)"), - build_string ("iso8859-2"), 0); + build_string ("iso8859-2"), 0, 0); staticpro (&Vcharset_latin_iso8859_3); Vcharset_latin_iso8859_3 = make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2, @@ -1076,7 +1092,7 @@ build_string ("Latin-3"), build_msg_string ("ISO8859-3 (Latin-3)"), build_msg_string ("ISO8859-3 (Latin-3)"), - build_string ("iso8859-3"), 0); + build_string ("iso8859-3"), 0, 0); staticpro (&Vcharset_latin_iso8859_4); Vcharset_latin_iso8859_4 = make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2, @@ -1085,7 +1101,7 @@ build_string ("Latin-4"), build_msg_string ("ISO8859-4 (Latin-4)"), build_msg_string ("ISO8859-4 (Latin-4)"), - build_string ("iso8859-4"), 0); + build_string ("iso8859-4"), 0, 0); staticpro (&Vcharset_thai_tis620); Vcharset_thai_tis620 = make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2, @@ -1094,7 +1110,7 @@ build_string ("TIS620"), build_msg_string ("TIS620 (Thai)"), build_msg_string ("TIS620.2529 (Thai)"), - build_string ("tis620"),0); + build_string ("tis620"), 0, 0); staticpro (&Vcharset_greek_iso8859_7); Vcharset_greek_iso8859_7 = make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2, @@ -1103,7 +1119,7 @@ build_string ("ISO8859-7"), build_msg_string ("ISO8859-7 (Greek)"), build_msg_string ("ISO8859-7 (Greek)"), - build_string ("iso8859-7"), 0); + build_string ("iso8859-7"), 0, 0); staticpro (&Vcharset_arabic_iso8859_6); Vcharset_arabic_iso8859_6 = make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2, @@ -1112,7 +1128,7 @@ build_string ("ISO8859-6"), build_msg_string ("ISO8859-6 (Arabic)"), build_msg_string ("ISO8859-6 (Arabic)"), - build_string ("iso8859-6"), 0); + build_string ("iso8859-6"), 0, 0); staticpro (&Vcharset_hebrew_iso8859_8); Vcharset_hebrew_iso8859_8 = make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2, @@ -1121,7 +1137,7 @@ build_string ("ISO8859-8"), build_msg_string ("ISO8859-8 (Hebrew)"), build_msg_string ("ISO8859-8 (Hebrew)"), - build_string ("iso8859-8"), 0); + build_string ("iso8859-8"), 0, 0); staticpro (&Vcharset_katakana_jisx0201); Vcharset_katakana_jisx0201 = make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2, @@ -1130,7 +1146,7 @@ build_string ("JISX0201 Kana"), build_msg_string ("JISX0201.1976 (Japanese Kana)"), build_msg_string ("JISX0201.1976 Japanese Kana"), - build_string ("jisx0201.1976"), 0); + build_string ("jisx0201.1976"), 0, 0); staticpro (&Vcharset_latin_jisx0201); Vcharset_latin_jisx0201 = make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2, @@ -1139,7 +1155,7 @@ build_string ("JISX0201 Roman"), build_msg_string ("JISX0201.1976 (Japanese Roman)"), build_msg_string ("JISX0201.1976 Japanese Roman"), - build_string ("jisx0201.1976"), 0); + build_string ("jisx0201.1976"), 0, 0); staticpro (&Vcharset_cyrillic_iso8859_5); Vcharset_cyrillic_iso8859_5 = make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2, @@ -1148,7 +1164,7 @@ build_string ("ISO8859-5"), build_msg_string ("ISO8859-5 (Cyrillic)"), build_msg_string ("ISO8859-5 (Cyrillic)"), - build_string ("iso8859-5"), 0); + build_string ("iso8859-5"), 0, 0); staticpro (&Vcharset_latin_iso8859_9); Vcharset_latin_iso8859_9 = make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2, @@ -1157,7 +1173,7 @@ build_string ("Latin-5"), build_msg_string ("ISO8859-9 (Latin-5)"), build_msg_string ("ISO8859-9 (Latin-5)"), - build_string ("iso8859-9"), 0); + build_string ("iso8859-9"), 0, 0); staticpro (&Vcharset_latin_iso8859_15); Vcharset_latin_iso8859_15 = make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2, @@ -1166,7 +1182,7 @@ build_string ("Latin-9"), build_msg_string ("ISO8859-15 (Latin-9)"), build_msg_string ("ISO8859-15 (Latin-9)"), - build_string ("iso8859-15"), 0); + build_string ("iso8859-15"), 0, 0); staticpro (&Vcharset_japanese_jisx0208_1978); Vcharset_japanese_jisx0208_1978 = make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3, @@ -1176,7 +1192,7 @@ build_msg_string ("JISX0208.1978 (Japanese)"), build_msg_string ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"), - build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), 0); + build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), 0, 0); staticpro (&Vcharset_chinese_gb2312); Vcharset_chinese_gb2312 = make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3, @@ -1185,7 +1201,7 @@ build_string ("GB2312"), build_msg_string ("GB2312)"), build_msg_string ("GB2312 Chinese simplified"), - build_string ("gb2312"), 0); + build_string ("gb2312"), 0, 0); staticpro (&Vcharset_japanese_jisx0208); Vcharset_japanese_jisx0208 = make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3, @@ -1194,7 +1210,7 @@ build_string ("JISX0208"), build_msg_string ("JISX0208.1983/1990 (Japanese)"), build_msg_string ("JISX0208.1983/1990 Japanese Kanji"), - build_string ("jisx0208.19\\(83\\|90\\)"), 0); + build_string ("jisx0208.19\\(83\\|90\\)"), 0, 0); staticpro (&Vcharset_korean_ksc5601); Vcharset_korean_ksc5601 = make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3, @@ -1203,7 +1219,7 @@ build_string ("KSC5601"), build_msg_string ("KSC5601 (Korean"), build_msg_string ("KSC5601 Korean Hangul and Hanja"), - build_string ("ksc5601"), 0); + build_string ("ksc5601"), 0, 0); staticpro (&Vcharset_japanese_jisx0212); Vcharset_japanese_jisx0212 = make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3, @@ -1212,7 +1228,7 @@ build_string ("JISX0212"), build_msg_string ("JISX0212 (Japanese)"), build_msg_string ("JISX0212 Japanese Supplement"), - build_string ("jisx0212"), 0); + build_string ("jisx0212"), 0, 0); #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$" staticpro (&Vcharset_chinese_cns11643_1); @@ -1224,7 +1240,7 @@ build_msg_string ("CNS11643-1 (Chinese traditional)"), build_msg_string ("CNS 11643 Plane 1 Chinese traditional"), - build_string (CHINESE_CNS_PLANE_RE("1")), 0); + build_string (CHINESE_CNS_PLANE_RE("1")), 0, 0); staticpro (&Vcharset_chinese_cns11643_2); Vcharset_chinese_cns11643_2 = make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3, @@ -1234,7 +1250,7 @@ build_msg_string ("CNS11643-2 (Chinese traditional)"), build_msg_string ("CNS 11643 Plane 2 Chinese traditional"), - build_string (CHINESE_CNS_PLANE_RE("2")), 0); + build_string (CHINESE_CNS_PLANE_RE("2")), 0, 0); staticpro (&Vcharset_chinese_big5_1); Vcharset_chinese_big5_1 = make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3, @@ -1244,7 +1260,7 @@ build_msg_string ("Big5 (Level-1)"), build_msg_string ("Big5 Level-1 Chinese traditional"), - build_string ("big5"), 0); + build_string ("big5"), 0, 0); staticpro (&Vcharset_chinese_big5_2); Vcharset_chinese_big5_2 = make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3, @@ -1254,7 +1270,7 @@ build_msg_string ("Big5 (Level-2)"), build_msg_string ("Big5 Level-2 Chinese traditional"), - build_string ("big5"), 0); + build_string ("big5"), 0, 0); #ifdef ENABLE_COMPOSITE_CHARS @@ -1269,7 +1285,7 @@ build_string ("Composite"), build_msg_string ("Composite characters"), build_msg_string ("Composite characters"), - build_string (""), 0); + build_string (""), 0, 0); #else /* We create a hack so that we have a way of storing ESC 0 and ESC 1 sequences as "characters", so that they will be output correctly. */ @@ -1281,6 +1297,6 @@ build_string ("Composite hack"), build_msg_string ("Composite characters hack"), build_msg_string ("Composite characters hack"), - build_string (""), 0); + build_string (""), 0, 0); #endif /* ENABLE_COMPOSITE_CHARS */ }