Mercurial > hg > xemacs-beta
diff src/mule-charset.c @ 3659:98af8a976fc3
[xemacs-hg @ 2006-11-05 22:31:31 by aidan]
Support specifying fonts for particular character sets in Mule; support
translation to ISO 10646-1 for Mule character sets without an otherwise
matching font; move to a vector of X11-charset-X11-registry instead of a
regex for the charset-registry property.
author | aidan |
---|---|
date | Sun, 05 Nov 2006 22:31:46 +0000 |
parents | d1754e7f0cea |
children | b880e45ea63b |
line wrap: on
line diff
--- a/src/mule-charset.c Sat Nov 04 22:51:03 2006 +0000 +++ b/src/mule-charset.c Sun Nov 05 22:31:46 2006 +0000 @@ -35,6 +35,7 @@ #include "lstream.h" #include "mule-ccl.h" #include "objects.h" +#include "specifier.h" /* The various pre-defined charsets. */ @@ -79,7 +80,7 @@ Lisp_Object Qcharsetp; /* Qdoc_string, Qdimension, Qchars defined in general.c */ -Lisp_Object Qregistry, Qfinal, Qgraphic; +Lisp_Object Qregistries, Qfinal, Qgraphic, Qregistry; Lisp_Object Qdirection; Lisp_Object Qreverse_direction_charset; Lisp_Object Qshort_name, Qlong_name; @@ -128,7 +129,7 @@ mark_object (cs->short_name); mark_object (cs->long_name); mark_object (cs->doc_string); - mark_object (cs->registry); + mark_object (cs->registries); mark_object (cs->ccl_program); return cs->name; } @@ -158,7 +159,7 @@ CHARSET_COLUMNS (cs), CHARSET_GRAPHIC (cs), CHARSET_FINAL (cs)); - print_internal (CHARSET_REGISTRY (cs), printcharfun, 0); + print_internal (CHARSET_REGISTRIES (cs), printcharfun, 0); write_fmt_string (printcharfun, " 0x%x>", cs->header.uid); } @@ -167,7 +168,7 @@ { XD_INT, offsetof (Lisp_Charset, from_unicode_levels) }, { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) }, { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) }, - { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) }, + { XD_LISP_OBJECT, offsetof (Lisp_Charset, registries) }, { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) }, { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) }, { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) }, @@ -239,7 +240,8 @@ CHARSET_GRAPHIC (cs) = graphic; CHARSET_FINAL (cs) = final; CHARSET_DOC_STRING (cs) = doc; - CHARSET_REGISTRY (cs) = reg; + CHECK_VECTOR(reg); + CHARSET_REGISTRIES (cs) = reg; CHARSET_ENCODE_AS_UTF_8 (cs) = encode_as_utf_8 ? 1 : 0; CHARSET_CCL_PROGRAM (cs) = Qnil; CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; @@ -271,6 +273,8 @@ } recalculate_unicode_precedence (); + setup_charset_initial_specifier_tags (obj); + return obj; } @@ -419,8 +423,8 @@ `short-name' Short version of the charset name (ex: Latin-1) `long-name' Long version of the charset name (ex: ISO8859-1 (Latin-1)) -`registry' A regular expression matching the font registry field for - this character set. +`registries' A vector of possible XLFD REGISTRY-ENCODING combinations for + this character set. Note that this is not a regular expression. `dimension' Number of octets used to index a character in this charset. Either 1 or 2. Defaults to 1. `columns' Number of columns used to display a character in this charset. @@ -468,7 +472,7 @@ Ibyte final = 0; int direction = CHARSET_LEFT_TO_RIGHT; int type; - Lisp_Object registry = Qnil; + Lisp_Object registries = Qnil; Lisp_Object charset = Qnil; Lisp_Object ccl_program = Qnil; Lisp_Object short_name = Qnil, long_name = Qnil; @@ -538,10 +542,27 @@ invalid_constant ("Invalid value for `graphic'", value); } + else if (EQ (keyword, Qregistries)) + { + CHECK_VECTOR (value); + registries = value; + } + else if (EQ (keyword, Qregistry)) { + Lisp_Object quoted_registry; + CHECK_STRING (value); - registry = value; + quoted_registry = Fregexp_quote(value); + if (strcmp(XSTRING_DATA(quoted_registry), + XSTRING_DATA(value))) + { + warn_when_safe + (Qregistry, Qwarning, + "Regexps no longer allowed for charset-registry. " + "Treating %s as string", XSTRING_DATA(value)); + } + registries = vector1(value); } else if (EQ (keyword, Qdirection)) @@ -613,8 +634,8 @@ } if (NILP (doc_string)) doc_string = build_string (""); - if (NILP (registry)) - registry = build_string (""); + if (NILP (registries)) + registries = make_vector(0, Qnil); if (NILP (short_name)) short_name = XSYMBOL (name)->name; if (NILP (long_name)) @@ -624,7 +645,7 @@ charset = make_charset (id, name, dimension + 2, type, columns, graphic, final, direction, short_name, long_name, - doc_string, registry, !NILP (existing_charset), + doc_string, registries, !NILP (existing_charset), encode_as_utf_8); XCHARSET (charset)->temporary = temporary; @@ -657,7 +678,7 @@ int id, dimension, columns, graphic, encode_as_utf_8; Ibyte final; int direction, type; - Lisp_Object registry, doc_string, short_name, long_name; + Lisp_Object registries, doc_string, short_name, long_name; Lisp_Charset *cs; charset = Fget_charset (charset); @@ -684,12 +705,12 @@ doc_string = CHARSET_DOC_STRING (cs); short_name = CHARSET_SHORT_NAME (cs); long_name = CHARSET_LONG_NAME (cs); - registry = CHARSET_REGISTRY (cs); + registries = CHARSET_REGISTRIES (cs); encode_as_utf_8 = CHARSET_ENCODE_AS_UTF_8 (cs); new_charset = make_charset (id, new_name, dimension + 2, type, columns, graphic, final, direction, short_name, long_name, - doc_string, registry, 0, encode_as_utf_8); + doc_string, registries, 0, encode_as_utf_8); CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset; XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset; @@ -820,7 +841,7 @@ if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs)); if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs)); if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs)); - if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs); + if (EQ (prop, Qregistries)) return CHARSET_REGISTRIES (cs); if (EQ (prop, Qencode_as_utf_8)) return CHARSET_ENCODE_AS_UTF_8 (cs) ? Qt : Qnil; if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs); @@ -862,15 +883,39 @@ return Qnil; } -/* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */ -DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /* -Set the `registry' property of CHARSET to REGISTRY. +DEFUN ("set-charset-registries", Fset_charset_registries, 2, 2, 0, /* +Set the `registries' property of CHARSET to REGISTRIES. + +REGISTRIES is an ordered vector of strings that describe the X11 +CHARSET_REGISTRY and the CHARSET_ENCODINGs appropriate for this charset. +Separate each registry from the corresponding encoding with a dash. The +strings are not regular expressions, in contrast to the old behavior of +the `charset-registry' property. + +One reason to call this function might be if you're in Japan and you'd +prefer the backslash to display as a Yen sign; the corresponding syntax +would be: + +(set-charset-registries 'ascii ["jisx0201.1976-0"]) + */ - (charset, registry)) + (charset, registries)) { + int i; charset = Fget_charset (charset); - CHECK_STRING (registry); - XCHARSET_REGISTRY (charset) = registry; + CHECK_VECTOR (registries); + + for (i = 0; i < XVECTOR_LENGTH(registries); ++i) + { + CHECK_STRING (XVECTOR_DATA(registries)[i]); + if (NULL == qxestrchr(XSTRING_DATA(XVECTOR_DATA(registries)[i]), '-')) + { + invalid_argument("Not an X11 REGISTRY-ENCODING combination", + XVECTOR_DATA(registries)[i]); + } + } + + XCHARSET_REGISTRIES (charset) = registries; invalidate_charset_font_caches (charset); face_property_was_changed (Vdefault_face, Qfont, Qglobal); return Qnil; @@ -967,16 +1012,17 @@ DEFSUBR (Fcharset_property); DEFSUBR (Fcharset_id); DEFSUBR (Fset_charset_ccl_program); - DEFSUBR (Fset_charset_registry); + DEFSUBR (Fset_charset_registries); #ifdef MEMORY_USAGE_STATS DEFSUBR (Fcharset_memory_usage); #endif DEFSYMBOL (Qcharsetp); - DEFSYMBOL (Qregistry); + DEFSYMBOL (Qregistries); DEFSYMBOL (Qfinal); DEFSYMBOL (Qgraphic); + DEFSYMBOL (Qregistry); DEFSYMBOL (Qdirection); DEFSYMBOL (Qreverse_direction_charset); DEFSYMBOL (Qshort_name); @@ -1056,7 +1102,7 @@ build_string ("ASCII"), build_msg_string ("ASCII"), build_msg_string ("ASCII (ISO646 IRV)"), - build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), 0, 0); + vector1(build_string("iso8859-1")), 0, 0); staticpro (&Vcharset_control_1); Vcharset_control_1 = make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2, @@ -1065,7 +1111,7 @@ build_string ("C1"), build_msg_string ("Control characters"), build_msg_string ("Control characters 128-191"), - build_string (""), 0, 0); + vector1(build_string("iso8859-1")), 0, 0); staticpro (&Vcharset_latin_iso8859_1); Vcharset_latin_iso8859_1 = make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2, @@ -1074,7 +1120,7 @@ build_string ("Latin-1"), build_msg_string ("ISO8859-1 (Latin-1)"), build_msg_string ("ISO8859-1 (Latin-1)"), - build_string ("iso8859-1"), 0, 0); + vector1(build_string("iso8859-1")), 0, 0); staticpro (&Vcharset_latin_iso8859_2); Vcharset_latin_iso8859_2 = make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2, @@ -1083,7 +1129,7 @@ build_string ("Latin-2"), build_msg_string ("ISO8859-2 (Latin-2)"), build_msg_string ("ISO8859-2 (Latin-2)"), - build_string ("iso8859-2"), 0, 0); + vector1(build_string("iso8859-2")), 0, 0); staticpro (&Vcharset_latin_iso8859_3); Vcharset_latin_iso8859_3 = make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2, @@ -1092,7 +1138,7 @@ build_string ("Latin-3"), build_msg_string ("ISO8859-3 (Latin-3)"), build_msg_string ("ISO8859-3 (Latin-3)"), - build_string ("iso8859-3"), 0, 0); + vector1(build_string("iso8859-3")), 0, 0); staticpro (&Vcharset_latin_iso8859_4); Vcharset_latin_iso8859_4 = make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2, @@ -1101,7 +1147,7 @@ build_string ("Latin-4"), build_msg_string ("ISO8859-4 (Latin-4)"), build_msg_string ("ISO8859-4 (Latin-4)"), - build_string ("iso8859-4"), 0, 0); + vector1(build_string("iso8859-2")), 0, 0); staticpro (&Vcharset_thai_tis620); Vcharset_thai_tis620 = make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2, @@ -1110,7 +1156,7 @@ build_string ("TIS620"), build_msg_string ("TIS620 (Thai)"), build_msg_string ("TIS620.2529 (Thai)"), - build_string ("tis620"), 0, 0); + vector1(build_string("tis620.2529-1")), 0, 0); staticpro (&Vcharset_greek_iso8859_7); Vcharset_greek_iso8859_7 = make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2, @@ -1119,7 +1165,7 @@ build_string ("ISO8859-7"), build_msg_string ("ISO8859-7 (Greek)"), build_msg_string ("ISO8859-7 (Greek)"), - build_string ("iso8859-7"), 0, 0); + vector1(build_string("iso8859-7")), 0, 0); staticpro (&Vcharset_arabic_iso8859_6); Vcharset_arabic_iso8859_6 = make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2, @@ -1128,7 +1174,7 @@ build_string ("ISO8859-6"), build_msg_string ("ISO8859-6 (Arabic)"), build_msg_string ("ISO8859-6 (Arabic)"), - build_string ("iso8859-6"), 0, 0); + vector1(build_string ("iso8859-6")), 0, 0); staticpro (&Vcharset_hebrew_iso8859_8); Vcharset_hebrew_iso8859_8 = make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2, @@ -1137,7 +1183,7 @@ build_string ("ISO8859-8"), build_msg_string ("ISO8859-8 (Hebrew)"), build_msg_string ("ISO8859-8 (Hebrew)"), - build_string ("iso8859-8"), 0, 0); + vector1(build_string ("iso8859-8")), 0, 0); staticpro (&Vcharset_katakana_jisx0201); Vcharset_katakana_jisx0201 = make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2, @@ -1146,7 +1192,7 @@ build_string ("JISX0201 Kana"), build_msg_string ("JISX0201.1976 (Japanese Kana)"), build_msg_string ("JISX0201.1976 Japanese Kana"), - build_string ("jisx0201.1976"), 0, 0); + vector1(build_string ("jisx0201.1976-0")), 0, 0); staticpro (&Vcharset_latin_jisx0201); Vcharset_latin_jisx0201 = make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2, @@ -1155,7 +1201,7 @@ build_string ("JISX0201 Roman"), build_msg_string ("JISX0201.1976 (Japanese Roman)"), build_msg_string ("JISX0201.1976 Japanese Roman"), - build_string ("jisx0201.1976"), 0, 0); + vector1(build_string ("jisx0201.1976-0")), 0, 0); staticpro (&Vcharset_cyrillic_iso8859_5); Vcharset_cyrillic_iso8859_5 = make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2, @@ -1164,7 +1210,7 @@ build_string ("ISO8859-5"), build_msg_string ("ISO8859-5 (Cyrillic)"), build_msg_string ("ISO8859-5 (Cyrillic)"), - build_string ("iso8859-5"), 0, 0); + vector1(build_string ("iso8859-5")), 0, 0); staticpro (&Vcharset_latin_iso8859_9); Vcharset_latin_iso8859_9 = make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2, @@ -1173,7 +1219,7 @@ build_string ("Latin-5"), build_msg_string ("ISO8859-9 (Latin-5)"), build_msg_string ("ISO8859-9 (Latin-5)"), - build_string ("iso8859-9"), 0, 0); + vector1(build_string ("iso8859-9")), 0, 0); staticpro (&Vcharset_latin_iso8859_15); Vcharset_latin_iso8859_15 = make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2, @@ -1182,7 +1228,7 @@ build_string ("Latin-9"), build_msg_string ("ISO8859-15 (Latin-9)"), build_msg_string ("ISO8859-15 (Latin-9)"), - build_string ("iso8859-15"), 0, 0); + vector1(build_string ("iso8859-15")), 0, 0); staticpro (&Vcharset_japanese_jisx0208_1978); Vcharset_japanese_jisx0208_1978 = make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3, @@ -1192,7 +1238,8 @@ build_msg_string ("JISX0208.1978 (Japanese)"), build_msg_string ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"), - build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), 0, 0); + vector2(build_string("jisx0208.1978-0"), + build_string("jisc6226.1978-0")), 0, 0); staticpro (&Vcharset_chinese_gb2312); Vcharset_chinese_gb2312 = make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3, @@ -1201,7 +1248,8 @@ build_string ("GB2312"), build_msg_string ("GB2312)"), build_msg_string ("GB2312 Chinese simplified"), - build_string ("gb2312"), 0, 0); + vector2(build_string("gb2312.1980-0"), + build_string("gb2312.80&gb8565.88-0")), 0, 0); staticpro (&Vcharset_japanese_jisx0208); Vcharset_japanese_jisx0208 = make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3, @@ -1210,7 +1258,8 @@ build_string ("JISX0208"), build_msg_string ("JISX0208.1983/1990 (Japanese)"), build_msg_string ("JISX0208.1983/1990 Japanese Kanji"), - build_string ("jisx0208.19\\(83\\|90\\)"), 0, 0); + vector2(build_string("jisx0208.1983-0"), + build_string("jisx0208.1990-0")), 0, 0); staticpro (&Vcharset_korean_ksc5601); Vcharset_korean_ksc5601 = make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3, @@ -1219,7 +1268,7 @@ build_string ("KSC5601"), build_msg_string ("KSC5601 (Korean"), build_msg_string ("KSC5601 Korean Hangul and Hanja"), - build_string ("ksc5601"), 0, 0); + vector1(build_string("ksc5601.1987-0")), 0, 0); staticpro (&Vcharset_japanese_jisx0212); Vcharset_japanese_jisx0212 = make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3, @@ -1228,9 +1277,9 @@ build_string ("JISX0212"), build_msg_string ("JISX0212 (Japanese)"), build_msg_string ("JISX0212 Japanese Supplement"), - build_string ("jisx0212"), 0, 0); + vector1(build_string("jisx0212.1990-0")), 0, 0); -#define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$" +#define CHINESE_CNS_PLANE(n) "cns11643.1992-" n staticpro (&Vcharset_chinese_cns11643_1); Vcharset_chinese_cns11643_1 = make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3, @@ -1240,7 +1289,7 @@ build_msg_string ("CNS11643-1 (Chinese traditional)"), build_msg_string ("CNS 11643 Plane 1 Chinese traditional"), - build_string (CHINESE_CNS_PLANE_RE("1")), 0, 0); + vector1(build_string (CHINESE_CNS_PLANE("1"))), 0, 0); staticpro (&Vcharset_chinese_cns11643_2); Vcharset_chinese_cns11643_2 = make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3, @@ -1250,7 +1299,7 @@ build_msg_string ("CNS11643-2 (Chinese traditional)"), build_msg_string ("CNS 11643 Plane 2 Chinese traditional"), - build_string (CHINESE_CNS_PLANE_RE("2")), 0, 0); + vector1(build_string (CHINESE_CNS_PLANE("2"))), 0, 0); staticpro (&Vcharset_chinese_big5_1); Vcharset_chinese_big5_1 = make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3, @@ -1260,7 +1309,7 @@ build_msg_string ("Big5 (Level-1)"), build_msg_string ("Big5 Level-1 Chinese traditional"), - build_string ("big5"), 0, 0); + vector1(build_string ("big5.eten-0")), 0, 0); staticpro (&Vcharset_chinese_big5_2); Vcharset_chinese_big5_2 = make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3, @@ -1270,7 +1319,7 @@ build_msg_string ("Big5 (Level-2)"), build_msg_string ("Big5 Level-2 Chinese traditional"), - build_string ("big5"), 0, 0); + vector1(build_string ("big5.eten-0")), 0, 0); #ifdef ENABLE_COMPOSITE_CHARS @@ -1285,7 +1334,7 @@ build_string ("Composite"), build_msg_string ("Composite characters"), build_msg_string ("Composite characters"), - build_string (""), 0, 0); + vector1(build_string ("")), 0, 0); #else /* We create a hack so that we have a way of storing ESC 0 and ESC 1 sequences as "characters", so that they will be output correctly. */ @@ -1297,6 +1346,6 @@ build_string ("Composite hack"), build_msg_string ("Composite characters hack"), build_msg_string ("Composite characters hack"), - build_string (""), 0, 0); + vector1(build_string ("")), 0, 0); #endif /* ENABLE_COMPOSITE_CHARS */ }