diff src/mule-charset.c @ 3439:d1754e7f0cea

[xemacs-hg @ 2006-06-03 17:50:39 by aidan] Just-in-time Unicode code point support.
author aidan
date Sat, 03 Jun 2006 17:51:06 +0000
parents facf3239ba30
children 98af8a976fc3
line wrap: on
line diff
--- a/src/mule-charset.c	Fri Jun 02 22:18:08 2006 +0000
+++ b/src/mule-charset.c	Sat Jun 03 17:51:06 2006 +0000
@@ -190,7 +190,7 @@
 	      int type, int columns, int graphic,
 	      Ibyte final, int direction,  Lisp_Object short_name,
 	      Lisp_Object long_name, Lisp_Object doc,
-	      Lisp_Object reg, int overwrite)
+	      Lisp_Object reg, int overwrite, int encode_as_utf_8)
 {
   Lisp_Object obj;
   Lisp_Charset *cs;
@@ -240,6 +240,7 @@
   CHARSET_FINAL		(cs) = final;
   CHARSET_DOC_STRING	(cs) = doc;
   CHARSET_REGISTRY	(cs) = reg;
+  CHARSET_ENCODE_AS_UTF_8 (cs) = encode_as_utf_8 ? 1 : 0;
   CHARSET_CCL_PROGRAM	(cs) = Qnil;
   CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
 
@@ -454,6 +455,12 @@
 		is passed the octets of the character, with the high
 		bit cleared and set depending upon whether the value
 		of the `graphic' property is 0 or 1.
+`encode-as-utf-8'
+		If non-nil, the charset will be written out using the UTF-8
+		escape syntax in ISO 2022-oriented coding systems.  Used for
+		supporting characters we know are part of Unicode but not of
+		any other known character set in escape-quoted and compound
+		text.
 */
        (name, doc_string, props))
 {
@@ -465,6 +472,7 @@
   Lisp_Object charset = Qnil;
   Lisp_Object ccl_program = Qnil;
   Lisp_Object short_name = Qnil, long_name = Qnil;
+  int encode_as_utf_8 = 0;
   Lisp_Object existing_charset;
   int temporary = UNBOUNDP (name);
 
@@ -546,6 +554,11 @@
 	      invalid_constant ("Invalid value for `direction'", value);
 	  }
 
+	else if (EQ (keyword, Qencode_as_utf_8))
+	  {
+	    encode_as_utf_8 = NILP (value) ? 0 : 1;
+	  }
+
 	else if (EQ (keyword, Qfinal))
 	  {
 	    CHECK_CHAR_COERCE_INT (value);
@@ -553,7 +566,6 @@
 	    if (final < '0' || final > '~')
 	      invalid_constant ("Invalid value for `final'", value);
 	  }
-
 	else if (EQ (keyword, Qccl_program))
 	  {
 	    struct ccl_program test_ccl;
@@ -612,7 +624,8 @@
 
   charset = make_charset (id, name, dimension + 2, type, columns, graphic,
 			  final, direction, short_name, long_name,
-			  doc_string, registry, !NILP (existing_charset));
+			  doc_string, registry, !NILP (existing_charset),
+			  encode_as_utf_8);
 
   XCHARSET (charset)->temporary = temporary;
   if (!NILP (ccl_program))
@@ -641,7 +654,7 @@
        (charset, new_name))
 {
   Lisp_Object new_charset = Qnil;
-  int id, dimension, columns, graphic;
+  int id, dimension, columns, graphic, encode_as_utf_8;
   Ibyte final;
   int direction, type;
   Lisp_Object registry, doc_string, short_name, long_name;
@@ -672,10 +685,11 @@
   short_name = CHARSET_SHORT_NAME (cs);
   long_name = CHARSET_LONG_NAME (cs);
   registry = CHARSET_REGISTRY (cs);
+  encode_as_utf_8 = CHARSET_ENCODE_AS_UTF_8 (cs);
 
   new_charset = make_charset (id, new_name, dimension + 2, type, columns,
 			      graphic, final, direction, short_name, long_name,
-			      doc_string, registry, 0);
+			      doc_string, registry, 0, encode_as_utf_8);
 
   CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
   XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
@@ -807,6 +821,8 @@
   if (EQ (prop, Qfinal))       return make_char (CHARSET_FINAL (cs));
   if (EQ (prop, Qchars))       return make_int (CHARSET_CHARS (cs));
   if (EQ (prop, Qregistry))    return CHARSET_REGISTRY (cs);
+  if (EQ (prop, Qencode_as_utf_8))
+    return CHARSET_ENCODE_AS_UTF_8 (cs) ? Qt : Qnil;
   if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
   if (EQ (prop, Qdirection))
     return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
@@ -1040,7 +1056,7 @@
 		  build_string ("ASCII"),
 		  build_msg_string ("ASCII"),
 		  build_msg_string ("ASCII (ISO646 IRV)"),
-		  build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), 0);
+		  build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), 0, 0);
   staticpro (&Vcharset_control_1);
   Vcharset_control_1 =
     make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
@@ -1049,7 +1065,7 @@
 		  build_string ("C1"),
 		  build_msg_string ("Control characters"),
 		  build_msg_string ("Control characters 128-191"),
-		  build_string (""), 0);
+		  build_string (""), 0, 0);
   staticpro (&Vcharset_latin_iso8859_1);
   Vcharset_latin_iso8859_1 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
@@ -1058,7 +1074,7 @@
 		  build_string ("Latin-1"),
 		  build_msg_string ("ISO8859-1 (Latin-1)"),
 		  build_msg_string ("ISO8859-1 (Latin-1)"),
-		  build_string ("iso8859-1"), 0);
+		  build_string ("iso8859-1"), 0, 0);
   staticpro (&Vcharset_latin_iso8859_2);
   Vcharset_latin_iso8859_2 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
@@ -1067,7 +1083,7 @@
 		  build_string ("Latin-2"),
 		  build_msg_string ("ISO8859-2 (Latin-2)"),
 		  build_msg_string ("ISO8859-2 (Latin-2)"),
-		  build_string ("iso8859-2"), 0);
+		  build_string ("iso8859-2"), 0, 0);
   staticpro (&Vcharset_latin_iso8859_3);
   Vcharset_latin_iso8859_3 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
@@ -1076,7 +1092,7 @@
 		  build_string ("Latin-3"),
 		  build_msg_string ("ISO8859-3 (Latin-3)"),
 		  build_msg_string ("ISO8859-3 (Latin-3)"),
-		  build_string ("iso8859-3"), 0);
+		  build_string ("iso8859-3"), 0, 0);
   staticpro (&Vcharset_latin_iso8859_4);
   Vcharset_latin_iso8859_4 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
@@ -1085,7 +1101,7 @@
 		  build_string ("Latin-4"),
 		  build_msg_string ("ISO8859-4 (Latin-4)"),
 		  build_msg_string ("ISO8859-4 (Latin-4)"),
-		  build_string ("iso8859-4"), 0);
+		  build_string ("iso8859-4"), 0, 0);
   staticpro (&Vcharset_thai_tis620);
   Vcharset_thai_tis620 =
     make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
@@ -1094,7 +1110,7 @@
 		  build_string ("TIS620"),
 		  build_msg_string ("TIS620 (Thai)"),
 		  build_msg_string ("TIS620.2529 (Thai)"),
-		  build_string ("tis620"),0);
+		  build_string ("tis620"), 0, 0);
   staticpro (&Vcharset_greek_iso8859_7);
   Vcharset_greek_iso8859_7 =
     make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
@@ -1103,7 +1119,7 @@
 		  build_string ("ISO8859-7"),
 		  build_msg_string ("ISO8859-7 (Greek)"),
 		  build_msg_string ("ISO8859-7 (Greek)"),
-		  build_string ("iso8859-7"), 0);
+		  build_string ("iso8859-7"), 0, 0);
   staticpro (&Vcharset_arabic_iso8859_6);
   Vcharset_arabic_iso8859_6 =
     make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
@@ -1112,7 +1128,7 @@
 		  build_string ("ISO8859-6"),
 		  build_msg_string ("ISO8859-6 (Arabic)"),
 		  build_msg_string ("ISO8859-6 (Arabic)"),
-		  build_string ("iso8859-6"), 0);
+		  build_string ("iso8859-6"), 0, 0);
   staticpro (&Vcharset_hebrew_iso8859_8);
   Vcharset_hebrew_iso8859_8 =
     make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
@@ -1121,7 +1137,7 @@
 		  build_string ("ISO8859-8"),
 		  build_msg_string ("ISO8859-8 (Hebrew)"),
 		  build_msg_string ("ISO8859-8 (Hebrew)"),
-		  build_string ("iso8859-8"), 0);
+		  build_string ("iso8859-8"), 0, 0);
   staticpro (&Vcharset_katakana_jisx0201);
   Vcharset_katakana_jisx0201 =
     make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
@@ -1130,7 +1146,7 @@
 		  build_string ("JISX0201 Kana"),
 		  build_msg_string ("JISX0201.1976 (Japanese Kana)"),
 		  build_msg_string ("JISX0201.1976 Japanese Kana"),
-		  build_string ("jisx0201.1976"), 0);
+		  build_string ("jisx0201.1976"), 0, 0);
   staticpro (&Vcharset_latin_jisx0201);
   Vcharset_latin_jisx0201 =
     make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
@@ -1139,7 +1155,7 @@
 		  build_string ("JISX0201 Roman"),
 		  build_msg_string ("JISX0201.1976 (Japanese Roman)"),
 		  build_msg_string ("JISX0201.1976 Japanese Roman"),
-		  build_string ("jisx0201.1976"), 0);
+		  build_string ("jisx0201.1976"), 0, 0);
   staticpro (&Vcharset_cyrillic_iso8859_5);
   Vcharset_cyrillic_iso8859_5 =
     make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
@@ -1148,7 +1164,7 @@
 		  build_string ("ISO8859-5"),
 		  build_msg_string ("ISO8859-5 (Cyrillic)"),
 		  build_msg_string ("ISO8859-5 (Cyrillic)"),
-		  build_string ("iso8859-5"), 0);
+		  build_string ("iso8859-5"), 0, 0);
   staticpro (&Vcharset_latin_iso8859_9);
   Vcharset_latin_iso8859_9 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
@@ -1157,7 +1173,7 @@
 		  build_string ("Latin-5"),
 		  build_msg_string ("ISO8859-9 (Latin-5)"),
 		  build_msg_string ("ISO8859-9 (Latin-5)"),
-		  build_string ("iso8859-9"), 0);
+		  build_string ("iso8859-9"), 0, 0);
   staticpro (&Vcharset_latin_iso8859_15);
   Vcharset_latin_iso8859_15 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2,
@@ -1166,7 +1182,7 @@
 		  build_string ("Latin-9"),
 		  build_msg_string ("ISO8859-15 (Latin-9)"),
 		  build_msg_string ("ISO8859-15 (Latin-9)"),
-		  build_string ("iso8859-15"), 0);
+		  build_string ("iso8859-15"), 0, 0);
   staticpro (&Vcharset_japanese_jisx0208_1978);
   Vcharset_japanese_jisx0208_1978 =
     make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
@@ -1176,7 +1192,7 @@
 		  build_msg_string ("JISX0208.1978 (Japanese)"),
 		  build_msg_string
 		  ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
-		  build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), 0);
+		  build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), 0, 0);
   staticpro (&Vcharset_chinese_gb2312);
   Vcharset_chinese_gb2312 =
     make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
@@ -1185,7 +1201,7 @@
 		  build_string ("GB2312"),
 		  build_msg_string ("GB2312)"),
 		  build_msg_string ("GB2312 Chinese simplified"),
-		  build_string ("gb2312"), 0);
+		  build_string ("gb2312"), 0, 0);
   staticpro (&Vcharset_japanese_jisx0208);
   Vcharset_japanese_jisx0208 =
     make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
@@ -1194,7 +1210,7 @@
 		  build_string ("JISX0208"),
 		  build_msg_string ("JISX0208.1983/1990 (Japanese)"),
 		  build_msg_string ("JISX0208.1983/1990 Japanese Kanji"),
-		  build_string ("jisx0208.19\\(83\\|90\\)"), 0);
+		  build_string ("jisx0208.19\\(83\\|90\\)"), 0, 0);
   staticpro (&Vcharset_korean_ksc5601);
   Vcharset_korean_ksc5601 =
     make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
@@ -1203,7 +1219,7 @@
 		  build_string ("KSC5601"),
 		  build_msg_string ("KSC5601 (Korean"),
 		  build_msg_string ("KSC5601 Korean Hangul and Hanja"),
-		  build_string ("ksc5601"), 0);
+		  build_string ("ksc5601"), 0, 0);
   staticpro (&Vcharset_japanese_jisx0212);
   Vcharset_japanese_jisx0212 =
     make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
@@ -1212,7 +1228,7 @@
 		  build_string ("JISX0212"),
 		  build_msg_string ("JISX0212 (Japanese)"),
 		  build_msg_string ("JISX0212 Japanese Supplement"),
-		  build_string ("jisx0212"), 0);
+		  build_string ("jisx0212"), 0, 0);
 
 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
   staticpro (&Vcharset_chinese_cns11643_1);
@@ -1224,7 +1240,7 @@
 		  build_msg_string ("CNS11643-1 (Chinese traditional)"),
 		  build_msg_string
 		  ("CNS 11643 Plane 1 Chinese traditional"),
-		  build_string (CHINESE_CNS_PLANE_RE("1")), 0);
+		  build_string (CHINESE_CNS_PLANE_RE("1")), 0, 0);
   staticpro (&Vcharset_chinese_cns11643_2);
   Vcharset_chinese_cns11643_2 =
     make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
@@ -1234,7 +1250,7 @@
 		  build_msg_string ("CNS11643-2 (Chinese traditional)"),
 		  build_msg_string
 		  ("CNS 11643 Plane 2 Chinese traditional"),
-		  build_string (CHINESE_CNS_PLANE_RE("2")), 0);
+		  build_string (CHINESE_CNS_PLANE_RE("2")), 0, 0);
   staticpro (&Vcharset_chinese_big5_1);
   Vcharset_chinese_big5_1 =
     make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
@@ -1244,7 +1260,7 @@
 		  build_msg_string ("Big5 (Level-1)"),
 		  build_msg_string
 		  ("Big5 Level-1 Chinese traditional"),
-		  build_string ("big5"), 0);
+		  build_string ("big5"), 0, 0);
   staticpro (&Vcharset_chinese_big5_2);
   Vcharset_chinese_big5_2 =
     make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
@@ -1254,7 +1270,7 @@
 		  build_msg_string ("Big5 (Level-2)"),
 		  build_msg_string
 		  ("Big5 Level-2 Chinese traditional"),
-		  build_string ("big5"), 0);
+		  build_string ("big5"), 0, 0);
 
 
 #ifdef ENABLE_COMPOSITE_CHARS
@@ -1269,7 +1285,7 @@
 		  build_string ("Composite"),
 		  build_msg_string ("Composite characters"),
 		  build_msg_string ("Composite characters"),
-		  build_string (""), 0);
+		  build_string (""), 0, 0);
 #else
   /* We create a hack so that we have a way of storing ESC 0 and ESC 1
      sequences as "characters", so that they will be output correctly. */
@@ -1281,6 +1297,6 @@
 		  build_string ("Composite hack"),
 		  build_msg_string ("Composite characters hack"),
 		  build_msg_string ("Composite characters hack"),
-		  build_string (""), 0);
+		  build_string (""), 0, 0);
 #endif /* ENABLE_COMPOSITE_CHARS */
 }