diff src/mule-charset.c @ 5118:e0db3c197671 ben-lisp-object

merge up to latest default branch, doesn't compile yet
author Ben Wing <ben@xemacs.org>
date Sat, 26 Dec 2009 21:18:49 -0600
parents 3742ea8250b5 0c54de4c4b9d
children d877c14318b3
line wrap: on
line diff
--- a/src/mule-charset.c	Sat Dec 26 00:20:27 2009 -0600
+++ b/src/mule-charset.c	Sat Dec 26 21:18:49 2009 -0600
@@ -35,6 +35,7 @@
 #include "lstream.h"
 #include "mule-ccl.h"
 #include "objects.h"
+#include "specifier.h"
 
 /* The various pre-defined charsets. */
 
@@ -46,7 +47,6 @@
 Lisp_Object Vcharset_latin_iso8859_4;
 Lisp_Object Vcharset_thai_tis620;
 Lisp_Object Vcharset_greek_iso8859_7;
-Lisp_Object Vcharset_arabic_iso8859_6;
 Lisp_Object Vcharset_hebrew_iso8859_8;
 Lisp_Object Vcharset_katakana_jisx0201;
 Lisp_Object Vcharset_latin_jisx0201;
@@ -78,13 +78,13 @@
 
 Lisp_Object Qcharsetp;
 
-/* Qdoc_string, Qdimension, Qchars defined in general.c */
-Lisp_Object Qregistry, Qfinal, Qgraphic;
+/* Qdoc_string, Qdimension, Qchars, Qfinal defined in general.c */
+Lisp_Object Qregistries, Qgraphic, Qregistry;
 Lisp_Object Qdirection;
 Lisp_Object Qreverse_direction_charset;
 Lisp_Object Qshort_name, Qlong_name;
 
-Lisp_Object Qfrom_unicode, Qto_unicode;
+Lisp_Object Qto_unicode;
 
 Lisp_Object
   Qlatin_iso8859_1,
@@ -93,7 +93,6 @@
   Qlatin_iso8859_4,
   Qthai_tis620,
   Qgreek_iso8859_7,
-  Qarabic_iso8859_6,
   Qhebrew_iso8859_8,
   Qkatakana_jisx0201,
   Qlatin_jisx0201,
@@ -128,7 +127,7 @@
   mark_object (cs->short_name);
   mark_object (cs->long_name);
   mark_object (cs->doc_string);
-  mark_object (cs->registry);
+  mark_object (cs->registries);
   mark_object (cs->ccl_program);
   return cs->name;
 }
@@ -158,7 +157,7 @@
 		    CHARSET_COLUMNS (cs),
 		    CHARSET_GRAPHIC (cs),
 		    CHARSET_FINAL (cs));
-  print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
+  print_internal (CHARSET_REGISTRIES (cs), printcharfun, 0);
   write_fmt_string (printcharfun, " 0x%x>", cs->header.uid);
 }
 
@@ -167,7 +166,7 @@
   { XD_INT, offsetof (Lisp_Charset, from_unicode_levels) },
   { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
   { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
-  { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
+  { XD_LISP_OBJECT, offsetof (Lisp_Charset, registries) },
   { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
   { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
   { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
@@ -179,7 +178,7 @@
   { XD_END }
 };
 
-DEFINE_LISP_OBJECT ("charset", charset,
+DEFINE_DUMPABLE_LISP_OBJECT ("charset", charset,
 			       mark_charset, print_charset, 0,
 			       0, 0, charset_description, Lisp_Charset);
 /* Make a new charset. */
@@ -189,7 +188,7 @@
 	      int type, int columns, int graphic,
 	      Ibyte final, int direction,  Lisp_Object short_name,
 	      Lisp_Object long_name, Lisp_Object doc,
-	      Lisp_Object reg, int overwrite)
+	      Lisp_Object reg, int overwrite, int encode_as_utf_8)
 {
   Lisp_Object obj;
   Lisp_Charset *cs;
@@ -238,7 +237,9 @@
   CHARSET_GRAPHIC	(cs) = graphic;
   CHARSET_FINAL		(cs) = final;
   CHARSET_DOC_STRING	(cs) = doc;
-  CHARSET_REGISTRY	(cs) = reg;
+  CHECK_VECTOR(reg);
+  CHARSET_REGISTRIES	(cs) = reg;
+  CHARSET_ENCODE_AS_UTF_8 (cs) = encode_as_utf_8 ? 1 : 0;
   CHARSET_CCL_PROGRAM	(cs) = Qnil;
   CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
 
@@ -269,6 +270,8 @@
     }
 
   recalculate_unicode_precedence ();
+  setup_charset_initial_specifier_tags (obj);
+
   return obj;
 }
 
@@ -417,8 +420,8 @@
 
 `short-name'	Short version of the charset name (ex: Latin-1)
 `long-name'	Long version of the charset name (ex: ISO8859-1 (Latin-1))
-`registry'	A regular expression matching the font registry field for
-		this character set.
+`registries'	A vector of possible XLFD REGISTRY-ENCODING combinations for 
+		this character set.  Note that this is not a regular expression.
 `dimension'	Number of octets used to index a character in this charset.
 		Either 1 or 2.  Defaults to 1.
 `columns'	Number of columns used to display a character in this charset.
@@ -453,6 +456,12 @@
 		is passed the octets of the character, with the high
 		bit cleared and set depending upon whether the value
 		of the `graphic' property is 0 or 1.
+`encode-as-utf-8'
+		If non-nil, the charset will be written out using the UTF-8
+		escape syntax in ISO 2022-oriented coding systems.  Used for
+		supporting characters we know are part of Unicode but not of
+		any other known character set in escape-quoted and compound
+		text.
 */
        (name, doc_string, props))
 {
@@ -460,10 +469,11 @@
   Ibyte final = 0;
   int direction = CHARSET_LEFT_TO_RIGHT;
   int type;
-  Lisp_Object registry = Qnil;
+  Lisp_Object registries = Qnil;
   Lisp_Object charset = Qnil;
   Lisp_Object ccl_program = Qnil;
   Lisp_Object short_name = Qnil, long_name = Qnil;
+  int encode_as_utf_8 = 0;
   Lisp_Object existing_charset;
   int temporary = UNBOUNDP (name);
 
@@ -529,10 +539,27 @@
 	      invalid_constant ("Invalid value for `graphic'", value);
 	  }
 
+	else if (EQ (keyword, Qregistries))
+	  {
+	    CHECK_VECTOR (value);
+	    registries = value;
+	  }
+	
 	else if (EQ (keyword, Qregistry))
 	  {
+	    Lisp_Object quoted_registry; 
+
 	    CHECK_STRING (value);
-	    registry = value;
+	    quoted_registry = Fregexp_quote(value);
+	    if (qxestrcmp(XSTRING_DATA(quoted_registry),
+		       XSTRING_DATA(value)))
+	      {
+		warn_when_safe 
+		  (Qregistry, Qwarning,
+		   "Regexps no longer allowed for charset-registry.  "
+		   "Treating %s as string", XSTRING_DATA(value));
+	      }
+	    registries = vector1(value);
 	  }
 
 	else if (EQ (keyword, Qdirection))
@@ -545,6 +572,11 @@
 	      invalid_constant ("Invalid value for `direction'", value);
 	  }
 
+	else if (EQ (keyword, Qencode_as_utf_8))
+	  {
+	    encode_as_utf_8 = NILP (value) ? 0 : 1;
+	  }
+
 	else if (EQ (keyword, Qfinal))
 	  {
 	    CHECK_CHAR_COERCE_INT (value);
@@ -552,14 +584,10 @@
 	    if (final < '0' || final > '~')
 	      invalid_constant ("Invalid value for `final'", value);
 	  }
-
 	else if (EQ (keyword, Qccl_program))
 	  {
-	    struct ccl_program test_ccl;
-
-	    if (setup_ccl_program (&test_ccl, value) < 0)
-	      invalid_argument ("Invalid value for `ccl-program'", value);
-	    ccl_program = value;
+            /* This errors if VALUE is not a valid CCL program. */
+	    ccl_program = get_ccl_program (value);
 	  }
 	else
 	  invalid_constant ("Unrecognized property", keyword);
@@ -600,8 +628,8 @@
     }
   if (NILP (doc_string))
     doc_string = build_string ("");
-  if (NILP (registry))
-    registry = build_string ("");
+  if (NILP (registries))
+    registries = make_vector(0, Qnil);
   if (NILP (short_name))
     short_name = XSYMBOL (name)->name;
   if (NILP (long_name))
@@ -611,7 +639,8 @@
 
   charset = make_charset (id, name, dimension + 2, type, columns, graphic,
 			  final, direction, short_name, long_name,
-			  doc_string, registry, !NILP (existing_charset));
+			  doc_string, registries, !NILP (existing_charset),
+			  encode_as_utf_8);
 
   XCHARSET (charset)->temporary = temporary;
   if (!NILP (ccl_program))
@@ -640,10 +669,10 @@
        (charset, new_name))
 {
   Lisp_Object new_charset = Qnil;
-  int id, dimension, columns, graphic;
+  int id, dimension, columns, graphic, encode_as_utf_8;
   Ibyte final;
   int direction, type;
-  Lisp_Object registry, doc_string, short_name, long_name;
+  Lisp_Object registries, doc_string, short_name, long_name;
   Lisp_Charset *cs;
 
   charset = Fget_charset (charset);
@@ -670,11 +699,12 @@
   doc_string = CHARSET_DOC_STRING (cs);
   short_name = CHARSET_SHORT_NAME (cs);
   long_name = CHARSET_LONG_NAME (cs);
-  registry = CHARSET_REGISTRY (cs);
+  registries = CHARSET_REGISTRIES (cs);
+  encode_as_utf_8 = CHARSET_ENCODE_AS_UTF_8 (cs);
 
   new_charset = make_charset (id, new_name, dimension + 2, type, columns,
 			      graphic, final, direction, short_name, long_name,
-			      doc_string, registry, 0);
+			      doc_string, registries, 0, encode_as_utf_8);
 
   CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
   XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
@@ -805,7 +835,9 @@
   if (EQ (prop, Qgraphic))     return make_int (CHARSET_GRAPHIC (cs));
   if (EQ (prop, Qfinal))       return make_char (CHARSET_FINAL (cs));
   if (EQ (prop, Qchars))       return make_int (CHARSET_CHARS (cs));
-  if (EQ (prop, Qregistry))    return CHARSET_REGISTRY (cs);
+  if (EQ (prop, Qregistries))    return CHARSET_REGISTRIES (cs);
+  if (EQ (prop, Qencode_as_utf_8))
+    return CHARSET_ENCODE_AS_UTF_8 (cs) ? Qt : Qnil;
   if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
   if (EQ (prop, Qdirection))
     return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
@@ -838,27 +870,119 @@
   struct ccl_program test_ccl;
 
   charset = Fget_charset (charset);
-  if (setup_ccl_program (&test_ccl, ccl_program) < 0)
-    invalid_argument ("Invalid ccl-program", ccl_program);
-  XCHARSET_CCL_PROGRAM (charset) = ccl_program;
+  XCHARSET_CCL_PROGRAM (charset) = get_ccl_program (ccl_program);
+
   face_property_was_changed (Vdefault_face, Qfont, Qglobal);
   return Qnil;
 }
 
-/* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
-DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
-Set the `registry' property of CHARSET to REGISTRY.
-*/
-       (charset, registry))
+void
+set_charset_registries(Lisp_Object charset, Lisp_Object registries)
 {
-  charset = Fget_charset (charset);
-  CHECK_STRING (registry);
-  XCHARSET_REGISTRY (charset) = registry;
+  XCHARSET_REGISTRIES (charset) = registries;
   invalidate_charset_font_caches (charset);
   face_property_was_changed (Vdefault_face, Qfont, Qglobal);
+}
+
+DEFUN ("set-charset-registries", Fset_charset_registries, 2, 3, 0, /*
+Set the `registries' property of CHARSET to REGISTRIES.
+
+REGISTRIES is an ordered vector of strings that describe the X11
+CHARSET_REGISTRY and the CHARSET_ENCODINGs appropriate for this charset.
+Separate each registry from the corresponding encoding with a dash.  The
+strings are not regular expressions, in contrast to the old behavior of
+the `charset-registry' property.
+
+One reason to call this function might be if you're in Japan and you'd
+prefer the backslash to display as a Yen sign; the corresponding syntax
+would be:
+
+(set-charset-registries 'ascii ["jisx0201.1976-0"])
+
+If optional argument FORCE is non-nil, avoid sanity-checking the elements of
+REGISTRIES. Normally the strings are checked to make sure they contain no
+XLFD wild cards and that they contain at least one hyphen; the only context
+in which one might want not to do this is in order to use a font which
+doesn't have a full XLFD--and thus, an effective
+CHARSET_REGISTRY-CHARSET_ENCODING of ""--to display ASCII.
+
+We recommend strongly that you specify a full XLFD, since this makes
+multilingual and variant font handling work much better. To get the full
+XLFD of any font, start xfd with the short name as the pattern argument:
+
+    xfd -fn 8x16kana
+
+and use the text that appears at the top of the window. 
+*/
+       (charset, registries, force))
+{
+  int i; 
+  charset = Fget_charset (charset);
+  CHECK_VECTOR (registries);
+
+  for (i = 0; i < XVECTOR_LENGTH(registries); ++i)
+    {
+      CHECK_STRING (XVECTOR_DATA(registries)[i]);
+
+      if (!NILP(force))
+        {
+          continue;
+        }
+
+      if (NULL == qxestrchr(XSTRING_DATA(XVECTOR_DATA(registries)[i]), '-'))
+	{
+	  invalid_argument("Not an X11 REGISTRY-ENCODING combination", 
+			   XVECTOR_DATA(registries)[i]);
+	}
+
+      if (qxestrchr(XSTRING_DATA(XVECTOR_DATA(registries)[i]), '*') ||
+	  qxestrchr(XSTRING_DATA(XVECTOR_DATA(registries)[i]), '?'))
+	{
+	  invalid_argument
+	    ("XLFD wildcards not allowed in charset-registries", 
+	     XVECTOR_DATA(registries)[i]);
+
+	}
+    }
+
+  set_charset_registries(charset, registries);
+
   return Qnil;
 }
 
+DEFUN ("charsets-in-region", Fcharsets_in_region, 2, 3, 0, /*
+Return a list of the charsets in the region between START and END.
+BUFFER defaults to the current buffer if omitted.
+*/
+       (start, end, buffer))
+{
+  /* This function can GC */
+  struct buffer *buf = decode_buffer (buffer, 1);
+  Charbpos pos, stop;	/* Limits of the region. */
+  Lisp_Object res = Qnil;
+  int charsets[NUM_LEADING_BYTES];
+  Ibyte lb;
+  struct gcpro gcpro1;
+
+  memset(charsets, 0, sizeof(charsets));
+  get_buffer_range_char (buf, start, end, &pos, &stop, 0);
+
+  GCPRO1 (res);
+  while (pos < stop)
+    {
+      lb = ichar_leading_byte(BUF_FETCH_CHAR (buf, pos));
+      if (0 == charsets[lb - MIN_LEADING_BYTE])
+	{
+	  charsets[lb - MIN_LEADING_BYTE] = 1;
+	  res = Fcons (XCHARSET_NAME(charset_by_leading_byte(lb)), res);
+	}
+      ++pos;
+    }
+  UNGCPRO;
+
+  return res;
+} 
+
 
 /************************************************************************/
 /*                            memory usage                              */
@@ -950,22 +1074,23 @@
   DEFSUBR (Fcharset_property);
   DEFSUBR (Fcharset_id);
   DEFSUBR (Fset_charset_ccl_program);
-  DEFSUBR (Fset_charset_registry);
+  DEFSUBR (Fset_charset_registries);
+  DEFSUBR (Fcharsets_in_region);
 
 #ifdef MEMORY_USAGE_STATS
   DEFSUBR (Fcharset_memory_usage);
 #endif
 
   DEFSYMBOL (Qcharsetp);
-  DEFSYMBOL (Qregistry);
+  DEFSYMBOL (Qregistries);
   DEFSYMBOL (Qfinal);
   DEFSYMBOL (Qgraphic);
+  DEFSYMBOL (Qregistry);
   DEFSYMBOL (Qdirection);
   DEFSYMBOL (Qreverse_direction_charset);
   DEFSYMBOL (Qshort_name);
   DEFSYMBOL (Qlong_name);
 
-  DEFSYMBOL (Qfrom_unicode);
   DEFSYMBOL (Qto_unicode);
 
   DEFSYMBOL (Ql2r);
@@ -979,7 +1104,6 @@
   DEFSYMBOL (Qlatin_iso8859_4);
   DEFSYMBOL (Qthai_tis620);
   DEFSYMBOL (Qgreek_iso8859_7);
-  DEFSYMBOL (Qarabic_iso8859_6);
   DEFSYMBOL (Qhebrew_iso8859_8);
   DEFSYMBOL (Qkatakana_jisx0201);
   DEFSYMBOL (Qlatin_jisx0201);
@@ -1039,7 +1163,7 @@
 		  build_string ("ASCII"),
 		  build_msg_string ("ASCII"),
 		  build_msg_string ("ASCII (ISO646 IRV)"),
-		  build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), 0);
+		  vector1(build_string("iso8859-1")), 0, 0);
   staticpro (&Vcharset_control_1);
   Vcharset_control_1 =
     make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
@@ -1048,7 +1172,7 @@
 		  build_string ("C1"),
 		  build_msg_string ("Control characters"),
 		  build_msg_string ("Control characters 128-191"),
-		  build_string (""), 0);
+		  vector1(build_string("iso8859-1")), 0, 0);
   staticpro (&Vcharset_latin_iso8859_1);
   Vcharset_latin_iso8859_1 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
@@ -1057,7 +1181,7 @@
 		  build_string ("Latin-1"),
 		  build_msg_string ("ISO8859-1 (Latin-1)"),
 		  build_msg_string ("ISO8859-1 (Latin-1)"),
-		  build_string ("iso8859-1"), 0);
+		  vector1(build_string("iso8859-1")), 0, 0);
   staticpro (&Vcharset_latin_iso8859_2);
   Vcharset_latin_iso8859_2 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
@@ -1066,7 +1190,7 @@
 		  build_string ("Latin-2"),
 		  build_msg_string ("ISO8859-2 (Latin-2)"),
 		  build_msg_string ("ISO8859-2 (Latin-2)"),
-		  build_string ("iso8859-2"), 0);
+		  vector1(build_string("iso8859-2")), 0, 0);
   staticpro (&Vcharset_latin_iso8859_3);
   Vcharset_latin_iso8859_3 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
@@ -1075,7 +1199,7 @@
 		  build_string ("Latin-3"),
 		  build_msg_string ("ISO8859-3 (Latin-3)"),
 		  build_msg_string ("ISO8859-3 (Latin-3)"),
-		  build_string ("iso8859-3"), 0);
+		  vector1(build_string("iso8859-3")), 0, 0);
   staticpro (&Vcharset_latin_iso8859_4);
   Vcharset_latin_iso8859_4 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
@@ -1084,7 +1208,7 @@
 		  build_string ("Latin-4"),
 		  build_msg_string ("ISO8859-4 (Latin-4)"),
 		  build_msg_string ("ISO8859-4 (Latin-4)"),
-		  build_string ("iso8859-4"), 0);
+		  vector1(build_string("iso8859-4")), 0, 0);
   staticpro (&Vcharset_thai_tis620);
   Vcharset_thai_tis620 =
     make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
@@ -1093,7 +1217,7 @@
 		  build_string ("TIS620"),
 		  build_msg_string ("TIS620 (Thai)"),
 		  build_msg_string ("TIS620.2529 (Thai)"),
-		  build_string ("tis620"),0);
+		  vector1(build_string("tis620.2529-1")), 0, 0);
   staticpro (&Vcharset_greek_iso8859_7);
   Vcharset_greek_iso8859_7 =
     make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
@@ -1102,16 +1226,7 @@
 		  build_string ("ISO8859-7"),
 		  build_msg_string ("ISO8859-7 (Greek)"),
 		  build_msg_string ("ISO8859-7 (Greek)"),
-		  build_string ("iso8859-7"), 0);
-  staticpro (&Vcharset_arabic_iso8859_6);
-  Vcharset_arabic_iso8859_6 =
-    make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
-		  CHARSET_TYPE_96, 1, 1, 'G',
-		  CHARSET_RIGHT_TO_LEFT,
-		  build_string ("ISO8859-6"),
-		  build_msg_string ("ISO8859-6 (Arabic)"),
-		  build_msg_string ("ISO8859-6 (Arabic)"),
-		  build_string ("iso8859-6"), 0);
+		  vector1(build_string("iso8859-7")), 0, 0);
   staticpro (&Vcharset_hebrew_iso8859_8);
   Vcharset_hebrew_iso8859_8 =
     make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
@@ -1120,7 +1235,7 @@
 		  build_string ("ISO8859-8"),
 		  build_msg_string ("ISO8859-8 (Hebrew)"),
 		  build_msg_string ("ISO8859-8 (Hebrew)"),
-		  build_string ("iso8859-8"), 0);
+		  vector1(build_string ("iso8859-8")), 0, 0);
   staticpro (&Vcharset_katakana_jisx0201);
   Vcharset_katakana_jisx0201 =
     make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
@@ -1129,7 +1244,7 @@
 		  build_string ("JISX0201 Kana"),
 		  build_msg_string ("JISX0201.1976 (Japanese Kana)"),
 		  build_msg_string ("JISX0201.1976 Japanese Kana"),
-		  build_string ("jisx0201.1976"), 0);
+		  vector1(build_string ("jisx0201.1976-0")), 0, 0);
   staticpro (&Vcharset_latin_jisx0201);
   Vcharset_latin_jisx0201 =
     make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
@@ -1138,7 +1253,7 @@
 		  build_string ("JISX0201 Roman"),
 		  build_msg_string ("JISX0201.1976 (Japanese Roman)"),
 		  build_msg_string ("JISX0201.1976 Japanese Roman"),
-		  build_string ("jisx0201.1976"), 0);
+		  vector1(build_string ("jisx0201.1976-0")), 0, 0);
   staticpro (&Vcharset_cyrillic_iso8859_5);
   Vcharset_cyrillic_iso8859_5 =
     make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
@@ -1147,7 +1262,7 @@
 		  build_string ("ISO8859-5"),
 		  build_msg_string ("ISO8859-5 (Cyrillic)"),
 		  build_msg_string ("ISO8859-5 (Cyrillic)"),
-		  build_string ("iso8859-5"), 0);
+		  vector1(build_string ("iso8859-5")), 0, 0);
   staticpro (&Vcharset_latin_iso8859_9);
   Vcharset_latin_iso8859_9 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
@@ -1156,7 +1271,7 @@
 		  build_string ("Latin-5"),
 		  build_msg_string ("ISO8859-9 (Latin-5)"),
 		  build_msg_string ("ISO8859-9 (Latin-5)"),
-		  build_string ("iso8859-9"), 0);
+		  vector1(build_string ("iso8859-9")), 0, 0);
   staticpro (&Vcharset_latin_iso8859_15);
   Vcharset_latin_iso8859_15 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2,
@@ -1165,7 +1280,7 @@
 		  build_string ("Latin-9"),
 		  build_msg_string ("ISO8859-15 (Latin-9)"),
 		  build_msg_string ("ISO8859-15 (Latin-9)"),
-		  build_string ("iso8859-15"), 0);
+		  vector1(build_string ("iso8859-15")), 0, 0);
   staticpro (&Vcharset_japanese_jisx0208_1978);
   Vcharset_japanese_jisx0208_1978 =
     make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
@@ -1175,7 +1290,8 @@
 		  build_msg_string ("JISX0208.1978 (Japanese)"),
 		  build_msg_string
 		  ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
-		  build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), 0);
+		  vector2(build_string("jisx0208.1978-0"),
+			  build_string("jisc6226.1978-0")), 0, 0);
   staticpro (&Vcharset_chinese_gb2312);
   Vcharset_chinese_gb2312 =
     make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
@@ -1184,7 +1300,8 @@
 		  build_string ("GB2312"),
 		  build_msg_string ("GB2312)"),
 		  build_msg_string ("GB2312 Chinese simplified"),
-		  build_string ("gb2312"), 0);
+		  vector2(build_string("gb2312.1980-0"), 
+			  build_string("gb2312.80&gb8565.88-0")), 0, 0);
   staticpro (&Vcharset_japanese_jisx0208);
   Vcharset_japanese_jisx0208 =
     make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
@@ -1193,7 +1310,8 @@
 		  build_string ("JISX0208"),
 		  build_msg_string ("JISX0208.1983/1990 (Japanese)"),
 		  build_msg_string ("JISX0208.1983/1990 Japanese Kanji"),
-		  build_string ("jisx0208.19\\(83\\|90\\)"), 0);
+		  vector2(build_string("jisx0208.1983-0"),
+			  build_string("jisx0208.1990-0")), 0, 0);
   staticpro (&Vcharset_korean_ksc5601);
   Vcharset_korean_ksc5601 =
     make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
@@ -1202,7 +1320,7 @@
 		  build_string ("KSC5601"),
 		  build_msg_string ("KSC5601 (Korean"),
 		  build_msg_string ("KSC5601 Korean Hangul and Hanja"),
-		  build_string ("ksc5601"), 0);
+		  vector1(build_string("ksc5601.1987-0")), 0, 0);
   staticpro (&Vcharset_japanese_jisx0212);
   Vcharset_japanese_jisx0212 =
     make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
@@ -1211,9 +1329,9 @@
 		  build_string ("JISX0212"),
 		  build_msg_string ("JISX0212 (Japanese)"),
 		  build_msg_string ("JISX0212 Japanese Supplement"),
-		  build_string ("jisx0212"), 0);
+		  vector1(build_string("jisx0212.1990-0")), 0, 0);
 
-#define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
+#define CHINESE_CNS_PLANE(n) "cns11643.1992-" n
   staticpro (&Vcharset_chinese_cns11643_1);
   Vcharset_chinese_cns11643_1 =
     make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
@@ -1223,7 +1341,7 @@
 		  build_msg_string ("CNS11643-1 (Chinese traditional)"),
 		  build_msg_string
 		  ("CNS 11643 Plane 1 Chinese traditional"),
-		  build_string (CHINESE_CNS_PLANE_RE("1")), 0);
+		  vector1(build_string (CHINESE_CNS_PLANE("1"))), 0, 0);
   staticpro (&Vcharset_chinese_cns11643_2);
   Vcharset_chinese_cns11643_2 =
     make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
@@ -1233,7 +1351,7 @@
 		  build_msg_string ("CNS11643-2 (Chinese traditional)"),
 		  build_msg_string
 		  ("CNS 11643 Plane 2 Chinese traditional"),
-		  build_string (CHINESE_CNS_PLANE_RE("2")), 0);
+		  vector1(build_string (CHINESE_CNS_PLANE("2"))), 0, 0);
   staticpro (&Vcharset_chinese_big5_1);
   Vcharset_chinese_big5_1 =
     make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
@@ -1243,7 +1361,7 @@
 		  build_msg_string ("Big5 (Level-1)"),
 		  build_msg_string
 		  ("Big5 Level-1 Chinese traditional"),
-		  build_string ("big5"), 0);
+		  vector1(build_string ("big5.eten-0")), 0, 0);
   staticpro (&Vcharset_chinese_big5_2);
   Vcharset_chinese_big5_2 =
     make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
@@ -1253,7 +1371,7 @@
 		  build_msg_string ("Big5 (Level-2)"),
 		  build_msg_string
 		  ("Big5 Level-2 Chinese traditional"),
-		  build_string ("big5"), 0);
+		  vector1(build_string ("big5.eten-0")), 0, 0);
 
 
 #ifdef ENABLE_COMPOSITE_CHARS
@@ -1268,7 +1386,7 @@
 		  build_string ("Composite"),
 		  build_msg_string ("Composite characters"),
 		  build_msg_string ("Composite characters"),
-		  build_string (""), 0);
+		  vector1(build_string ("")), 0, 0);
 #else
   /* We create a hack so that we have a way of storing ESC 0 and ESC 1
      sequences as "characters", so that they will be output correctly. */
@@ -1280,6 +1398,6 @@
 		  build_string ("Composite hack"),
 		  build_msg_string ("Composite characters hack"),
 		  build_msg_string ("Composite characters hack"),
-		  build_string (""), 0);
+		  vector1(build_string ("")), 0, 0);
 #endif /* ENABLE_COMPOSITE_CHARS */
 }