diff src/mule-charset.c @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents 4d00488244c1
children 026c5bf9c134
line wrap: on
line diff
--- a/src/mule-charset.c	Fri Mar 08 13:33:14 2002 +0000
+++ b/src/mule-charset.c	Wed Mar 13 08:54:06 2002 +0000
@@ -1,6 +1,7 @@
 /* Functions to handle multilingual characters.
    Copyright (C) 1992, 1995 Free Software Foundation, Inc.
    Copyright (C) 1995 Sun Microsystems, Inc.
+   Copyright (C) 2001, 2002 Ben Wing.
 
 This file is part of XEmacs.
 
@@ -29,9 +30,9 @@
 #include "buffer.h"
 #include "chartab.h"
 #include "elhash.h"
-#include "lstream.h"
 #include "device.h"
 #include "faces.h"
+#include "lstream.h"
 #include "mule-ccl.h"
 
 /* The various pre-defined charsets. */
@@ -60,25 +61,12 @@
 Lisp_Object Vcharset_chinese_cns11643_2;
 Lisp_Object Vcharset_chinese_big5_1;
 Lisp_Object Vcharset_chinese_big5_2;
-
-#ifdef ENABLE_COMPOSITE_CHARS
 Lisp_Object Vcharset_composite;
 
-/* Hash tables for composite chars.  One maps string representing
-   composed chars to their equivalent chars; one goes the
-   other way. */
-Lisp_Object Vcomposite_char_char2string_hash_table;
-Lisp_Object Vcomposite_char_string2char_hash_table;
-
-static int composite_char_row_next;
-static int composite_char_col_next;
-
-#endif /* ENABLE_COMPOSITE_CHARS */
-
 struct charset_lookup *chlook;
 
 static const struct lrecord_description charset_lookup_description_1[] = {
-  { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), 128+4*128*2 },
+  { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), NUM_LEADING_BYTES+4*128*2 },
   { XD_END }
 };
 
@@ -87,43 +75,17 @@
   charset_lookup_description_1
 };
 
-/* Table of number of bytes in the string representation of a character
-   indexed by the first byte of that representation.
-
-   rep_bytes_by_first_byte(c) is more efficient than the equivalent
-   canonical computation:
-
-   XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
-
-const Bytecount rep_bytes_by_first_byte[0xA0] =
-{ /* 0x00 - 0x7f are for straight ASCII */
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-  /* 0x80 - 0x8f are for Dimension-1 official charsets */
-  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-  /* 0x90 - 0x9d are for Dimension-2 official charsets */
-  /* 0x9e is for Dimension-1 private charsets */
-  /* 0x9f is for Dimension-2 private charsets */
-  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
-};
-
 Lisp_Object Qcharsetp;
 
 /* Qdoc_string, Qdimension, Qchars defined in general.c */
 Lisp_Object Qregistry, Qfinal, Qgraphic;
 Lisp_Object Qdirection;
 Lisp_Object Qreverse_direction_charset;
-Lisp_Object Qleading_byte;
 Lisp_Object Qshort_name, Qlong_name;
 
-Lisp_Object Qascii,
-  Qcontrol_1,
+Lisp_Object Qfrom_unicode, Qto_unicode;
+
+Lisp_Object
   Qlatin_iso8859_1,
   Qlatin_iso8859_2,
   Qlatin_iso8859_3,
@@ -152,235 +114,6 @@
 
 Lisp_Object Vcharset_hash_table;
 
-/* Composite characters are characters constructed by overstriking two
-   or more regular characters.
-
-   1) The old Mule implementation involves storing composite characters
-      in a buffer as a tag followed by all of the actual characters
-      used to make up the composite character.  I think this is a bad
-      idea; it greatly complicates code that wants to handle strings
-      one character at a time because it has to deal with the possibility
-      of great big ungainly characters.  It's much more reasonable to
-      simply store an index into a table of composite characters.
-
-   2) The current implementation only allows for 16,384 separate
-      composite characters over the lifetime of the XEmacs process.
-      This could become a potential problem if the user
-      edited lots of different files that use composite characters.
-      Due to FSF bogosity, increasing the number of allowable
-      composite characters under Mule would decrease the number
-      of possible faces that can exist.  Mule already has shrunk
-      this to 2048, and further shrinkage would become uncomfortable.
-      No such problems exist in XEmacs.
-
-      Composite characters could be represented as 0x80 C1 C2 C3,
-      where each C[1-3] is in the range 0xA0 - 0xFF.  This allows
-      for slightly under 2^20 (one million) composite characters
-      over the XEmacs process lifetime, and you only need to
-      increase the size of a Mule character from 19 to 21 bits.
-      Or you could use 0x80 C1 C2 C3 C4, allowing for about
-      85 million (slightly over 2^26) composite characters. */
-
-
-/************************************************************************/
-/*                       Basic Emchar functions                         */
-/************************************************************************/
-
-/* Convert a non-ASCII Mule character C into a one-character Mule-encoded
-   string in STR.  Returns the number of bytes stored.
-   Do not call this directly.  Use the macro set_charptr_emchar() instead.
- */
-
-Bytecount
-non_ascii_set_charptr_emchar (Intbyte *str, Emchar c)
-{
-  Intbyte *p;
-  Intbyte lb;
-  int c1, c2;
-  Lisp_Object charset;
-
-  p = str;
-  BREAKUP_CHAR (c, charset, c1, c2);
-  lb = CHAR_LEADING_BYTE (c);
-  if (LEADING_BYTE_PRIVATE_P (lb))
-    *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
-  *p++ = lb;
-  if (EQ (charset, Vcharset_control_1))
-    c1 += 0x20;
-  *p++ = c1 | 0x80;
-  if (c2)
-    *p++ = c2 | 0x80;
-
-  return (p - str);
-}
-
-/* Return the first character from a Mule-encoded string in STR,
-   assuming it's non-ASCII.  Do not call this directly.
-   Use the macro charptr_emchar() instead. */
-
-Emchar
-non_ascii_charptr_emchar (const Intbyte *str)
-{
-  Intbyte i0 = *str, i1, i2 = 0;
-  Lisp_Object charset;
-
-  if (i0 == LEADING_BYTE_CONTROL_1)
-    return (Emchar) (*++str - 0x20);
-
-  if (LEADING_BYTE_PREFIX_P (i0))
-    i0 = *++str;
-
-  i1 = *++str & 0x7F;
-
-  charset = CHARSET_BY_LEADING_BYTE (i0);
-  if (XCHARSET_DIMENSION (charset) == 2)
-    i2 = *++str & 0x7F;
-
-  return MAKE_CHAR (charset, i1, i2);
-}
-
-/* Return whether CH is a valid Emchar, assuming it's non-ASCII.
-   Do not call this directly.  Use the macro valid_char_p() instead. */
-
-int
-non_ascii_valid_char_p (Emchar ch)
-{
-  int f1, f2, f3;
-
-  /* Must have only lowest 19 bits set */
-  if (ch & ~0x7FFFF)
-    return 0;
-
-  f1 = CHAR_FIELD1 (ch);
-  f2 = CHAR_FIELD2 (ch);
-  f3 = CHAR_FIELD3 (ch);
-
-  if (f1 == 0)
-    {
-      Lisp_Object charset;
-
-      if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
-	  (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
-	   f2 > MAX_CHAR_FIELD2_PRIVATE)
-	return 0;
-      if (f3 < 0x20)
-	return 0;
-
-      if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
-					f2 <= MAX_CHAR_FIELD2_PRIVATE))
-	return 1;
-
-      /*
-	 NOTE: This takes advantage of the fact that
-	 FIELD2_TO_OFFICIAL_LEADING_BYTE and
-	 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
-	 */
-      charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
-      if (EQ (charset, Qnil))
-	return 0;
-      return (XCHARSET_CHARS (charset) == 96);
-    }
-  else
-    {
-      Lisp_Object charset;
-
-      if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
-	  (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
-	  f1 > MAX_CHAR_FIELD1_PRIVATE)
-	return 0;
-      if (f2 < 0x20 || f3 < 0x20)
-	return 0;
-
-#ifdef ENABLE_COMPOSITE_CHARS
-      if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
-	{
-	  if (UNBOUNDP (Fgethash (make_int (ch),
-				  Vcomposite_char_char2string_hash_table,
-				  Qunbound)))
-	    return 0;
-	  return 1;
-	}
-#endif /* ENABLE_COMPOSITE_CHARS */
-
-      if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
-	  && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
-	return 1;
-
-      if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
-	charset =
-	  CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
-      else
-	charset =
-	  CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
-
-      if (EQ (charset, Qnil))
-	return 0;
-      return (XCHARSET_CHARS (charset) == 96);
-    }
-}
-
-
-/************************************************************************/
-/*                       Basic string functions                         */
-/************************************************************************/
-
-/* Copy the character pointed to by SRC into DST.  Do not call this
-   directly.  Use the macro charptr_copy_char() instead.
-   Return the number of bytes copied.  */
-
-Bytecount
-non_ascii_charptr_copy_char (const Intbyte *src, Intbyte *dst)
-{
-  Bytecount bytes = REP_BYTES_BY_FIRST_BYTE (*src);
-  Bytecount i;
-  for (i = bytes; i; i--, dst++, src++)
-    *dst = *src;
-  return bytes;
-}
-
-
-/************************************************************************/
-/*                        streams of Emchars                            */
-/************************************************************************/
-
-/* Treat a stream as a stream of Emchar's rather than a stream of bytes.
-   The functions below are not meant to be called directly; use
-   the macros in insdel.h. */
-
-Emchar
-Lstream_get_emchar_1 (Lstream *stream, int ch)
-{
-  Intbyte str[MAX_EMCHAR_LEN];
-  Intbyte *strptr = str;
-  Bytecount bytes;
-
-  str[0] = (Intbyte) ch;
-
-  for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
-    {
-      int c = Lstream_getc (stream);
-      charbpos_checking_assert (c >= 0);
-      *++strptr = (Intbyte) c;
-    }
-  return charptr_emchar (str);
-}
-
-int
-Lstream_fput_emchar (Lstream *stream, Emchar ch)
-{
-  Intbyte str[MAX_EMCHAR_LEN];
-  Bytecount len = set_charptr_emchar (str, ch);
-  return Lstream_write (stream, str, len);
-}
-
-void
-Lstream_funget_emchar (Lstream *stream, Emchar ch)
-{
-  Intbyte str[MAX_EMCHAR_LEN];
-  Bytecount len = set_charptr_emchar (str, ch);
-  Lstream_unread (stream, str, len);
-}
-
 
 /************************************************************************/
 /*                            charset object                            */
@@ -403,7 +136,6 @@
 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
 {
   Lisp_Charset *cs = XCHARSET (obj);
-  char buf[200];
 
   if (print_readably)
     printing_unreadable_object ("#<charset %s 0x%x>",
@@ -411,27 +143,35 @@
 					     name),
 				cs->header.uid);
 
-  write_c_string ("#<charset ", printcharfun);
-  print_internal (CHARSET_NAME (cs), printcharfun, 0);
-  write_c_string (" ", printcharfun);
-  print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
-  write_c_string (" ", printcharfun);
-  print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
-  write_c_string (" ", printcharfun);
-  print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
-  sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
-	   CHARSET_TYPE (cs) == CHARSET_TYPE_94    ? "94" :
-	   CHARSET_TYPE (cs) == CHARSET_TYPE_96    ? "96" :
-	   CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
-	   "96x96",
-	   CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
-	   CHARSET_COLUMNS (cs),
-	   CHARSET_GRAPHIC (cs),
-	   CHARSET_FINAL (cs));
-  write_c_string (buf, printcharfun);
+  write_fmt_string_lisp (printcharfun, "#<charset %s %S %S %S", 4,
+			 CHARSET_NAME (cs), CHARSET_SHORT_NAME (cs),
+			 CHARSET_LONG_NAME (cs), CHARSET_DOC_STRING (cs));
+  write_fmt_string (printcharfun, " %s %s cols=%d g%d final='%c' reg=",
+		    CHARSET_TYPE (cs) == CHARSET_TYPE_94    ? "94" :
+		    CHARSET_TYPE (cs) == CHARSET_TYPE_96    ? "96" :
+		    CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
+		    "96x96",
+		    CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" :
+		    "r2l",
+		    CHARSET_COLUMNS (cs),
+		    CHARSET_GRAPHIC (cs),
+		    CHARSET_FINAL (cs));
   print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
-  sprintf (buf, " 0x%x>", cs->header.uid);
-  write_c_string (buf, printcharfun);
+  write_fmt_string (printcharfun, " 0x%x>", cs->header.uid);
+}
+
+static void
+finalize_charset (void *header, int for_disksave)
+{
+  /* See mule-charset.h, definition of Lisp_Charset. */
+  Lisp_Object charset = wrap_charset ((Lisp_Charset *) header);
+  if (for_disksave && XCHARSET_TO_UNICODE_TABLE (charset))
+    {
+      /* Control-1, ASCII, Composite don't have tables */
+      free_charset_unicode_tables (charset);
+      XCHARSET_TO_UNICODE_TABLE (charset) = 0;
+      XCHARSET_FROM_UNICODE_TABLE (charset) = 0;
+    }
 }
 
 static const struct lrecord_description charset_description[] = {
@@ -442,28 +182,68 @@
   { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
   { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
   { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
+#if 0
+  /* #### XD_UNION not yet implemented!  pdump version of XEmacs will
+     not work! */
+  { XD_UNION, offsetof (Lisp_Charset, to_unicode_table),
+      XD_INDIRECT (offsetof (Lisp_Charset, dimension), 0),
+      to_unicode_description },
+  { XD_UNION, offsetof (Lisp_Charset, from_unicode_table),
+      XD_INDIRECT (offsetof (Lisp_Charset, from_unicode_levels), 0),
+      from_unicode_description },
+#endif
   { XD_END }
 };
 
 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
-                               mark_charset, print_charset, 0, 0, 0, charset_description,
-			       Lisp_Charset);
+                               mark_charset, print_charset, finalize_charset,
+			       0, 0, charset_description, Lisp_Charset);
 
 /* Make a new charset. */
 /* #### SJT Should generic properties be allowed? */
 static Lisp_Object
-make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
-	      unsigned char type, unsigned char columns, unsigned char graphic,
-	      Intbyte final, unsigned char direction,  Lisp_Object short_name,
+make_charset (int id, Lisp_Object name, int rep_bytes,
+	      int type, int columns, int graphic,
+	      Intbyte final, int direction,  Lisp_Object short_name,
 	      Lisp_Object long_name, Lisp_Object doc,
-	      Lisp_Object reg)
+	      Lisp_Object reg, int overwrite)
 {
   Lisp_Object obj;
-  Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
+  Lisp_Charset *cs;
+
+  if (!overwrite)
+    {
+      cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
+      zero_lcrecord (cs);
+      XSETCHARSET (obj, cs);
+
+      if (final)
+	{
+	  /* some charsets do not have final characters.  This includes
+	     ASCII, Control-1, Composite, and the two faux private
+	     charsets. */
+	  assert (NILP (chlook->
+			charset_by_attributes[type][final][direction]));
+	  chlook->charset_by_attributes[type][final][direction] = obj;
+	}
 
-  zero_lcrecord (cs);
-
-  XSETCHARSET (obj, cs);
+      assert (NILP (chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE]));
+      chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE] = obj;
+    }
+  else
+    {
+      Lisp_Object ret;
+      /* Actually overwrite the properties of the existing charset.
+	 We do this because until now charsets could never be "deleted",
+	 so parts of the code don't bother to GC charsets. */
+      obj = chlook->charset_by_leading_byte[id - MIN_LEADING_BYTE];
+      cs = XCHARSET (obj);
+      assert (EQ (chlook->charset_by_attributes[type][final][direction],
+		  obj));
+      
+      ret = Fremhash (XCHARSET_NAME (obj), Vcharset_hash_table);
+      assert (!NILP (ret));
+    }
 
   CHARSET_ID		(cs) = id;
   CHARSET_NAME		(cs) = name;
@@ -480,27 +260,33 @@
   CHARSET_CCL_PROGRAM	(cs) = Qnil;
   CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
 
-  CHARSET_DIMENSION     (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
-				CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
-  CHARSET_CHARS         (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
-				CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
+  CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
+			    CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
+  CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
+			CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
 
-  if (final)
+  if (id == LEADING_BYTE_ASCII || id == LEADING_BYTE_CONTROL_1
+#ifdef ENABLE_COMPOSITE_CHARS
+      || id == LEADING_BYTE_COMPOSITE
+#endif
+      )
+    assert (!overwrite);
+  else
     {
-      /* some charsets do not have final characters.  This includes
-	 ASCII, Control-1, Composite, and the two faux private
-	 charsets. */
-      assert (NILP (chlook->charset_by_attributes[type][final][direction]));
-      chlook->charset_by_attributes[type][final][direction] = obj;
+      if (overwrite)
+	free_charset_unicode_tables (obj);
+      init_charset_unicode_tables (obj);
     }
 
-  assert (NILP (chlook->charset_by_leading_byte[id - 128]));
-  chlook->charset_by_leading_byte[id - 128] = obj;
-
   /* Some charsets are "faux" and don't have names or really exist at
      all except in the leading-byte table. */
   if (!NILP (name))
-    Fputhash (name, obj, Vcharset_hash_table);
+    {
+      assert (NILP (Fgethash (name, Vcharset_hash_table, Qnil)));
+      Fputhash (name, obj, Vcharset_hash_table);
+    }
+
+  recalculate_unicode_precedence ();
   return obj;
 }
 
@@ -511,14 +297,16 @@
 
   if (dimension == 1)
     {
-      if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
+      if (chlook->next_allocated_1_byte_leading_byte >
+	  MAX_LEADING_BYTE_PRIVATE_1)
 	lb = 0;
       else
 	lb = chlook->next_allocated_1_byte_leading_byte++;
     }
   else
     {
-      if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
+      if (chlook->next_allocated_2_byte_leading_byte >
+	  MAX_LEADING_BYTE_PRIVATE_2)
 	lb = 0;
       else
 	lb = chlook->next_allocated_2_byte_leading_byte++;
@@ -526,8 +314,7 @@
 
   if (!lb)
     invalid_operation
-      ("No more character sets free for this dimension",
-       make_int (dimension));
+      ("No more character sets free for this dimension", make_int (dimension));
 
   return lb;
 }
@@ -671,21 +458,31 @@
 */
        (name, doc_string, props))
 {
-  int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
+  int id, dimension = 1, chars = 94, graphic = 0, columns = -1;
+  Intbyte final = 0;
   int direction = CHARSET_LEFT_TO_RIGHT;
   int type;
   Lisp_Object registry = Qnil;
-  Lisp_Object charset;
+  Lisp_Object charset = Qnil;
   Lisp_Object ccl_program = Qnil;
   Lisp_Object short_name = Qnil, long_name = Qnil;
+  Lisp_Object existing_charset;
+  int temporary = UNBOUNDP (name);
 
-  CHECK_SYMBOL (name);
+  /* NOTE: name == Qunbound is a directive from the iso2022 code to
+     create a temporary charset for an unknown final.  We allow the final
+     to be overwritten with a real charset later on. */
+
   if (!NILP (doc_string))
     CHECK_STRING (doc_string);
+  if (!UNBOUNDP (name))
+    {
+      CHECK_SYMBOL (name);
 
-  charset = Ffind_charset (name);
-  if (!NILP (charset))
-    invalid_operation ("Cannot redefine existing charset", name);
+      charset = Ffind_charset (name);
+      if (!NILP (charset))
+	invalid_operation ("Cannot redefine existing charset", name);
+    }
 
   {
     EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
@@ -766,7 +563,6 @@
 	      invalid_argument ("Invalid value for 'ccl-program", value);
 	    ccl_program = value;
 	  }
-
 	else
 	  invalid_constant ("Unrecognized property", keyword);
       }
@@ -784,32 +580,49 @@
   else
     type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
 
-  if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
-      !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
+  existing_charset = CHARSET_BY_ATTRIBUTES (type, final,
+					    CHARSET_LEFT_TO_RIGHT);
+  if (NILP (existing_charset))
+    existing_charset = CHARSET_BY_ATTRIBUTES (type, final,
+					      CHARSET_RIGHT_TO_LEFT);
+
+  if (!NILP (existing_charset) && !XCHARSET (existing_charset)->temporary)
     invalid_argument
       ("Character set already defined for this DIMENSION/CHARS/FINAL combo",
-       Qunbound);
+       existing_charset);
+
+  if (!NILP (existing_charset))
+    /* Reuse same leading byte */
+    id = XCHARSET_ID (existing_charset);
+  else
+    id = get_unallocated_leading_byte (dimension);
 
-  id = get_unallocated_leading_byte (dimension);
+  if (temporary)
+    {
+      Intbyte tempname[80];
 
+      qxesprintf (tempname, "___temporary___%d__", id);
+      name = intern_int (tempname);
+    }
   if (NILP (doc_string))
     doc_string = build_string ("");
-
   if (NILP (registry))
     registry = build_string ("");
-
   if (NILP (short_name))
     XSETSTRING (short_name, XSYMBOL (name)->name);
-
   if (NILP (long_name))
     long_name = doc_string;
-
   if (columns == -1)
     columns = dimension;
+
   charset = make_charset (id, name, dimension + 2, type, columns, graphic,
-			  final, direction, short_name, long_name, doc_string, registry);
+			  final, direction, short_name, long_name,
+			  doc_string, registry, !NILP (existing_charset));
+
+  XCHARSET (charset)->temporary = temporary;
   if (!NILP (ccl_program))
     XCHARSET_CCL_PROGRAM (charset) = ccl_program;
+
   return charset;
 }
 
@@ -821,7 +634,8 @@
        (charset, new_name))
 {
   Lisp_Object new_charset = Qnil;
-  int id, dimension, columns, graphic, final;
+  int id, dimension, columns, graphic;
+  Intbyte final;
   int direction, type;
   Lisp_Object registry, doc_string, short_name, long_name;
   Lisp_Charset *cs;
@@ -854,7 +668,7 @@
 
   new_charset = make_charset (id, new_name, dimension + 2, type, columns,
 			      graphic, final, direction, short_name, long_name,
-			      doc_string, registry);
+			      doc_string, registry, 0);
 
   CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
   XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
@@ -1058,188 +872,68 @@
 
 
 /************************************************************************/
-/*              Lisp primitives for working with characters             */
+/*                            memory usage                              */
 /************************************************************************/
 
-DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
-Make a character from CHARSET and octets ARG1 and ARG2.
-ARG2 is required only for characters from two-dimensional charsets.
-For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
-character s with caron.
-*/
-       (charset, arg1, arg2))
-{
-  Lisp_Charset *cs;
-  int a1, a2;
-  int lowlim, highlim;
-
-  charset = Fget_charset (charset);
-  cs = XCHARSET (charset);
+#ifdef MEMORY_USAGE_STATS
 
-  if      (EQ (charset, Vcharset_ascii))     lowlim =  0, highlim = 127;
-  else if (EQ (charset, Vcharset_control_1)) lowlim =  0, highlim =  31;
-  else if (CHARSET_CHARS (cs) == 94)         lowlim = 33, highlim = 126;
-  else	/* CHARSET_CHARS (cs) == 96) */	     lowlim = 32, highlim = 127;
-
-  CHECK_INT (arg1);
-  /* It is useful (and safe, according to Olivier Galibert) to strip
-     the 8th bit off ARG1 and ARG2 because it allows programmers to
-     write (make-char 'latin-iso8859-2 CODE) where code is the actual
-     Latin 2 code of the character.  */
-  a1 = XINT (arg1) & 0x7f;
-  if (a1 < lowlim || a1 > highlim)
-    args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
-
-  if (CHARSET_DIMENSION (cs) == 1)
-    {
-      if (!NILP (arg2))
-        invalid_argument
-          ("Charset is of dimension one; second octet must be nil", arg2);
-      return make_char (MAKE_CHAR (charset, a1, 0));
-    }
+struct charset_stats
+{
+  int from_unicode;
+  int to_unicode;
+  int other;
+};
 
-  CHECK_INT (arg2);
-  a2 = XINT (arg2) & 0x7f;
-  if (a2 < lowlim || a2 > highlim)
-    args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
-
-  return make_char (MAKE_CHAR (charset, a1, a2));
-}
-
-DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
-Return the character set of CHARACTER.
-*/
-       (character))
+static void
+compute_charset_usage (Lisp_Object charset, struct charset_stats *stats,
+		      struct overhead_stats *ovstats)
 {
-  CHECK_CHAR_COERCE_INT (character);
-
-  return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
-			(CHAR_LEADING_BYTE (XCHAR (character))));
-}
-
-DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
-Return the octet numbered N (should be 0 or 1) of CHARACTER.
-N defaults to 0 if omitted.
-*/
-       (character, n))
-{
-  Lisp_Object charset;
-  int octet0, octet1;
-
-  CHECK_CHAR_COERCE_INT (character);
-
-  BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
-
-  if (NILP (n) || EQ (n, Qzero))
-    return make_int (octet0);
-  else if (EQ (n, make_int (1)))
-    return make_int (octet1);
-  else
-    invalid_constant ("Octet number must be 0 or 1", n);
+  struct Lisp_Charset *c = XCHARSET (charset);
+  xzero (*stats);
+  stats->other   += malloced_storage_size (c, sizeof (*c), ovstats);
+  stats->from_unicode += compute_from_unicode_table_size (charset, ovstats);
+  stats->to_unicode += compute_to_unicode_table_size (charset, ovstats);
 }
 
-DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
-Return list of charset and one or two position-codes of CHARACTER.
-*/
-       (character))
-{
-  /* This function can GC */
-  struct gcpro gcpro1, gcpro2;
-  Lisp_Object charset = Qnil;
-  Lisp_Object rc = Qnil;
-  int c1, c2;
-
-  GCPRO2 (charset, rc);
-  CHECK_CHAR_COERCE_INT (character);
+DEFUN ("charset-memory-usage", Fcharset_memory_usage, 1, 1, 0, /*
+Return stats about the memory usage of charset CHARSET.
+The values returned are in the form of an alist of usage types and
+byte counts.  The byte counts attempt to encompass all the memory used
+by the charset (separate from the memory logically associated with a
+charset or frame), including internal structures and any malloc()
+overhead associated with them.  In practice, the byte counts are
+underestimated for various reasons, e.g. because certain memory usage
+is very hard to determine \(e.g. the amount of memory used inside the
+Xt library or inside the X server).
 
-  BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
+Multiple slices of the total memory usage may be returned, separated
+by a nil.  Each slice represents a particular view of the memory, a
+particular way of partitioning it into groups.  Within a slice, there
+is no overlap between the groups of memory, and each slice collectively
+represents all the memory concerned.
+*/
+       (charset))
+{
+  struct charset_stats stats;
+  struct overhead_stats ovstats;
+  Lisp_Object val = Qnil;
 
-  if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
-    {
-      rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
-    }
-  else
-    {
-      rc = list2 (XCHARSET_NAME (charset), make_int (c1));
-    }
-  UNGCPRO;
+  charset = Fget_charset (charset);
+  xzero (ovstats);
+  compute_charset_usage (charset, &stats, &ovstats);
 
-  return rc;
+  val = acons (Qfrom_unicode,       make_int (stats.from_unicode),      val);
+  val = acons (Qto_unicode,         make_int (stats.to_unicode),        val);
+  val = Fcons (Qnil, val);
+  val = acons (Qactually_requested, make_int (ovstats.was_requested),   val);
+  val = acons (Qmalloc_overhead,    make_int (ovstats.malloc_overhead), val);
+  val = acons (Qgap_overhead,       make_int (ovstats.gap_overhead),    val);
+  val = acons (Qdynarr_overhead,    make_int (ovstats.dynarr_overhead), val);
+
+  return Fnreverse (val);
 }
 
-
-#ifdef ENABLE_COMPOSITE_CHARS
-/************************************************************************/
-/*                     composite character functions                    */
-/************************************************************************/
-
-Emchar
-lookup_composite_char (Intbyte *str, int len)
-{
-  Lisp_Object lispstr = make_string (str, len);
-  Lisp_Object ch = Fgethash (lispstr,
-			     Vcomposite_char_string2char_hash_table,
-			     Qunbound);
-  Emchar emch;
-
-  if (UNBOUNDP (ch))
-    {
-      if (composite_char_row_next >= 128)
-	invalid_operation ("No more composite chars available", lispstr);
-      emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
-			composite_char_col_next);
-      Fputhash (make_char (emch), lispstr,
-	        Vcomposite_char_char2string_hash_table);
-      Fputhash (lispstr, make_char (emch),
-		Vcomposite_char_string2char_hash_table);
-      composite_char_col_next++;
-      if (composite_char_col_next >= 128)
-	{
-	  composite_char_col_next = 32;
-	  composite_char_row_next++;
-	}
-    }
-  else
-    emch = XCHAR (ch);
-  return emch;
-}
-
-Lisp_Object
-composite_char_string (Emchar ch)
-{
-  Lisp_Object str = Fgethash (make_char (ch),
-			      Vcomposite_char_char2string_hash_table,
-			      Qunbound);
-  assert (!UNBOUNDP (str));
-  return str;
-}
-
-xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
-Convert a string into a single composite character.
-The character is the result of overstriking all the characters in
-the string.
-*/
-       (string))
-{
-  CHECK_STRING (string);
-  return make_char (lookup_composite_char (XSTRING_DATA (string),
-					   XSTRING_LENGTH (string)));
-}
-
-xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
-Return a string of the characters comprising a composite character.
-*/
-       (ch))
-{
-  Emchar emch;
-
-  CHECK_CHAR (ch);
-  emch = XCHAR (ch);
-  if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
-    invalid_argument ("Must be composite char", ch);
-  return composite_char_string (emch);
-}
-#endif /* ENABLE_COMPOSITE_CHARS */
+#endif /* MEMORY_USAGE_STATS */
 
 
 /************************************************************************/
@@ -1269,14 +963,8 @@
   DEFSUBR (Fset_charset_ccl_program);
   DEFSUBR (Fset_charset_registry);
 
-  DEFSUBR (Fmake_char);
-  DEFSUBR (Fchar_charset);
-  DEFSUBR (Fchar_octet);
-  DEFSUBR (Fsplit_char);
-
-#ifdef ENABLE_COMPOSITE_CHARS
-  DEFSUBR (Fmake_composite_char);
-  DEFSUBR (Fcomposite_char_string);
+#ifdef MEMORY_USAGE_STATS
+  DEFSUBR (Fcharset_memory_usage);
 #endif
 
   DEFSYMBOL (Qcharsetp);
@@ -1288,13 +976,14 @@
   DEFSYMBOL (Qshort_name);
   DEFSYMBOL (Qlong_name);
 
+  DEFSYMBOL (Qfrom_unicode);
+  DEFSYMBOL (Qto_unicode);
+
   DEFSYMBOL (Ql2r);
   DEFSYMBOL (Qr2l);
 
   /* Charsets, compatible with FSF 20.3
      Naming convention is Script-Charset[-Edition] */
-  DEFSYMBOL (Qascii);
-  DEFSYMBOL (Qcontrol_1);
   DEFSYMBOL (Qlatin_iso8859_1);
   DEFSYMBOL (Qlatin_iso8859_2);
   DEFSYMBOL (Qlatin_iso8859_3);
@@ -1321,6 +1010,23 @@
   DEFSYMBOL (Qcomposite);
 }
 
+static int
+init_charset_unicode_tables_mapper (Lisp_Object key, Lisp_Object value,
+				    void *closure)
+{
+  init_charset_unicode_tables (value);
+  return 0;
+}
+
+void
+init_mule_charset (void)
+{
+  /* See mule-charset.h, definition of Lisp_Charset. */
+  if (initialized)
+    elisp_maphash (init_charset_unicode_tables_mapper, Vcharset_hash_table,
+		   0);
+}
+
 void
 vars_of_mule_charset (void)
 {
@@ -1341,15 +1047,15 @@
 
   chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
   chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
+
+  staticpro (&Vcharset_hash_table);
+  Vcharset_hash_table =
+    make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
 }
 
 void
 complex_vars_of_mule_charset (void)
 {
-  staticpro (&Vcharset_hash_table);
-  Vcharset_hash_table =
-    make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
-
   /* Predefined character sets.  We store them into variables for
      ease of access. */
 
@@ -1359,181 +1065,181 @@
 		  CHARSET_TYPE_94, 1, 0, 'B',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("ASCII"),
-		  build_string ("ASCII)"),
-		  build_string ("ASCII (ISO646 IRV)"),
-		  build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
+		  build_msg_string ("ASCII"),
+		  build_msg_string ("ASCII (ISO646 IRV)"),
+		  build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"), 0);
   staticpro (&Vcharset_control_1);
   Vcharset_control_1 =
     make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
 		  CHARSET_TYPE_94, 1, 1, 0,
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("C1"),
-		  build_string ("Control characters"),
-		  build_string ("Control characters 128-191"),
-		  build_string (""));
+		  build_msg_string ("Control characters"),
+		  build_msg_string ("Control characters 128-191"),
+		  build_string (""), 0);
   staticpro (&Vcharset_latin_iso8859_1);
   Vcharset_latin_iso8859_1 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
 		  CHARSET_TYPE_96, 1, 1, 'A',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("Latin-1"),
-		  build_string ("ISO8859-1 (Latin-1)"),
-		  build_string ("ISO8859-1 (Latin-1)"),
-		  build_string ("iso8859-1"));
+		  build_msg_string ("ISO8859-1 (Latin-1)"),
+		  build_msg_string ("ISO8859-1 (Latin-1)"),
+		  build_string ("iso8859-1"), 0);
   staticpro (&Vcharset_latin_iso8859_2);
   Vcharset_latin_iso8859_2 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
 		  CHARSET_TYPE_96, 1, 1, 'B',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("Latin-2"),
-		  build_string ("ISO8859-2 (Latin-2)"),
-		  build_string ("ISO8859-2 (Latin-2)"),
-		  build_string ("iso8859-2"));
+		  build_msg_string ("ISO8859-2 (Latin-2)"),
+		  build_msg_string ("ISO8859-2 (Latin-2)"),
+		  build_string ("iso8859-2"), 0);
   staticpro (&Vcharset_latin_iso8859_3);
   Vcharset_latin_iso8859_3 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
 		  CHARSET_TYPE_96, 1, 1, 'C',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("Latin-3"),
-		  build_string ("ISO8859-3 (Latin-3)"),
-		  build_string ("ISO8859-3 (Latin-3)"),
-		  build_string ("iso8859-3"));
+		  build_msg_string ("ISO8859-3 (Latin-3)"),
+		  build_msg_string ("ISO8859-3 (Latin-3)"),
+		  build_string ("iso8859-3"), 0);
   staticpro (&Vcharset_latin_iso8859_4);
   Vcharset_latin_iso8859_4 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
 		  CHARSET_TYPE_96, 1, 1, 'D',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("Latin-4"),
-		  build_string ("ISO8859-4 (Latin-4)"),
-		  build_string ("ISO8859-4 (Latin-4)"),
-		  build_string ("iso8859-4"));
+		  build_msg_string ("ISO8859-4 (Latin-4)"),
+		  build_msg_string ("ISO8859-4 (Latin-4)"),
+		  build_string ("iso8859-4"), 0);
   staticpro (&Vcharset_thai_tis620);
   Vcharset_thai_tis620 =
     make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
 		  CHARSET_TYPE_96, 1, 1, 'T',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("TIS620"),
-		  build_string ("TIS620 (Thai)"),
-		  build_string ("TIS620.2529 (Thai)"),
-		  build_string ("tis620"));
+		  build_msg_string ("TIS620 (Thai)"),
+		  build_msg_string ("TIS620.2529 (Thai)"),
+		  build_string ("tis620"),0);
   staticpro (&Vcharset_greek_iso8859_7);
   Vcharset_greek_iso8859_7 =
     make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
 		  CHARSET_TYPE_96, 1, 1, 'F',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("ISO8859-7"),
-		  build_string ("ISO8859-7 (Greek)"),
-		  build_string ("ISO8859-7 (Greek)"),
-		  build_string ("iso8859-7"));
+		  build_msg_string ("ISO8859-7 (Greek)"),
+		  build_msg_string ("ISO8859-7 (Greek)"),
+		  build_string ("iso8859-7"), 0);
   staticpro (&Vcharset_arabic_iso8859_6);
   Vcharset_arabic_iso8859_6 =
     make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
 		  CHARSET_TYPE_96, 1, 1, 'G',
 		  CHARSET_RIGHT_TO_LEFT,
 		  build_string ("ISO8859-6"),
-		  build_string ("ISO8859-6 (Arabic)"),
-		  build_string ("ISO8859-6 (Arabic)"),
-		  build_string ("iso8859-6"));
+		  build_msg_string ("ISO8859-6 (Arabic)"),
+		  build_msg_string ("ISO8859-6 (Arabic)"),
+		  build_string ("iso8859-6"), 0);
   staticpro (&Vcharset_hebrew_iso8859_8);
   Vcharset_hebrew_iso8859_8 =
     make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
 		  CHARSET_TYPE_96, 1, 1, 'H',
 		  CHARSET_RIGHT_TO_LEFT,
 		  build_string ("ISO8859-8"),
-		  build_string ("ISO8859-8 (Hebrew)"),
-		  build_string ("ISO8859-8 (Hebrew)"),
-		  build_string ("iso8859-8"));
+		  build_msg_string ("ISO8859-8 (Hebrew)"),
+		  build_msg_string ("ISO8859-8 (Hebrew)"),
+		  build_string ("iso8859-8"), 0);
   staticpro (&Vcharset_katakana_jisx0201);
   Vcharset_katakana_jisx0201 =
     make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
 		  CHARSET_TYPE_94, 1, 1, 'I',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("JISX0201 Kana"),
-		  build_string ("JISX0201.1976 (Japanese Kana)"),
-		  build_string ("JISX0201.1976 Japanese Kana"),
-		  build_string ("jisx0201.1976"));
+		  build_msg_string ("JISX0201.1976 (Japanese Kana)"),
+		  build_msg_string ("JISX0201.1976 Japanese Kana"),
+		  build_string ("jisx0201.1976"), 0);
   staticpro (&Vcharset_latin_jisx0201);
   Vcharset_latin_jisx0201 =
     make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
 		  CHARSET_TYPE_94, 1, 0, 'J',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("JISX0201 Roman"),
-		  build_string ("JISX0201.1976 (Japanese Roman)"),
-		  build_string ("JISX0201.1976 Japanese Roman"),
-		  build_string ("jisx0201.1976"));
+		  build_msg_string ("JISX0201.1976 (Japanese Roman)"),
+		  build_msg_string ("JISX0201.1976 Japanese Roman"),
+		  build_string ("jisx0201.1976"), 0);
   staticpro (&Vcharset_cyrillic_iso8859_5);
   Vcharset_cyrillic_iso8859_5 =
     make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
 		  CHARSET_TYPE_96, 1, 1, 'L',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("ISO8859-5"),
-		  build_string ("ISO8859-5 (Cyrillic)"),
-		  build_string ("ISO8859-5 (Cyrillic)"),
-		  build_string ("iso8859-5"));
+		  build_msg_string ("ISO8859-5 (Cyrillic)"),
+		  build_msg_string ("ISO8859-5 (Cyrillic)"),
+		  build_string ("iso8859-5"), 0);
   staticpro (&Vcharset_latin_iso8859_9);
   Vcharset_latin_iso8859_9 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
 		  CHARSET_TYPE_96, 1, 1, 'M',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("Latin-5"),
-		  build_string ("ISO8859-9 (Latin-5)"),
-		  build_string ("ISO8859-9 (Latin-5)"),
-		  build_string ("iso8859-9"));
+		  build_msg_string ("ISO8859-9 (Latin-5)"),
+		  build_msg_string ("ISO8859-9 (Latin-5)"),
+		  build_string ("iso8859-9"), 0);
   staticpro (&Vcharset_latin_iso8859_15);
   Vcharset_latin_iso8859_15 =
     make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2,
 		  CHARSET_TYPE_96, 1, 1, 'b',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("Latin-9"),
-		  build_string ("ISO8859-15 (Latin-9)"),
-		  build_string ("ISO8859-15 (Latin-9)"),
-		  build_string ("iso8859-15"));
+		  build_msg_string ("ISO8859-15 (Latin-9)"),
+		  build_msg_string ("ISO8859-15 (Latin-9)"),
+		  build_string ("iso8859-15"), 0);
   staticpro (&Vcharset_japanese_jisx0208_1978);
   Vcharset_japanese_jisx0208_1978 =
     make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
 		  CHARSET_TYPE_94X94, 2, 0, '@',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("JISX0208.1978"),
-		  build_string ("JISX0208.1978 (Japanese)"),
-		  build_string
+		  build_msg_string ("JISX0208.1978 (Japanese)"),
+		  build_msg_string
 		  ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
-		  build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
+		  build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"), 0);
   staticpro (&Vcharset_chinese_gb2312);
   Vcharset_chinese_gb2312 =
     make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
 		  CHARSET_TYPE_94X94, 2, 0, 'A',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("GB2312"),
-		  build_string ("GB2312)"),
-		  build_string ("GB2312 Chinese simplified"),
-		  build_string ("gb2312"));
+		  build_msg_string ("GB2312)"),
+		  build_msg_string ("GB2312 Chinese simplified"),
+		  build_string ("gb2312"), 0);
   staticpro (&Vcharset_japanese_jisx0208);
   Vcharset_japanese_jisx0208 =
     make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
 		  CHARSET_TYPE_94X94, 2, 0, 'B',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("JISX0208"),
-		  build_string ("JISX0208.1983/1990 (Japanese)"),
-		  build_string ("JISX0208.1983/1990 Japanese Kanji"),
-		  build_string ("jisx0208.19\\(83\\|90\\)"));
+		  build_msg_string ("JISX0208.1983/1990 (Japanese)"),
+		  build_msg_string ("JISX0208.1983/1990 Japanese Kanji"),
+		  build_string ("jisx0208.19\\(83\\|90\\)"), 0);
   staticpro (&Vcharset_korean_ksc5601);
   Vcharset_korean_ksc5601 =
     make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
 		  CHARSET_TYPE_94X94, 2, 0, 'C',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("KSC5601"),
-		  build_string ("KSC5601 (Korean"),
-		  build_string ("KSC5601 Korean Hangul and Hanja"),
-		  build_string ("ksc5601"));
+		  build_msg_string ("KSC5601 (Korean"),
+		  build_msg_string ("KSC5601 Korean Hangul and Hanja"),
+		  build_string ("ksc5601"), 0);
   staticpro (&Vcharset_japanese_jisx0212);
   Vcharset_japanese_jisx0212 =
     make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
 		  CHARSET_TYPE_94X94, 2, 0, 'D',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("JISX0212"),
-		  build_string ("JISX0212 (Japanese)"),
-		  build_string ("JISX0212 Japanese Supplement"),
-		  build_string ("jisx0212"));
+		  build_msg_string ("JISX0212 (Japanese)"),
+		  build_msg_string ("JISX0212 Japanese Supplement"),
+		  build_string ("jisx0212"), 0);
 
 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
   staticpro (&Vcharset_chinese_cns11643_1);
@@ -1542,40 +1248,40 @@
 		  CHARSET_TYPE_94X94, 2, 0, 'G',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("CNS11643-1"),
-		  build_string ("CNS11643-1 (Chinese traditional)"),
-		  build_string
+		  build_msg_string ("CNS11643-1 (Chinese traditional)"),
+		  build_msg_string
 		  ("CNS 11643 Plane 1 Chinese traditional"),
-		  build_string (CHINESE_CNS_PLANE_RE("1")));
+		  build_string (CHINESE_CNS_PLANE_RE("1")), 0);
   staticpro (&Vcharset_chinese_cns11643_2);
   Vcharset_chinese_cns11643_2 =
     make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
 		  CHARSET_TYPE_94X94, 2, 0, 'H',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("CNS11643-2"),
-		  build_string ("CNS11643-2 (Chinese traditional)"),
-		  build_string
+		  build_msg_string ("CNS11643-2 (Chinese traditional)"),
+		  build_msg_string
 		  ("CNS 11643 Plane 2 Chinese traditional"),
-		  build_string (CHINESE_CNS_PLANE_RE("2")));
+		  build_string (CHINESE_CNS_PLANE_RE("2")), 0);
   staticpro (&Vcharset_chinese_big5_1);
   Vcharset_chinese_big5_1 =
     make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
 		  CHARSET_TYPE_94X94, 2, 0, '0',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("Big5"),
-		  build_string ("Big5 (Level-1)"),
-		  build_string
+		  build_msg_string ("Big5 (Level-1)"),
+		  build_msg_string
 		  ("Big5 Level-1 Chinese traditional"),
-		  build_string ("big5"));
+		  build_string ("big5"), 0);
   staticpro (&Vcharset_chinese_big5_2);
   Vcharset_chinese_big5_2 =
     make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
 		  CHARSET_TYPE_94X94, 2, 0, '1',
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("Big5"),
-		  build_string ("Big5 (Level-2)"),
-		  build_string
+		  build_msg_string ("Big5 (Level-2)"),
+		  build_msg_string
 		  ("Big5 Level-2 Chinese traditional"),
-		  build_string ("big5"));
+		  build_string ("big5"), 0);
 
 
 #ifdef ENABLE_COMPOSITE_CHARS
@@ -1588,20 +1294,20 @@
 		  CHARSET_TYPE_96X96, 2, 0, 0,
 		  CHARSET_LEFT_TO_RIGHT,
 		  build_string ("Composite"),
-		  build_string ("Composite characters"),
-		  build_string ("Composite characters"),
-		  build_string (""));
-
-  /* #### not dumped properly */
-  composite_char_row_next = 32;
-  composite_char_col_next = 32;
-
-  Vcomposite_char_string2char_hash_table =
-    make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
-  Vcomposite_char_char2string_hash_table =
-    make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
-  staticpro (&Vcomposite_char_string2char_hash_table);
-  staticpro (&Vcomposite_char_char2string_hash_table);
+		  build_msg_string ("Composite characters"),
+		  build_msg_string ("Composite characters"),
+		  build_string (""), 0);
+#else
+  /* We create a hack so that we have a way of storing ESC 0 and ESC 1
+     sequences as "characters", so that they will be output correctly. */
+  staticpro (&Vcharset_composite);
+  Vcharset_composite =
+    make_charset (LEADING_BYTE_COMPOSITE_REPLACEMENT, Qcomposite, 2,
+		  CHARSET_TYPE_96, 1, 1, '|',
+		  CHARSET_LEFT_TO_RIGHT,
+		  build_string ("Composite hack"),
+		  build_msg_string ("Composite characters hack"),
+		  build_msg_string ("Composite characters hack"),
+		  build_string (""), 0);
 #endif /* ENABLE_COMPOSITE_CHARS */
-
 }