xemacs-beta: src/mule-coding.c comparison

comparison src/mule-coding.c @ 70:131b0175ea99 r20-0b30

Import from CVS: tag r20-0b30

author	cvs
date	Mon, 13 Aug 2007 09:02:59 +0200
parents
children	54cc21c15cbb

comparison

equal deleted inserted replaced

-:804d1389bcd6
+:131b0175ea99
+/* Code conversion functions.
+Copyright (C) 1991, 1995 Free Software Foundation, Inc.
+Copyright (C) 1995 Sun Microsystems, Inc.
+This file is part of XEmacs.
+XEmacs is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+XEmacs is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+You should have received a copy of the GNU General Public License
+along with XEmacs; see the file COPYING.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* Synched up with: Mule 2.3.   Not in FSF. */
+/* Rewritten by Ben Wing <wing@666.com>. */
+#include <config.h>
+#include "lisp.h"
+#include "buffer.h"
+#include "elhash.h"
+#include "insdel.h"
+#include "lstream.h"
+#include "mule-coding.h"
+Lisp_Object Qfile_coding_system, Qcoding_system_error;
+Lisp_Object Vkeyboard_coding_system;
+Lisp_Object Vterminal_coding_system;
+Lisp_Object Vprocess_input_coding_system;
+Lisp_Object Vprocess_output_coding_system;
+Lisp_Object Vpathname_coding_system;
+/* Table of symbols identifying each coding category. */
+Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1];
+/* Coding system currently associated with each coding category. */
+Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1];
+/* Table of all coding categories in decreasing order of priority.
+This describes a permutation of the possible coding categories. */
+int coding_category_by_priority[CODING_CATEGORY_LAST + 1];
+Lisp_Object Qcoding_system_p;
+Lisp_Object Qbig5, Qshift_jis, Qno_conversion, Qccl, Qiso2022;
+/* Qinternal in general.c */
+Lisp_Object Qmnemonic, Qeol_type;
+Lisp_Object Qcr, Qcrlf, Qlf;
+Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf;
+Lisp_Object Qpost_read_conversion;
+Lisp_Object Qpre_write_conversion;
+Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3;
+Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output;
+Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output;
+Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
+Lisp_Object Qno_iso6429, Qescape_quoted;
+Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion;
+Lisp_Object Qencode, Qdecode;
+Lisp_Object Qctext;
+Lisp_Object Vcoding_system_hashtable;
+/* Additional information used by the ISO2022 decoder and detector. */
+struct iso2022_decoder
+{
+/* CHARSET holds the character sets currently assigned to the G0
+through G3 variables.  It is initialized from the array
+INITIAL_CHARSET in CODESYS. */
+Lisp_Object charset[4];
+/* Which registers are currently invoked into the left (GL) and
+right (GR) halves of the 8-bit encoding space? */
+int register_left, register_right;
+/* ISO_ESC holds a value indicating part of an escape sequence
+that has already been seen. */
+enum iso_esc_flag esc;
+/* This records the bytes we've seen so far in an escape sequence,
+in case the sequence is invalid (we spit out the bytes unchanged). */
+unsigned char esc_bytes[8];
+/* Index for next byte to store in ISO escape sequence. */
+int esc_bytes_index;
+/* Stuff seen so far when composing a string. */
+unsigned_char_dynarr *composite_chars;
+/* If we saw an invalid designation sequence for a particular
+register, we flag it here and switch to ASCII.  The next time we
+see a valid designation for this register, we turn off the flag
+and do the designation normally, but pretend the sequence was
+invalid.  The effect of all this is that (most of the time) the
+escape sequences for both the switch to the unknown charset, and
+the switch back to the known charset, get inserted literally into
+the buffer and saved out as such.  The hope is that we can
+preserve the escape sequences so that the resulting written out
+file makes sense.  If we don't do any of this, the designation
+to the invalid charset will be preserved but that switch back
+to the known charset will probably get eaten because it was
+the same charset that was already present in the register. */
+unsigned char invalid_designated[4];
+/* We try to do similar things as above for direction-switching
+sequences.  If we encountered a direction switch while an
+invalid designation was present, or an invalid designation
+just after a direction switch (i.e. no valid designation
+encountered yet), we insert the direction-switch escape
+sequence literally into the output stream, and later on
+insert the corresponding direction-restoring escape sequence
+literally also. */
+int switched_dir_and_no_valid_charset_yet :1;
+int invalid_switch_dir :1;
+/* Tells the decoder to output the escape sequence literally
+even though it was valid.  Used in the games we play to
+avoid lossage when we encounter invalid designations. */
+int output_literally :1;
+/* We encountered a direction switch followed by an invalid
+designation.  We didn't output the direction switch
+literally because we didn't know about the invalid designation;
+but we have to do so now. */
+int output_direction_sequence :1;
+};
+Lisp_Object Fcopy_coding_system (Lisp_Object old_coding_system,
+				 Lisp_Object new_name);
+struct detection_state;
+static int detect_coding_shift_jis (struct detection_state *st,
+				    CONST unsigned char *src,
+				    unsigned int n);
+static void decode_coding_shift_jis (Lstream *decoding,
+				     CONST unsigned char *src,
+				     unsigned_char_dynarr *dst,
+				     unsigned int n);
+static void encode_coding_shift_jis (Lstream *encoding,
+				     CONST unsigned char *src,
+				     unsigned_char_dynarr *dst,
+				     unsigned int n);
+static int detect_coding_big5 (struct detection_state *st,
+			       CONST unsigned char *src,
+			       unsigned int n);
+static void decode_coding_big5 (Lstream *decoding,
+				CONST unsigned char *src,
+				unsigned_char_dynarr *dst, unsigned int n);
+static void encode_coding_big5 (Lstream *encoding,
+				CONST unsigned char *src,
+				unsigned_char_dynarr *dst, unsigned int n);
+static int postprocess_iso2022_mask (int mask);
+static void reset_iso2022 (Lisp_Object coding_system,
+			   struct iso2022_decoder *iso);
+static int detect_coding_iso2022 (struct detection_state *st,
+				  CONST unsigned char *src,
+				  unsigned int n);
+static void decode_coding_iso2022 (Lstream *decoding,
+				   CONST unsigned char *src,
+				   unsigned_char_dynarr *dst, unsigned int n);
+static void encode_coding_iso2022 (Lstream *encoding,
+				   CONST unsigned char *src,
+				   unsigned_char_dynarr *dst, unsigned int n);
+static void decode_coding_no_conversion (Lstream *decoding,
+					 CONST unsigned char *src,
+					 unsigned_char_dynarr *dst,
+					 unsigned int n);
+static void encode_coding_no_conversion (Lstream *encoding,
+					 CONST unsigned char *src,
+					 unsigned_char_dynarr *dst,
+					 unsigned int n);
+static void mule_decode (Lstream *decoding, CONST unsigned char *src,
+			 unsigned_char_dynarr *dst, unsigned int n);
+static void mule_encode (Lstream *encoding, CONST unsigned char *src,
+			 unsigned_char_dynarr *dst, unsigned int n);
+struct codesys_prop
+{
+Lisp_Object sym;
+int prop_type;
+};
+typedef struct codesys_prop_dynarr_type
+{
+Dynarr_declare (struct codesys_prop);
+} codesys_prop_dynarr;
+codesys_prop_dynarr *the_codesys_prop_dynarr;
+enum codesys_prop_enum
+{
+CODESYS_PROP_ALL_OK,
+CODESYS_PROP_ISO2022,
+CODESYS_PROP_CCL
+};
+/************************************************************************/
+/*                       Coding system functions                        */
+/************************************************************************/
+static Lisp_Object mark_coding_system (Lisp_Object, void (*) (Lisp_Object));
+static void print_coding_system (Lisp_Object, Lisp_Object, int);
+static void finalize_coding_system (void *header, int for_disksave);
+DEFINE_LRECORD_IMPLEMENTATION ("coding-system", coding_system,
+			       mark_coding_system, print_coding_system,
+			       finalize_coding_system,
+			       0, 0, struct Lisp_Coding_System);
+static Lisp_Object
+mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
+{
+struct Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
+int i;
+(markobj) (CODING_SYSTEM_NAME (codesys));
+(markobj) (CODING_SYSTEM_DOC_STRING (codesys));
+(markobj) (CODING_SYSTEM_MNEMONIC (codesys));
+(markobj) (CODING_SYSTEM_EOL_LF (codesys));
+(markobj) (CODING_SYSTEM_EOL_CRLF (codesys));
+(markobj) (CODING_SYSTEM_EOL_CR (codesys));
+switch (CODING_SYSTEM_TYPE (codesys))
+{
+case CODESYS_ISO2022:
+for (i = 0; i < 4; i++)
+	(markobj) (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i));
+if (codesys->iso2022.input_conv)
+	{
+	  for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++)
+	    {
+	      struct charset_conversion_spec *ccs =
+		Dynarr_atp (codesys->iso2022.input_conv, i);
+	      (markobj) (ccs->from_charset);
+	      (markobj) (ccs->to_charset);
+	    }
+	}
+if (codesys->iso2022.output_conv)
+	{
+	  for (i = 0; i < Dynarr_length (codesys->iso2022.output_conv); i++)
+	    {
+	      struct charset_conversion_spec *ccs =
+		Dynarr_atp (codesys->iso2022.output_conv, i);
+	      (markobj) (ccs->from_charset);
+	      (markobj) (ccs->to_charset);
+	    }
+	}
+break;
+case CODESYS_CCL:
+(markobj) (CODING_SYSTEM_CCL_DECODE (codesys));
+(markobj) (CODING_SYSTEM_CCL_ENCODE (codesys));
+break;
+default:
+break;
+}
+(markobj) (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys));
+return CODING_SYSTEM_POST_READ_CONVERSION (codesys);
+}
+static void
+print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
+		     int escapeflag)
+{
+struct Lisp_Coding_System *c = XCODING_SYSTEM (obj);
+if (print_readably)
+error ("printing unreadable object #<coding_system 0x%x>",
+	   c->header.uid);
+write_c_string ("#<coding_system ", printcharfun);
+print_internal (c->name, printcharfun, 1);
+write_c_string (">", printcharfun);
+}
+static void
+finalize_coding_system (void *header, int for_disksave)
+{
+struct Lisp_Coding_System *c = (struct Lisp_Coding_System *) header;
+/* Since coding systems never go away, this function is not
+necessary.  But it would be necessary if we changed things
+so that coding systems could go away. */
+if (!for_disksave) /* see comment in lstream.c */
+{
+switch (CODING_SYSTEM_TYPE (c))
+	{
+	case CODESYS_ISO2022:
+	  if (c->iso2022.input_conv)
+	    {
+	      Dynarr_free (c->iso2022.input_conv);
+	      c->iso2022.input_conv = 0;
+	    }
+	  if (c->iso2022.output_conv)
+	    {
+	      Dynarr_free (c->iso2022.output_conv);
+	      c->iso2022.output_conv = 0;
+	    }
+	  break;
+	default:
+	  break;
+	}
+}
+}
+static int
+symbol_to_eol_type (Lisp_Object symbol)
+{
+CHECK_SYMBOL (symbol);
+if (NILP (symbol))           return EOL_AUTODETECT;
+else if (EQ (symbol, Qlf))   return EOL_LF;
+else if (EQ (symbol, Qcrlf)) return EOL_CRLF;
+else if (EQ (symbol, Qcr))   return EOL_CR;
+else
+signal_simple_error ("Unrecognized eol type", symbol);
+return 0; /* not reached */
+}
+static Lisp_Object
+eol_type_to_symbol (int eol_type)
+{
+switch (eol_type)
+{
+case EOL_LF:         return Qlf;
+case EOL_CRLF:       return Qcrlf;
+case EOL_CR:         return Qcr;
+case EOL_AUTODETECT: return Qnil;
+default:             abort ();
+}
+return Qnil; /* not reached */
+}
+static void
+setup_eol_coding_systems (struct Lisp_Coding_System *codesys)
+{
+Lisp_Object codesys_obj = Qnil;
+int len = string_length (XSYMBOL (CODING_SYSTEM_NAME (codesys))->name);
+char *codesys_name = (char *) alloca (len + 7);
+Lisp_Object codesys_name_sym, sub_codesys_obj;
+/* kludge */
+XSETCODING_SYSTEM (codesys_obj, codesys);
+memcpy (codesys_name,
+	  string_data (XSYMBOL (CODING_SYSTEM_NAME (codesys))->name), len);
+#define DEFINE_SUB_CODESYS(op_sys, Type) do {	\
+strcpy (codesys_name + len, "-" op_sys);	\
+codesys_name_sym = intern (codesys_name);	\
+sub_codesys_obj = Fcopy_coding_system (codesys_obj, codesys_name_sym); \
+XCODING_SYSTEM_EOL_TYPE (sub_codesys_obj) = Type; \
+CODING_SYSTEM_##Type (codesys) = sub_codesys_obj; \
+} while (0)
+DEFINE_SUB_CODESYS("unix", EOL_LF);
+DEFINE_SUB_CODESYS("dos",  EOL_CRLF);
+DEFINE_SUB_CODESYS("mac",  EOL_CR);
+}
+DEFUN ("coding-system-p", Fcoding_system_p, 1, 1, 0, /*
+T if OBJECT is a coding system.
+A coding system is an object that defines how text containing multiple
+character sets is encoded into a stream of (typically 8-bit) bytes.
+The coding system is used to decode the stream into a series of
+characters (which may be from multiple charsets) when the text is read
+from a file or process, and is used to encode the text back into the
+same format when it is written out to a file or process.
+For example, many ISO2022-compliant coding systems (such as Compound
+Text, which is used for inter-client data under the X Window System)
+use escape sequences to switch between different charsets -- Japanese
+Kanji, for example, is invoked with \"ESC $ ( B\"; ASCII is invoked
+with \"ESC ( B\"; and Cyrillic is invoked with \"ESC - L\".  See
+`make-coding-system' for more information.
+Coding systems are normally identified using a symbol, and the
+symbol is accepted in place of the actual coding system object whenever
+a coding system is called for. (This is similar to how faces work.)
+*/
+(object))
+{
+return CODING_SYSTEMP (object) ? Qt : Qnil;
+}
+DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /*
+Retrieve the coding system of the given name.
+If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply
+returned.  Otherwise, CODING-SYSTEM-OR-NAME should be a symbol.
+If there is no such coding system, nil is returned.  Otherwise the
+associated coding system object is returned.
+*/
+(coding_system_or_name))
+{
+if (NILP (coding_system_or_name))
+coding_system_or_name = Qbinary;
+if (CODING_SYSTEMP (coding_system_or_name))
+return coding_system_or_name;
+CHECK_SYMBOL (coding_system_or_name);
+return Fgethash (coding_system_or_name, Vcoding_system_hashtable, Qnil);
+}
+DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
+Retrieve the coding system of the given name.
+Same as `find-coding-system' except that if there is no such
+coding system, an error is signaled instead of returning nil.
+*/
+(name))
+{
+Lisp_Object coding_system = Ffind_coding_system (name);
+if (NILP (coding_system))
+signal_simple_error ("No such coding system", name);
+return coding_system;
+}
+/* We store the coding systems in hash tables with the names as the key and the
+actual coding system object as the value.  Occasionally we need to use them
+in a list format.  These routines provide us with that. */
+struct coding_system_list_closure
+{
+Lisp_Object *coding_system_list;
+};
+static void
+add_coding_system_to_list_mapper (CONST void *hash_key, void *hash_contents,
+				  void *coding_system_list_closure)
+{
+/* This function can GC */
+Lisp_Object key, contents;
+Lisp_Object *coding_system_list;
+struct coding_system_list_closure *chcl = coding_system_list_closure;
+CVOID_TO_LISP (key, hash_key);
+VOID_TO_LISP (contents, hash_contents);
+coding_system_list = chcl->coding_system_list;
+*coding_system_list = Fcons (XCODING_SYSTEM (contents)->name,
+			       *coding_system_list);
+}
+DEFUN ("coding-system-list", Fcoding_system_list, 0, 0, 0, /*
+Return a list of the names of all defined coding systems.
+*/
+())
+{
+Lisp_Object coding_system_list = Qnil;
+struct gcpro gcpro1;
+struct coding_system_list_closure coding_system_list_closure;
+GCPRO1 (coding_system_list);
+coding_system_list_closure.coding_system_list = &coding_system_list;
+elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hashtable,
+		 &coding_system_list_closure);
+UNGCPRO;
+return coding_system_list;
+}
+DEFUN ("coding-system-name", Fcoding_system_name, 1, 1, 0, /*
+Return the name of the given coding system.
+*/
+(coding_system))
+{
+coding_system = Fget_coding_system (coding_system);
+return (XCODING_SYSTEM_NAME (coding_system));
+}
+static struct Lisp_Coding_System *
+allocate_coding_system (int type, Lisp_Object name)
+{
+struct Lisp_Coding_System *codesys;
+codesys = (struct Lisp_Coding_System *)
+alloc_lcrecord (sizeof (struct Lisp_Coding_System), lrecord_coding_system);
+zero_lcrecord (codesys);
+CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = Qnil;
+CODING_SYSTEM_POST_READ_CONVERSION (codesys) = Qnil;
+CODING_SYSTEM_EOL_TYPE (codesys) = EOL_AUTODETECT;
+CODING_SYSTEM_EOL_CRLF (codesys) = Qnil;
+CODING_SYSTEM_EOL_CR   (codesys) = Qnil;
+CODING_SYSTEM_EOL_LF   (codesys) = Qnil;
+CODING_SYSTEM_TYPE     (codesys) = type;
+if (type == CODESYS_ISO2022)
+{
+int i;
+for (i = 0; i < 4; i++)
+	CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i) = Qnil;
+}
+else if (type == CODESYS_CCL)
+{
+CODING_SYSTEM_CCL_DECODE (codesys) = Qnil;
+CODING_SYSTEM_CCL_ENCODE (codesys) = Qnil;
+}
+CODING_SYSTEM_NAME (codesys) = name;
+return codesys;
+}
+/* Given a list of charset conversion specs as specified in a Lisp
+program, parse it into STORE_HERE. */
+static void
+parse_charset_conversion_specs (charset_conversion_spec_dynarr *store_here,
+				Lisp_Object spec_list)
+{
+Lisp_Object rest;
+EXTERNAL_LIST_LOOP (rest, spec_list)
+{
+Lisp_Object car = XCAR (rest);
+Lisp_Object from, to;
+struct charset_conversion_spec spec;
+if (!CONSP (car) || !CONSP (XCDR (car)) || !NILP (XCDR (XCDR (car))))
+	signal_simple_error ("Invalid charset conversion spec", car);
+from = Fget_charset (XCAR (car));
+to = Fget_charset (XCAR (XCDR (car)));
+if (XCHARSET_TYPE (from) != XCHARSET_TYPE (to))
+	signal_simple_error_2
+	  ("Attempted conversion between different charset types",
+	   from, to);
+spec.from_charset = from;
+spec.to_charset = to;
+Dynarr_add (store_here, spec);
+}
+}
+/* Given a dynarr LOAD_HERE of internally-stored charset conversion
+specs, return the equivalent as the Lisp programmer would see it.
+If LOAD_HERE is 0, return Qnil. */
+static Lisp_Object
+unparse_charset_conversion_specs (charset_conversion_spec_dynarr *load_here)
+{
+int i;
+Lisp_Object result = Qnil;
+if (!load_here)
+return Qnil;
+for (i = 0; i < Dynarr_length (load_here); i++)
+{
+struct charset_conversion_spec *ccs =
+	Dynarr_atp (load_here, i);
+result = Fcons (list2 (ccs->from_charset, ccs->to_charset), result);
+}
+return Fnreverse (result);
+}
+DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /*
+Register symbol NAME as a coding system.
+TYPE describes the conversion method used and should be one of
+nil or 'autodetect
+Automatic conversion.  XEmacs attempts to detect the coding system
+used in the file.
+'no-conversion
+No conversion.  Use this for binary files and such.  On output,
+graphic characters that are not in ASCII or Latin-1 will be
+replaced by a ?. (For a no-conversion-encoded buffer, these
+characters will only be present if you explicitly insert them.)
+'shift-jis
+Shift-JIS (a Japanese encoding commonly used in PC operating systems).
+'iso2022
+Any ISO2022-compliant encoding.  Among other things, this includes
+JIS (the Japanese encoding commonly used for e-mail), EUC (the
+standard Unix encoding for Japanese and other languages), and
+Compound Text (the encoding used in X11).  You can specify more
+specific information about the conversion with the FLAGS argument.
+'big5
+Big5 (the encoding commonly used for Taiwanese).
+'ccl
+The conversion is performed using a user-written pseudo-code
+program.  CCL (Code Conversion Language) is the name of this
+pseudo-code.
+'internal
+Write out or read in the raw contents of the memory representing
+the buffer's text.  This is primarily useful for debugging
+purposes, and is only enabled when XEmacs has been compiled with
+DEBUG_XEMACS defined (via the --debug configure option).
+WARNING: Reading in a file using 'internal conversion can result
+in an internal inconsistency in the memory representing a
+buffer's text, which will produce unpredictable results and may
+cause XEmacs to crash.  Under normal circumstances you should
+never use 'internal conversion.
+DOC-STRING is a string describing the coding system.
+PROPS is a property list, describing the specific nature of the
+character set.  Recognized properties are:
+'mnemonic
+String to be displayed in the modeline when this coding system is
+active.
+'eol-type
+End-of-line conversion to be used.  It should be one of
+	nil
+		Automatically detect the end-of-line type (LF, CRLF,
+		or CR).  Also generate subsidiary coding systems named
+		`NAME-unix', `NAME-dos', and `NAME-mac', that are
+		identical to this coding system but have an EOL-TYPE
+		value of 'lf, 'crlf, and 'cr, respectively.
+	'lf
+		The end of a line is marked externally using ASCII LF.
+		Since this is also the way that XEmacs represents an
+		end-of-line internally, specifying this option results
+		in no end-of-line conversion.  This is the standard
+		format for Unix text files.
+	'crlf
+		The end of a line is marked externally using ASCII
+		CRLF.  This is the standard format for MS-DOS text
+		files.
+	'cr
+		The end of a line is marked externally using ASCII CR.
+		This is the standard format for Macintosh text files.
+	t
+		Automatically detect the end-of-line type but do not
+		generate subsidiary coding systems.  (This value is
+		converted to nil when stored internally, and
+		`coding-system-property' will return nil.)
+'post-read-conversion
+Function called after a file has been read in, to perform the
+decoding.  Called with two arguments, BEG and END, denoting
+a region of the current buffer to be decoded.
+'pre-write-conversion
+Function called before a file is written out, to perform the
+encoding.  Called with two arguments, BEG and END, denoting
+a region of the current buffer to be encoded.
+The following additional properties are recognized if TYPE is 'iso2022:
+'charset-g0
+'charset-g1
+'charset-g2
+'charset-g3
+The character set initially designated to the G0 - G3 registers.
+The value should be one of
+-- A charset object (designate that character set)
+	  -- nil (do not ever use this register)
+	  -- t (no character set is initially designated to
+		the register, but may be later on; this automatically
+		sets the corresponding `force-g*-on-output' property)
+'force-g0-on-output
+'force-g1-on-output
+'force-g2-on-output
+'force-g2-on-output
+If non-nil, send an explicit designation sequence on output before
+using the specified register.
+'short
+If non-nil, use the short forms \"ESC $ @\", \"ESC $ A\", and
+\"ESC $ B\" on output in place of the full designation sequences
+\"ESC $ ( @\", \"ESC $ ( A\", and \"ESC $ ( B\".
+'no-ascii-eol
+If non-nil, don't designate ASCII to G0 at each end of line on output.
+Setting this to non-nil also suppresses other state-resetting that
+normally happens at the end of a line.
+'no-ascii-cntl
+If non-nil, don't designate ASCII to G0 before control chars on output.
+'seven
+If non-nil, use 7-bit environment on output.  Otherwise, use 8-bit
+environment.
+'lock-shift
+If non-nil, use locking-shift (SO/SI) instead of single-shift
+or designation by escape sequence.
+'no-iso6429
+If non-nil, don't use ISO6429's direction specification.
+'escape-quoted
+If non-nil, literal control characters that are the same as
+the beginning of a recognized ISO2022 or ISO6429 escape sequence
+(in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E),
+SS3 (0x8F), and CSI (0x9B)) are \"quoted\" with an escape character
+so that they can be properly distinguished from an escape sequence.
+(Note that doing this results in a non-portable encoding.) This
+encoding flag is used for byte-compiled files.  Note that ESC
+is a good choice for a quoting character because there are no
+escape sequences whose second byte is a character from the Control-0
+or Control-1 character sets; this is explicitly disallowed by the
+ISO2022 standard.
+'input-charset-conversion
+A list of conversion specifications, specifying conversion of
+characters in one charset to another when decoding is performed.
+Each specification is a list of two elements: the source charset,
+and the destination charset.
+'output-charset-conversion
+A list of conversion specifications, specifying conversion of
+characters in one charset to another when encoding is performed.
+The form of each specification is the same as for
+'input-charset-conversion.
+The following additional properties are recognized (and required)
+if TYPE is 'ccl:
+'decode
+CCL program used for decoding (converting to internal format).
+'encode
+CCL program used for encoding (converting to external format).
+*/
+(name, type, doc_string, props))
+{
+struct Lisp_Coding_System *codesys;
+Lisp_Object rest, key, value;
+int ty;
+int need_to_setup_eol_systems = 1;
+/* Convert type to constant */
+if (NILP (type) || EQ (type, Qautodetect)) { ty = CODESYS_AUTODETECT; }
+else if (EQ (type, Qshift_jis))            { ty = CODESYS_SHIFT_JIS; }
+else if (EQ (type, Qiso2022))              { ty = CODESYS_ISO2022; }
+else if (EQ (type, Qbig5))                 { ty = CODESYS_BIG5; }
+else if (EQ (type, Qccl))                  { ty = CODESYS_CCL; }
+else if (EQ (type, Qno_conversion))        { ty = CODESYS_NO_CONVERSION; }
+#ifdef DEBUG_XEMACS
+else if (EQ (type, Qinternal))             { ty = CODESYS_INTERNAL; }
+#endif
+else
+signal_simple_error ("Invalid coding system type", type);
+CHECK_SYMBOL (name);
+codesys = allocate_coding_system (ty, name);
+if (NILP (doc_string))
+doc_string = build_string ("");
+else
+CHECK_STRING (doc_string);
+CODING_SYSTEM_DOC_STRING (codesys) = doc_string;
+EXTERNAL_PROPERTY_LIST_LOOP (rest, key, value, props)
+{
+if (EQ (key, Qmnemonic))
+	{
+if (!NILP (value))
+	    CHECK_STRING (value);
+	  CODING_SYSTEM_MNEMONIC (codesys) = value;
+	}
+else if (EQ (key, Qeol_type))
+	{
+	  need_to_setup_eol_systems = NILP (value);
+	  if (EQ (value, Qt))
+	    value = Qnil;
+	  CODING_SYSTEM_EOL_TYPE (codesys) = symbol_to_eol_type (value);
+	}
+else if (EQ (key, Qpost_read_conversion)) CODING_SYSTEM_POST_READ_CONVERSION (codesys) = value;
+else if (EQ (key, Qpre_write_conversion)) CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = value;
+else if (ty == CODESYS_ISO2022)
+	{
+#define FROB_INITIAL_CHARSET(charset_num) \
+CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, charset_num) = \
+((EQ (value, Qt) || EQ (value, Qnil)) ? value : Fget_charset (value))
+	  if      (EQ (key, Qcharset_g0)) FROB_INITIAL_CHARSET (0);
+	  else if (EQ (key, Qcharset_g1)) FROB_INITIAL_CHARSET (1);
+	  else if (EQ (key, Qcharset_g2)) FROB_INITIAL_CHARSET (2);
+	  else if (EQ (key, Qcharset_g3)) FROB_INITIAL_CHARSET (3);
+#define FROB_FORCE_CHARSET(charset_num) \
+CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (codesys, charset_num) = !NILP (value)
+	  else if (EQ (key, Qforce_g0_on_output)) FROB_FORCE_CHARSET (0);
+	  else if (EQ (key, Qforce_g1_on_output)) FROB_FORCE_CHARSET (1);
+	  else if (EQ (key, Qforce_g2_on_output)) FROB_FORCE_CHARSET (2);
+	  else if (EQ (key, Qforce_g3_on_output)) FROB_FORCE_CHARSET (3);
+#define FROB_BOOLEAN_PROPERTY(prop) \
+CODING_SYSTEM_ISO2022_##prop (codesys) = !NILP (value)
+	  else if (EQ (key, Qshort))         FROB_BOOLEAN_PROPERTY (SHORT);
+	  else if (EQ (key, Qno_ascii_eol))  FROB_BOOLEAN_PROPERTY (NO_ASCII_EOL);
+	  else if (EQ (key, Qno_ascii_cntl)) FROB_BOOLEAN_PROPERTY (NO_ASCII_CNTL);
+	  else if (EQ (key, Qseven))         FROB_BOOLEAN_PROPERTY (SEVEN);
+	  else if (EQ (key, Qlock_shift))    FROB_BOOLEAN_PROPERTY (LOCK_SHIFT);
+	  else if (EQ (key, Qno_iso6429))    FROB_BOOLEAN_PROPERTY (NO_ISO6429);
+	  else if (EQ (key, Qescape_quoted)) FROB_BOOLEAN_PROPERTY (ESCAPE_QUOTED);
+	  else if (EQ (key, Qinput_charset_conversion))
+	    {
+	      codesys->iso2022.input_conv =
+		Dynarr_new (struct charset_conversion_spec);
+	      parse_charset_conversion_specs (codesys->iso2022.input_conv,
+					      value);
+	    }
+	  else if (EQ (key, Qoutput_charset_conversion))
+	    {
+	      codesys->iso2022.output_conv =
+		Dynarr_new (struct charset_conversion_spec);
+	      parse_charset_conversion_specs (codesys->iso2022.output_conv,
+					      value);
+	    }
+	  else
+	    signal_simple_error ("Unrecognized property", key);
+	}
+else if (EQ (type, Qccl))
+	{
+	  if (EQ (key, Qdecode))
+	    {
+	      CHECK_VECTOR (value);
+	      CODING_SYSTEM_CCL_DECODE (codesys) = value;
+	    }
+	  else if (EQ (key, Qencode))
+	    {
+	      CHECK_VECTOR (value);
+	      CODING_SYSTEM_CCL_ENCODE (codesys) = value;
+	    }
+	  else
+	    signal_simple_error ("Unrecognized property", key);
+	}
+else
+	signal_simple_error ("Unrecognized property", key);
+}
+if (need_to_setup_eol_systems)
+setup_eol_coding_systems (codesys);
+{
+Lisp_Object codesys_obj;
+XSETCODING_SYSTEM (codesys_obj, codesys);
+Fputhash (name, codesys_obj, Vcoding_system_hashtable);
+return codesys_obj;
+}
+}
+DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /*
+Copy OLD-CODING-SYSTEM to NEW-NAME.
+If NEW-NAME does not name an existing coding system, a new one will
+be created.
+*/
+(old_coding_system, new_name))
+{
+Lisp_Object new_coding_system;
+old_coding_system = Fget_coding_system (old_coding_system);
+new_coding_system = Ffind_coding_system (new_name);
+if (NILP (new_coding_system))
+{
+XSETCODING_SYSTEM (new_coding_system,
+			 allocate_coding_system
+			 (XCODING_SYSTEM_TYPE (old_coding_system),
+			  new_name));
+Fputhash (new_name, new_coding_system, Vcoding_system_hashtable);
+}
+{
+struct Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system);
+struct Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
+memcpy (((char *) to  ) + sizeof (to->header),
+	    ((char *) from) + sizeof (from->header),
+	    sizeof (*from) - sizeof (from->header));
+to->name = new_name;
+}
+return new_coding_system;
+}
+static Lisp_Object
+subsidiary_coding_system (Lisp_Object coding_system, int eol_type)
+{
+struct Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
+Lisp_Object new_coding_system;
+if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
+return coding_system;
+if (eol_type == EOL_AUTODETECT)
+return coding_system;
+switch (eol_type)
+{
+case EOL_LF:   new_coding_system = CODING_SYSTEM_EOL_LF   (cs); break;
+case EOL_CR:   new_coding_system = CODING_SYSTEM_EOL_CR   (cs); break;
+case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break;
+default:       abort ();
+}
+return NILP (new_coding_system) ? coding_system : new_coding_system;
+}
+DEFUN ("subsidiary-coding-system", Fsubsidiary_coding_system, 2, 2, 0, /*
+Return the subsidiary coding system of CODING-SYSTEM with eol type EOL-TYPE.
+*/
+(coding_system, eol_type))
+{
+coding_system = Fget_coding_system (coding_system);
+return subsidiary_coding_system (coding_system,
+				   symbol_to_eol_type (eol_type));
+}
+/************************************************************************/
+/*                         Coding system accessors                      */
+/************************************************************************/
+DEFUN ("coding-system-doc-string", Fcoding_system_doc_string, 1, 1, 0, /*
+Return the doc string for CODING-SYSTEM.
+*/
+(coding_system))
+{
+coding_system = Fget_coding_system (coding_system);
+return XCODING_SYSTEM_DOC_STRING (coding_system);
+}
+DEFUN ("coding-system-type", Fcoding_system_type, 1, 1, 0, /*
+Return the type of CODING-SYSTEM.
+*/
+(coding_system))
+{
+switch (XCODING_SYSTEM_TYPE (Fget_coding_system (coding_system)))
+{
+case CODESYS_AUTODETECT:	return Qautodetect;
+case CODESYS_SHIFT_JIS:	return Qshift_jis;
+case CODESYS_ISO2022:	return Qiso2022;
+case CODESYS_BIG5:		return Qbig5;
+case CODESYS_CCL:		return Qccl;
+case CODESYS_NO_CONVERSION:	return Qno_conversion;
+#ifdef DEBUG_XEMACS
+case CODESYS_INTERNAL:	return Qinternal;
+#endif
+default:
+abort ();
+}
+return Qnil; /* not reached */
+}
+DEFUN ("coding-system-property", Fcoding_system_property, 2, 2, 0, /*
+Return the PROP property of CODING-SYSTEM.
+*/
+(coding_system, prop))
+{
+int i, ok = 0;
+enum coding_system_type type;
+coding_system = Fget_coding_system (coding_system);
+CHECK_SYMBOL (prop);
+type = XCODING_SYSTEM_TYPE (coding_system);
+for (i = 0; !ok && i < Dynarr_length (the_codesys_prop_dynarr); i++)
+if (EQ (Dynarr_at (the_codesys_prop_dynarr, i).sym, prop))
+{
+	ok = 1;
+	switch (Dynarr_at (the_codesys_prop_dynarr, i).prop_type)
+	  {
+	  case CODESYS_PROP_ALL_OK:
+	    break;
+	  case CODESYS_PROP_ISO2022:
+	    if (type != CODESYS_ISO2022)
+	      signal_simple_error
+		("Property only valid in ISO2022 coding systems",
+		 prop);
+	    break;
+	  case CODESYS_PROP_CCL:
+	    if (type != CODESYS_CCL)
+	      signal_simple_error
+		("Property only valid in CCL coding systems",
+		 prop);
+	    break;
+	  default:
+	    abort ();
+	  }
+}
+if (!ok)
+signal_simple_error ("Unrecognized property", prop);
+if (EQ (prop, Qname))
+return XCODING_SYSTEM_NAME (coding_system);
+else if (EQ (prop, Qtype))
+return Fcoding_system_type (coding_system);
+else if (EQ (prop, Qdoc_string))
+return XCODING_SYSTEM_DOC_STRING (coding_system);
+else if (EQ (prop, Qmnemonic))
+return XCODING_SYSTEM_MNEMONIC (coding_system);
+else if (EQ (prop, Qeol_type))
+return eol_type_to_symbol (XCODING_SYSTEM_EOL_TYPE (coding_system));
+else if (EQ (prop, Qeol_lf))
+return XCODING_SYSTEM_EOL_LF (coding_system);
+else if (EQ (prop, Qeol_crlf))
+return XCODING_SYSTEM_EOL_CRLF (coding_system);
+else if (EQ (prop, Qeol_cr))
+return XCODING_SYSTEM_EOL_CR (coding_system);
+else if (EQ (prop, Qpost_read_conversion))
+return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system);
+else if (EQ (prop, Qpre_write_conversion))
+return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system);
+else if (type == CODESYS_ISO2022)
+{
+#define INITIAL_CHARSET(charset_num) \
+(XCHARSET_NAME (XCODING_SYSTEM_ISO2022_INITIAL_CHARSET \
+		  (coding_system, charset_num)))
+if      (EQ (prop, Qcharset_g0)) return INITIAL_CHARSET (0);
+else if (EQ (prop, Qcharset_g1)) return INITIAL_CHARSET (1);
+else if (EQ (prop, Qcharset_g2)) return INITIAL_CHARSET (2);
+else if (EQ (prop, Qcharset_g3)) return INITIAL_CHARSET (3);
+#define FORCE_CHARSET(charset_num) \
+(XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT \
+(coding_system, charset_num) ? Qt : Qnil)
+else if (EQ (prop, Qforce_g0_on_output)) return FORCE_CHARSET (0);
+else if (EQ (prop, Qforce_g1_on_output)) return FORCE_CHARSET (1);
+else if (EQ (prop, Qforce_g2_on_output)) return FORCE_CHARSET (2);
+else if (EQ (prop, Qforce_g3_on_output)) return FORCE_CHARSET (3);
+#define LISP_BOOLEAN(prop) \
+(XCODING_SYSTEM_ISO2022_##prop (coding_system) ? Qt : Qnil)
+else if (EQ (prop, Qshort))         return LISP_BOOLEAN (SHORT);
+else if (EQ (prop, Qno_ascii_eol))  return LISP_BOOLEAN (NO_ASCII_EOL);
+else if (EQ (prop, Qno_ascii_cntl)) return LISP_BOOLEAN (NO_ASCII_CNTL);
+else if (EQ (prop, Qseven))         return LISP_BOOLEAN (SEVEN);
+else if (EQ (prop, Qlock_shift))    return LISP_BOOLEAN (LOCK_SHIFT);
+else if (EQ (prop, Qno_iso6429))    return LISP_BOOLEAN (NO_ISO6429);
+else if (EQ (prop, Qescape_quoted)) return LISP_BOOLEAN (ESCAPE_QUOTED);
+else if (EQ (prop, Qinput_charset_conversion))
+	return
+	  unparse_charset_conversion_specs
+	    (XCODING_SYSTEM (coding_system)->iso2022.input_conv);
+else if (EQ (prop, Qoutput_charset_conversion))
+	return
+	  unparse_charset_conversion_specs
+	    (XCODING_SYSTEM (coding_system)->iso2022.output_conv);
+else
+	abort ();
+}
+else if (type == CODESYS_CCL)
+{
+if (EQ (prop, Qdecode))
+	return XCODING_SYSTEM_CCL_DECODE (coding_system);
+else if (EQ (prop, Qencode))
+	return XCODING_SYSTEM_CCL_ENCODE (coding_system);
+else
+	abort ();
+}
+else
+abort ();
+return Qnil; /* not reached */
+}
+/************************************************************************/
+/*                       Coding category functions                      */
+/************************************************************************/
+static int
+decode_coding_category (Lisp_Object symbol)
+{
+int i;
+CHECK_SYMBOL (symbol);
+for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+if (EQ (coding_category_symbol[i], symbol))
+return i;
+signal_simple_error ("Unrecognized coding category", symbol);
+return 0; /* not reached */
+}
+DEFUN ("coding-category-list", Fcoding_category_list, 0, 0, 0, /*
+Return a list of all recognized coding categories.
+*/
+())
+{
+int i;
+Lisp_Object list = Qnil;
+for (i = CODING_CATEGORY_LAST; i >= 0; i--)
+list = Fcons (coding_category_symbol[i], list);
+return list;
+}
+DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /*
+Change the priority order of the coding categories.
+LIST should be list of coding categories, in descending order of
+priority.  Unspecified coding categories will be lower in priority
+than all specified ones, in the same relative order they were in
+previously.
+*/
+(list))
+{
+int category_to_priority[CODING_CATEGORY_LAST + 1];
+int i, j;
+Lisp_Object rest;
+/* First generate a list that maps coding categories to priorities. */
+for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+category_to_priority[i] = -1;
+/* Highest priority comes from the specified list. */
+i = 0;
+EXTERNAL_LIST_LOOP (rest, list)
+{
+int cat = decode_coding_category (XCAR (rest));
+if (category_to_priority[cat] >= 0)
+	signal_simple_error ("Duplicate coding category in list", XCAR (rest));
+category_to_priority[cat] = i++;
+}
+/* Now go through the existing categories by priority to retrieve
+the categories not yet specified and preserve their priority
+order. */
+for (j = 0; j <= CODING_CATEGORY_LAST; j++)
+{
+int cat = coding_category_by_priority[j];
+if (category_to_priority[cat] < 0)
+	category_to_priority[cat] = i++;
+}
+/* Now we need to construct the inverse of the mapping we just
+constructed. */
+for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+coding_category_by_priority[category_to_priority[i]] = i;
+/* Phew!  That was confusing. */
+return Qnil;
+}
+DEFUN ("coding-priority-list", Fcoding_priority_list, 0, 0, 0, /*
+Return a list of coding categories in descending order of priority.
+*/
+())
+{
+int i;
+Lisp_Object list = Qnil;
+for (i = CODING_CATEGORY_LAST; i >= 0; i--)
+list = Fcons (coding_category_symbol[coding_category_by_priority[i]],
+		  list);
+return list;
+}
+DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /*
+Change the coding system associated with a coding category.
+*/
+(coding_category, coding_system))
+{
+int cat = decode_coding_category (coding_category);
+coding_system = Fget_coding_system (coding_system);
+coding_category_system[cat] = coding_system;
+return Qnil;
+}
+DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /*
+Return the coding system associated with a coding category.
+*/
+(coding_category))
+{
+int cat = decode_coding_category (coding_category);
+Lisp_Object sys = coding_category_system[cat];
+if (!NILP (sys))
+return XCODING_SYSTEM_NAME (sys);
+return Qnil;
+}
+/************************************************************************/
+/*                     Detecting the encoding of data                   */
+/************************************************************************/
+struct detection_state
+{
+int eol_type;
+int seen_non_ascii;
+int mask;
+struct
+{
+int mask;
+int in_second_byte;
+}
+big5;
+struct
+{
+int mask;
+int in_second_byte;
+}
+shift_jis;
+struct
+{
+int mask;
+int initted;
+struct iso2022_decoder iso;
+unsigned int flags;
+int high_byte_count;
+int saw_single_shift:1;
+}
+iso2022;
+struct
+{
+int seen_anything;
+int just_saw_cr;
+}
+eol;
+};
+static int
+acceptable_control_char_p (int c)
+{
+switch (c)
+{
+/* Allow and ignore control characters that you might
+	 reasonably see in a text file */
+case '\r':
+case '\n':
+case '\t':
+case  7: /* bell */
+case  8: /* backspace */
+case 11: /* vertical tab */
+case 12: /* form feed */
+case 26: /* MS-DOS C-z junk */
+return 1;
+default:
+return 0;
+}
+}
+static int
+mask_has_at_most_one_bit_p (int mask)
+{
+/* Perhaps the only thing useful you learn from intensive Microsoft
+technical interviews */
+return (mask & (mask - 1)) == 0;
+}
+static int
+detect_eol_type (struct detection_state *st, CONST unsigned char *src,
+		 unsigned int n)
+{
+int c;
+while (n--)
+{
+c = *src++;
+if (c == '\r')
+	st->eol.just_saw_cr = 1;
+else
+	{
+	  if (c == '\n')
+	    {
+	      if (st->eol.just_saw_cr)
+		return EOL_CRLF;
+	      else if (st->eol.seen_anything)
+		return EOL_LF;
+	    }
+	  else if (st->eol.just_saw_cr)
+	    return EOL_CR;
+	  st->eol.just_saw_cr = 0;
+	}
+st->eol.seen_anything = 1;
+}
+return EOL_AUTODETECT;
+}
+/* Attempt to determine the encoding and EOL type of the given text.
+Before calling this function for the first type, you must initialize
+st->eol_type as appropriate and initialize st->mask to ~0.
+st->eol_type holds the determined EOL type, or EOL_AUTODETECT if
+not yet known.
+st->mask holds the determined coding category mask, or ~0 if only
+ASCII has been seen so far.
+Returns:
+0 == st->eol_type is EOL_AUTODETECT and/or more than coding category
+is present in st->mask
+1 == definitive answers are here for both st->eol_type and st->mask
+*/
+static int
+detect_coding_type (struct detection_state *st, CONST unsigned char *src,
+		    unsigned int n, int just_do_eol)
+{
+int c;
+if (st->eol_type == EOL_AUTODETECT)
+st->eol_type = detect_eol_type (st, src, n);
+if (just_do_eol)
+return st->eol_type != EOL_AUTODETECT;
+if (!st->seen_non_ascii)
+{
+for (; n; n--, src++)
+	{
+	  c = *src;
+	  if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80)
+	    {
+	      st->seen_non_ascii = 1;
+	      st->shift_jis.mask = ~0;
+	      st->big5.mask = ~0;
+	      st->iso2022.mask = ~0;
+	      break;
+	    }
+	}
+}
+if (!n)
+return 0;
+if (!mask_has_at_most_one_bit_p (st->iso2022.mask))
+st->iso2022.mask = detect_coding_iso2022 (st, src, n);
+if (!mask_has_at_most_one_bit_p (st->shift_jis.mask))
+st->shift_jis.mask = detect_coding_shift_jis (st, src, n);
+if (!mask_has_at_most_one_bit_p (st->big5.mask))
+st->big5.mask = detect_coding_big5 (st, src, n);
+st->mask = st->iso2022.mask | st->shift_jis.mask | st->big5.mask;
+{
+int retval = mask_has_at_most_one_bit_p (st->mask);
+st->mask |= CODING_CATEGORY_NO_CONVERSION_MASK;
+return retval && st->eol_type != EOL_AUTODETECT;
+}
+}
+static Lisp_Object
+coding_system_from_mask (int mask)
+{
+if (mask == ~0)
+{
+/* If the file was entirely or basically ASCII, use the
+	 default value of `file-coding-system'. */
+Lisp_Object retval =
+	XBUFFER (Vbuffer_defaults)->file_coding_system;
+if (!NILP (retval))
+	{
+	  retval = Ffind_coding_system (retval);
+	  if (NILP (retval))
+	    {
+	      warn_when_safe
+		(Qbad_variable, Qwarning,
+		 "Invalid `default-file-coding-system', set to nil");
+	      XBUFFER (Vbuffer_defaults)->file_coding_system = Qnil;
+	    }
+	}
+if (NILP (retval))
+	retval = Fget_coding_system (Qno_conversion);
+return retval;
+}
+else
+{
+int i;
+int cat = -1;
+mask = postprocess_iso2022_mask (mask);
+/* Look through the coding categories by priority and find
+	 the first one that is allowed. */
+for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+	{
+	  cat = coding_category_by_priority[i];
+	  if ((mask & (1 << cat)) &&
+	      !NILP (coding_category_system[cat]))
+	    break;
+	}
+if (cat >= 0)
+	return coding_category_system[cat];
+else
+	return Fget_coding_system (Qno_conversion);
+}
+}
+/* Given a seekable read stream and potential coding system and EOL type
+as specified, do any autodetection that is called for.  If the
+coding system and/or EOL type are not autodetect, they will be left
+alone; but this function will never return an autodetect coding system
+or EOL type.
+This function does not automatically fetch subsidiary coding systems;
+that should be unnecessary with the explicit eol-type argument. */
+static void
+determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
+			      int *eol_type_in_out)
+{
+struct detection_state decst;
+if (*eol_type_in_out == EOL_AUTODETECT)
+*eol_type_in_out = XCODING_SYSTEM_EOL_TYPE (*codesys_in_out);
+memset (&decst, 0, sizeof (decst));
+decst.eol_type = *eol_type_in_out;
+decst.mask = ~0;
+/* If autodetection is called for, do it now. */
+if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT ||
+*eol_type_in_out == EOL_AUTODETECT)
+{
+while (1)
+	{
+	  unsigned char random_buffer[4096];
+	  int nread;
+	  nread = Lstream_read (stream, random_buffer, sizeof (random_buffer));
+	  if (!nread)
+	    break;
+	  if (detect_coding_type (&decst, random_buffer, nread,
+				  XCODING_SYSTEM_TYPE (*codesys_in_out) !=
+				  CODESYS_AUTODETECT))
+	    break;
+	}
+*eol_type_in_out = decst.eol_type;
+if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT)
+	*codesys_in_out = coding_system_from_mask (decst.mask);
+}
+/* If we absolutely can't determine the EOL type, just assume LF. */
+if (*eol_type_in_out == EOL_AUTODETECT)
+*eol_type_in_out = EOL_LF;
+Lstream_rewind (stream);
+}
+DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /*
+Detect coding system of the text in the region between START and END.
+Returned value is a list of possible coding systems ordered by priority.
+If only ASCII characters are found, it returns 'autodetect or one of its
+subsidiary coding systems according to a detected end-of-line type.
+Optional arg BUFFER defaults to the current buffer.
+*/
+(start, end, buffer))
+{
+Lisp_Object val = Qnil;
+struct buffer *buf = decode_buffer (buffer, 0);
+Bufpos b, e;
+Lisp_Object instream;
+struct detection_state decst;
+get_buffer_range_char (buf, start, end, &b, &e, 0);
+instream = make_lisp_buffer_input_stream (buf, b, e, 0);
+instream = make_encoding_input_stream (XLSTREAM (instream),
+					 Fget_coding_system (Qbinary));
+memset (&decst, 0, sizeof (decst));
+decst.eol_type = EOL_AUTODETECT;
+decst.mask = ~0;
+while (1)
+{
+unsigned char random_buffer[4096];
+int nread;
+nread = Lstream_read (XLSTREAM (instream), random_buffer,
+			    sizeof (random_buffer));
+if (!nread)
+	break;
+if (detect_coding_type (&decst, random_buffer, nread, 0))
+	break;
+}
+if (decst.mask == ~0)
+{
+val = subsidiary_coding_system (Fget_coding_system (Qautodetect),
+				      decst.eol_type);
+}
+else
+{
+int i;
+val = Qnil;
+decst.mask = postprocess_iso2022_mask (decst.mask);
+for (i = CODING_CATEGORY_LAST; i >= 0; i--)
+	{
+	  int sys = coding_category_by_priority[i];
+	  if (decst.mask & (1 << sys))
+	    {
+	      Lisp_Object codesys = coding_category_system[sys];
+	      if (!NILP (codesys))
+		codesys = subsidiary_coding_system (codesys, decst.eol_type);
+	      val = Fcons (codesys, val);
+	    }
+	}
+}
+return val;
+}
+/************************************************************************/
+/*           Converting to internal Mule format ("decoding")            */
+/************************************************************************/
+/* A decoding stream is a stream used for decoding text (i.e.
+converting from some external format to internal format).
+The decoding-stream object keeps track of the actual coding
+stream, the stream that is at the other end, and data that
+needs to be persistent across the lifetime of the stream. */
+/* Handle the EOL stuff related to just-read-in character C.  EOL is
+the EOL type of the coding stream.  FLAGS is the current value of
+FLAGS in the coding stream, and may be modified by this macro.
+(The macro only looks at the CODING_STATE_CR flag.) DST is the
+Dynarr to which the decoded bytes are to be written.  You need to
+also define a local goto label "label_continue_loop" that is at the
+end of the main character-reading loop.
+If C is a CR character, then this macro handles it entirely and
+jumps to label_continue_loop.  Otherwise, this macro does not add
+anything to DST, and continues normally.  You should continue
+processing C normally after this macro. */
+#define DECODE_HANDLE_EOL_TYPE(eol, c, flags, dst)		\
+do {								\
+if (c == '\r')						\
+{								\
+if (eol == EOL_CR)					\
+	Dynarr_add (dst, '\n');					\
+else if (eol != EOL_CRLF || flags & CODING_STATE_CR)	\
+	Dynarr_add (dst, c);					\
+else							\
+	flags |= CODING_STATE_CR;				\
+goto label_continue_loop;					\
+}								\
+else if (flags & CODING_STATE_CR)				\
+{	/* eol == CODING_SYSTEM_EOL_CRLF */			\
+if (c != '\n')						\
+	Dynarr_add (dst, '\r');					\
+flags &= ~CODING_STATE_CR;				\
+}								\
+} while (0)
+/* C should be a binary character in the range 0 - 255; convert
+to internal format and add to Dynarr DST. */
+#define DECODE_ADD_BINARY_CHAR(c, dst)		\
+do {						\
+if (BYTE_ASCII_P (c))				\
+Dynarr_add (dst, c);			\
+else if (BYTE_C1_P (c))			\
+{						\
+Dynarr_add (dst, LEADING_BYTE_CONTROL_1);	\
+Dynarr_add (dst, c + 0x20);		\
+}						\
+else						\
+{						\
+Dynarr_add (dst, LEADING_BYTE_LATIN_1);	\
+Dynarr_add (dst, c);			\
+}						\
+} while (0)
+#define DECODE_OUTPUT_PARTIAL_CHAR(ch)	\
+do {					\
+if (ch)				\
+{					\
+DECODE_ADD_BINARY_CHAR (ch, dst);	\
+ch = 0;				\
+}					\
+} while (0)
+#define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst)\
+do {							\
+DECODE_OUTPUT_PARTIAL_CHAR (ch);			\
+if (flags & CODING_STATE_END)				\
+{							\
+if (flags & CODING_STATE_CR)			\
+	Dynarr_add (dst, '\r');				\
+}							\
+} while (0)
+#define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding)
+struct decoding_stream
+{
+/* Coding system that governs the conversion. */
+struct Lisp_Coding_System *codesys;
+/* Stream that we read the encoded data from or
+write the decoded data to. */
+Lstream *other_end;
+/* If we are reading, then we can return only a fixed amount of
+data, so if the conversion resulted in too much data, we store it
+here for retrieval the next time around. */
+unsigned_char_dynarr *runoff;
+/* FLAGS holds flags indicating the current state of the decoding.
+Some of these flags are dependent on the coding system. */
+unsigned int flags;
+/* CH holds a partially built-up character.  Since we only deal
+with one- and two-byte characters at the moment, we only use
+this to store the first byte of a two-byte character. */
+unsigned int ch;
+/* EOL_TYPE specifies the type of end-of-line conversion that
+currently applies.  We need to keep this separate from the
+EOL type stored in CODESYS because the latter might indicate
+automatic EOL-type detection while the former will always
+indicate a particular EOL type. */
+int eol_type;
+/* Additional ISO2022 information.  We define the structure above
+because it's also needed by the detection routines. */
+struct iso2022_decoder iso2022;
+/* Additional information (the state of the running CCL program)
+used by the CCL decoder. */
+struct ccl_program ccl;
+struct detection_state decst;
+};
+static int decoding_reader     (Lstream *stream,       unsigned char *data, int size);
+static int decoding_writer     (Lstream *stream, CONST unsigned char *data, int size);
+static int decoding_rewinder   (Lstream *stream);
+static int decoding_seekable_p (Lstream *stream);
+static int decoding_flusher    (Lstream *stream);
+static int decoding_closer     (Lstream *stream);
+static Lisp_Object decoding_marker (Lisp_Object stream,
+				    void (*markobj) (Lisp_Object));
+DEFINE_LSTREAM_IMPLEMENTATION ("decoding", lstream_decoding,
+			       sizeof (struct decoding_stream));
+static Lisp_Object
+decoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
+{
+Lstream *str = DECODING_STREAM_DATA (XLSTREAM (stream))->other_end;
+Lisp_Object str_obj;
+/* We do not need to mark the coding systems or charsets stored
+within the stream because they are stored in a global list
+and automatically marked. */
+XSETLSTREAM (str_obj, str);
+(markobj) (str_obj);
+if (str->imp->marker)
+return (str->imp->marker) (str_obj, markobj);
+else
+return Qnil;
+}
+/* Read SIZE bytes of data and store it into DATA.  We are a decoding stream
+so we read data from the other end, decode it, and store it into DATA. */
+static int
+decoding_reader (Lstream *stream, unsigned char *data, int size)
+{
+struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+unsigned char *orig_data = data;
+int read_size;
+int error_occurred = 0;
+/* We need to interface to mule_decode(), which expects to take some
+amount of data and store the result into a Dynarr.  We have
+mule_decode() store into str->runoff, and take data from there
+as necessary. */
+/* We loop until we have enough data, reading chunks from the other
+end and decoding it. */
+while (1)
+{
+/* Take data from the runoff if we can.  Make sure to take at
+	 most SIZE bytes, and delete the data from the runoff. */
+if (Dynarr_length (str->runoff) > 0)
+	{
+	  int chunk = min (size, Dynarr_length (str->runoff));
+	  memcpy (data, Dynarr_atp (str->runoff, 0), chunk);
+	  Dynarr_delete_many (str->runoff, 0, chunk);
+	  data += chunk;
+	  size -= chunk;
+	}
+if (size == 0)
+	break; /* No more room for data */
+if (str->flags & CODING_STATE_END)
+	/* This means that on the previous iteration, we hit the EOF on
+	   the other end.  We loop once more so that mule_decode() can
+	   output any final stuff it may be holding, or any "go back
+	   to a sane state" escape sequences. (This latter makes sense
+	   during encoding.) */
+	break;
+/* Exhausted the runoff, so get some more.  DATA has at least
+	 SIZE bytes left of storage in it, so it's OK to read directly
+	 into it.  (We'll be overwriting above, after we've decoded it
+	 into the runoff.) */
+read_size = Lstream_read (str->other_end, data, size);
+if (read_size < 0)
+	{
+	  error_occurred = 1;
+	  break;
+	}
+if (read_size == 0)
+	/* There might be some more end data produced in the translation.
+	   See the comment above. */
+	str->flags |= CODING_STATE_END;
+mule_decode (stream, data, str->runoff, read_size);
+}
+if (data - orig_data == 0)
+return error_occurred ? -1 : 0;
+else
+return data - orig_data;
+}
+static int
+decoding_writer (Lstream *stream, CONST unsigned char *data, int size)
+{
+struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+int retval;
+/* Decode all our data into the runoff, and then attempt to write
+it all out to the other end.  Remove whatever chunk we succeeded
+in writing. */
+mule_decode (stream, data, str->runoff, size);
+retval = Lstream_write (str->other_end, Dynarr_atp (str->runoff, 0),
+			  Dynarr_length (str->runoff));
+if (retval > 0)
+Dynarr_delete_many (str->runoff, 0, retval);
+/* Do NOT return retval.  The return value indicates how much
+of the incoming data was written, not how many bytes were
+written. */
+return size;
+}
+static void
+reset_decoding_stream (struct decoding_stream *str)
+{
+if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_ISO2022)
+{
+Lisp_Object coding_system = Qnil;
+XSETCODING_SYSTEM (coding_system, str->codesys);
+reset_iso2022 (coding_system, &str->iso2022);
+}
+else if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_CCL)
+{
+set_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys),
+		       0, 0, 0);
+}
+str->flags = str->ch = 0;
+}
+static int
+decoding_rewinder (Lstream *stream)
+{
+struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+reset_decoding_stream (str);
+Dynarr_reset (str->runoff);
+return Lstream_rewind (str->other_end);
+}
+static int
+decoding_seekable_p (Lstream *stream)
+{
+struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+return Lstream_seekable_p (str->other_end);
+}
+static int
+decoding_flusher (Lstream *stream)
+{
+struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+return Lstream_flush (str->other_end);
+}
+static int
+decoding_closer (Lstream *stream)
+{
+struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+if (stream->flags & LSTREAM_FL_WRITE)
+{
+str->flags |= CODING_STATE_END;
+decoding_writer (stream, 0, 0);
+}
+Dynarr_free (str->runoff);
+if (str->iso2022.composite_chars)
+Dynarr_free (str->iso2022.composite_chars);
+return Lstream_close (str->other_end);
+}
+Lisp_Object
+decoding_stream_coding_system (Lstream *stream)
+{
+Lisp_Object coding_system = Qnil;
+struct decoding_stream *str = DECODING_STREAM_DATA (stream);
+XSETCODING_SYSTEM (coding_system, str->codesys);
+return subsidiary_coding_system (coding_system, str->eol_type);
+}
+void
+set_decoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
+{
+struct Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
+struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
+str->codesys = cs;
+if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
+str->eol_type = CODING_SYSTEM_EOL_TYPE (cs);
+reset_decoding_stream (str);
+}
+/* WARNING WARNING WARNING WARNING!!!!!  If you open up a decoding
+stream for writing, no automatic code detection will be performed.
+The reason for this is that automatic code detection requires a
+seekable input.  Things will also fail if you open a decoding
+stream for reading using a non-fully-specified coding system and
+a non-seekable input stream. */
+static Lisp_Object
+make_decoding_stream_1 (Lstream *stream, Lisp_Object codesys,
+			CONST char *mode)
+{
+Lstream *lstr = Lstream_new (lstream_decoding, mode);
+struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
+Lisp_Object obj;
+memset (str, 0, sizeof (*str));
+str->other_end = stream;
+str->runoff = (unsigned_char_dynarr *) Dynarr_new (unsigned char);
+str->eol_type = EOL_AUTODETECT;
+if (!strcmp (mode, "r")
+&& Lstream_seekable_p (stream))
+/* We can determine the coding system now. */
+determine_real_coding_system (stream, &codesys, &str->eol_type);
+set_decoding_stream_coding_system (lstr, codesys);
+str->decst.eol_type = str->eol_type;
+str->decst.mask = ~0;
+XSETLSTREAM (obj, lstr);
+return obj;
+}
+Lisp_Object
+make_decoding_input_stream (Lstream *stream, Lisp_Object codesys)
+{
+return make_decoding_stream_1 (stream, codesys, "r");
+}
+Lisp_Object
+make_decoding_output_stream (Lstream *stream, Lisp_Object codesys)
+{
+return make_decoding_stream_1 (stream, codesys, "w");
+}
+/* Note: the decode_coding_? functions all take the same
+arguments as mule_decode(), which is to say some SRC data of
+size N, which is to be stored into dynamic array DST.
+DECODING is the stream within which the decoding is
+taking place, but no data is actually read from or
+written to that stream; that is handled in decoding_reader()
+or decoding_writer().  This allows the same functions to
+be used for both reading and writing. */
+static void
+mule_decode (Lstream *decoding, CONST unsigned char *src,
+	     unsigned_char_dynarr *dst, unsigned int n)
+{
+struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+/* If necessary, do encoding-detection now.  We do this when
+we're a writing stream or a non-seekable reading stream,
+meaning that we can't just process the whole input,
+rewind, and start over. */
+if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_AUTODETECT ||
+str->eol_type == EOL_AUTODETECT)
+{
+Lisp_Object codesys = Qnil;
+XSETCODING_SYSTEM (codesys, str->codesys);
+detect_coding_type (&str->decst, src, n,
+			  CODING_SYSTEM_TYPE (str->codesys) !=
+			  CODESYS_AUTODETECT);
+if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_AUTODETECT &&
+	  str->decst.mask != ~0)
+	/* #### This is cheesy.  What we really ought to do is
+	   buffer up a certain amount of data so as to get a
+	   less random result. */
+	codesys = coding_system_from_mask (str->decst.mask);
+str->eol_type = str->decst.eol_type;
+if (XCODING_SYSTEM (codesys) != str->codesys)
+	{
+	  /* Preserve the CODING_STATE_END flag in case it was set.
+	     If we erase it, bad things might happen. */
+	  int was_end = str->flags & CODING_STATE_END;
+set_decoding_stream_coding_system (decoding, codesys);
+	  if (was_end)
+	    str->flags |= CODING_STATE_END;
+	}
+}
+switch (CODING_SYSTEM_TYPE (str->codesys))
+{
+#ifdef DEBUG_XEMACS
+case CODESYS_INTERNAL:
+Dynarr_add_many (dst, src, n);
+break;
+#endif
+case CODESYS_AUTODETECT:
+/* If we got this far and still haven't decided on the coding
+	 system, then do no conversion. */
+case CODESYS_NO_CONVERSION:
+decode_coding_no_conversion (decoding, src, dst, n);
+break;
+case CODESYS_SHIFT_JIS:
+decode_coding_shift_jis (decoding, src, dst, n);
+break;
+case CODESYS_BIG5:
+decode_coding_big5 (decoding, src, dst, n);
+break;
+case CODESYS_CCL:
+ccl_driver (&str->ccl, src, dst, n, (str->flags) & CODING_STATE_END);
+break;
+case CODESYS_ISO2022:
+decode_coding_iso2022 (decoding, src, dst, n);
+break;
+default:
+abort ();
+}
+}
+static Lisp_Object
+close_both_streams (Lisp_Object cons)
+{
+Lisp_Object instream  = XCAR (cons);
+Lisp_Object outstream = XCDR (cons);
+Lstream_close (XLSTREAM (outstream));
+Lstream_close (XLSTREAM (instream));
+return Qnil;
+}
+DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, 0, /*
+Decode the text between START and END which is encoded in CODING-SYSTEM.
+This is useful if you've read in encoded text from a file without decoding
+it (e.g. you read in a JIS-formatted file but used the `binary' or
+`no-conversion' coding system, so that it shows up as \"^[$B!<!+^[(B\").
+Return length of decoded text.
+BUFFER defaults to the current buffer if unspecified.
+*/
+(start, end, coding_system, buffer))
+{
+Bufpos b, e;
+struct buffer *buf = decode_buffer (buffer, 0);
+Lisp_Object instream, outstream;
+int speccount = specpdl_depth ();
+struct gcpro gcpro1, gcpro2;
+get_buffer_range_char (buf, start, end, &b, &e, 0);
+coding_system = Fget_coding_system (coding_system);
+instream = make_lisp_buffer_input_stream (buf, b, e, 0);
+outstream = make_lisp_buffer_output_stream (buf, b, 0);
+outstream = make_decoding_output_stream (XLSTREAM (outstream),
+					   coding_system);
+outstream = make_encoding_output_stream (XLSTREAM (outstream),
+					   Fget_coding_system (Qbinary));
+GCPRO2 (instream, outstream);
+record_unwind_protect (close_both_streams, Fcons (instream, outstream));
+/* The chain of streams looks like this:
+[BUFFER] <----- send through
+------> [ENCODE AS BINARY]
+		             ------> [DECODE AS SPECIFIED]
+			             ------> [BUFFER]
+*/
+{
+char tempbuf[1024]; /* some random amount */
+Lstream *in  = XLSTREAM(instream);
+Lstream *out = XLSTREAM(outstream);
+Bufpos newpos, even_newer_pos;
+while (1)
+{
+Bufpos oldpos = lisp_buffer_stream_startpos (in);
+int size_in_bytes = Lstream_read (in, tempbuf, sizeof (tempbuf));
+if (!size_in_bytes)
+break;
+newpos = lisp_buffer_stream_startpos (in);
+Lstream_write (out, tempbuf, size_in_bytes);
+even_newer_pos = lisp_buffer_stream_startpos (in);
+buffer_delete_range (buf, even_newer_pos - (newpos - oldpos),
+even_newer_pos, 0);
+}
+}
+unbind_to (speccount, Qnil);
+UNGCPRO;
+return Qnil;
+}
+/************************************************************************/
+/*           Converting to an external encoding ("encoding")            */
+/************************************************************************/
+/* An encoding stream is an output stream.  When you create the
+stream, you specify the coding system that governs the encoding
+and another stream that the resulting encoded data is to be
+sent to, and then start sending data to it. */
+#define ENCODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, encoding)
+struct encoding_stream
+{
+/* Coding system that governs the conversion. */
+struct Lisp_Coding_System *codesys;
+/* Stream that we read the encoded data from or
+write the decoded data to. */
+Lstream *other_end;
+/* If we are reading, then we can return only a fixed amount of
+data, so if the conversion resulted in too much data, we store it
+here for retrieval the next time around. */
+unsigned_char_dynarr *runoff;
+/* FLAGS holds flags indicating the current state of the encoding.
+Some of these flags are dependent on the coding system. */
+unsigned int flags;
+/* CH holds a partially built-up character.  Since we only deal
+with one- and two-byte characters at the moment, we only use
+this to store the first byte of a two-byte character. */
+unsigned int ch;
+/* Additional information used by the ISO2022 encoder. */
+struct
+{
+/* CHARSET holds the character sets currently assigned to the G0
+	 through G3 registers.  It is initialized from the array
+	 INITIAL_CHARSET in CODESYS. */
+Lisp_Object charset[4];
+/* Which registers are currently invoked into the left (GL) and
+	 right (GR) halves of the 8-bit encoding space? */
+int register_left, register_right;
+/* Whether we need to explicitly designate the charset in the
+	 G? register before using it.  It is initialized from the
+	 array FORCE_CHARSET_ON_OUTPUT in CODESYS. */
+unsigned char force_charset_on_output[4];
+/* Other state variables that need to be preserved across
+	 invocations. */
+Lisp_Object current_charset;
+int current_half;
+int current_char_boundary;
+} iso2022;
+/* Additional information (the state of the running CCL program)
+used by the CCL encoder. */
+struct ccl_program ccl;
+};
+static int encoding_reader (Lstream *stream, unsigned char *data, int size);
+static int encoding_writer (Lstream *stream, CONST unsigned char *data,
+			    int size);
+static int encoding_rewinder   (Lstream *stream);
+static int encoding_seekable_p (Lstream *stream);
+static int encoding_flusher    (Lstream *stream);
+static int encoding_closer     (Lstream *stream);
+static Lisp_Object encoding_marker (Lisp_Object stream,
+				    void (*markobj) (Lisp_Object));
+DEFINE_LSTREAM_IMPLEMENTATION ("encoding", lstream_encoding,
+			       sizeof (struct encoding_stream));
+static Lisp_Object
+encoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
+{
+Lstream *str = ENCODING_STREAM_DATA (XLSTREAM (stream))->other_end;
+Lisp_Object str_obj;
+/* We do not need to mark the coding systems or charsets stored
+within the stream because they are stored in a global list
+and automatically marked. */
+XSETLSTREAM (str_obj, str);
+(markobj) (str_obj);
+if (str->imp->marker)
+return (str->imp->marker) (str_obj, markobj);
+else
+return Qnil;
+}
+/* Read SIZE bytes of data and store it into DATA.  We are a encoding stream
+so we read data from the other end, encode it, and store it into DATA. */
+static int
+encoding_reader (Lstream *stream, unsigned char *data, int size)
+{
+struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+unsigned char *orig_data = data;
+int read_size;
+int error_occurred = 0;
+/* We need to interface to mule_encode(), which expects to take some
+amount of data and store the result into a Dynarr.  We have
+mule_encode() store into str->runoff, and take data from there
+as necessary. */
+/* We loop until we have enough data, reading chunks from the other
+end and encoding it. */
+while (1)
+{
+/* Take data from the runoff if we can.  Make sure to take at
+	 most SIZE bytes, and delete the data from the runoff. */
+if (Dynarr_length (str->runoff) > 0)
+	{
+	  int chunk = min (size, Dynarr_length (str->runoff));
+	  memcpy (data, Dynarr_atp (str->runoff, 0), chunk);
+	  Dynarr_delete_many (str->runoff, 0, chunk);
+	  data += chunk;
+	  size -= chunk;
+	}
+if (size == 0)
+	break; /* No more room for data */
+if (str->flags & CODING_STATE_END)
+	/* This means that on the previous iteration, we hit the EOF on
+	   the other end.  We loop once more so that mule_encode() can
+	   output any final stuff it may be holding, or any "go back
+	   to a sane state" escape sequences. (This latter makes sense
+	   during encoding.) */
+	break;
+/* Exhausted the runoff, so get some more.  DATA at least SIZE bytes
+	 left of storage in it, so it's OK to read directly into it.
+	 (We'll be overwriting above, after we've encoded it into the
+	 runoff.) */
+read_size = Lstream_read (str->other_end, data, size);
+if (read_size < 0)
+	{
+	  error_occurred = 1;
+	  break;
+	}
+if (read_size == 0)
+	/* There might be some more end data produced in the translation.
+	   See the comment above. */
+	str->flags |= CODING_STATE_END;
+mule_encode (stream, data, str->runoff, read_size);
+}
+if (data - orig_data == 0)
+return error_occurred ? -1 : 0;
+else
+return data - orig_data;
+}
+static int
+encoding_writer (Lstream *stream, CONST unsigned char *data, int size)
+{
+struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+int retval;
+/* Encode all our data into the runoff, and then attempt to write
+it all out to the other end.  Remove whatever chunk we succeeded
+in writing. */
+mule_encode (stream, data, str->runoff, size);
+retval = Lstream_write (str->other_end, Dynarr_atp (str->runoff, 0),
+			  Dynarr_length (str->runoff));
+if (retval > 0)
+Dynarr_delete_many (str->runoff, 0, retval);
+/* Do NOT return retval.  The return value indicates how much
+of the incoming data was written, not how many bytes were
+written. */
+return size;
+}
+static void
+reset_encoding_stream (struct encoding_stream *str)
+{
+if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_ISO2022)
+{
+int i;
+for (i = 0; i < 4; i++)
+	str->iso2022.charset[i] =
+	  CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, i);
+for (i = 0; i < 4; i++)
+	str->iso2022.force_charset_on_output[i] =
+	  CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (str->codesys, i);
+str->iso2022.register_left = 0;
+str->iso2022.register_right = 1;
+str->iso2022.current_charset = Qnil;
+str->iso2022.current_half = 0;
+str->iso2022.current_char_boundary = 1;
+}
+else if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_CCL)
+{
+set_ccl_program (&str->ccl, CODING_SYSTEM_CCL_ENCODE (str->codesys), 0, 0, 0);
+}
+str->flags = str->ch = 0;
+}
+static int
+encoding_rewinder (Lstream *stream)
+{
+struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+reset_encoding_stream (str);
+Dynarr_reset (str->runoff);
+return Lstream_rewind (str->other_end);
+}
+static int
+encoding_seekable_p (Lstream *stream)
+{
+struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+return Lstream_seekable_p (str->other_end);
+}
+static int
+encoding_flusher (Lstream *stream)
+{
+struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+return Lstream_flush (str->other_end);
+}
+static int
+encoding_closer (Lstream *stream)
+{
+struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+if (stream->flags & LSTREAM_FL_WRITE)
+{
+str->flags |= CODING_STATE_END;
+encoding_writer (stream, 0, 0);
+}
+Dynarr_free (str->runoff);
+return Lstream_close (str->other_end);
+}
+Lisp_Object
+encoding_stream_coding_system (Lstream *stream)
+{
+Lisp_Object coding_system = Qnil;
+struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
+XSETCODING_SYSTEM (coding_system, str->codesys);
+return coding_system;
+}
+void
+set_encoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
+{
+struct Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
+struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
+str->codesys = cs;
+reset_encoding_stream (str);
+}
+static Lisp_Object
+make_encoding_stream_1 (Lstream *stream, Lisp_Object codesys,
+			CONST char *mode)
+{
+Lstream *lstr = Lstream_new (lstream_encoding, mode);
+struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
+Lisp_Object obj;
+memset (str, 0, sizeof (*str));
+str->runoff = (unsigned_char_dynarr *) Dynarr_new (unsigned char);
+str->other_end = stream;
+set_encoding_stream_coding_system (lstr, codesys);
+XSETLSTREAM (obj, lstr);
+return obj;
+}
+Lisp_Object
+make_encoding_input_stream (Lstream *stream, Lisp_Object codesys)
+{
+return make_encoding_stream_1 (stream, codesys, "r");
+}
+Lisp_Object
+make_encoding_output_stream (Lstream *stream, Lisp_Object codesys)
+{
+return make_encoding_stream_1 (stream, codesys, "w");
+}
+/* Convert N bytes of internally-formatted data stored in SRC to an
+external format, according to the encoding stream ENCODING.
+Store the encoded data into DST. */
+static void
+mule_encode (Lstream *encoding, CONST unsigned char *src,
+	     unsigned_char_dynarr *dst, unsigned int n)
+{
+struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+switch (CODING_SYSTEM_TYPE (str->codesys))
+{
+#ifdef DEBUG_XEMACS
+case CODESYS_INTERNAL:
+Dynarr_add_many (dst, src, n);
+break;
+#endif
+case CODESYS_AUTODETECT:
+/* If we got this far and still haven't decided on the coding
+	 system, then do no conversion. */
+case CODESYS_NO_CONVERSION:
+encode_coding_no_conversion (encoding, src, dst, n);
+break;
+case CODESYS_SHIFT_JIS:
+encode_coding_shift_jis (encoding, src, dst, n);
+break;
+case CODESYS_BIG5:
+encode_coding_big5 (encoding, src, dst, n);
+break;
+case CODESYS_CCL:
+ccl_driver (&str->ccl, src, dst, n, (str->flags) & CODING_STATE_END);
+break;
+case CODESYS_ISO2022:
+encode_coding_iso2022 (encoding, src, dst, n);
+break;
+default:
+abort ();
+}
+}
+DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, 0, /*
+Encode the text between START and END using CODING-SYSTEM.
+This will, for example, convert Japanese characters into stuff such as
+\"^[$B!<!+^[(B\" if you use the JIS encoding.  Return length of encoded
+text.  BUFFER defaults to the current buffer if unspecified.
+*/
+(start, end, coding_system, buffer))
+{
+Bufpos b, e;
+struct buffer *buf = decode_buffer (buffer, 0);
+Lisp_Object instream, outstream;
+char tempbuf[1024]; /* some random amount */
+int speccount = specpdl_depth ();
+struct gcpro gcpro1, gcpro2;
+get_buffer_range_char (buf, start, end, &b, &e, 0);
+coding_system = Fget_coding_system (coding_system);
+instream = make_lisp_buffer_input_stream (buf, b, e, 0);
+outstream = make_lisp_buffer_output_stream (buf, b, 0);
+outstream = make_decoding_output_stream (XLSTREAM (outstream),
+					   Fget_coding_system (Qbinary));
+outstream = make_encoding_output_stream (XLSTREAM (outstream),
+					   coding_system);
+GCPRO2 (instream, outstream);
+record_unwind_protect (close_both_streams, Fcons (instream, outstream));
+/* The chain of streams looks like this:
+[BUFFER] <----- send through
+------> [ENCODE AS SPECIFIED]
+		             ------> [DECODE AS BINARY]
+			             ------> [BUFFER]
+*/
+while (1)
+{
+int size_in_bytes;
+Bufpos oldpos, newpos, even_newer_pos;
+oldpos = lisp_buffer_stream_startpos (XLSTREAM (instream));
+size_in_bytes = Lstream_read (XLSTREAM (instream), tempbuf,
+				    sizeof (tempbuf));
+if (!size_in_bytes)
+	break;
+newpos = lisp_buffer_stream_startpos (XLSTREAM (instream));
+Lstream_write (XLSTREAM (outstream), tempbuf, size_in_bytes);
+even_newer_pos = lisp_buffer_stream_startpos (XLSTREAM (instream));
+buffer_delete_range (buf, even_newer_pos - (newpos - oldpos),
+			   even_newer_pos, 0);
+}
+{
+Charcount retlen =
+lisp_buffer_stream_startpos (XLSTREAM (instream)) - b;
+unbind_to (speccount, Qnil);
+UNGCPRO;
+return make_int (retlen);
+}
+}
+/************************************************************************/
+/*                          Shift-JIS methods                           */
+/************************************************************************/
+/* Shift-JIS is a coding system encoding three character sets: ASCII, right
+half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
+as is.  A character of JISX0201-Kana (TYPE94 character set) is
+encoded by "position-code + 0x80".  A character of JISX0208
+(TYPE94x94 character set) is encoded in 2-byte but two
+position-codes are divided and shifted so that it fit in the range
+below.
+--- CODE RANGE of Shift-JIS ---
+(character set)	(range)
+ASCII		0x00 .. 0x7F
+JISX0201-Kana	0xA0 .. 0xDF
+JISX0208 (1st byte)	0x80 .. 0x9F and 0xE0 .. 0xEF
+	    (2nd byte)	0x40 .. 0x7E and 0x80 .. 0xFC
+-------------------------------
+*/
+/* Is this the first byte of a Shift-JIS two-byte char? */
+#define BYTE_SHIFT_JIS_TWO_BYTE_1_P(c)					\
+(((c) >= 0x81 && (c) <= 0x9F) || ((c) >= 0xE0 && (c) <= 0xEF))
+/* Is this the second byte of a Shift-JIS two-byte char? */
+#define BYTE_SHIFT_JIS_TWO_BYTE_2_P(c)					\
+(((c) >= 0x40 && (c) <= 0x7E) || ((c) >= 0x80 && (c) <= 0xFC))
+#define BYTE_SHIFT_JIS_KATAKANA_P(c)					\
+((c) >= 0xA1 && (c) <= 0xDF)
+/* Code conversion macros.  These are macros because they are used in
+inner loops during code conversion.
+Note that temporary variables in macros introduce the classic
+dynamic-scoping problems with variable names.  We use capital-
+lettered variables in the assumption that XEmacs does not use
+capital letters in variables except in a very formalized way
+(e.g. Qstring). */
+/* Convert shift-JIS code (sj1, sj2) into internal string
+representation (c1, c2). (The leading byte is assumed.) */
+#define DECODE_SHIFT_JIS(sj1, sj2, c1, c2) do	\
+{						\
+int I1 = sj1, I2 = sj2;			\
+if (I2 >= 0x9f)				\
+{						\
+if (I1 >= 0xe0)				\
+	c1 = (I1 << 1) - 0xe0;			\
+else					\
+	c1 = (I1 << 1) - 0x60;			\
+c2 = I2 + 2;				\
+}						\
+else						\
+{						\
+if (I1 >= 0xe0)				\
+	c1 = (I1 << 1) - 0xe1;			\
+else					\
+	c1 = (I1 << 1) - 0x61;			\
+if (I2 >= 0x7f)				\
+	c2 = I2 + 0x60;				\
+else					\
+	c2 = I2 + 0x61;				\
+}						\
+} while (0)
+/* Convert the internal string representation of a Shift-JIS character
+(c1, c2) into Shift-JIS code (sj1, sj2).  The leading byte is
+assumed. */
+#define ENCODE_SHIFT_JIS(c1, c2, sj1, sj2) do	\
+{						\
+int I1 = c1, I2 = sj2;			\
+if (I1 & 1)					\
+{						\
+if (I1 < 0xdf)				\
+	sj1 = (I1 >> 1) + 0x31;			\
+else					\
+	sj1 = (I1 >> 1) + 0x71;			\
+if (I2 >= 0xe0)				\
+	sj2 = I2 - 0x60;			\
+else					\
+	sj2 = I2 - 0x61;			\
+}						\
+else						\
+{						\
+if (I1 < 0xdf)				\
+	sj1 = (I1 >> 1) + 0x30;			\
+else					\
+	sj1 = (I1 >> 1) + 0x70;			\
+sj2 = I2 - 2;				\
+}						\
+} while (0)
+static int
+detect_coding_shift_jis (struct detection_state *st, CONST unsigned char *src,
+			 unsigned int n)
+{
+int c;
+while (n--)
+{
+c = *src++;
+if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+	return 0;
+if (st->shift_jis.in_second_byte)
+	{
+	  st->shift_jis.in_second_byte = 0;
+	  if (c < 0x40)
+	    return 0;
+	}
+else if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
+	st->shift_jis.in_second_byte = 1;
+}
+return CODING_CATEGORY_SHIFT_JIS_MASK;
+}
+/* Convert Shift-JIS data to internal format. */
+static void
+decode_coding_shift_jis (Lstream *decoding, CONST unsigned char *src,
+			 unsigned_char_dynarr *dst, unsigned int n)
+{
+unsigned char c;
+unsigned int flags, ch;
+int eol;
+struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+CODING_STREAM_DECOMPOSE (str, flags, ch);
+eol = str->eol_type;
+while (n--)
+{
+c = *src++;
+if (ch)
+	{
+	  /* Previous character was first byte of Shift-JIS Kanji char. */
+	  if (BYTE_SHIFT_JIS_TWO_BYTE_2_P (c))
+	    {
+	      unsigned char e1, e2;
+	      Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
+	      DECODE_SHIFT_JIS (ch, c, e1, e2);
+	      Dynarr_add (dst, e1);
+	      Dynarr_add (dst, e2);
+	    }
+	  else
+	    {
+	      DECODE_ADD_BINARY_CHAR (ch, dst);
+	      DECODE_ADD_BINARY_CHAR (c, dst);
+	    }
+	  ch = 0;
+	}
+else
+	{
+	  DECODE_HANDLE_EOL_TYPE (eol, c, flags, dst);
+	  if (BYTE_SHIFT_JIS_TWO_BYTE_1_P (c))
+	    ch = c;
+	  else if (BYTE_SHIFT_JIS_KATAKANA_P (c))
+	    {
+	      Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0201_KANA);
+	      Dynarr_add (dst, c);
+	    }
+	  else
+	    DECODE_ADD_BINARY_CHAR (c, dst);
+	}
+label_continue_loop:;
+}
+DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
+CODING_STREAM_COMPOSE (str, flags, ch);
+}
+/* Convert internally-formatted data to Shift-JIS. */
+static void
+encode_coding_shift_jis (Lstream *encoding, CONST unsigned char *src,
+			 unsigned_char_dynarr *dst, unsigned int n)
+{
+unsigned char c;
+struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+unsigned int flags, ch;
+int eol;
+CODING_STREAM_DECOMPOSE (str, flags, ch);
+eol = CODING_SYSTEM_EOL_TYPE (str->codesys);
+while (n--)
+{
+c = *src++;
+if (c == '\n')
+	{
+	  if (eol != EOL_LF && eol != EOL_AUTODETECT)
+	    Dynarr_add (dst, '\r');
+	  if (eol != EOL_CR)
+	    Dynarr_add (dst, '\n');
+	  ch = 0;
+	}
+else if (BYTE_ASCII_P (c))
+	{
+	  Dynarr_add (dst, c);
+	  ch = 0;
+	}
+else if (BUFBYTE_LEADING_BYTE_P (c))
+	ch = (c == LEADING_BYTE_JAPANESE_JISX0201_KANA ||
+	      c == LEADING_BYTE_JAPANESE_JISX0208_1978 ||
+	      c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0;
+else if (ch)
+	{
+	  if (ch == LEADING_BYTE_JAPANESE_JISX0201_KANA)
+	    {
+	      Dynarr_add (dst, c);
+	      ch = 0;
+	    }
+	  else if (ch == LEADING_BYTE_JAPANESE_JISX0208_1978 ||
+		   ch == LEADING_BYTE_JAPANESE_JISX0208)
+	    ch = c;
+	  else
+	    {
+	      unsigned char j1, j2;
+	      ENCODE_SHIFT_JIS (ch, c, j1, j2);
+	      Dynarr_add (dst, j1);
+	      Dynarr_add (dst, j2);
+	      ch = 0;
+	    }
+	}
+}
+CODING_STREAM_COMPOSE (str, flags, ch);
+}
+DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /*
+Decode a JISX0208 character of Shift-JIS coding-system.
+CODE is the character code in Shift-JIS as a cons of type bytes.
+Return the corresponding character.
+*/
+(code))
+{
+unsigned char c1, c2, s1, s2;
+CHECK_CONS (code);
+CHECK_INT (XCAR (code));
+CHECK_INT (XCDR (code));
+s1 = XINT (XCAR (code));
+s2 = XINT (XCDR (code));
+if (BYTE_SHIFT_JIS_TWO_BYTE_1_P (s1) &&
+BYTE_SHIFT_JIS_TWO_BYTE_2_P (s2))
+{
+DECODE_SHIFT_JIS (s1, s2, c1, c2);
+return make_char (MAKE_CHAR (Vcharset_japanese_jisx0208,
+				   c1 & 0x7F, c2 & 0x7F));
+}
+else
+return Qnil;
+}
+DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /*
+Encode a JISX0208 character CHAR to SHIFT-JIS coding-system.
+Return the corresponding character code in SHIFT-JIS as a cons of two bytes.
+*/
+(ch))
+{
+Lisp_Object charset;
+int c1, c2, s1, s2;
+CHECK_CHAR_COERCE_INT (ch);
+BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
+if (EQ (charset, Vcharset_japanese_jisx0208))
+{
+ENCODE_SHIFT_JIS (c1 | 0x80, c2 | 0x80, s1, s2);
+return Fcons (make_int (s1), make_int (s2));
+}
+else
+return Qnil;
+}
+/************************************************************************/
+/*                            Big5 methods                              */
+/************************************************************************/
+/* BIG5 is a coding system encoding two character sets: ASCII and
+Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
+character set and is encoded in two-byte.
+--- CODE RANGE of BIG5 ---
+(character set)	(range)
+ASCII		0x00 .. 0x7F
+Big5 (1st byte)	0xA1 .. 0xFE
+	(2nd byte)	0x40 .. 0x7E and 0xA1 .. 0xFE
+--------------------------
+Since the number of characters in Big5 is larger than maximum
+characters in Emacs' charset (96x96), it can't be handled as one
+charset.  So, in Emacs, Big5 is devided into two: `charset-big5-1'
+and `charset-big5-2'.  Both <type>s are TYPE94x94.  The former
+contains frequently used characters and the latter contains less
+frequently used characters.  */
+#define BYTE_BIG5_TWO_BYTE_1_P(c) \
+((c) >= 0xA1 && (c) <= 0xFE)
+/* Is this the second byte of a Shift-JIS two-byte char? */
+#define BYTE_BIG5_TWO_BYTE_2_P(c) \
+(((c) >= 0x40 && (c) <= 0x7E) || ((c) >= 0xA1 && (c) <= 0xFE))
+/* Number of Big5 characters which have the same code in 1st byte.  */
+#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
+/* Code conversion macros.  These are macros because they are used in
+inner loops during code conversion.
+Note that temporary variables in macros introduce the classic
+dynamic-scoping problems with variable names.  We use capital-
+lettered variables in the assumption that XEmacs does not use
+capital letters in variables except in a very formalized way
+(e.g. Qstring). */
+/* Convert Big5 code (b1, b2) into its internal string representation
+(lb, c1, c2). */
+/* There is a much simpler way to split the Big5 charset into two.
+For the moment I'm going to leave the algorithm as-is because it
+claims to separate out the most-used characters into a single
+charset, which perhaps will lead to optimizations in various
+places.
+The way the algorithm works is something like this:
+Big5 can be viewed as a 94x157 charset, where the row is
+encoded into the bytes 0xA1 .. 0xFE and the column is encoded
+into the bytes 0x40 .. 0x7E and 0xA1 .. 0xFE.  As for frequency,
+the split between low and high column numbers is apparently
+meaningless; ascending rows produce less and less frequent chars.
+Therefore, we assign the lower half of rows (0xA1 .. 0xC8) to
+the first charset, and the upper half (0xC9 .. 0xFE) to the
+second.  To do the conversion, we convert the character into
+a single number where 0 .. 156 is the first row, 157 .. 313
+is the second, etc.  That way, the characters are ordered by
+decreasing frequency.  Then we just chop the space in two
+and coerce the result into a 94x94 space.
+*/
+#define DECODE_BIG5(b1, b2, lb, c1, c2) do				\
+{									\
+int B1 = b1, B2 = b2;							\
+unsigned int I							\
+= (B1 - 0xA1) * BIG5_SAME_ROW + B2 - (B2 < 0x7F ? 0x40 : 0x62);	\
+									\
+if (B1 < 0xC9)							\
+{									\
+lb = LEADING_BYTE_CHINESE_BIG5_1;					\
+}									\
+else									\
+{									\
+lb = LEADING_BYTE_CHINESE_BIG5_2;					\
+I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);				\
+}									\
+c1 = I / (0xFF - 0xA1) + 0xA1;					\
+c2 = I % (0xFF - 0xA1) + 0xA1;					\
+} while (0)
+/* Convert the internal string representation of a Big5 character
+(lb, c1, c2) into Big5 code (b1, b2). */
+#define ENCODE_BIG5(lb, c1, c2, b1, b2) do				\
+{									\
+unsigned int I = ((c1) - 0xA1) * (0xFF - 0xA1) + ((c2) - 0xA1);	\
+									\
+if (lb == LEADING_BYTE_CHINESE_BIG5_2)				\
+{									\
+I += BIG5_SAME_ROW * (0xC9 - 0xA1);				\
+}									\
+b1 = I / BIG5_SAME_ROW + 0xA1;					\
+b2 = I % BIG5_SAME_ROW;						\
+b2 += b2 < 0x3F ? 0x40 : 0x62;					\
+} while (0)
+static int
+detect_coding_big5 (struct detection_state *st, CONST unsigned char *src,
+		    unsigned int n)
+{
+int c;
+while (n--)
+{
+c = *src++;
+if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO ||
+	  (c >= 0x80 && c <= 0xA0))
+	return 0;
+if (st->big5.in_second_byte)
+	{
+	  st->big5.in_second_byte = 0;
+	  if (c < 0x40 || (c >= 0x80 && c <= 0xA0))
+	    return 0;
+	}
+else if (c >= 0xA1)
+	st->big5.in_second_byte = 1;
+}
+return CODING_CATEGORY_BIG5_MASK;
+}
+/* Convert Big5 data to internal format. */
+static void
+decode_coding_big5 (Lstream *decoding, CONST unsigned char *src,
+		    unsigned_char_dynarr *dst, unsigned int n)
+{
+unsigned char c;
+unsigned int flags, ch, eol;
+struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+CODING_STREAM_DECOMPOSE (str, flags, ch);
+eol = str->eol_type;
+while (n--)
+{
+c = *src++;
+if (ch)
+	{
+	  /* Previous character was first byte of Big5 char. */
+	  if (BYTE_BIG5_TWO_BYTE_2_P (c))
+	    {
+	      unsigned char b1, b2, b3;
+	      DECODE_BIG5 (ch, c, b1, b2, b3);
+	      Dynarr_add (dst, b1);
+	      Dynarr_add (dst, b2);
+	      Dynarr_add (dst, b3);
+	    }
+	  else
+	    {
+	      DECODE_ADD_BINARY_CHAR (ch, dst);
+	      DECODE_ADD_BINARY_CHAR (c, dst);
+	    }
+	  ch = 0;
+	}
+else
+	{
+	  DECODE_HANDLE_EOL_TYPE (eol, c, flags, dst);
+	  if (BYTE_BIG5_TWO_BYTE_1_P (c))
+	    ch = c;
+	  else
+	    DECODE_ADD_BINARY_CHAR (c, dst);
+	}
+label_continue_loop:;
+}
+DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
+CODING_STREAM_COMPOSE (str, flags, ch);
+}
+/* Convert internally-formatted data to Big5. */
+static void
+encode_coding_big5 (Lstream *encoding, CONST unsigned char *src,
+		    unsigned_char_dynarr *dst, unsigned int n)
+{
+unsigned char c;
+struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+unsigned int flags, ch, eol;
+CODING_STREAM_DECOMPOSE (str, flags, ch);
+eol = CODING_SYSTEM_EOL_TYPE (str->codesys);
+while (n--)
+{
+c = *src++;
+if (c == '\n')
+	{
+	  if (eol != EOL_LF && eol != EOL_AUTODETECT)
+	    Dynarr_add (dst, '\r');
+	  if (eol != EOL_CR)
+	    Dynarr_add (dst, '\n');
+	}
+else if (BYTE_ASCII_P (c))
+	{
+	  /* ASCII. */
+	  Dynarr_add (dst, c);
+	}
+else if (BUFBYTE_LEADING_BYTE_P (c))
+	{
+	  if (c == LEADING_BYTE_CHINESE_BIG5_1 ||
+	      c == LEADING_BYTE_CHINESE_BIG5_2)
+	    {
+	      /* A recognized leading byte. */
+	      ch = c;
+	      continue; /* not done with this character. */
+	    }
+	  /* otherwise just ignore this character. */
+	}
+else if (ch == LEADING_BYTE_CHINESE_BIG5_1 ||
+	       ch == LEADING_BYTE_CHINESE_BIG5_2)
+	{
+	  /* Previous char was a recognized leading byte. */
+	  ch = (ch << 8) | c;
+	  continue; /* not done with this character. */
+	}
+else if (ch)
+	{
+	  /* Encountering second byte of a Big5 character. */
+	  unsigned char b1, b2;
+	  ENCODE_BIG5 (ch >> 8, ch & 0xFF, c, b1, b2);
+	  Dynarr_add (dst, b1);
+	  Dynarr_add (dst, b2);
+	}
+ch = 0;
+}
+CODING_STREAM_COMPOSE (str, flags, ch);
+}
+DEFUN ("decode-big5-char", Fdecode_big5_char, 1, 1, 0, /*
+Decode a Big5 character CODE of BIG5 coding-system.
+CODE is the character code in BIG5.
+Return the corresponding character.
+*/
+(code))
+{
+unsigned char c1, c2, b1, b2;
+Lisp_Object charset;
+CHECK_CONS (code);
+CHECK_INT (XCAR (code));
+CHECK_INT (XCDR (code));
+b1 = XINT (XCAR (code));
+b2 = XINT (XCDR (code));
+if (BYTE_BIG5_TWO_BYTE_1_P (b1) &&
+BYTE_BIG5_TWO_BYTE_2_P (b2))
+{
+DECODE_BIG5 (b1, b2, XCHARSET_LEADING_BYTE (charset), c1, c2);
+return make_char (MAKE_CHAR (charset, c1 & 0x7F, c2 & 0x7F));
+}
+else
+return Qnil;
+}
+DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /*
+Encode the Big5 character CH to BIG5 coding-system.
+Return the corresponding character code in Big5.
+*/
+(ch))
+{
+Lisp_Object charset;
+int c1, c2, b1, b2;
+CHECK_CHAR_COERCE_INT (ch);
+BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
+if (EQ (charset, Vcharset_chinese_big5_1) ||
+EQ (charset, Vcharset_chinese_big5_2))
+{
+ENCODE_BIG5 (XCHARSET_LEADING_BYTE (charset), c1 | 0x80, c2 | 0x80,
+		   b1, b2);
+return Fcons (make_int (b1), make_int (b2));
+}
+else
+return Qnil;
+}
+/************************************************************************/
+/*                           ISO2022 methods                            */
+/************************************************************************/
+/* The following note describes the coding system ISO2022 briefly.
+Since the intention of this note is to help understanding of the
+programs in this file, some parts are NOT ACCURATE or OVERLY
+SIMPLIFIED.  For thorough understanding, please refer to the
+original document of ISO2022.
+ISO2022 provides many mechanisms to encode several character sets
+in 7-bit and 8-bit environments.  If one chooses 7-bit environment,
+all text is encoded by codes of less than 128.  This may make the
+encoded text a little bit longer, but the text get more stability
+to pass through several gateways (some of them strip off MSB).
+There are two kind of character sets: control character set and
+graphic character set.  The former contains control characters such
+as `newline' and `escape' to provide control functions (control
+functions are provided also by escape sequence).  The latter
+contains graphic characters such as 'A' and '-'.  Emacs recognizes
+two control character sets and many graphic character sets.
+Graphic character sets are classified into one of four types,
+according to the dimension and number of characters in the set:
+TYPE94, TYPE96, TYPE94x94, and TYPE96x96.  In addition, each
+character set is assigned an identification byte, unique for each
+type, called "final character" (denoted as <F> hereafter).  The <F>
+of each character set is decided by ECMA(*) when it is registered
+in ISO.  Code range of <F> is 0x30..0x7F (0x30..0x3F are for
+private use only).
+Note (*): ECMA = European Computer Manufacturers Association
+Here are examples of graphic character set [NAME(<F>)]:
+	o TYPE94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
+	o TYPE96 -- right-half-of-ISO8859-1('A'), ...
+	o TYPE94x94 -- GB2312('A'), JISX0208('B'), ...
+	o TYPE96x96 -- none for the moment
+A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
+	C0 [0x00..0x1F] -- control character plane 0
+	GL [0x20..0x7F] -- graphic character plane 0
+	C1 [0x80..0x9F] -- control character plane 1
+	GR [0xA0..0xFF] -- graphic character plane 1
+A control character set is directly designated and invoked to C0 or
+C1 by an escape sequence.  The most common case is that:
+- ISO646's  control character set is designated/invoked to C0, and
+- ISO6429's control character set is designated/invoked to C1,
+and usually these designations/invocations are omitted in encoded
+text.  In a 7-bit environment, only C0 can be used, and a control
+character for C1 is encoded by an appropriate escape sequence to
+fit into the environment.  All control characters for C1 are
+defined to have corresponding escape sequences.
+A graphic character set is at first designated to one of four
+graphic registers (G0 through G3), then these graphic registers are
+invoked to GL or GR.  These designations and invocations can be
+done independently.  The most common case is that G0 is invoked to
+GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
+these invocations and designations are omitted in encoded text.
+In a 7-bit environment, only GL can be used.
+When a graphic character set of TYPE94 or TYPE94x94 is invoked to
+GL, codes 0x20 and 0x7F of the GL area work as control characters
+SPACE and DEL respectively, and code 0xA0 and 0xFF of GR area
+should not be used.
+There are two ways of invocation: locking-shift and single-shift.
+With locking-shift, the invocation lasts until the next different
+invocation, whereas with single-shift, the invocation works only
+for the following character and doesn't affect locking-shift.
+Invocations are done by the following control characters or escape
+sequences.
+----------------------------------------------------------------------
+abbrev  function	             cntrl escape seq	description
+----------------------------------------------------------------------
+SI/LS0  (shift-in)		     0x0F  none		invoke G0 into GL
+SO/LS1  (shift-out)		     0x0E  none		invoke G1 into GL
+LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR
+LS2     (locking-shift-2)	     none  ESC 'n'	invoke G2 into GL
+LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR
+LS3     (locking-shift-3)	     none  ESC 'o'	invoke G3 into GL
+LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR
+SS2     (single-shift-2)	     0x8E  ESC 'N'	invoke G2 for one char
+SS3     (single-shift-3)	     0x8F  ESC 'O'	invoke G3 for one char
+----------------------------------------------------------------------
+The first four are for locking-shift.  Control characters for these
+functions are defined by macros ISO_CODE_XXX in `coding.h'.
+Designations are done by the following escape sequences.
+----------------------------------------------------------------------
+escape sequence	description
+----------------------------------------------------------------------
+ESC '(' <F>		designate TYPE94<F> to G0
+ESC ')' <F>		designate TYPE94<F> to G1
+ESC '*' <F>		designate TYPE94<F> to G2
+ESC '+' <F>		designate TYPE94<F> to G3
+ESC ',' <F>		designate TYPE96<F> to G0 (*)
+ESC '-' <F>		designate TYPE96<F> to G1
+ESC '.' <F>		designate TYPE96<F> to G2
+ESC '/' <F>		designate TYPE96<F> to G3
+ESC '$' '(' <F>	designate TYPE94x94<F> to G0 (**)
+ESC '$' ')' <F>	designate TYPE94x94<F> to G1
+ESC '$' '*' <F>	designate TYPE94x94<F> to G2
+ESC '$' '+' <F>	designate TYPE94x94<F> to G3
+ESC '$' ',' <F>	designate TYPE96x96<F> to G0 (*)
+ESC '$' '-' <F>	designate TYPE96x96<F> to G1
+ESC '$' '.' <F>	designate TYPE96x96<F> to G2
+ESC '$' '/' <F>	designate TYPE96x96<F> to G3
+----------------------------------------------------------------------
+In this list, "TYPE94<F>" means a graphic character set of type TYPE94
+and final character <F>, and etc.
+Note (*): Although these designations are not allowed in ISO2022,
+Emacs accepts them on decoding, and produces them on encoding
+TYPE96 or TYPE96x96 character set in a coding system which is
+characterized as 7-bit environment, non-locking-shift, and
+non-single-shift.
+Note (**): If <F> is '@', 'A', or 'B', the intermediate character
+'(' can be omitted.  We call this as "short-form" here after.
+Now you may notice that there are a lot of ways for encoding the
+same multilingual text in ISO2022.  Actually, there exist many
+coding systems such as Compound Text (used in X's inter client
+communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
+(used in Korean internet), EUC (Extended UNIX Code, used in Asian
+localized platforms), and all of these are variants of ISO2022.
+In addition to the above, Emacs handles two more kinds of escape
+sequences: ISO6429's direction specification and Emacs' private
+sequence for specifying character composition.
+ISO6429's direction specification takes the following format:
+	o CSI ']'      -- end of the current direction
+	o CSI '0' ']'  -- end of the current direction
+	o CSI '1' ']'  -- start of left-to-right text
+	o CSI '2' ']'  -- start of right-to-left text
+The control character CSI (0x9B: control sequence introducer) is
+abbreviated to the escape sequence ESC '[' in 7-bit environment.
+Character composition specification takes the following format:
+	o ESC '0' -- start character composition
+	o ESC '1' -- end character composition
+Since these are not standard escape sequences of any ISO, the use
+of them for these meanings is restricted to Emacs only.  */
+static void
+reset_iso2022 (Lisp_Object coding_system, struct iso2022_decoder *iso)
+{
+int i;
+for (i = 0; i < 4; i++)
+{
+if (!NILP (coding_system))
+	iso->charset[i] =
+	  XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, i);
+else
+	iso->charset[i] = Qt;
+iso->invalid_designated[i] = 0;
+}
+iso->esc = ISO_ESC_NOTHING;
+iso->esc_bytes_index = 0;
+iso->register_left = 0;
+iso->register_right = 1;
+iso->switched_dir_and_no_valid_charset_yet = 0;
+iso->invalid_switch_dir = 0;
+iso->output_direction_sequence = 0;
+iso->output_literally = 0;
+if (iso->composite_chars)
+Dynarr_reset (iso->composite_chars);
+}
+static int
+fit_to_be_escape_quoted (unsigned char c)
+{
+switch (c)
+{
+case ISO_CODE_ESC:
+case ISO_CODE_CSI:
+case ISO_CODE_SS2:
+case ISO_CODE_SS3:
+case ISO_CODE_SO:
+case ISO_CODE_SI:
+return 1;
+default:
+return 0;
+}
+}
+/* Parse one byte of an ISO2022 escape sequence.
+If the result is an invalid escape sequence, return 0 and
+do not change anything in STR.  Otherwise, if the result is
+an incomplete escape sequence, update ISO2022.ESC and
+ISO2022.ESC_BYTES and return -1.  Otherwise, update
+all the state variables (but not ISO2022.ESC_BYTES) and
+return 1.
+If CHECK_INVALID_CHARSETS is non-zero, check for designation
+or invocation of an invalid character set and treat that as
+an unrecognized escape sequence. */
+static int
+parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
+		   unsigned char c, unsigned int *flags,
+		   int check_invalid_charsets)
+{
+/* (1) If we're at the end of a designation sequence, CS is the
+charset being designated and REG is the register to designate
+it to.
+(2) If we're at the end of a locking-shift sequence, REG is
+the register to invoke and HALF (0 == left, 1 == right) is
+the half to invoke it into.
+(3) If we're at the end of a single-shift sequence, REG is
+the register to invoke. */
+Lisp_Object cs = Qnil;
+int reg, half;
+/* NOTE: This code does goto's all over the fucking place.
+The reason for this is that we're basically implementing
+a state machine here, and hierarchical languages like C
+don't really provide a clean way of doing this. */
+if (! (*flags & CODING_STATE_ESCAPE))
+/* At beginning of escape sequence; we need to reset our
+escape-state variables. */
+iso->esc = ISO_ESC_NOTHING;
+iso->output_literally = 0;
+iso->output_direction_sequence = 0;
+switch (iso->esc)
+{
+case ISO_ESC_NOTHING:
+iso->esc_bytes_index = 0;
+switch (c)
+	{
+	case ISO_CODE_ESC:	/* Start escape sequence */
+	  *flags |= CODING_STATE_ESCAPE;
+	  iso->esc = ISO_ESC;
+	  goto not_done;
+	case ISO_CODE_CSI:      /* ISO6429 (specifying directionality) */
+	  *flags |= CODING_STATE_ESCAPE;
+	  iso->esc = ISO_ESC_5_11;
+	  goto not_done;
+	case ISO_CODE_SO:	/* locking shift 1 */
+	  reg = 1; half = 0;
+	  goto locking_shift;
+	case ISO_CODE_SI:	/* locking shift 0 */
+	  reg = 0; half = 0;
+	  goto locking_shift;
+	case ISO_CODE_SS2:	/* single shift */
+	  reg = 2;
+	  goto single_shift;
+	case ISO_CODE_SS3:	/* single shift */
+	  reg = 3;
+	  goto single_shift;
+	default:			/* Other control characters */
+	  return 0;
+	}
+case ISO_ESC:
+switch (c)
+	{
+	  /**** single shift ****/
+	case 'N':	/* single shift 2 */
+	  reg = 2;
+	  goto single_shift;
+	case 'O':	/* single shift 3 */
+	  reg = 3;
+	  goto single_shift;
+	  /**** locking shift ****/
+	case '~':	/* locking shift 1 right */
+	  reg = 1; half = 1;
+	  goto locking_shift;
+	case 'n':	/* locking shift 2 */
+	  reg = 2; half = 0;
+	  goto locking_shift;
+	case '}':	/* locking shift 2 right */
+	  reg = 2; half = 1;
+	  goto locking_shift;
+	case 'o':	/* locking shift 3 */
+	  reg = 3; half = 0;
+	  goto locking_shift;
+	case '|':	/* locking shift 3 right */
+	  reg = 3; half = 1;
+	  goto locking_shift;
+	  /**** composite ****/
+	case '0':
+	  iso->esc = ISO_ESC_START_COMPOSITE;
+	  *flags = (*flags & CODING_STATE_ISO2022_LOCK) |
+	    CODING_STATE_COMPOSITE;
+	  return 1;
+	case '1':
+	  iso->esc = ISO_ESC_END_COMPOSITE;
+	  *flags = (*flags & CODING_STATE_ISO2022_LOCK) &
+	    ~CODING_STATE_COMPOSITE;
+	  return 1;
+	  /**** directionality ****/
+	case '[':
+	  iso->esc = ISO_ESC_5_11;
+	  goto not_done;
+	  /**** designation ****/
+	case '$':	/* multibyte charset prefix */
+	  iso->esc = ISO_ESC_2_4;
+	  goto not_done;
+	default:
+	  if (0x28 <= c && c <= 0x2F)
+	    {
+	      iso->esc = c - 0x28 + ISO_ESC_2_8;
+	      goto not_done;
+	    }
+	  /* This function is called with CODESYS equal to nil when
+	     doing coding-system detection. */
+	  if (!NILP (codesys)
+	      && XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+	      && fit_to_be_escape_quoted (c))
+	    {
+	      iso->esc = ISO_ESC_LITERAL;
+	      *flags &= CODING_STATE_ISO2022_LOCK;
+	      return 1;
+	    }
+	  /* bzzzt! */
+	  return 0;
+	}
+/**** directionality ****/
+case ISO_ESC_5_11:		/* ISO6429 direction control */
+if (c == ']')
+	{
+	  *flags &= (CODING_STATE_ISO2022_LOCK & ~CODING_STATE_R2L);
+	  goto directionality;
+	}
+if      (c == '0') iso->esc = ISO_ESC_5_11_0;
+else if (c == '1') iso->esc = ISO_ESC_5_11_1;
+else if (c == '2') iso->esc = ISO_ESC_5_11_2;
+else               return 0;
+goto not_done;
+case ISO_ESC_5_11_0:
+if (c == ']')
+	{
+	  *flags &= (CODING_STATE_ISO2022_LOCK & ~CODING_STATE_R2L);
+	  goto directionality;
+	}
+return 0;
+case ISO_ESC_5_11_1:
+if (c == ']')
+	{
+	  *flags = (CODING_STATE_ISO2022_LOCK & ~CODING_STATE_R2L);
+	  goto directionality;
+	}
+return 0;
+case ISO_ESC_5_11_2:
+if (c == ']')
+	{
+	  *flags = (*flags & CODING_STATE_ISO2022_LOCK) | CODING_STATE_R2L;
+	  goto directionality;
+	}
+return 0;
+directionality:
+iso->esc = ISO_ESC_DIRECTIONALITY;
+/* Various junk here to attempt to preserve the direction sequences
+	 literally in the text if they would otherwise be swallowed due
+	 to invalid designations that don't show up as actual charset
+	 changes in the text. */
+if (iso->invalid_switch_dir)
+	{
+	  /* We already inserted a direction switch literally into the
+	     text.  We assume (#### this may not be right) that the
+	     next direction switch is the one going the other way,
+	     and we need to output that literally as well. */
+	  iso->output_literally = 1;
+	  iso->invalid_switch_dir = 0;
+	}
+else
+	{
+	  int jj;
+	  /* If we are in the thrall of in invalid designation,
+	   then stick the directionality sequence literally into the
+	   output stream so it ends up in the original text again. */
+	  for (jj = 0; jj < 4; jj++)
+	    if (iso->invalid_designated[jj])
+	      break;
+	  if (jj < 4)
+	    {
+	      iso->output_literally = 1;
+	      iso->invalid_switch_dir = 1;
+	    }
+	  else
+	    /* Indicate that we haven't yet seen a valid designation,
+	       so that if a switch-dir is directly followed by an
+	       invalid designation, both get inserted literally. */
+	    iso->switched_dir_and_no_valid_charset_yet = 1;
+	}
+return 1;
+/**** designation ****/
+case ISO_ESC_2_4:
+if (0x28 <= c && c <= 0x2F)
+	{
+	  iso->esc = c - 0x28 + ISO_ESC_2_4_8;
+	  goto not_done;
+	}
+if (0x40 <= c && c <= 0x42)
+	{
+	  cs = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94, c,
+				      *flags & CODING_STATE_R2L ?
+				      CHARSET_RIGHT_TO_LEFT :
+				      CHARSET_LEFT_TO_RIGHT);
+	  reg = 0;
+	  goto designated;
+	}
+return 0;
+default:
+{
+	int type;
+	if (c < '0' || c > '~')
+	  return 0; /* bad final byte */
+	if (iso->esc >= ISO_ESC_2_8 &&
+	    iso->esc <= ISO_ESC_2_15)
+	  {
+	    if (iso->esc >= ISO_ESC_2_12)
+	      type = CHARSET_TYPE_96;
+	    else
+	      type = CHARSET_TYPE_94;
+	    reg = (iso->esc - ISO_ESC_2_8) & 3;
+	  }
+	else if (iso->esc >= ISO_ESC_2_4_8 &&
+		 iso->esc <= ISO_ESC_2_4_15)
+	  {
+	    if (iso->esc >= ISO_ESC_2_4_12)
+	      type = CHARSET_TYPE_96X96;
+	    else
+	      type = CHARSET_TYPE_94X94;
+	    reg = (iso->esc - ISO_ESC_2_4_8) & 3;
+	  }
+	cs = CHARSET_BY_ATTRIBUTES (type, c,
+				    *flags & CODING_STATE_R2L ?
+				    CHARSET_RIGHT_TO_LEFT :
+				    CHARSET_LEFT_TO_RIGHT);
+	goto designated;
+}
+}
+not_done:
+iso->esc_bytes[iso->esc_bytes_index++] = (unsigned char) c;
+return -1;
+single_shift:
+if (check_invalid_charsets && !CHARSETP (iso->charset[reg]))
+/* can't invoke something that ain't there. */
+return 0;
+iso->esc = ISO_ESC_SINGLE_SHIFT;
+*flags &= CODING_STATE_ISO2022_LOCK;
+if (reg == 2)
+*flags |= CODING_STATE_SS2;
+else
+*flags |= CODING_STATE_SS3;
+return 1;
+locking_shift:
+if (check_invalid_charsets &&
+!CHARSETP (iso->charset[reg]))
+/* can't invoke something that ain't there. */
+return 0;
+if (half)
+iso->register_right = reg;
+else
+iso->register_left = reg;
+*flags &= CODING_STATE_ISO2022_LOCK;
+iso->esc = ISO_ESC_LOCKING_SHIFT;
+return 1;
+designated:
+if (NILP (cs) && check_invalid_charsets)
+{
+iso->invalid_designated[reg] = 1;
+iso->charset[reg] = Vcharset_ascii;
+iso->esc = ISO_ESC_DESIGNATE;
+*flags &= CODING_STATE_ISO2022_LOCK;
+iso->output_literally = 1;
+if (iso->switched_dir_and_no_valid_charset_yet)
+	{
+	  /* We encountered a switch-direction followed by an
+	     invalid designation.  Ensure that the switch-direction
+	     gets outputted; otherwise it will probably get eaten
+	     when the text is written out again. */
+	  iso->switched_dir_and_no_valid_charset_yet = 0;
+	  iso->output_direction_sequence = 1;
+	  /* And make sure that the switch-dir going the other
+	     way gets outputted, as well. */
+	  iso->invalid_switch_dir = 1;
+	}
+return 1;
+}
+/* This function is called with CODESYS equal to nil when
+doing coding-system detection. */
+if (!NILP (codesys))
+{
+charset_conversion_spec_dynarr *dyn =
+	XCODING_SYSTEM (codesys)->iso2022.input_conv;
+if (dyn)
+	{
+	  int i;
+	  for (i = 0; i < Dynarr_length (dyn); i++)
+	    {
+	      struct charset_conversion_spec *spec = Dynarr_atp (dyn, i);
+	      if (EQ (cs, spec->from_charset))
+		cs = spec->to_charset;
+	    }
+	}
+}
+iso->charset[reg] = cs;
+iso->esc = ISO_ESC_DESIGNATE;
+*flags &= CODING_STATE_ISO2022_LOCK;
+if (iso->invalid_designated[reg])
+{
+iso->invalid_designated[reg] = 0;
+iso->output_literally = 1;
+}
+if (iso->switched_dir_and_no_valid_charset_yet)
+iso->switched_dir_and_no_valid_charset_yet = 0;
+return 1;
+}
+static int
+detect_coding_iso2022 (struct detection_state *st,  CONST unsigned char *src,
+		       unsigned int n)
+{
+int c;
+int mask;
+/* #### There are serious deficiencies in the recognition mechanism
+here.  This needs to be much smarter if it's going to cut it. */
+if (!st->iso2022.initted)
+{
+reset_iso2022 (Qnil, &st->iso2022.iso);
+st->iso2022.mask = (CODING_CATEGORY_ISO_7_MASK |
+			  CODING_CATEGORY_ISO_8_DESIGNATE_MASK |
+			  CODING_CATEGORY_ISO_8_1_MASK |
+			  CODING_CATEGORY_ISO_8_2_MASK |
+			  CODING_CATEGORY_ISO_LOCK_SHIFT_MASK);
+st->iso2022.flags = 0;
+st->iso2022.high_byte_count = 0;
+st->iso2022.saw_single_shift = 0;
+st->iso2022.initted = 1;
+}
+mask = st->iso2022.mask;
+while (n--)
+{
+c = *src++;
+if (c >= 0xA0)
+	{
+	  mask &= ~CODING_CATEGORY_ISO_7_MASK;
+	  st->iso2022.high_byte_count++;
+	}
+else
+	{
+	  if (st->iso2022.high_byte_count && !st->iso2022.saw_single_shift)
+	    {
+	      if (st->iso2022.high_byte_count & 1)
+		/* odd number of high bytes; assume not iso-8-2 */
+		mask &= ~CODING_CATEGORY_ISO_8_2_MASK;
+	    }
+	  st->iso2022.high_byte_count = 0;
+	  st->iso2022.saw_single_shift = 0;
+	  if (c > 0x80)
+	    mask &= ~CODING_CATEGORY_ISO_7_MASK;
+	}
+if (!(st->iso2022.flags & CODING_STATE_ESCAPE)
+	  && (BYTE_C0_P (c) || BYTE_C1_P (c)))
+	{ /* control chars */
+	  switch (c)
+	    {
+	      /* Allow and ignore control characters that you might
+		 reasonably see in a text file */
+	    case '\r':
+	    case '\n':
+	    case '\t':
+	    case  7: /* bell */
+	    case  8: /* backspace */
+	    case 11: /* vertical tab */
+	    case 12: /* form feed */
+	    case 26: /* MS-DOS C-z junk */
+	      goto label_continue_loop;
+	    default:
+	      break;
+	    }
+	}
+if ((st->iso2022.flags & CODING_STATE_ESCAPE) || BYTE_C0_P (c)
+|| BYTE_C1_P (c))
+	{
+	  if (parse_iso2022_esc (Qnil, &st->iso2022.iso, c,
+				 &st->iso2022.flags, 0))
+	    {
+	      switch (st->iso2022.iso.esc)
+		{
+		case ISO_ESC_DESIGNATE:
+		  mask &= ~CODING_CATEGORY_ISO_8_1_MASK;
+		  mask &= ~CODING_CATEGORY_ISO_8_2_MASK;
+		  break;
+		case ISO_ESC_LOCKING_SHIFT:
+		  mask = CODING_CATEGORY_ISO_LOCK_SHIFT_MASK;
+		  goto ran_out_of_chars;
+		case ISO_ESC_SINGLE_SHIFT:
+		  mask &= ~CODING_CATEGORY_ISO_8_DESIGNATE_MASK;
+		  st->iso2022.saw_single_shift = 1;
+		  break;
+		default:
+		  break;
+		}
+	    }
+	  else
+	    {
+	      mask = 0;
+	      goto ran_out_of_chars;
+	    }
+	}
+label_continue_loop:;
+}
+ran_out_of_chars:
+return mask;
+}
+static int
+postprocess_iso2022_mask (int mask)
+{
+/* #### kind of cheesy */
+/* If seven-bit ISO is allowed, then assume that the encoding is
+entirely seven-bit and turn off the eight-bit ones. */
+if (mask & CODING_CATEGORY_ISO_7_MASK)
+mask &= ~ (CODING_CATEGORY_ISO_8_DESIGNATE_MASK |
+	       CODING_CATEGORY_ISO_8_1_MASK |
+	       CODING_CATEGORY_ISO_8_2_MASK);
+return mask;
+}
+/* If FLAGS is a null pointer or specifies right-to-left motion,
+output a switch-dir-to-left-to-right sequence to DST.
+Also update FLAGS if it is not a null pointer.
+If INTERNAL_P is set, we are outputting in internal format and
+need to handle the CSI differently. */
+static void
+restore_left_to_right_direction (struct Lisp_Coding_System *codesys,
+				 unsigned_char_dynarr *dst,
+				 unsigned int *flags,
+				 int internal_p)
+{
+if (!flags || (*flags & CODING_STATE_R2L))
+{
+if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
+	{
+	  Dynarr_add (dst, ISO_CODE_ESC);
+	  Dynarr_add (dst, '[');
+	}
+else if (internal_p)
+	DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst);
+else
+	Dynarr_add (dst, ISO_CODE_CSI);
+Dynarr_add (dst, '0');
+Dynarr_add (dst, ']');
+if (flags)
+	*flags &= ~CODING_STATE_R2L;
+}
+}
+/* If FLAGS is a null pointer or specifies a direction different from
+DIRECTION (which should be either CHARSET_RIGHT_TO_LEFT or
+CHARSET_LEFT_TO_RIGHT), output the appropriate switch-dir escape
+sequence to DST.  Also update FLAGS if it is not a null pointer.
+If INTERNAL_P is set, we are outputting in internal format and
+need to handle the CSI differently. */
+static void
+ensure_correct_direction (int direction, struct Lisp_Coding_System *codesys,
+			  unsigned_char_dynarr *dst, unsigned int *flags,
+			  int internal_p)
+{
+if ((!flags || (*flags & CODING_STATE_R2L)) &&
+direction == CHARSET_LEFT_TO_RIGHT)
+restore_left_to_right_direction (codesys, dst, flags, internal_p);
+else if (!CODING_SYSTEM_ISO2022_NO_ISO6429 (codesys)
+	   && (!flags || !(*flags & CODING_STATE_R2L)) &&
+	   direction == CHARSET_RIGHT_TO_LEFT)
+{
+if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
+	{
+	  Dynarr_add (dst, ISO_CODE_ESC);
+	  Dynarr_add (dst, '[');
+	}
+else if (internal_p)
+	DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst);
+else
+	Dynarr_add (dst, ISO_CODE_CSI);
+Dynarr_add (dst, '2');
+Dynarr_add (dst, ']');
+if (flags)
+	*flags |= CODING_STATE_R2L;
+}
+}
+/* Convert ISO2022-format data to internal format. */
+static void
+decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
+		       unsigned_char_dynarr *dst, unsigned int n)
+{
+unsigned char c;
+unsigned int flags, ch;
+int eol;
+struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+Lisp_Object coding_system = Qnil;
+unsigned_char_dynarr *real_dst = dst;
+CODING_STREAM_DECOMPOSE (str, flags, ch);
+eol = CODING_SYSTEM_EOL_TYPE (str->codesys);
+XSETCODING_SYSTEM (coding_system, str->codesys);
+if (flags & CODING_STATE_COMPOSITE)
+dst = str->iso2022.composite_chars;
+while (n--)
+{
+c = *src++;
+if (flags & CODING_STATE_ESCAPE)
+	{	/* Within ESC sequence */
+	  int retval;
+	  retval = parse_iso2022_esc (coding_system, &str->iso2022,
+				      c, &flags, 1);
+	  if (retval)
+	    {
+	      switch (str->iso2022.esc)
+		{
+		case ISO_ESC_START_COMPOSITE:
+		  if (str->iso2022.composite_chars)
+		    Dynarr_reset (str->iso2022.composite_chars);
+		  else
+		    str->iso2022.composite_chars = Dynarr_new (unsigned char);
+		  dst = str->iso2022.composite_chars;
+		  break;
+		case ISO_ESC_END_COMPOSITE:
+		  {
+		    Bufbyte comstr[MAX_EMCHAR_LEN];
+		    Bytecount len;
+		    Emchar emch = lookup_composite_char (Dynarr_atp (dst, 0),
+							 Dynarr_length (dst));
+		    dst = real_dst;
+		    len = set_charptr_emchar (comstr, emch);
+		    Dynarr_add_many (dst, comstr, len);
+		    break;
+		  }
+		case ISO_ESC_LITERAL:
+		  DECODE_ADD_BINARY_CHAR (c, dst);
+		  break;
+		default:
+		  /* Everything else handled already */
+		  break;
+		}
+	    }
+	  /* Attempted error recovery. */
+	  if (str->iso2022.output_direction_sequence)
+	    ensure_correct_direction (flags & CODING_STATE_R2L ?
+				      CHARSET_RIGHT_TO_LEFT :
+				      CHARSET_LEFT_TO_RIGHT,
+				      str->codesys, dst, 0, 1);
+	  /* More error recovery. */
+	  if (!retval || str->iso2022.output_literally)
+	    {
+	      /* Output the (possibly invalid) sequence */
+	      int i;
+	      for (i = 0; i < str->iso2022.esc_bytes_index; i++)
+		DECODE_ADD_BINARY_CHAR (str->iso2022.esc_bytes[i], dst);
+	      flags &= CODING_STATE_ISO2022_LOCK;
+	      if (!retval)
+		n++, src--;/* Repeat the loop with the same character. */
+	      else
+		{
+		  /* No sense in reprocessing the final byte of the
+		     escape sequence; it could mess things up anyway.
+		     Just add it now. */
+		  DECODE_ADD_BINARY_CHAR (c, dst);
+		}
+	    }
+	  ch = 0;
+	}
+else if (BYTE_C0_P (c) || BYTE_C1_P (c))
+	{ /* Control characters */
+	  /***** Error-handling *****/
+	  /* If we were in the middle of a character, dump out the
+	     partial character. */
+	  DECODE_OUTPUT_PARTIAL_CHAR (ch);
+	  /* If we just saw a single-shift character, dump it out.
+	     This may dump out the wrong sort of single-shift character,
+	     but least it will give an indication that something went
+	     wrong. */
+	  if (flags & CODING_STATE_SS2)
+	    {
+	      DECODE_ADD_BINARY_CHAR (ISO_CODE_SS2, dst);
+	      flags &= ~CODING_STATE_SS2;
+	    }
+	  if (flags & CODING_STATE_SS3)
+	    {
+	      DECODE_ADD_BINARY_CHAR (ISO_CODE_SS3, dst);
+	      flags &= ~CODING_STATE_SS3;
+	    }
+	  /***** Now handle the control characters. *****/
+	  /* Handle CR/LF */
+	  DECODE_HANDLE_EOL_TYPE (eol, c, flags, dst);
+	  flags &= CODING_STATE_ISO2022_LOCK;
+	  if (!parse_iso2022_esc (coding_system, &str->iso2022, c, &flags, 1))
+	    DECODE_ADD_BINARY_CHAR (c, dst);
+	}
+else
+	{			/* Graphic characters */
+	  Lisp_Object charset;
+	  int lb;
+	  int reg;
+	  DECODE_HANDLE_EOL_TYPE (eol, c, flags, dst);
+	  /* Now determine the charset. */
+	  reg = ((flags & CODING_STATE_SS2) ? 2
+		 : (flags & CODING_STATE_SS3) ? 3
+		 : !BYTE_ASCII_P (c) ? str->iso2022.register_right
+		 : str->iso2022.register_left);
+	  charset = str->iso2022.charset[reg];
+	  /* Error checking: */
+	  if (NILP (charset) || str->iso2022.invalid_designated[reg]
+	      || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL)
+		  && XCHARSET_CHARS (charset) == 94))
+	    /* Mrmph.  We are trying to invoke a register that has no
+	       or an invalid charset in it, or trying to add a character
+	       outside the range of the charset.  Insert that char literally
+	       to preserve it for the output. */
+	    {
+	      DECODE_OUTPUT_PARTIAL_CHAR (ch);
+	      DECODE_ADD_BINARY_CHAR (c, dst);
+	    }
+	  else
+	    {
+	      /* Things are probably hunky-dorey. */
+	      /* Fetch reverse charset, maybe. */
+	      if (((flags & CODING_STATE_R2L) &&
+		   XCHARSET_DIRECTION (charset) == CHARSET_LEFT_TO_RIGHT)
+		  ||
+		  (!(flags & CODING_STATE_R2L) &&
+		   XCHARSET_DIRECTION (charset) == CHARSET_RIGHT_TO_LEFT))
+		{
+		  Lisp_Object new_charset =
+		    XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
+		  if (!NILP (new_charset))
+		    charset = new_charset;
+		}
+	      lb = XCHARSET_LEADING_BYTE (charset);
+	      switch (XCHARSET_REP_BYTES (charset))
+		{
+		case 1:	/* ASCII */
+		  DECODE_OUTPUT_PARTIAL_CHAR (ch);
+		  Dynarr_add (dst, c & 0x7F);
+		  break;
+		case 2:	/* one-byte official */
+		  DECODE_OUTPUT_PARTIAL_CHAR (ch);
+		  Dynarr_add (dst, lb);
+		  Dynarr_add (dst, c | 0x80);
+		  break;
+		case 3:	/* one-byte private or two-byte official */
+		  if (XCHARSET_PRIVATE_P (charset))
+		    {
+		      DECODE_OUTPUT_PARTIAL_CHAR (ch);
+		      Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1);
+		      Dynarr_add (dst, lb);
+		      Dynarr_add (dst, c | 0x80);
+		    }
+		  else
+		    {
+		      if (ch)
+			{
+			  Dynarr_add (dst, lb);
+			  Dynarr_add (dst, ch | 0x80);
+			  Dynarr_add (dst, c | 0x80);
+			  ch = 0;
+			}
+		      else
+			ch = c;
+		    }
+		  break;
+		default:	/* two-byte private */
+		  if (ch)
+		    {
+		      Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2);
+		      Dynarr_add (dst, lb);
+		      Dynarr_add (dst, ch | 0x80);
+		      Dynarr_add (dst, c | 0x80);
+		      ch = 0;
+		    }
+		  else
+		    ch = c;
+		}
+	    }
+	  if (!ch)
+	    flags &= CODING_STATE_ISO2022_LOCK;
+	}
+label_continue_loop:;
+}
+if (flags & CODING_STATE_END)
+DECODE_OUTPUT_PARTIAL_CHAR (ch);
+CODING_STREAM_COMPOSE (str, flags, ch);
+}
+/***** ISO2022 encoder *****/
+/* Designate CHARSET into register REG. */
+static void
+iso2022_designate (Lisp_Object charset, unsigned char reg,
+		   struct encoding_stream *str, unsigned_char_dynarr *dst)
+{
+CONST char *inter94 = "()*+", *inter96= ",-./";
+int type;
+unsigned char final;
+Lisp_Object old_charset = str->iso2022.charset[reg];
+str->iso2022.charset[reg] = charset;
+if (!CHARSETP (charset))
+/* charset might be an initial nil or t. */
+return;
+type = XCHARSET_TYPE (charset);
+final = XCHARSET_FINAL (charset);
+if (!str->iso2022.force_charset_on_output[reg] &&
+CHARSETP (old_charset) &&
+XCHARSET_TYPE (old_charset) == type &&
+XCHARSET_FINAL (old_charset) == final)
+return;
+str->iso2022.force_charset_on_output[reg] = 0;
+{
+charset_conversion_spec_dynarr *dyn =
+str->codesys->iso2022.output_conv;
+if (dyn)
+{
+	int i;
+	for (i = 0; i < Dynarr_length (dyn); i++)
+	  {
+	    struct charset_conversion_spec *spec = Dynarr_atp (dyn, i);
+	    if (EQ (charset, spec->from_charset))
+		charset = spec->to_charset;
+	  }
+}
+}
+Dynarr_add (dst, ISO_CODE_ESC);
+switch (type)
+{
+case CHARSET_TYPE_94:
+Dynarr_add (dst, inter94[reg]);
+break;
+case CHARSET_TYPE_96:
+Dynarr_add (dst, inter96[reg]);
+break;
+case CHARSET_TYPE_94X94:
+Dynarr_add (dst, '$');
+if (reg != 0
+	  || !(CODING_SYSTEM_ISO2022_SHORT (str->codesys))
+	  || final < '@'
+	  || final > 'B')
+	Dynarr_add (dst, inter94[reg]);
+break;
+case CHARSET_TYPE_96X96:
+Dynarr_add (dst, '$');
+Dynarr_add (dst, inter96[reg]);
+break;
+}
+Dynarr_add (dst, final);
+}
+static void
+ensure_normal_shift (struct encoding_stream *str, unsigned_char_dynarr *dst)
+{
+if (str->iso2022.register_left != 0)
+{
+Dynarr_add (dst, ISO_CODE_SI);
+str->iso2022.register_left = 0;
+}
+}
+static void
+ensure_shift_out (struct encoding_stream *str, unsigned_char_dynarr *dst)
+{
+if (str->iso2022.register_left != 1)
+{
+Dynarr_add (dst, ISO_CODE_SO);
+str->iso2022.register_left = 1;
+}
+}
+/* Convert internally-formatted data to ISO2022 format. */
+static void
+encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
+		       unsigned_char_dynarr *dst, unsigned int n)
+{
+unsigned char charmask, c;
+unsigned int flags, ch, eol;
+unsigned char char_boundary;
+struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+struct Lisp_Coding_System *codesys = str->codesys;
+int i;
+Lisp_Object charset;
+int half;
+/* flags for handling composite chars.  We do a little switcharoo
+on the source while we're outputting the composite char. */
+unsigned int saved_n;
+CONST unsigned char *saved_src;
+int in_composite = 0;
+CODING_STREAM_DECOMPOSE (str, flags, ch);
+eol = CODING_SYSTEM_EOL_TYPE (str->codesys);
+char_boundary = str->iso2022.current_char_boundary;
+charset = str->iso2022.current_charset;
+half = str->iso2022.current_half;
+back_to_square_n:
+while (n--)
+{
+c = *src++;
+if (BYTE_ASCII_P (c))
+	{		/* Processing ASCII character */
+	  ch = 0;
+	  restore_left_to_right_direction (codesys, dst, &flags, 0);
+	  /* Make sure G0 contains ASCII */
+	  if ((c > ' ' && c < ISO_CODE_DEL) ||
+	      !CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (codesys))
+	    {
+	      ensure_normal_shift (str, dst);
+	      iso2022_designate (Vcharset_ascii, 0, str, dst);
+	    }
+	  /* If necessary, restore everything to the default state
+	     at end-of-line */
+	  if (c == '\n' &&
+	      !(CODING_SYSTEM_ISO2022_NO_ASCII_EOL (codesys)))
+	    {
+	      restore_left_to_right_direction (codesys, dst, &flags, 0);
+	      ensure_normal_shift (str, dst);
+	      for (i = 0; i < 4; i++)
+		{
+		  Lisp_Object initial_charset =
+		    CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i);
+		  iso2022_designate (initial_charset, i, str, dst);
+		}
+	    }
+	  if (c == '\n')
+	    {
+	      if (eol != EOL_LF && eol != EOL_AUTODETECT)
+		Dynarr_add (dst, '\r');
+	      if (eol != EOL_CR)
+		Dynarr_add (dst, c);
+	    }
+	  else
+	    {
+	      if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+		  && fit_to_be_escape_quoted (c))
+		Dynarr_add (dst, ISO_CODE_ESC);
+	      Dynarr_add (dst, c);
+	    }
+	  char_boundary = 1;
+	}
+else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch))
+	{ /* Processing Leading Byte */
+	  ch = 0;
+	  charset = CHARSET_BY_LEADING_BYTE (c);
+	  if (c == PRE_LEADING_BYTE_PRIVATE_1 ||
+	      c == PRE_LEADING_BYTE_PRIVATE_2)
+	    ch = c;
+	  else if (!EQ (charset, Vcharset_control_1)
+		   && !EQ (charset, Vcharset_composite))
+	    {
+	      int reg;
+	      ensure_correct_direction (XCHARSET_DIRECTION (charset),
+					codesys, dst, &flags, 0);
+	      /* Now determine which register to use. */
+	      reg = -1;
+	      for (i = 0; i < 4; i++)
+		{
+		  if (EQ (charset, str->iso2022.charset[i]) ||
+		      EQ (charset,
+			  CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)))
+		    {
+		      reg = i;
+		      break;
+		    }
+		}
+	      if (reg == -1)
+		{
+		  if (XCHARSET_GRAPHIC (charset) != 0)
+		    {
+		      if (!NILP (str->iso2022.charset[1]) &&
+			  (!CODING_SYSTEM_ISO2022_SEVEN (codesys) ||
+			   CODING_SYSTEM_ISO2022_LOCK_SHIFT (codesys)))
+			reg = 1;
+		      else if (!NILP (str->iso2022.charset[2]))
+			reg = 2;
+		      else if (!NILP (str->iso2022.charset[3]))
+			reg = 3;
+		      else
+			reg = 0;
+		    }
+		  else
+		    reg = 0;
+		}
+	      iso2022_designate (charset, reg, str, dst);
+	      /* Now invoke that register. */
+	      switch (reg)
+		{
+		case 0:
+		  ensure_normal_shift (str, dst);
+		  half = 0;
+		  break;
+		case 1:
+		  if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
+		    {
+		      ensure_shift_out (str, dst);
+		      half = 0;
+		    }
+		  else
+		    half = 1;
+		  break;
+		case 2:
+		  if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
+		    {
+		      Dynarr_add (dst, ISO_CODE_ESC);
+		      Dynarr_add (dst, 'N');
+		      half = 0;
+		    }
+		  else
+		    {
+		      Dynarr_add (dst, ISO_CODE_SS2);
+		      half = 1;
+		    }
+		  break;
+		case 3:
+		  if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
+		    {
+		      Dynarr_add (dst, ISO_CODE_ESC);
+		      Dynarr_add (dst, 'O');
+		      half = 0;
+		    }
+		  else
+		    {
+		      Dynarr_add (dst, ISO_CODE_SS3);
+		      half = 1;
+		    }
+		  break;
+		default:
+		  abort ();
+		}
+	    }
+	  char_boundary = 0;
+	}
+else
+	{			/* Processing Non-ASCII character */
+	  charmask = (half == 0 ? 0x7F : 0xFF);
+	  char_boundary = 1;
+	  if (EQ (charset, Vcharset_control_1))
+	    {
+	      if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
+		  && fit_to_be_escape_quoted (c))
+		Dynarr_add (dst, ISO_CODE_ESC);
+	      /* you asked for it ... */
+	      Dynarr_add (dst, c - 0x20);
+	    }
+	  else
+	    {
+	      switch (XCHARSET_REP_BYTES (charset))
+		{
+		case 2:
+		  Dynarr_add (dst, c & charmask);
+		  break;
+		case 3:
+		  if (XCHARSET_PRIVATE_P (charset))
+		    {
+		      Dynarr_add (dst, c & charmask);
+		      ch = 0;
+		    }
+		  else if (ch)
+		    {
+		      if (EQ (charset, Vcharset_composite))
+			{
+			  if (in_composite)
+			    {
+			      /* #### Bother! We don't know how to
+				 handle this yet. */
+			      Dynarr_add (dst, '~');
+			    }
+			  else
+			    {
+			      Emchar emch = MAKE_CHAR (Vcharset_composite,
+						       ch & 0x7F, c & 0x7F);
+			      Lisp_Object lstr = composite_char_string (emch);
+			      saved_n = n;
+			      saved_src = src;
+			      in_composite = 1;
+			      src = XSTRING_DATA   (lstr);
+			      n   = XSTRING_LENGTH (lstr);
+			      Dynarr_add (dst, ISO_CODE_ESC);
+			      Dynarr_add (dst, '0'); /* start composing */
+			    }
+			}
+		      else
+			{
+			  Dynarr_add (dst, ch & charmask);
+			  Dynarr_add (dst, c & charmask);
+			}
+		      ch = 0;
+		    }
+		  else
+		    {
+		      ch = c;
+		      char_boundary = 0;
+		    }
+		  break;
+		case 4:
+		  if (ch)
+		    {
+		      Dynarr_add (dst, ch & charmask);
+		      Dynarr_add (dst, c & charmask);
+		      ch = 0;
+		    }
+		  else
+		    {
+		      ch = c;
+		      char_boundary = 0;
+		    }
+		  break;
+		default:
+		  abort ();
+		}
+	    }
+	}
+}
+if (in_composite)
+{
+n = saved_n;
+src = saved_src;
+in_composite = 0;
+Dynarr_add (dst, ISO_CODE_ESC);
+Dynarr_add (dst, '1'); /* end composing */
+goto back_to_square_n; /* Wheeeeeeeee ..... */
+}
+if (char_boundary && flags & CODING_STATE_END)
+{
+restore_left_to_right_direction (codesys, dst, &flags, 0);
+ensure_normal_shift (str, dst);
+for (i = 0; i < 4; i++)
+	{
+	  Lisp_Object initial_charset =
+	    CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i);
+	  iso2022_designate (initial_charset, i, str, dst);
+	}
+}
+CODING_STREAM_COMPOSE (str, flags, ch);
+str->iso2022.current_char_boundary = char_boundary;
+str->iso2022.current_charset = charset;
+str->iso2022.current_half = half;
+/* Verbum caro factum est! */
+}
+/************************************************************************/
+/*                     No-conversion methods                            */
+/************************************************************************/
+/* This is used when reading in "binary" files -- i.e. files that may
+contain all 256 possible byte values and that are not to be
+interpreted as being in any particular decoding. */
+static void
+decode_coding_no_conversion (Lstream *decoding, CONST unsigned char *src,
+			     unsigned_char_dynarr *dst, unsigned int n)
+{
+unsigned char c;
+unsigned int flags, ch;
+int eol;
+struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
+CODING_STREAM_DECOMPOSE (str, flags, ch);
+eol = str->eol_type;
+while (n--)
+{
+c = *src++;
+DECODE_HANDLE_EOL_TYPE (eol, c, flags, dst);
+DECODE_ADD_BINARY_CHAR (c, dst);
+label_continue_loop:;
+}
+DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);
+CODING_STREAM_COMPOSE (str, flags, ch);
+}
+static void
+encode_coding_no_conversion (Lstream *encoding, CONST unsigned char *src,
+			     unsigned_char_dynarr *dst, unsigned int n)
+{
+unsigned char c;
+struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
+unsigned int flags, ch, eol;
+CODING_STREAM_DECOMPOSE (str, flags, ch);
+eol = CODING_SYSTEM_EOL_TYPE (str->codesys);
+while (n--)
+{
+c = *src++;
+if (c == '\n')
+	{
+	  if (eol != EOL_LF && eol != EOL_AUTODETECT)
+	    Dynarr_add (dst, '\r');
+	  if (eol != EOL_CR)
+	    Dynarr_add (dst, '\n');
+	  ch = 0;
+	}
+else if (BYTE_ASCII_P (c))
+	{
+	  assert (ch == 0);
+	  Dynarr_add (dst, c);
+	}
+else if (BUFBYTE_LEADING_BYTE_P (c))
+	{
+	  assert (ch == 0);
+	  if (c == LEADING_BYTE_LATIN_1 || c == LEADING_BYTE_CONTROL_1)
+	    ch = c;
+	  else
+	    Dynarr_add (dst, '~'); /* untranslatable character */
+	}
+else
+	{
+	  if (ch == LEADING_BYTE_LATIN_1)
+	    Dynarr_add (dst, c);
+	  else if (ch == LEADING_BYTE_CONTROL_1)
+	    {
+	      assert (c < 0xC0);
+	      Dynarr_add (dst, c - 0x20);
+	    }
+	  /* else it should be the second or third byte of an
+	     untranslatable character, so ignore it */
+	  ch = 0;
+	}
+}
+CODING_STREAM_COMPOSE (str, flags, ch);
+}
+/************************************************************************/
+/*                   Simple internal/external functions                 */
+/************************************************************************/
+static extbyte_dynarr *conversion_out_dynarr;
+static bufbyte_dynarr *conversion_in_dynarr;
+/* Determine coding system from coding format */
+#define PATHNAME_CODING_SYSTEM 				\
+((NILP (Vpathname_coding_system) ||			\
+(EQ ((Vpathname_coding_system), Qbinary))) ?		\
+Qnil : Fget_coding_system (Vpathname_coding_system))
+/* #### not correct for all values of `fmt'! */
+#define FMT_CODING_SYSTEM(fmt)					\
+(((fmt) == FORMAT_FILENAME) ? PATHNAME_CODING_SYSTEM      :	\
+((fmt) == FORMAT_CTEXT   ) ? Fget_coding_system (Qctext) :	\
+((fmt) == FORMAT_TERMINAL) ? PATHNAME_CODING_SYSTEM      :	\
+Qnil)
+extern CONST Extbyte *
+convert_to_external_format (CONST Bufbyte *ptr,
+			    Bytecount len,
+			    Extcount *len_out,
+			    enum external_data_format fmt)
+{
+Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt);
+if (!conversion_out_dynarr)
+conversion_out_dynarr = Dynarr_new (Extbyte);
+else
+Dynarr_reset (conversion_out_dynarr);
+if (NILP (coding_system))
+{
+CONST Bufbyte *end = ptr + len;
+for (; ptr < end;)
+{
+Bufbyte c =
+(BYTE_ASCII_P (*ptr))		? *ptr :
+(*ptr == LEADING_BYTE_CONTROL_1)	? (*(ptr+1) - 0x20) :
+(*ptr == LEADING_BYTE_LATIN_1)	? (*(ptr+1)) :
+'~';
+Dynarr_add (conversion_out_dynarr, (Extbyte) c);
+INC_CHARPTR (ptr);
+}
+#ifdef ERROR_CHECK_BUFPOS
+assert (ptr == end);
+#endif
+}
+else
+{
+Lisp_Object instream =
+make_fixed_buffer_input_stream ((unsigned char *) ptr, len);
+Lisp_Object outstream = make_dynarr_output_stream
+((unsigned_char_dynarr *) conversion_out_dynarr);
+struct gcpro gcpro1, gcpro2;
+char tempbuf[1024]; /* some random amount */
+outstream =
+make_encoding_output_stream (XLSTREAM (outstream), coding_system);
+GCPRO2 (instream, outstream); /* Necessary?? */
+while (1)
+{
+int size_in_bytes = Lstream_read (XLSTREAM (instream),
+tempbuf, sizeof (tempbuf));
+if (!size_in_bytes)
+break;
+Lstream_write (XLSTREAM (outstream), tempbuf, size_in_bytes);
+}
+Lstream_close (XLSTREAM (instream));
+Lstream_close (XLSTREAM (outstream));
+UNGCPRO;
+}
+*len_out = Dynarr_length (conversion_out_dynarr);
+Dynarr_add (conversion_out_dynarr, 0); /* remember to zero-terminate! */
+return Dynarr_atp (conversion_out_dynarr, 0);
+}
+extern CONST Bufbyte *
+convert_from_external_format (CONST Extbyte *ptr,
+			      Extcount len,
+			      Bytecount *len_out,
+			      enum external_data_format fmt)
+{
+Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt);
+if (!conversion_in_dynarr)
+conversion_in_dynarr = Dynarr_new (Bufbyte);
+else
+Dynarr_reset (conversion_in_dynarr);
+if (NILP (coding_system))
+{
+CONST Extbyte *end = ptr + len;
+for (; ptr < end; ptr++)
+{
+Extbyte c = *ptr;
+DECODE_ADD_BINARY_CHAR (c, conversion_in_dynarr);
+}
+}
+else
+{
+Lisp_Object instream =
+make_fixed_buffer_input_stream ((unsigned char *) ptr, len);
+Lisp_Object outstream = make_dynarr_output_stream
+((unsigned_char_dynarr *) conversion_in_dynarr);
+struct gcpro gcpro1, gcpro2;
+char tempbuf[1024]; /* some random amount */
+outstream =
+make_decoding_output_stream (XLSTREAM (outstream), coding_system);
+GCPRO2 (instream, outstream); /* Necessary?? */
+while (1)
+{
+int size_in_bytes = Lstream_read (XLSTREAM (instream),
+tempbuf, sizeof (tempbuf));
+if (!size_in_bytes)
+break;
+Lstream_write (XLSTREAM (outstream), tempbuf, size_in_bytes);
+}
+Lstream_close (XLSTREAM (instream));
+Lstream_close (XLSTREAM (outstream));
+UNGCPRO;
+}
+*len_out = Dynarr_length (conversion_in_dynarr);
+Dynarr_add (conversion_in_dynarr, 0); /* remember to zero-terminate! */
+return Dynarr_atp (conversion_in_dynarr, 0);
+}
+/************************************************************************/
+/*                             Initialization                           */
+/************************************************************************/
+void
+syms_of_mule_coding (void)
+{
+defsymbol (&Qfile_coding_system, "file-coding-system");
+deferror (&Qcoding_system_error, "coding-system-error",
+	    "Coding-system error", Qio_error);
+DEFSUBR (Fcoding_system_p);
+DEFSUBR (Ffind_coding_system);
+DEFSUBR (Fget_coding_system);
+DEFSUBR (Fcoding_system_list);
+DEFSUBR (Fcoding_system_name);
+DEFSUBR (Fmake_coding_system);
+DEFSUBR (Fcopy_coding_system);
+DEFSUBR (Fsubsidiary_coding_system);
+DEFSUBR (Fcoding_system_type);
+DEFSUBR (Fcoding_system_doc_string);
+DEFSUBR (Fcoding_system_property);
+DEFSUBR (Fcoding_category_list);
+DEFSUBR (Fset_coding_priority_list);
+DEFSUBR (Fcoding_priority_list);
+DEFSUBR (Fset_coding_category_system);
+DEFSUBR (Fcoding_category_system);
+DEFSUBR (Fdetect_coding_region);
+DEFSUBR (Fdecode_coding_region);
+DEFSUBR (Fencode_coding_region);
+DEFSUBR (Fdecode_shift_jis_char);
+DEFSUBR (Fencode_shift_jis_char);
+DEFSUBR (Fdecode_big5_char);
+DEFSUBR (Fencode_big5_char);
+defsymbol (&Qcoding_system_p, "coding-system-p");
+defsymbol (&Qbig5, "big5");
+defsymbol (&Qshift_jis, "shift-jis");
+defsymbol (&Qno_conversion, "no-conversion");
+defsymbol (&Qccl, "ccl");
+defsymbol (&Qiso2022, "iso2022");
+defsymbol (&Qmnemonic, "mnemonic");
+defsymbol (&Qeol_type, "eol-type");
+defsymbol (&Qpost_read_conversion, "post-read-conversion");
+defsymbol (&Qpre_write_conversion, "pre-write-conversion");
+defsymbol (&Qcr, "cr");
+defsymbol (&Qlf, "lf");
+defsymbol (&Qcrlf, "crlf");
+defsymbol (&Qeol_cr, "eol-cr");
+defsymbol (&Qeol_lf, "eol-lf");
+defsymbol (&Qeol_crlf, "eol-crlf");
+defsymbol (&Qcharset_g0, "charset-g0");
+defsymbol (&Qcharset_g1, "charset-g1");
+defsymbol (&Qcharset_g2, "charset-g2");
+defsymbol (&Qcharset_g3, "charset-g3");
+defsymbol (&Qforce_g0_on_output, "force-g0-on-output");
+defsymbol (&Qforce_g1_on_output, "force-g1-on-output");
+defsymbol (&Qforce_g2_on_output, "force-g2-on-output");
+defsymbol (&Qforce_g3_on_output, "force-g3-on-output");
+defsymbol (&Qshort, "short");
+defsymbol (&Qno_ascii_eol, "no-ascii-eol");
+defsymbol (&Qno_ascii_cntl, "no-ascii-cntl");
+defsymbol (&Qseven, "seven");
+defsymbol (&Qlock_shift, "lock-shift");
+defsymbol (&Qno_iso6429, "no-iso6429");
+defsymbol (&Qescape_quoted, "escape-quoted");
+defsymbol (&Qinput_charset_conversion, "input-charset-conversion");
+defsymbol (&Qoutput_charset_conversion, "output-charset-conversion");
+defsymbol (&Qencode, "encode");
+defsymbol (&Qdecode, "decode");
+defsymbol (&Qctext, "ctext");
+defsymbol (&coding_category_symbol[CODING_CATEGORY_SHIFT_JIS],
+	     "shift-jis");
+defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_7],
+	     "iso-7");
+defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_DESIGNATE],
+	     "iso-8-designate");
+defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_1],
+	     "iso-8-1");
+defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_2],
+	     "iso-8-2");
+defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_LOCK_SHIFT],
+	     "iso-lock-shift");
+defsymbol (&coding_category_symbol[CODING_CATEGORY_BIG5],
+	     "big5");
+defsymbol (&coding_category_symbol[CODING_CATEGORY_NO_CONVERSION],
+	     "no-conversion");
+}
+void
+lstream_type_create_mule_coding (void)
+{
+LSTREAM_HAS_METHOD (decoding, reader);
+LSTREAM_HAS_METHOD (decoding, writer);
+LSTREAM_HAS_METHOD (decoding, rewinder);
+LSTREAM_HAS_METHOD (decoding, seekable_p);
+LSTREAM_HAS_METHOD (decoding, flusher);
+LSTREAM_HAS_METHOD (decoding, closer);
+LSTREAM_HAS_METHOD (decoding, marker);
+LSTREAM_HAS_METHOD (encoding, reader);
+LSTREAM_HAS_METHOD (encoding, writer);
+LSTREAM_HAS_METHOD (encoding, rewinder);
+LSTREAM_HAS_METHOD (encoding, seekable_p);
+LSTREAM_HAS_METHOD (encoding, flusher);
+LSTREAM_HAS_METHOD (encoding, closer);
+LSTREAM_HAS_METHOD (encoding, marker);
+}
+void
+vars_of_mule_coding (void)
+{
+int i;
+/* Initialize to something reasonable ... */
+for (i = 0; i <= CODING_CATEGORY_LAST; i++)
+{
+coding_category_system[i] = Qnil;
+coding_category_by_priority[i] = i;
+}
+DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /*
+Coding system used for TTY keyboard input.
+Not used under a windowing system.
+*/ );
+Vkeyboard_coding_system = Qnil;
+DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /*
+Coding system used for TTY display output.
+Not used under a windowing system.
+*/ );
+Vterminal_coding_system = Qnil;
+DEFVAR_LISP ("process-input-coding-system", &Vprocess_input_coding_system /*
+Default coding system used by C process routines for inputting data.
+This can be changed for a particular process using
+`set-process-input-coding-system'.
+*/ );
+Vprocess_input_coding_system = Qnil;
+DEFVAR_LISP ("process-output-coding-system",
+&Vprocess_output_coding_system /*
+Default coding system used by C process routines for outputting data.
+This can be changed for a particular process using
+`set-process-output-coding-system'.
+*/ );
+Vprocess_output_coding_system = Qnil;
+DEFVAR_LISP ("pathname-coding-system", &Vpathname_coding_system /*
+Coding system used to convert pathnames when accessing files.
+*/ );
+Vpathname_coding_system = Qnil;
+}
+void
+complex_vars_of_mule_coding (void)
+{
+staticpro (&Vcoding_system_hashtable);
+Vcoding_system_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK,
+						  HASHTABLE_EQ);
+the_codesys_prop_dynarr = Dynarr_new (struct codesys_prop);
+#define DEFINE_CODESYS_PROP(Prop_Type, Sym) do	\
+{						\
+struct codesys_prop csp;			\
+csp.sym = (Sym);				\
+csp.prop_type = (Prop_Type);			\
+Dynarr_add (the_codesys_prop_dynarr, csp);	\
+} while (0)
+DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qmnemonic);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qeol_type);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qeol_cr);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qeol_crlf);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qeol_lf);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qpost_read_conversion);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qpre_write_conversion);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qcharset_g0);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qcharset_g1);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qcharset_g2);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qcharset_g3);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qforce_g0_on_output);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qforce_g1_on_output);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qforce_g2_on_output);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qforce_g3_on_output);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qshort);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qno_ascii_eol);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qno_ascii_cntl);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qseven);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qlock_shift);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qno_iso6429);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qescape_quoted);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qinput_charset_conversion);
+DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qoutput_charset_conversion);
+DEFINE_CODESYS_PROP (CODESYS_PROP_CCL,     Qencode);
+DEFINE_CODESYS_PROP (CODESYS_PROP_CCL,     Qdecode);
+/* Need to create this here or we're really screwed. */
+Fmake_coding_system (Qno_conversion, Qno_conversion, build_string ("No conversion"),
+		       list2 (Qmnemonic, build_string ("Noconv")));
+Fcopy_coding_system (Fcoding_system_property (Qno_conversion, Qeol_lf),
+		       Qbinary);
+/* Need this for bootstrapping */
+coding_category_system[CODING_CATEGORY_NO_CONVERSION] =
+Fget_coding_system (Qno_conversion);
+}

Mercurial > hg > xemacs-beta

comparison src/mule-coding.c @ 70:131b0175ea99 r20-0b30