view src/mule-coding.c @ 260:052205f7dd5f

Added tag r20-5b28 for changeset 11cf20601dec
author cvs
date Mon, 13 Aug 2007 10:23:05 +0200
parents 11cf20601dec
children c5d627a313b1
line wrap: on
line source

/* Code conversion functions.
   Copyright (C) 1991, 1995 Free Software Foundation, Inc.
   Copyright (C) 1995 Sun Microsystems, Inc.

This file is part of XEmacs.

XEmacs is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.

XEmacs is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with XEmacs; see the file COPYING.  If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */

/* Synched up with: Mule 2.3.   Not in FSF. */

/* Rewritten by Ben Wing <wing@666.com>. */

#if 0				/* while file-coding not split up */

#include <config.h>
#include "lisp.h"

#include "buffer.h"
#include "elhash.h"
#include "insdel.h"
#include "lstream.h"
#include "mule-ccl.h"
#include "mule-coding.h"

Lisp_Object Qbuffer_file_coding_system, Qcoding_system_error;

Lisp_Object Vkeyboard_coding_system;
Lisp_Object Vterminal_coding_system;
Lisp_Object Vcoding_system_for_read;
Lisp_Object Vcoding_system_for_write;
Lisp_Object Vfile_name_coding_system;

/* Table of symbols identifying each coding category. */
Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1];

/* Coding system currently associated with each coding category. */
Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1];

/* Table of all coding categories in decreasing order of priority.
   This describes a permutation of the possible coding categories. */
int coding_category_by_priority[CODING_CATEGORY_LAST + 1];

Lisp_Object Qcoding_system_p;

Lisp_Object Qbig5, Qshift_jis, Qno_conversion, Qccl, Qiso2022;
/* Qinternal in general.c */

Lisp_Object Qmnemonic, Qeol_type;
Lisp_Object Qcr, Qcrlf, Qlf;
Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf;
Lisp_Object Qpost_read_conversion;
Lisp_Object Qpre_write_conversion;

Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3;
Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output;
Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output;
Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
Lisp_Object Qno_iso6429, Qescape_quoted;
Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion;

Lisp_Object Qencode, Qdecode;

Lisp_Object Qctext;

Lisp_Object Vcoding_system_hashtable;

int enable_multibyte_characters;

/* Additional information used by the ISO2022 decoder and detector. */
struct iso2022_decoder
{
  /* CHARSET holds the character sets currently assigned to the G0
     through G3 variables.  It is initialized from the array
     INITIAL_CHARSET in CODESYS. */
  Lisp_Object charset[4];

  /* Which registers are currently invoked into the left (GL) and
     right (GR) halves of the 8-bit encoding space? */
  int register_left, register_right;

  /* ISO_ESC holds a value indicating part of an escape sequence
     that has already been seen. */
  enum iso_esc_flag esc;

  /* This records the bytes we've seen so far in an escape sequence,
     in case the sequence is invalid (we spit out the bytes unchanged). */
  unsigned char esc_bytes[8];

  /* Index for next byte to store in ISO escape sequence. */
  int esc_bytes_index;

  /* Stuff seen so far when composing a string. */
  unsigned_char_dynarr *composite_chars;

  /* If we saw an invalid designation sequence for a particular
     register, we flag it here and switch to ASCII.  The next time we
     see a valid designation for this register, we turn off the flag
     and do the designation normally, but pretend the sequence was
     invalid.  The effect of all this is that (most of the time) the
     escape sequences for both the switch to the unknown charset, and
     the switch back to the known charset, get inserted literally into
     the buffer and saved out as such.  The hope is that we can
     preserve the escape sequences so that the resulting written out
     file makes sense.  If we don't do any of this, the designation
     to the invalid charset will be preserved but that switch back
     to the known charset will probably get eaten because it was
     the same charset that was already present in the register. */
  unsigned char invalid_designated[4];

  /* We try to do similar things as above for direction-switching
     sequences.  If we encountered a direction switch while an
     invalid designation was present, or an invalid designation
     just after a direction switch (i.e. no valid designation
     encountered yet), we insert the direction-switch escape
     sequence literally into the output stream, and later on
     insert the corresponding direction-restoring escape sequence
     literally also. */
  unsigned int switched_dir_and_no_valid_charset_yet :1;
  unsigned int invalid_switch_dir :1;

  /* Tells the decoder to output the escape sequence literally
     even though it was valid.  Used in the games we play to
     avoid lossage when we encounter invalid designations. */
  unsigned int output_literally :1;
  /* We encountered a direction switch followed by an invalid
     designation.  We didn't output the direction switch
     literally because we didn't know about the invalid designation;
     but we have to do so now. */
  unsigned int output_direction_sequence :1;
};

Lisp_Object Fcopy_coding_system (Lisp_Object old_coding_system,
				 Lisp_Object new_name);
struct detection_state;
static int detect_coding_sjis (struct detection_state *st,
			       CONST unsigned char *src,
			       unsigned int n);
static void decode_coding_sjis (Lstream *decoding,
				CONST unsigned char *src,
				unsigned_char_dynarr *dst,
				unsigned int n);
static void encode_coding_sjis (Lstream *encoding,
				CONST unsigned char *src,
				unsigned_char_dynarr *dst,
				unsigned int n);
static int detect_coding_big5 (struct detection_state *st,
			       CONST unsigned char *src,
			       unsigned int n);
static void decode_coding_big5 (Lstream *decoding,
				CONST unsigned char *src,
				unsigned_char_dynarr *dst, unsigned int n);
static void encode_coding_big5 (Lstream *encoding,
				CONST unsigned char *src,
				unsigned_char_dynarr *dst, unsigned int n);
static int postprocess_iso2022_mask (int mask);
static void reset_iso2022 (Lisp_Object coding_system,
			   struct iso2022_decoder *iso);
static int detect_coding_iso2022 (struct detection_state *st,
				  CONST unsigned char *src,
				  unsigned int n);
static void decode_coding_iso2022 (Lstream *decoding,
				   CONST unsigned char *src,
				   unsigned_char_dynarr *dst, unsigned int n);
static void encode_coding_iso2022 (Lstream *encoding,
				   CONST unsigned char *src,
				   unsigned_char_dynarr *dst, unsigned int n);
static void decode_coding_no_conversion (Lstream *decoding,
					 CONST unsigned char *src,
					 unsigned_char_dynarr *dst,
					 unsigned int n);
static void encode_coding_no_conversion (Lstream *encoding,
					 CONST unsigned char *src,
					 unsigned_char_dynarr *dst,
					 unsigned int n);
static void mule_decode (Lstream *decoding, CONST unsigned char *src,
			 unsigned_char_dynarr *dst, unsigned int n);
static void mule_encode (Lstream *encoding, CONST unsigned char *src,
			 unsigned_char_dynarr *dst, unsigned int n);

typedef struct codesys_prop codesys_prop;
struct codesys_prop
{
  Lisp_Object sym;
  int prop_type;
};

typedef struct
{
  Dynarr_declare (codesys_prop);
} codesys_prop_dynarr;

codesys_prop_dynarr *the_codesys_prop_dynarr;

enum codesys_prop_enum
{
  CODESYS_PROP_ALL_OK,
  CODESYS_PROP_ISO2022,
  CODESYS_PROP_CCL
};


/************************************************************************/
/*                       Coding system functions                        */
/************************************************************************/

static Lisp_Object mark_coding_system (Lisp_Object, void (*) (Lisp_Object));
static void print_coding_system (Lisp_Object, Lisp_Object, int);
static void finalize_coding_system (void *header, int for_disksave);

DEFINE_LRECORD_IMPLEMENTATION ("coding-system", coding_system,
			       mark_coding_system, print_coding_system,
			       finalize_coding_system,
			       0, 0, struct Lisp_Coding_System);

static Lisp_Object
mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
{
  struct Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);

  (markobj) (CODING_SYSTEM_NAME (codesys));
  (markobj) (CODING_SYSTEM_DOC_STRING (codesys));
  (markobj) (CODING_SYSTEM_MNEMONIC (codesys));
  (markobj) (CODING_SYSTEM_EOL_LF (codesys));
  (markobj) (CODING_SYSTEM_EOL_CRLF (codesys));
  (markobj) (CODING_SYSTEM_EOL_CR (codesys));

  switch (CODING_SYSTEM_TYPE (codesys))
    {
      int i;
    case CODESYS_ISO2022:
      for (i = 0; i < 4; i++)
	(markobj) (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i));
      if (codesys->iso2022.input_conv)
	{
	  for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++)
	    {
	      struct charset_conversion_spec *ccs =
		Dynarr_atp (codesys->iso2022.input_conv, i);
	      (markobj) (ccs->from_charset);
	      (markobj) (ccs->to_charset);
	    }
	}
      if (codesys->iso2022.output_conv)
	{
	  for (i = 0; i < Dynarr_length (codesys->iso2022.output_conv); i++)
	    {
	      struct charset_conversion_spec *ccs =
		Dynarr_atp (codesys->iso2022.output_conv, i);
	      (markobj) (ccs->from_charset);
	      (markobj) (ccs->to_charset);
	    }
	}
      break;

    case CODESYS_CCL:
      (markobj) (CODING_SYSTEM_CCL_DECODE (codesys));
      (markobj) (CODING_SYSTEM_CCL_ENCODE (codesys));
      break;
    default:
      break;
    }

  (markobj) (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys));
  return CODING_SYSTEM_POST_READ_CONVERSION (codesys);
}

static void
print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
		     int escapeflag)
{
  struct Lisp_Coding_System *c = XCODING_SYSTEM (obj);
  if (print_readably)
    error ("printing unreadable object #<coding_system 0x%x>",
	   c->header.uid);

  write_c_string ("#<coding_system ", printcharfun);
  print_internal (c->name, printcharfun, 1);
  write_c_string (">", printcharfun);
}

static void
finalize_coding_system (void *header, int for_disksave)
{
  struct Lisp_Coding_System *c = (struct Lisp_Coding_System *) header;
  /* Since coding systems never go away, this function is not
     necessary.  But it would be necessary if we changed things
     so that coding systems could go away. */
  if (!for_disksave) /* see comment in lstream.c */
    {
      switch (CODING_SYSTEM_TYPE (c))
	{
	case CODESYS_ISO2022:
	  if (c->iso2022.input_conv)
	    {
	      Dynarr_free (c->iso2022.input_conv);
	      c->iso2022.input_conv = 0;
	    }
	  if (c->iso2022.output_conv)
	    {
	      Dynarr_free (c->iso2022.output_conv);
	      c->iso2022.output_conv = 0;
	    }
	  break;

	default:
	  break;
	}
    }
}

static enum eol_type
symbol_to_eol_type (Lisp_Object symbol)
{
  CHECK_SYMBOL (symbol);
  if (NILP (symbol))      return EOL_AUTODETECT;
  if (EQ (symbol, Qlf))   return EOL_LF;
  if (EQ (symbol, Qcrlf)) return EOL_CRLF;
  if (EQ (symbol, Qcr))   return EOL_CR;

  signal_simple_error ("Unrecognized eol type", symbol);
  return EOL_AUTODETECT; /* not reached */
}

static Lisp_Object
eol_type_to_symbol (enum eol_type type)
{
  switch (type)
    {
    case EOL_LF:         return Qlf;
    case EOL_CRLF:       return Qcrlf;
    case EOL_CR:         return Qcr;
    case EOL_AUTODETECT: return Qnil;
    default:             abort (); return Qnil; /* not reached */
    }
}

static void
setup_eol_coding_systems (struct Lisp_Coding_System *codesys)
{
  Lisp_Object codesys_obj = Qnil;
  int len = string_length (XSYMBOL (CODING_SYSTEM_NAME (codesys))->name);
  char *codesys_name = (char *) alloca (len + 7);
  Lisp_Object codesys_name_sym, sub_codesys_obj;

  /* kludge */

  XSETCODING_SYSTEM (codesys_obj, codesys);

  memcpy (codesys_name,
	  string_data (XSYMBOL (CODING_SYSTEM_NAME (codesys))->name), len);

#define DEFINE_SUB_CODESYS(op_sys, Type) do {	\
    strcpy (codesys_name + len, "-" op_sys);	\
    codesys_name_sym = intern (codesys_name);	\
    sub_codesys_obj = Fcopy_coding_system (codesys_obj, codesys_name_sym); \
    XCODING_SYSTEM_EOL_TYPE (sub_codesys_obj) = Type; \
    CODING_SYSTEM_##Type (codesys) = sub_codesys_obj; \
} while (0)

  DEFINE_SUB_CODESYS("unix", EOL_LF);
  DEFINE_SUB_CODESYS("dos",  EOL_CRLF);
  DEFINE_SUB_CODESYS("mac",  EOL_CR);
}

DEFUN ("coding-system-p", Fcoding_system_p, 1, 1, 0, /*
T if OBJECT is a coding system.
A coding system is an object that defines how text containing multiple
character sets is encoded into a stream of (typically 8-bit) bytes.
The coding system is used to decode the stream into a series of
characters (which may be from multiple charsets) when the text is read
from a file or process, and is used to encode the text back into the
same format when it is written out to a file or process.

For example, many ISO2022-compliant coding systems (such as Compound
Text, which is used for inter-client data under the X Window System)
use escape sequences to switch between different charsets -- Japanese
Kanji, for example, is invoked with "ESC $ ( B"; ASCII is invoked
with "ESC ( B"; and Cyrillic is invoked with "ESC - L".  See
`make-coding-system' for more information.

Coding systems are normally identified using a symbol, and the
symbol is accepted in place of the actual coding system object whenever
a coding system is called for. (This is similar to how faces work.)
*/
       (object))
{
  return CODING_SYSTEMP (object) ? Qt : Qnil;
}

DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /*
Retrieve the coding system of the given name.

If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply
returned.  Otherwise, CODING-SYSTEM-OR-NAME should be a symbol.
If there is no such coding system, nil is returned.  Otherwise the
associated coding system object is returned.
*/
       (coding_system_or_name))
{
  if (NILP (coding_system_or_name))
    coding_system_or_name = Qbinary;
  if (CODING_SYSTEMP (coding_system_or_name))
    return coding_system_or_name;
  CHECK_SYMBOL (coding_system_or_name);

  return Fgethash (coding_system_or_name, Vcoding_system_hashtable, Qnil);
}

DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
Retrieve the coding system of the given name.
Same as `find-coding-system' except that if there is no such
coding system, an error is signaled instead of returning nil.
*/
       (name))
{
  Lisp_Object coding_system = Ffind_coding_system (name);

  if (NILP (coding_system))
    signal_simple_error ("No such coding system", name);
  return coding_system;
}

/* We store the coding systems in hash tables with the names as the key and the
   actual coding system object as the value.  Occasionally we need to use them
   in a list format.  These routines provide us with that. */
struct coding_system_list_closure
{
  Lisp_Object *coding_system_list;
};

static int
add_coding_system_to_list_mapper (CONST void *hash_key, void *hash_contents,
				  void *coding_system_list_closure)
{
  /* This function can GC */
  Lisp_Object key, contents;
  Lisp_Object *coding_system_list;
  struct coding_system_list_closure *cscl =
    (struct coding_system_list_closure *) coding_system_list_closure;
  CVOID_TO_LISP (key, hash_key);
  VOID_TO_LISP (contents, hash_contents);
  coding_system_list = cscl->coding_system_list;

  *coding_system_list = Fcons (XCODING_SYSTEM (contents)->name,
			       *coding_system_list);
  return 0;
}

DEFUN ("coding-system-list", Fcoding_system_list, 0, 0, 0, /*
Return a list of the names of all defined coding systems.
*/
       ())
{
  Lisp_Object coding_system_list = Qnil;
  struct gcpro gcpro1;
  struct coding_system_list_closure coding_system_list_closure;

  GCPRO1 (coding_system_list);
  coding_system_list_closure.coding_system_list = &coding_system_list;
  elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hashtable,
		 &coding_system_list_closure);
  UNGCPRO;

  return coding_system_list;
}

DEFUN ("coding-system-name", Fcoding_system_name, 1, 1, 0, /*
Return the name of the given coding system.
*/
       (coding_system))
{
  coding_system = Fget_coding_system (coding_system);
  return XCODING_SYSTEM_NAME (coding_system);
}

static struct Lisp_Coding_System *
allocate_coding_system (enum coding_system_type type, Lisp_Object name)
{
  struct Lisp_Coding_System *codesys =
    alloc_lcrecord_type (struct Lisp_Coding_System, lrecord_coding_system);

  zero_lcrecord (codesys);
  CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = Qnil;
  CODING_SYSTEM_POST_READ_CONVERSION (codesys) = Qnil;
  CODING_SYSTEM_EOL_TYPE (codesys) = EOL_AUTODETECT;
  CODING_SYSTEM_EOL_CRLF (codesys) = Qnil;
  CODING_SYSTEM_EOL_CR   (codesys) = Qnil;
  CODING_SYSTEM_EOL_LF   (codesys) = Qnil;
  CODING_SYSTEM_TYPE     (codesys) = type;

  if (type == CODESYS_ISO2022)
    {
      int i;
      for (i = 0; i < 4; i++)
	CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i) = Qnil;
    }
  else if (type == CODESYS_CCL)
    {
      CODING_SYSTEM_CCL_DECODE (codesys) = Qnil;
      CODING_SYSTEM_CCL_ENCODE (codesys) = Qnil;
    }

  CODING_SYSTEM_NAME (codesys) = name;

  return codesys;
}

/* Given a list of charset conversion specs as specified in a Lisp
   program, parse it into STORE_HERE. */

static void
parse_charset_conversion_specs (charset_conversion_spec_dynarr *store_here,
				Lisp_Object spec_list)
{
  Lisp_Object rest;

  EXTERNAL_LIST_LOOP (rest, spec_list)
    {
      Lisp_Object car = XCAR (rest);
      Lisp_Object from, to;
      struct charset_conversion_spec spec;

      if (!CONSP (car) || !CONSP (XCDR (car)) || !NILP (XCDR (XCDR (car))))
	signal_simple_error ("Invalid charset conversion spec", car);
      from = Fget_charset (XCAR (car));
      to = Fget_charset (XCAR (XCDR (car)));
      if (XCHARSET_TYPE (from) != XCHARSET_TYPE (to))
	signal_simple_error_2
	  ("Attempted conversion between different charset types",
	   from, to);
      spec.from_charset = from;
      spec.to_charset = to;

      Dynarr_add (store_here, spec);
    }
}

/* Given a dynarr LOAD_HERE of internally-stored charset conversion
   specs, return the equivalent as the Lisp programmer would see it.

   If LOAD_HERE is 0, return Qnil. */

static Lisp_Object
unparse_charset_conversion_specs (charset_conversion_spec_dynarr *load_here)
{
  int i;
  Lisp_Object result = Qnil;

  if (!load_here)
    return Qnil;
  for (i = 0; i < Dynarr_length (load_here); i++)
    {
      struct charset_conversion_spec *ccs =
	Dynarr_atp (load_here, i);
      result = Fcons (list2 (ccs->from_charset, ccs->to_charset), result);
    }

  return Fnreverse (result);
}

DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /*
Register symbol NAME as a coding system.

TYPE describes the conversion method used and should be one of

nil or 'undecided
     Automatic conversion.  XEmacs attempts to detect the coding system
     used in the file.
'no-conversion
     No conversion.  Use this for binary files and such.  On output,
     graphic characters that are not in ASCII or Latin-1 will be
     replaced by a ?. (For a no-conversion-encoded buffer, these
     characters will only be present if you explicitly insert them.)
'shift-jis
     Shift-JIS (a Japanese encoding commonly used in PC operating systems).
'iso2022
     Any ISO2022-compliant encoding.  Among other things, this includes
     JIS (the Japanese encoding commonly used for e-mail), EUC (the
     standard Unix encoding for Japanese and other languages), and
     Compound Text (the encoding used in X11).  You can specify more
     specific information about the conversion with the FLAGS argument.
'big5
     Big5 (the encoding commonly used for Taiwanese).
'ccl
     The conversion is performed using a user-written pseudo-code
     program.  CCL (Code Conversion Language) is the name of this
     pseudo-code.
'internal
     Write out or read in the raw contents of the memory representing
     the buffer's text.  This is primarily useful for debugging
     purposes, and is only enabled when XEmacs has been compiled with
     DEBUG_XEMACS defined (via the --debug configure option).
     WARNING: Reading in a file using 'internal conversion can result
     in an internal inconsistency in the memory representing a
     buffer's text, which will produce unpredictable results and may
     cause XEmacs to crash.  Under normal circumstances you should
     never use 'internal conversion.

DOC-STRING is a string describing the coding system.

PROPS is a property list, describing the specific nature of the
character set.  Recognized properties are:

'mnemonic
     String to be displayed in the modeline when this coding system is
     active.

'eol-type
     End-of-line conversion to be used.  It should be one of

	nil
		Automatically detect the end-of-line type (LF, CRLF,
		or CR).  Also generate subsidiary coding systems named
		`NAME-unix', `NAME-dos', and `NAME-mac', that are
		identical to this coding system but have an EOL-TYPE
		value of 'lf, 'crlf, and 'cr, respectively.
	'lf
		The end of a line is marked externally using ASCII LF.
		Since this is also the way that XEmacs represents an
		end-of-line internally, specifying this option results
		in no end-of-line conversion.  This is the standard
		format for Unix text files.
	'crlf
		The end of a line is marked externally using ASCII
		CRLF.  This is the standard format for MS-DOS text
		files.
	'cr
		The end of a line is marked externally using ASCII CR.
		This is the standard format for Macintosh text files.
	t
		Automatically detect the end-of-line type but do not
		generate subsidiary coding systems.  (This value is
		converted to nil when stored internally, and
		`coding-system-property' will return nil.)

'post-read-conversion
     Function called after a file has been read in, to perform the
     decoding.  Called with two arguments, BEG and END, denoting
     a region of the current buffer to be decoded.

'pre-write-conversion
     Function called before a file is written out, to perform the
     encoding.  Called with two arguments, BEG and END, denoting
     a region of the current buffer to be encoded.


The following additional properties are recognized if TYPE is 'iso2022:

'charset-g0
'charset-g1
'charset-g2
'charset-g3
     The character set initially designated to the G0 - G3 registers.
     The value should be one of

          -- A charset object (designate that character set)
	  -- nil (do not ever use this register)
	  -- t (no character set is initially designated to
		the register, but may be later on; this automatically
		sets the corresponding `force-g*-on-output' property)

'force-g0-on-output
'force-g1-on-output
'force-g2-on-output
'force-g2-on-output
     If non-nil, send an explicit designation sequence on output before
     using the specified register.

'short
     If non-nil, use the short forms "ESC $ @", "ESC $ A", and
     "ESC $ B" on output in place of the full designation sequences
     "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B".

'no-ascii-eol
     If non-nil, don't designate ASCII to G0 at each end of line on output.
     Setting this to non-nil also suppresses other state-resetting that
     normally happens at the end of a line.

'no-ascii-cntl
     If non-nil, don't designate ASCII to G0 before control chars on output.

'seven
     If non-nil, use 7-bit environment on output.  Otherwise, use 8-bit
     environment.

'lock-shift
     If non-nil, use locking-shift (SO/SI) instead of single-shift
     or designation by escape sequence.

'no-iso6429
     If non-nil, don't use ISO6429's direction specification.

'escape-quoted
     If non-nil, literal control characters that are the same as
     the beginning of a recognized ISO2022 or ISO6429 escape sequence
     (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E),
     SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character
     so that they can be properly distinguished from an escape sequence.
     (Note that doing this results in a non-portable encoding.) This
     encoding flag is used for byte-compiled files.  Note that ESC
     is a good choice for a quoting character because there are no
     escape sequences whose second byte is a character from the Control-0
     or Control-1 character sets; this is explicitly disallowed by the
     ISO2022 standard.

'input-charset-conversion
     A list of conversion specifications, specifying conversion of
     characters in one charset to another when decoding is performed.
     Each specification is a list of two elements: the source charset,
     and the destination charset.

'output-charset-conversion
     A list of conversion specifications, specifying conversion of
     characters in one charset to another when encoding is performed.
     The form of each specification is the same as for
     'input-charset-conversion.


The following additional properties are recognized (and required)
if TYPE is 'ccl:

'decode
     CCL program used for decoding (converting to internal format).

'encode
     CCL program used for encoding (converting to external format).
*/
       (name, type, doc_string, props))
{
  struct Lisp_Coding_System *codesys;
  Lisp_Object rest, key, value;
  enum coding_system_type ty;
  int need_to_setup_eol_systems = 1;

  /* Convert type to constant */
  if (NILP (type) || EQ (type, Qundecided))
                                      { ty = CODESYS_AUTODETECT; }
  else if (EQ (type, Qshift_jis))     { ty = CODESYS_SHIFT_JIS; }
  else if (EQ (type, Qiso2022))       { ty = CODESYS_ISO2022; }
  else if (EQ (type, Qbig5))          { ty = CODESYS_BIG5; }
  else if (EQ (type, Qccl))           { ty = CODESYS_CCL; }
  else if (EQ (type, Qno_conversion)) { ty = CODESYS_NO_CONVERSION; }
#ifdef DEBUG_XEMACS
  else if (EQ (type, Qinternal))      { ty = CODESYS_INTERNAL; }
#endif
  else
    signal_simple_error ("Invalid coding system type", type);

  CHECK_SYMBOL (name);

  codesys = allocate_coding_system (ty, name);

  if (NILP (doc_string))
    doc_string = build_string ("");
  else
    CHECK_STRING (doc_string);
  CODING_SYSTEM_DOC_STRING (codesys) = doc_string;

  EXTERNAL_PROPERTY_LIST_LOOP (rest, key, value, props)
    {
      if (EQ (key, Qmnemonic))
	{
          if (!NILP (value))
	    CHECK_STRING (value);
	  CODING_SYSTEM_MNEMONIC (codesys) = value;
	}

      else if (EQ (key, Qeol_type))
	{
	  need_to_setup_eol_systems = NILP (value);
	  if (EQ (value, Qt))
	    value = Qnil;
	  CODING_SYSTEM_EOL_TYPE (codesys) = symbol_to_eol_type (value);
	}

      else if (EQ (key, Qpost_read_conversion)) CODING_SYSTEM_POST_READ_CONVERSION (codesys) = value;
      else if (EQ (key, Qpre_write_conversion)) CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = value;
      else if (ty == CODESYS_ISO2022)
	{
#define FROB_INITIAL_CHARSET(charset_num) \
  CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, charset_num) = \
    ((EQ (value, Qt) || EQ (value, Qnil)) ? value : Fget_charset (value))

	  if      (EQ (key, Qcharset_g0)) FROB_INITIAL_CHARSET (0);
	  else if (EQ (key, Qcharset_g1)) FROB_INITIAL_CHARSET (1);
	  else if (EQ (key, Qcharset_g2)) FROB_INITIAL_CHARSET (2);
	  else if (EQ (key, Qcharset_g3)) FROB_INITIAL_CHARSET (3);

#define FROB_FORCE_CHARSET(charset_num) \
  CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (codesys, charset_num) = !NILP (value)

	  else if (EQ (key, Qforce_g0_on_output)) FROB_FORCE_CHARSET (0);
	  else if (EQ (key, Qforce_g1_on_output)) FROB_FORCE_CHARSET (1);
	  else if (EQ (key, Qforce_g2_on_output)) FROB_FORCE_CHARSET (2);
	  else if (EQ (key, Qforce_g3_on_output)) FROB_FORCE_CHARSET (3);

#define FROB_BOOLEAN_PROPERTY(prop) \
  CODING_SYSTEM_ISO2022_##prop (codesys) = !NILP (value)

	  else if (EQ (key, Qshort))         FROB_BOOLEAN_PROPERTY (SHORT);
	  else if (EQ (key, Qno_ascii_eol))  FROB_BOOLEAN_PROPERTY (NO_ASCII_EOL);
	  else if (EQ (key, Qno_ascii_cntl)) FROB_BOOLEAN_PROPERTY (NO_ASCII_CNTL);
	  else if (EQ (key, Qseven))         FROB_BOOLEAN_PROPERTY (SEVEN);
	  else if (EQ (key, Qlock_shift))    FROB_BOOLEAN_PROPERTY (LOCK_SHIFT);
	  else if (EQ (key, Qno_iso6429))    FROB_BOOLEAN_PROPERTY (NO_ISO6429);
	  else if (EQ (key, Qescape_quoted)) FROB_BOOLEAN_PROPERTY (ESCAPE_QUOTED);

	  else if (EQ (key, Qinput_charset_conversion))
	    {
	      codesys->iso2022.input_conv =
		Dynarr_new (charset_conversion_spec);
	      parse_charset_conversion_specs (codesys->iso2022.input_conv,
					      value);
	    }
	  else if (EQ (key, Qoutput_charset_conversion))
	    {
	      codesys->iso2022.output_conv =
		Dynarr_new (charset_conversion_spec);
	      parse_charset_conversion_specs (codesys->iso2022.output_conv,
					      value);
	    }
	  else
	    signal_simple_error ("Unrecognized property", key);
	}
      else if (EQ (type, Qccl))
	{
	  if (EQ (key, Qdecode))
	    {
	      CHECK_VECTOR (value);
	      CODING_SYSTEM_CCL_DECODE (codesys) = value;
	    }
	  else if (EQ (key, Qencode))
	    {
	      CHECK_VECTOR (value);
	      CODING_SYSTEM_CCL_ENCODE (codesys) = value;
	    }
	  else
	    signal_simple_error ("Unrecognized property", key);
	}
      else
	signal_simple_error ("Unrecognized property", key);
    }

  if (need_to_setup_eol_systems)
    setup_eol_coding_systems (codesys);

  {
    Lisp_Object codesys_obj;
    XSETCODING_SYSTEM (codesys_obj, codesys);
    Fputhash (name, codesys_obj, Vcoding_system_hashtable);
    return codesys_obj;
  }
}

DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /*
Copy OLD-CODING-SYSTEM to NEW-NAME.
If NEW-NAME does not name an existing coding system, a new one will
be created.
*/
       (old_coding_system, new_name))
{
  Lisp_Object new_coding_system;
  old_coding_system = Fget_coding_system (old_coding_system);
  new_coding_system = Ffind_coding_system (new_name);
  if (NILP (new_coding_system))
    {
      XSETCODING_SYSTEM (new_coding_system,
			 allocate_coding_system
			 (XCODING_SYSTEM_TYPE (old_coding_system),
			  new_name));
      Fputhash (new_name, new_coding_system, Vcoding_system_hashtable);
    }

  {
    struct Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system);
    struct Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
    memcpy (((char *) to  ) + sizeof (to->header),
	    ((char *) from) + sizeof (from->header),
	    sizeof (*from) - sizeof (from->header));
    to->name = new_name;
  }
  return new_coding_system;
}

static Lisp_Object
subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type)
{
  struct Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
  Lisp_Object new_coding_system;

  if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
    return coding_system;

  switch (type)
    {
    case EOL_AUTODETECT: return coding_system;
    case EOL_LF:   new_coding_system = CODING_SYSTEM_EOL_LF   (cs); break;
    case EOL_CR:   new_coding_system = CODING_SYSTEM_EOL_CR   (cs); break;
    case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break;
    default:       abort ();
    }

  return NILP (new_coding_system) ? coding_system : new_coding_system;
}

DEFUN ("subsidiary-coding-system", Fsubsidiary_coding_system, 2, 2, 0, /*
Return the subsidiary coding system of CODING-SYSTEM with eol type EOL-TYPE.
*/
       (coding_system, eol_type))
{
  coding_system = Fget_coding_system (coding_system);

  return subsidiary_coding_system (coding_system,
				   symbol_to_eol_type (eol_type));
}


/************************************************************************/
/*                         Coding system accessors                      */
/************************************************************************/

DEFUN ("coding-system-doc-string", Fcoding_system_doc_string, 1, 1, 0, /*
Return the doc string for CODING-SYSTEM.
*/
       (coding_system))
{
  coding_system = Fget_coding_system (coding_system);
  return XCODING_SYSTEM_DOC_STRING (coding_system);
}

DEFUN ("coding-system-type", Fcoding_system_type, 1, 1, 0, /*
Return the type of CODING-SYSTEM.
*/
       (coding_system))
{
  switch (XCODING_SYSTEM_TYPE (Fget_coding_system (coding_system)))
    {
    case CODESYS_AUTODETECT:	return Qundecided;
    case CODESYS_SHIFT_JIS:	return Qshift_jis;
    case CODESYS_ISO2022:	return Qiso2022;
    case CODESYS_BIG5:		return Qbig5;
    case CODESYS_CCL:		return Qccl;
    case CODESYS_NO_CONVERSION:	return Qno_conversion;
#ifdef DEBUG_XEMACS
    case CODESYS_INTERNAL:	return Qinternal;
#endif
    default:
      abort ();
    }

  return Qnil; /* not reached */
}

static
Lisp_Object coding_system_charset (Lisp_Object coding_system, int gnum)
{
  Lisp_Object cs
    = XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, gnum);
  
  if (CHARSETP(cs)){
    return XCHARSET_NAME(cs);
  }
  else {
    return Qnil;
  }
}

DEFUN ("coding-system-charset", Fcoding_system_charset, 2, 2, 0, /*
Return initial charset of CODING-SYSTEM designated to GNUM.
GNUM allows 0 .. 3.
*/
       (coding_system, gnum))
{
  coding_system = Fget_coding_system (coding_system);
  CHECK_INT (gnum);

  return coding_system_charset(coding_system, XINT (gnum));
}

DEFUN ("coding-system-property", Fcoding_system_property, 2, 2, 0, /*
Return the PROP property of CODING-SYSTEM.
*/
       (coding_system, prop))
{
  int i, ok = 0;
  enum coding_system_type type;

  coding_system = Fget_coding_system (coding_system);
  CHECK_SYMBOL (prop);
  type = XCODING_SYSTEM_TYPE (coding_system);

  for (i = 0; !ok && i < Dynarr_length (the_codesys_prop_dynarr); i++)
    if (EQ (Dynarr_at (the_codesys_prop_dynarr, i).sym, prop))
      {
	ok = 1;
	switch (Dynarr_at (the_codesys_prop_dynarr, i).prop_type)
	  {
	  case CODESYS_PROP_ALL_OK:
	    break;

	  case CODESYS_PROP_ISO2022:
	    if (type != CODESYS_ISO2022)
	      signal_simple_error
		("Property only valid in ISO2022 coding systems",
		 prop);
	    break;

	  case CODESYS_PROP_CCL:
	    if (type != CODESYS_CCL)
	      signal_simple_error
		("Property only valid in CCL coding systems",
		 prop);
	    break;

	  default:
	    abort ();
	  }
      }

  if (!ok)
    signal_simple_error ("Unrecognized property", prop);

  if (EQ (prop, Qname))
    return XCODING_SYSTEM_NAME (coding_system);
  else if (EQ (prop, Qtype))
    return Fcoding_system_type (coding_system);
  else if (EQ (prop, Qdoc_string))
    return XCODING_SYSTEM_DOC_STRING (coding_system);
  else if (EQ (prop, Qmnemonic))
    return XCODING_SYSTEM_MNEMONIC (coding_system);
  else if (EQ (prop, Qeol_type))
    return eol_type_to_symbol (XCODING_SYSTEM_EOL_TYPE (coding_system));
  else if (EQ (prop, Qeol_lf))
    return XCODING_SYSTEM_EOL_LF (coding_system);
  else if (EQ (prop, Qeol_crlf))
    return XCODING_SYSTEM_EOL_CRLF (coding_system);
  else if (EQ (prop, Qeol_cr))
    return XCODING_SYSTEM_EOL_CR (coding_system);
  else if (EQ (prop, Qpost_read_conversion))
    return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system);
  else if (EQ (prop, Qpre_write_conversion))
    return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system);
  else if (type == CODESYS_ISO2022)
    {
      if (EQ (prop, Qcharset_g0))
	return coding_system_charset (coding_system, 0);
      else if (EQ (prop, Qcharset_g1))
	return coding_system_charset (coding_system, 1);
      else if (EQ (prop, Qcharset_g2))
	return coding_system_charset (coding_system, 2);
      else if (EQ (prop, Qcharset_g3))
	return coding_system_charset (coding_system, 3);

#define FORCE_CHARSET(charset_num) \
  (XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT \
   (coding_system, charset_num) ? Qt : Qnil)

      else if (EQ (prop, Qforce_g0_on_output)) return FORCE_CHARSET (0);
      else if (EQ (prop, Qforce_g1_on_output)) return FORCE_CHARSET (1);
      else if (EQ (prop, Qforce_g2_on_output)) return FORCE_CHARSET (2);
      else if (EQ (prop, Qforce_g3_on_output)) return FORCE_CHARSET (3);

#define LISP_BOOLEAN(prop) \
  (XCODING_SYSTEM_ISO2022_##prop (coding_system) ? Qt : Qnil)

      else if (EQ (prop, Qshort))         return LISP_BOOLEAN (SHORT);
      else if (EQ (prop, Qno_ascii_eol))  return LISP_BOOLEAN (NO_ASCII_EOL);
      else if (EQ (prop, Qno_ascii_cntl)) return LISP_BOOLEAN (NO_ASCII_CNTL);
      else if (EQ (prop, Qseven))         return LISP_BOOLEAN (SEVEN);
      else if (EQ (prop, Qlock_shift))    return LISP_BOOLEAN (LOCK_SHIFT);
      else if (EQ (prop, Qno_iso6429))    return LISP_BOOLEAN (NO_ISO6429);
      else if (EQ (prop, Qescape_quoted)) return LISP_BOOLEAN (ESCAPE_QUOTED);

      else if (EQ (prop, Qinput_charset_conversion))
	return
	  unparse_charset_conversion_specs
	    (XCODING_SYSTEM (coding_system)->iso2022.input_conv);
      else if (EQ (prop, Qoutput_charset_conversion))
	return
	  unparse_charset_conversion_specs
	    (XCODING_SYSTEM (coding_system)->iso2022.output_conv);
      else
	abort ();
    }
  else if (type == CODESYS_CCL)
    {
      if (EQ (prop, Qdecode))
	return XCODING_SYSTEM_CCL_DECODE (coding_system);
      else if (EQ (prop, Qencode))
	return XCODING_SYSTEM_CCL_ENCODE (coding_system);
      else
	abort ();
    }
  else
    abort ();

  return Qnil; /* not reached */
}


/************************************************************************/
/*                       Coding category functions                      */
/************************************************************************/

static int
decode_coding_category (Lisp_Object symbol)
{
  int i;

  CHECK_SYMBOL (symbol);
  for (i = 0; i <= CODING_CATEGORY_LAST; i++)
    if (EQ (coding_category_symbol[i], symbol))
      return i;

  signal_simple_error ("Unrecognized coding category", symbol);
  return 0; /* not reached */
}

DEFUN ("coding-category-list", Fcoding_category_list, 0, 0, 0, /*
Return a list of all recognized coding categories.
*/
       ())
{
  int i;
  Lisp_Object list = Qnil;

  for (i = CODING_CATEGORY_LAST; i >= 0; i--)
    list = Fcons (coding_category_symbol[i], list);
  return list;
}

DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /*
Change the priority order of the coding categories.
LIST should be list of coding categories, in descending order of
priority.  Unspecified coding categories will be lower in priority
than all specified ones, in the same relative order they were in
previously.
*/
       (list))
{
  int category_to_priority[CODING_CATEGORY_LAST + 1];
  int i, j;
  Lisp_Object rest;

  /* First generate a list that maps coding categories to priorities. */

  for (i = 0; i <= CODING_CATEGORY_LAST; i++)
    category_to_priority[i] = -1;

  /* Highest priority comes from the specified list. */
  i = 0;
  EXTERNAL_LIST_LOOP (rest, list)
    {
      int cat = decode_coding_category (XCAR (rest));

      if (category_to_priority[cat] >= 0)
	signal_simple_error ("Duplicate coding category in list", XCAR (rest));
      category_to_priority[cat] = i++;
    }

  /* Now go through the existing categories by priority to retrieve
     the categories not yet specified and preserve their priority
     order. */
  for (j = 0; j <= CODING_CATEGORY_LAST; j++)
    {
      int cat = coding_category_by_priority[j];
      if (category_to_priority[cat] < 0)
	category_to_priority[cat] = i++;
    }

  /* Now we need to construct the inverse of the mapping we just
     constructed. */

  for (i = 0; i <= CODING_CATEGORY_LAST; i++)
    coding_category_by_priority[category_to_priority[i]] = i;

  /* Phew!  That was confusing. */
  return Qnil;
}

DEFUN ("coding-priority-list", Fcoding_priority_list, 0, 0, 0, /*
Return a list of coding categories in descending order of priority.
*/
       ())
{
  int i;
  Lisp_Object list = Qnil;

  for (i = CODING_CATEGORY_LAST; i >= 0; i--)
    list = Fcons (coding_category_symbol[coding_category_by_priority[i]],
		  list);
  return list;
}

DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /*
Change the coding system associated with a coding category.
*/
       (coding_category, coding_system))
{
  int cat = decode_coding_category (coding_category);

  coding_system = Fget_coding_system (coding_system);
  coding_category_system[cat] = coding_system;
  return Qnil;
}

DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /*
Return the coding system associated with a coding category.
*/
       (coding_category))
{
  int cat = decode_coding_category (coding_category);
  Lisp_Object sys = coding_category_system[cat];

  if (!NILP (sys))
    return XCODING_SYSTEM_NAME (sys);
  return Qnil;
}


/************************************************************************/
/*                     Detecting the encoding of data                   */
/************************************************************************/

struct detection_state
{
  enum eol_type eol_type;
  int seen_non_ascii;
  int mask;

  struct
    {
      int mask;
      int in_second_byte;
    }
  big5;

  struct
    {
      int mask;
      int in_second_byte;
    }
  shift_jis;

  struct
    {
      int mask;
      int initted;
      struct iso2022_decoder iso;
      unsigned int flags;
      int high_byte_count;
      unsigned int saw_single_shift:1;
    }
  iso2022;

  struct
    {
      int seen_anything;
      int just_saw_cr;
    }
  eol;
};

static int
acceptable_control_char_p (int c)
{
  switch (c)
    {
      /* Allow and ignore control characters that you might
	 reasonably see in a text file */
    case '\r':
    case '\n':
    case '\t':
    case  7: /* bell */
    case  8: /* backspace */
    case 11: /* vertical tab */
    case 12: /* form feed */
    case 26: /* MS-DOS C-z junk */
    case 31: /* '^_' -- for info */
      return 1;
    default:
      return 0;
    }
}

static int
mask_has_at_most_one_bit_p (int mask)
{
  /* Perhaps the only thing useful you learn from intensive Microsoft
     technical interviews */
  return (mask & (mask - 1)) == 0;
}

static enum eol_type
detect_eol_type (struct detection_state *st, CONST unsigned char *src,
		 unsigned int n)
{
  int c;

  while (n--)
    {
      c = *src++;
      if (c == '\r')
	st->eol.just_saw_cr = 1;
      else
	{
	  if (c == '\n')
	    {
	      if (st->eol.just_saw_cr)
		return EOL_CRLF;
	      else if (st->eol.seen_anything)
		return EOL_LF;
	    }
	  else if (st->eol.just_saw_cr)
	    return EOL_CR;
	  st->eol.just_saw_cr = 0;
	}
      st->eol.seen_anything = 1;
    }

  return EOL_AUTODETECT;
}

/* Attempt to determine the encoding and EOL type of the given text.
   Before calling this function for the first type, you must initialize
   st->eol_type as appropriate and initialize st->mask to ~0.

   st->eol_type holds the determined EOL type, or EOL_AUTODETECT if
   not yet known.

   st->mask holds the determined coding category mask, or ~0 if only
   ASCII has been seen so far.

   Returns:

   0 == st->eol_type is EOL_AUTODETECT and/or more than coding category
        is present in st->mask
   1 == definitive answers are here for both st->eol_type and st->mask
*/

static int
detect_coding_type (struct detection_state *st, CONST unsigned char *src,
		    unsigned int n, int just_do_eol)
{
  int c;

  if (st->eol_type == EOL_AUTODETECT)
    st->eol_type = detect_eol_type (st, src, n);

  if (just_do_eol)
    return st->eol_type != EOL_AUTODETECT;

  if (!st->seen_non_ascii)
    {
      for (; n; n--, src++)
	{
	  c = *src;
	  if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80)
	    {
	      st->seen_non_ascii = 1;
	      st->shift_jis.mask = ~0;
	      st->big5.mask = ~0;
	      st->iso2022.mask = ~0;
	      break;
	    }
	}
    }

  if (!n)
    return 0;

  if (!mask_has_at_most_one_bit_p (st->iso2022.mask))
    st->iso2022.mask = detect_coding_iso2022 (st, src, n);
  if (!mask_has_at_most_one_bit_p (st->shift_jis.mask))
    st->shift_jis.mask = detect_coding_sjis (st, src, n);
  if (!mask_has_at_most_one_bit_p (st->big5.mask))
    st->big5.mask = detect_coding_big5 (st, src, n);

  st->mask = st->iso2022.mask | st->shift_jis.mask | st->big5.mask;

  {
    int retval = mask_has_at_most_one_bit_p (st->mask);
    st->mask |= CODING_CATEGORY_NO_CONVERSION_MASK;
    return retval && st->eol_type != EOL_AUTODETECT;
  }
}

static Lisp_Object
coding_system_from_mask (int mask)
{
  if (mask == ~0)
    {
      /* If the file was entirely or basically ASCII, use the
	 default value of `buffer-file-coding-system'. */
      Lisp_Object retval =
	XBUFFER (Vbuffer_defaults)->buffer_file_coding_system;
      if (!NILP (retval))
	{
	  retval = Ffind_coding_system (retval);
	  if (NILP (retval))
	    {
	      warn_when_safe
		(Qbad_variable, Qwarning,
		 "Invalid `default-buffer-file-coding-system', set to nil");
	      XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil;
	    }
	}
      if (NILP (retval))
	retval = Fget_coding_system (Qno_conversion);
      return retval;
    }
  else
    {
      int i;
      int cat = -1;

      mask = postprocess_iso2022_mask (mask);

      /* Look through the coding categories by priority and find
	 the first one that is allowed. */
      for (i = 0; i <= CODING_CATEGORY_LAST; i++)
	{
	  cat = coding_category_by_priority[i];
	  if ((mask & (1 << cat)) &&
	      !NILP (coding_category_system[cat]))
	    break;
	}
      if (cat >= 0)
	return coding_category_system[cat];
      else
	return Fget_coding_system (Qno_conversion);
    }
}

/* Given a seekable read stream and potential coding system and EOL type
   as specified, do any autodetection that is called for.  If the
   coding system and/or EOL type are not autodetect, they will be left
   alone; but this function will never return an autodetect coding system
   or EOL type.

   This function does not automatically fetch subsidiary coding systems;
   that should be unnecessary with the explicit eol-type argument. */

void
determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
			      enum eol_type *eol_type_in_out)
{
  struct detection_state decst;

  if (*eol_type_in_out == EOL_AUTODETECT)
    *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE (*codesys_in_out);

  memset (&decst, 0, sizeof (decst));
  decst.eol_type = *eol_type_in_out;
  decst.mask = ~0;

  /* If autodetection is called for, do it now. */
  if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT ||
      *eol_type_in_out == EOL_AUTODETECT)
    {

      while (1)
	{
	  unsigned char random_buffer[4096];
	  int nread;

	  nread = Lstream_read (stream, random_buffer, sizeof (random_buffer));
	  if (!nread)
	    break;
	  if (detect_coding_type (&decst, random_buffer, nread,
				  XCODING_SYSTEM_TYPE (*codesys_in_out) !=
				  CODESYS_AUTODETECT))
	    break;
	}

      *eol_type_in_out = decst.eol_type;
      if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT)
	*codesys_in_out = coding_system_from_mask (decst.mask);
    }

  /* If we absolutely can't determine the EOL type, just assume LF. */
  if (*eol_type_in_out == EOL_AUTODETECT)
    *eol_type_in_out = EOL_LF;

  Lstream_rewind (stream);
}

DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /*
Detect coding system of the text in the region between START and END.
Returned a list of possible coding systems ordered by priority.
If only ASCII characters are found, it returns 'undecided or one of
its subsidiary coding systems according to a detected end-of-line
type.  Optional arg BUFFER defaults to the current buffer.
*/
       (start, end, buffer))
{
  Lisp_Object val = Qnil;
  struct buffer *buf = decode_buffer (buffer, 0);
  Bufpos b, e;
  Lisp_Object instream, lb_instream;
  Lstream *istr, *lb_istr;
  struct detection_state decst;
  struct gcpro gcpro1, gcpro2;

  get_buffer_range_char (buf, start, end, &b, &e, 0);
  lb_instream = make_lisp_buffer_input_stream (buf, b, e, 0);
  lb_istr = XLSTREAM (lb_instream);
  instream = make_encoding_input_stream (lb_istr, Fget_coding_system (Qbinary));
  istr = XLSTREAM (instream);
  GCPRO2 (instream, lb_instream);
  memset (&decst, 0, sizeof (decst));
  decst.eol_type = EOL_AUTODETECT;
  decst.mask = ~0;
  while (1)
    {
      unsigned char random_buffer[4096];
      int nread = Lstream_read (istr, random_buffer, sizeof (random_buffer));

      if (!nread)
	break;
      if (detect_coding_type (&decst, random_buffer, nread, 0))
	break;
    }

  if (decst.mask == ~0)
    val = subsidiary_coding_system (Fget_coding_system (Qundecided),
				    decst.eol_type);
  else
    {
      int i;

      val = Qnil;

      decst.mask = postprocess_iso2022_mask (decst.mask);

      for (i = CODING_CATEGORY_LAST; i >= 0; i--)
	{
	  int sys = coding_category_by_priority[i];
	  if (decst.mask & (1 << sys))
	    {
	      Lisp_Object codesys = coding_category_system[sys];
	      if (!NILP (codesys))
		codesys = subsidiary_coding_system (codesys, decst.eol_type);
	      val = Fcons (codesys, val);
	    }
	}
    }
  Lstream_close (istr);
  UNGCPRO;
  Lstream_delete (istr);
  Lstream_delete (lb_istr);
  return val;
}


/************************************************************************/
/*           Converting to internal Mule format ("decoding")            */
/************************************************************************/

/* A decoding stream is a stream used for decoding text (i.e.
   converting from some external format to internal format).
   The decoding-stream object keeps track of the actual coding
   stream, the stream that is at the other end, and data that
   needs to be persistent across the lifetime of the stream. */

/* Handle the EOL stuff related to just-read-in character C.
   EOL_TYPE is the EOL type of the coding stream.
   FLAGS is the current value of FLAGS in the coding stream, and may
   be modified by this macro.  (The macro only looks at the
   CODING_STATE_CR flag.)  DST is the Dynarr to which the decoded
   bytes are to be written.  You need to also define a local goto
   label "label_continue_loop" that is at the end of the main
   character-reading loop.

   If C is a CR character, then this macro handles it entirely and
   jumps to label_continue_loop.  Otherwise, this macro does not add
   anything to DST, and continues normally.  You should continue
   processing C normally after this macro. */

#define DECODE_HANDLE_EOL_TYPE(eol_type, c, flags, dst)		\
do {								\
  if (c == '\r')						\
    {								\
      if (eol_type == EOL_CR)					\
	Dynarr_add (dst, '\n');					\
      else if (eol_type != EOL_CRLF || flags & CODING_STATE_CR)	\
	Dynarr_add (dst, c);					\
      else							\
	flags |= CODING_STATE_CR;				\
      goto label_continue_loop;					\
    }								\
  else if (flags & CODING_STATE_CR)				\
    {	/* eol_type == CODING_SYSTEM_EOL_CRLF */		\
      if (c != '\n')						\
	Dynarr_add (dst, '\r');					\
      flags &= ~CODING_STATE_CR;				\
    }								\
} while (0)

/* C should be a binary character in the range 0 - 255; convert
   to internal format and add to Dynarr DST. */

#define DECODE_ADD_BINARY_CHAR(c, dst)		\
do {						\
  if (BYTE_ASCII_P (c))				\
    Dynarr_add (dst, c);			\
  else if (BYTE_C1_P (c))			\
    {						\
      Dynarr_add (dst, LEADING_BYTE_CONTROL_1);	\
      Dynarr_add (dst, c + 0x20);		\
    }						\
  else						\
    {						\
      Dynarr_add (dst, LEADING_BYTE_LATIN_ISO8859_1); \
      Dynarr_add (dst, c);			\
    }						\
} while (0)

#define DECODE_OUTPUT_PARTIAL_CHAR(ch)	\
do {					\
  if (ch)				\
    {					\
      DECODE_ADD_BINARY_CHAR (ch, dst);	\
      ch = 0;				\
    }					\
} while (0)

#define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst)	\
do {					\
  DECODE_OUTPUT_PARTIAL_CHAR (ch);	\
  if ((flags & CODING_STATE_END) &&	\
      (flags & CODING_STATE_CR))	\
    Dynarr_add (dst, '\r');		\
} while (0)

#define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding)

struct decoding_stream
{
  /* Coding system that governs the conversion. */
  struct Lisp_Coding_System *codesys;

  /* Stream that we read the encoded data from or
     write the decoded data to. */
  Lstream *other_end;

  /* If we are reading, then we can return only a fixed amount of
     data, so if the conversion resulted in too much data, we store it
     here for retrieval the next time around. */
  unsigned_char_dynarr *runoff;

  /* FLAGS holds flags indicating the current state of the decoding.
     Some of these flags are dependent on the coding system. */
  unsigned int flags;

  /* CH holds a partially built-up character.  Since we only deal
     with one- and two-byte characters at the moment, we only use
     this to store the first byte of a two-byte character. */
  unsigned int ch;

  /* EOL_TYPE specifies the type of end-of-line conversion that
     currently applies.  We need to keep this separate from the
     EOL type stored in CODESYS because the latter might indicate
     automatic EOL-type detection while the former will always
     indicate a particular EOL type. */
  enum eol_type eol_type;

  /* Additional ISO2022 information.  We define the structure above
     because it's also needed by the detection routines. */
  struct iso2022_decoder iso2022;

  /* Additional information (the state of the running CCL program)
     used by the CCL decoder. */
  struct ccl_program ccl;

  struct detection_state decst;
};

static int decoding_reader     (Lstream *stream,       unsigned char *data, int size);
static int decoding_writer     (Lstream *stream, CONST unsigned char *data, int size);
static int decoding_rewinder   (Lstream *stream);
static int decoding_seekable_p (Lstream *stream);
static int decoding_flusher    (Lstream *stream);
static int decoding_closer     (Lstream *stream);
static Lisp_Object decoding_marker (Lisp_Object stream,
				    void (*markobj) (Lisp_Object));

DEFINE_LSTREAM_IMPLEMENTATION ("decoding", lstream_decoding,
			       sizeof (struct decoding_stream));

static Lisp_Object
decoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
{
  Lstream *str = DECODING_STREAM_DATA (XLSTREAM (stream))->other_end;
  Lisp_Object str_obj;

  /* We do not need to mark the coding systems or charsets stored
     within the stream because they are stored in a global list
     and automatically marked. */

  XSETLSTREAM (str_obj, str);
  (markobj) (str_obj);
  if (str->imp->marker)
    return (str->imp->marker) (str_obj, markobj);
  else
    return Qnil;
}

/* Read SIZE bytes of data and store it into DATA.  We are a decoding stream
   so we read data from the other end, decode it, and store it into DATA. */

static int
decoding_reader (Lstream *stream, unsigned char *data, int size)
{
  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
  unsigned char *orig_data = data;
  int read_size;
  int error_occurred = 0;

  /* We need to interface to mule_decode(), which expects to take some
     amount of data and store the result into a Dynarr.  We have
     mule_decode() store into str->runoff, and take data from there
     as necessary. */

  /* We loop until we have enough data, reading chunks from the other
     end and decoding it. */
  while (1)
    {
      /* Take data from the runoff if we can.  Make sure to take at
	 most SIZE bytes, and delete the data from the runoff. */
      if (Dynarr_length (str->runoff) > 0)
	{
	  int chunk = min (size, Dynarr_length (str->runoff));
	  memcpy (data, Dynarr_atp (str->runoff, 0), chunk);
	  Dynarr_delete_many (str->runoff, 0, chunk);
	  data += chunk;
	  size -= chunk;
	}

      if (size == 0)
	break; /* No more room for data */

      if (str->flags & CODING_STATE_END)
	/* This means that on the previous iteration, we hit the EOF on
	   the other end.  We loop once more so that mule_decode() can
	   output any final stuff it may be holding, or any "go back
	   to a sane state" escape sequences. (This latter makes sense
	   during encoding.) */
	break;

      /* Exhausted the runoff, so get some more.  DATA has at least
	 SIZE bytes left of storage in it, so it's OK to read directly
	 into it.  (We'll be overwriting above, after we've decoded it
	 into the runoff.) */
      read_size = Lstream_read (str->other_end, data, size);
      if (read_size < 0)
	{
	  error_occurred = 1;
	  break;
	}
      if (read_size == 0)
	/* There might be some more end data produced in the translation.
	   See the comment above. */
	str->flags |= CODING_STATE_END;
      mule_decode (stream, data, str->runoff, read_size);
    }

  if (data - orig_data == 0)
    return error_occurred ? -1 : 0;
  else
    return data - orig_data;
}

static int
decoding_writer (Lstream *stream, CONST unsigned char *data, int size)
{
  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
  int retval;

  /* Decode all our data into the runoff, and then attempt to write
     it all out to the other end.  Remove whatever chunk we succeeded
     in writing. */
  mule_decode (stream, data, str->runoff, size);
  retval = Lstream_write (str->other_end, Dynarr_atp (str->runoff, 0),
			  Dynarr_length (str->runoff));
  if (retval > 0)
    Dynarr_delete_many (str->runoff, 0, retval);
  /* Do NOT return retval.  The return value indicates how much
     of the incoming data was written, not how many bytes were
     written. */
  return size;
}

static void
reset_decoding_stream (struct decoding_stream *str)
{
  if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_ISO2022)
    {
      Lisp_Object coding_system = Qnil;
      XSETCODING_SYSTEM (coding_system, str->codesys);
      reset_iso2022 (coding_system, &str->iso2022);
    }
  else if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_CCL)
    {
      setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys));
    }

  str->flags = str->ch = 0;
}

static int
decoding_rewinder (Lstream *stream)
{
  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
  reset_decoding_stream (str);
  Dynarr_reset (str->runoff);
  return Lstream_rewind (str->other_end);
}

static int
decoding_seekable_p (Lstream *stream)
{
  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
  return Lstream_seekable_p (str->other_end);
}

static int
decoding_flusher (Lstream *stream)
{
  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
  return Lstream_flush (str->other_end);
}

static int
decoding_closer (Lstream *stream)
{
  struct decoding_stream *str = DECODING_STREAM_DATA (stream);
  if (stream->flags & LSTREAM_FL_WRITE)
    {
      str->flags |= CODING_STATE_END;
      decoding_writer (stream, 0, 0);
    }
  Dynarr_free (str->runoff);
  if (str->iso2022.composite_chars)
    Dynarr_free (str->iso2022.composite_chars);
  return Lstream_close (str->other_end);
}

Lisp_Object
decoding_stream_coding_system (Lstream *stream)
{
  Lisp_Object coding_system = Qnil;
  struct decoding_stream *str = DECODING_STREAM_DATA (stream);

  XSETCODING_SYSTEM (coding_system, str->codesys);
  return subsidiary_coding_system (coding_system, str->eol_type);
}

void
set_decoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
{
  struct Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
  struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
  str->codesys = cs;
  if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
    str->eol_type = CODING_SYSTEM_EOL_TYPE (cs);
  reset_decoding_stream (str);
}

/* WARNING WARNING WARNING WARNING!!!!!  If you open up a decoding
   stream for writing, no automatic code detection will be performed.
   The reason for this is that automatic code detection requires a
   seekable input.  Things will also fail if you open a decoding
   stream for reading using a non-fully-specified coding system and
   a non-seekable input stream. */

static Lisp_Object
make_decoding_stream_1 (Lstream *stream, Lisp_Object codesys,
			CONST char *mode)
{
  Lstream *lstr = Lstream_new (lstream_decoding, mode);
  struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
  Lisp_Object obj;

  memset (str, 0, sizeof (*str));
  str->other_end = stream;
  str->runoff = (unsigned_char_dynarr *) Dynarr_new (unsigned_char);
  str->eol_type = EOL_AUTODETECT;
  if (!strcmp (mode, "r")
      && Lstream_seekable_p (stream))
    /* We can determine the coding system now. */
    determine_real_coding_system (stream, &codesys, &str->eol_type);
  set_decoding_stream_coding_system (lstr, codesys);
  str->decst.eol_type = str->eol_type;
  str->decst.mask = ~0;
  XSETLSTREAM (obj, lstr);
  return obj;
}

Lisp_Object
make_decoding_input_stream (Lstream *stream, Lisp_Object codesys)
{
  return make_decoding_stream_1 (stream, codesys, "r");
}

Lisp_Object
make_decoding_output_stream (Lstream *stream, Lisp_Object codesys)
{
  return make_decoding_stream_1 (stream, codesys, "w");
}

/* Note: the decode_coding_* functions all take the same
   arguments as mule_decode(), which is to say some SRC data of
   size N, which is to be stored into dynamic array DST.
   DECODING is the stream within which the decoding is
   taking place, but no data is actually read from or
   written to that stream; that is handled in decoding_reader()
   or decoding_writer().  This allows the same functions to
   be used for both reading and writing. */

static void
mule_decode (Lstream *decoding, CONST unsigned char *src,
	     unsigned_char_dynarr *dst, unsigned int n)
{
  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);

  /* If necessary, do encoding-detection now.  We do this when
     we're a writing stream or a non-seekable reading stream,
     meaning that we can't just process the whole input,
     rewind, and start over. */

  if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_AUTODETECT ||
      str->eol_type == EOL_AUTODETECT)
    {
      Lisp_Object codesys = Qnil;

      XSETCODING_SYSTEM (codesys, str->codesys);
      detect_coding_type (&str->decst, src, n,
			  CODING_SYSTEM_TYPE (str->codesys) !=
			  CODESYS_AUTODETECT);
      if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_AUTODETECT &&
	  str->decst.mask != ~0)
	/* #### This is cheesy.  What we really ought to do is
	   buffer up a certain amount of data so as to get a
	   less random result. */
	codesys = coding_system_from_mask (str->decst.mask);
      str->eol_type = str->decst.eol_type;
      if (XCODING_SYSTEM (codesys) != str->codesys)
	{
	  /* Preserve the CODING_STATE_END flag in case it was set.
	     If we erase it, bad things might happen. */
	  int was_end = str->flags & CODING_STATE_END;
          set_decoding_stream_coding_system (decoding, codesys);
	  if (was_end)
	    str->flags |= CODING_STATE_END;
	}
    }

  switch (CODING_SYSTEM_TYPE (str->codesys))
    {
#ifdef DEBUG_XEMACS
    case CODESYS_INTERNAL:
      Dynarr_add_many (dst, src, n);
      break;
#endif
    case CODESYS_AUTODETECT:
      /* If we got this far and still haven't decided on the coding
	 system, then do no conversion. */
    case CODESYS_NO_CONVERSION:
      decode_coding_no_conversion (decoding, src, dst, n);
      break;
    case CODESYS_SHIFT_JIS:
      decode_coding_sjis (decoding, src, dst, n);
      break;
    case CODESYS_BIG5:
      decode_coding_big5 (decoding, src, dst, n);
      break;
    case CODESYS_CCL:
      ccl_driver (&str->ccl, src, dst, n, 0);
      break;
    case CODESYS_ISO2022:
      decode_coding_iso2022 (decoding, src, dst, n);
      break;
    default:
      abort ();
    }
}

DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, 0, /*
Decode the text between START and END which is encoded in CODING-SYSTEM.
This is useful if you've read in encoded text from a file without decoding
it (e.g. you read in a JIS-formatted file but used the `binary' or
`no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B").
Return length of decoded text.
BUFFER defaults to the current buffer if unspecified.
*/
       (start, end, coding_system, buffer))
{
  Bufpos b, e;
  struct buffer *buf = decode_buffer (buffer, 0);
  Lisp_Object instream, lb_outstream, de_outstream, outstream;
  Lstream *istr, *ostr;
  struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;

  get_buffer_range_char (buf, start, end, &b, &e, 0);

  barf_if_buffer_read_only (buf, b, e);

  coding_system = Fget_coding_system (coding_system);
  instream = make_lisp_buffer_input_stream  (buf, b, e, 0);
  lb_outstream = make_lisp_buffer_output_stream (buf, b, 0);
  de_outstream = make_decoding_output_stream (XLSTREAM (lb_outstream),
					      coding_system);
  outstream = make_encoding_output_stream (XLSTREAM (de_outstream),
					   Fget_coding_system (Qbinary));
  istr = XLSTREAM (instream);
  ostr = XLSTREAM (outstream);
  GCPRO4 (instream, lb_outstream, de_outstream, outstream);

  /* The chain of streams looks like this:

     [BUFFER] <----- send through
                     ------> [ENCODE AS BINARY]
		             ------> [DECODE AS SPECIFIED]
			             ------> [BUFFER]
   */

  while (1)
    {
      char tempbuf[1024]; /* some random amount */
      Bufpos newpos, even_newer_pos;
      Bufpos oldpos = lisp_buffer_stream_startpos (istr);
      int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));

      if (!size_in_bytes)
	break;
      newpos = lisp_buffer_stream_startpos (istr);
      Lstream_write (ostr, tempbuf, size_in_bytes);
      even_newer_pos = lisp_buffer_stream_startpos (istr);
      buffer_delete_range (buf, even_newer_pos - (newpos - oldpos),
			   even_newer_pos, 0);
    }
  Lstream_close (istr);
  Lstream_close (ostr);
  UNGCPRO;
  Lstream_delete (istr);
  Lstream_delete (ostr);
  Lstream_delete (XLSTREAM (de_outstream));
  Lstream_delete (XLSTREAM (lb_outstream));
  return Qnil;
}


/************************************************************************/
/*           Converting to an external encoding ("encoding")            */
/************************************************************************/

/* An encoding stream is an output stream.  When you create the
   stream, you specify the coding system that governs the encoding
   and another stream that the resulting encoded data is to be
   sent to, and then start sending data to it. */

#define ENCODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, encoding)

struct encoding_stream
{
  /* Coding system that governs the conversion. */
  struct Lisp_Coding_System *codesys;

  /* Stream that we read the encoded data from or
     write the decoded data to. */
  Lstream *other_end;

  /* If we are reading, then we can return only a fixed amount of
     data, so if the conversion resulted in too much data, we store it
     here for retrieval the next time around. */
  unsigned_char_dynarr *runoff;

  /* FLAGS holds flags indicating the current state of the encoding.
     Some of these flags are dependent on the coding system. */
  unsigned int flags;

  /* CH holds a partially built-up character.  Since we only deal
     with one- and two-byte characters at the moment, we only use
     this to store the first byte of a two-byte character. */
  unsigned int ch;

  /* Additional information used by the ISO2022 encoder. */
  struct
    {
      /* CHARSET holds the character sets currently assigned to the G0
	 through G3 registers.  It is initialized from the array
	 INITIAL_CHARSET in CODESYS. */
      Lisp_Object charset[4];

      /* Which registers are currently invoked into the left (GL) and
	 right (GR) halves of the 8-bit encoding space? */
      int register_left, register_right;

      /* Whether we need to explicitly designate the charset in the
	 G? register before using it.  It is initialized from the
	 array FORCE_CHARSET_ON_OUTPUT in CODESYS. */
      unsigned char force_charset_on_output[4];

      /* Other state variables that need to be preserved across
	 invocations. */
      Lisp_Object current_charset;
      int current_half;
      int current_char_boundary;
    } iso2022;

  /* Additional information (the state of the running CCL program)
     used by the CCL encoder. */
  struct ccl_program ccl;
};

static int encoding_reader (Lstream *stream, unsigned char *data, int size);
static int encoding_writer (Lstream *stream, CONST unsigned char *data,
			    int size);
static int encoding_rewinder   (Lstream *stream);
static int encoding_seekable_p (Lstream *stream);
static int encoding_flusher    (Lstream *stream);
static int encoding_closer     (Lstream *stream);
static Lisp_Object encoding_marker (Lisp_Object stream,
				    void (*markobj) (Lisp_Object));

DEFINE_LSTREAM_IMPLEMENTATION ("encoding", lstream_encoding,
			       sizeof (struct encoding_stream));

static Lisp_Object
encoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
{
  Lstream *str = ENCODING_STREAM_DATA (XLSTREAM (stream))->other_end;
  Lisp_Object str_obj;

  /* We do not need to mark the coding systems or charsets stored
     within the stream because they are stored in a global list
     and automatically marked. */

  XSETLSTREAM (str_obj, str);
  (markobj) (str_obj);
  if (str->imp->marker)
    return (str->imp->marker) (str_obj, markobj);
  else
    return Qnil;
}

/* Read SIZE bytes of data and store it into DATA.  We are a encoding stream
   so we read data from the other end, encode it, and store it into DATA. */

static int
encoding_reader (Lstream *stream, unsigned char *data, int size)
{
  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
  unsigned char *orig_data = data;
  int read_size;
  int error_occurred = 0;

  /* We need to interface to mule_encode(), which expects to take some
     amount of data and store the result into a Dynarr.  We have
     mule_encode() store into str->runoff, and take data from there
     as necessary. */

  /* We loop until we have enough data, reading chunks from the other
     end and encoding it. */
  while (1)
    {
      /* Take data from the runoff if we can.  Make sure to take at
	 most SIZE bytes, and delete the data from the runoff. */
      if (Dynarr_length (str->runoff) > 0)
	{
	  int chunk = min (size, Dynarr_length (str->runoff));
	  memcpy (data, Dynarr_atp (str->runoff, 0), chunk);
	  Dynarr_delete_many (str->runoff, 0, chunk);
	  data += chunk;
	  size -= chunk;
	}

      if (size == 0)
	break; /* No more room for data */

      if (str->flags & CODING_STATE_END)
	/* This means that on the previous iteration, we hit the EOF on
	   the other end.  We loop once more so that mule_encode() can
	   output any final stuff it may be holding, or any "go back
	   to a sane state" escape sequences. (This latter makes sense
	   during encoding.) */
	break;

      /* Exhausted the runoff, so get some more.  DATA at least SIZE bytes
	 left of storage in it, so it's OK to read directly into it.
	 (We'll be overwriting above, after we've encoded it into the
	 runoff.) */
      read_size = Lstream_read (str->other_end, data, size);
      if (read_size < 0)
	{
	  error_occurred = 1;
	  break;
	}
      if (read_size == 0)
	/* There might be some more end data produced in the translation.
	   See the comment above. */
	str->flags |= CODING_STATE_END;
      mule_encode (stream, data, str->runoff, read_size);
    }

  if (data == orig_data)
    return error_occurred ? -1 : 0;
  else
    return data - orig_data;
}

static int
encoding_writer (Lstream *stream, CONST unsigned char *data, int size)
{
  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
  int retval;

  /* Encode all our data into the runoff, and then attempt to write
     it all out to the other end.  Remove whatever chunk we succeeded
     in writing. */
  mule_encode (stream, data, str->runoff, size);
  retval = Lstream_write (str->other_end, Dynarr_atp (str->runoff, 0),
			  Dynarr_length (str->runoff));
  if (retval > 0)
    Dynarr_delete_many (str->runoff, 0, retval);
  /* Do NOT return retval.  The return value indicates how much
     of the incoming data was written, not how many bytes were
     written. */
  return size;
}

static void
reset_encoding_stream (struct encoding_stream *str)
{
  switch (CODING_SYSTEM_TYPE (str->codesys))
    {
    case CODESYS_ISO2022:
      {
	int i;

	for (i = 0; i < 4; i++)
	  {
	    str->iso2022.charset[i] =
	      CODING_SYSTEM_ISO2022_INITIAL_CHARSET (str->codesys, i);
	    str->iso2022.force_charset_on_output[i] =
	      CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (str->codesys, i);
	  }
	str->iso2022.register_left = 0;
	str->iso2022.register_right = 1;
	str->iso2022.current_charset = Qnil;
	str->iso2022.current_half = 0;
	str->iso2022.current_char_boundary = 1;
	break;
      }
    case CODESYS_CCL:
      setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_ENCODE (str->codesys));
      break;
    default:
      break;
    }

  str->flags = str->ch = 0;
}

static int
encoding_rewinder (Lstream *stream)
{
  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
  reset_encoding_stream (str);
  Dynarr_reset (str->runoff);
  return Lstream_rewind (str->other_end);
}

static int
encoding_seekable_p (Lstream *stream)
{
  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
  return Lstream_seekable_p (str->other_end);
}

static int
encoding_flusher (Lstream *stream)
{
  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
  return Lstream_flush (str->other_end);
}

static int
encoding_closer (Lstream *stream)
{
  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
  if (stream->flags & LSTREAM_FL_WRITE)
    {
      str->flags |= CODING_STATE_END;
      encoding_writer (stream, 0, 0);
    }
  Dynarr_free (str->runoff);
  return Lstream_close (str->other_end);
}

Lisp_Object
encoding_stream_coding_system (Lstream *stream)
{
  Lisp_Object coding_system = Qnil;
  struct encoding_stream *str = ENCODING_STREAM_DATA (stream);

  XSETCODING_SYSTEM (coding_system, str->codesys);
  return coding_system;
}

void
set_encoding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
{
  struct Lisp_Coding_System *cs = XCODING_SYSTEM (codesys);
  struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
  str->codesys = cs;
  reset_encoding_stream (str);
}

static Lisp_Object
make_encoding_stream_1 (Lstream *stream, Lisp_Object codesys,
			CONST char *mode)
{
  Lstream *lstr = Lstream_new (lstream_encoding, mode);
  struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
  Lisp_Object obj;

  memset (str, 0, sizeof (*str));
  str->runoff = Dynarr_new (unsigned_char);
  str->other_end = stream;
  set_encoding_stream_coding_system (lstr, codesys);
  XSETLSTREAM (obj, lstr);
  return obj;
}

Lisp_Object
make_encoding_input_stream (Lstream *stream, Lisp_Object codesys)
{
  return make_encoding_stream_1 (stream, codesys, "r");
}

Lisp_Object
make_encoding_output_stream (Lstream *stream, Lisp_Object codesys)
{
  return make_encoding_stream_1 (stream, codesys, "w");
}

/* Convert N bytes of internally-formatted data stored in SRC to an
   external format, according to the encoding stream ENCODING.
   Store the encoded data into DST. */

static void
mule_encode (Lstream *encoding, CONST unsigned char *src,
	     unsigned_char_dynarr *dst, unsigned int n)
{
  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);

  switch (CODING_SYSTEM_TYPE (str->codesys))
    {
#ifdef DEBUG_XEMACS
    case CODESYS_INTERNAL:
      Dynarr_add_many (dst, src, n);
      break;
#endif
    case CODESYS_AUTODETECT:
      /* If we got this far and still haven't decided on the coding
	 system, then do no conversion. */
    case CODESYS_NO_CONVERSION:
      encode_coding_no_conversion (encoding, src, dst, n);
      break;
    case CODESYS_SHIFT_JIS:
      encode_coding_sjis (encoding, src, dst, n);
      break;
    case CODESYS_BIG5:
      encode_coding_big5 (encoding, src, dst, n);
      break;
    case CODESYS_CCL:
      ccl_driver (&str->ccl, src, dst, n, 0);
      break;
    case CODESYS_ISO2022:
      encode_coding_iso2022 (encoding, src, dst, n);
      break;
    default:
      abort ();
    }
}

DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, 0, /*
Encode the text between START and END using CODING-SYSTEM.
This will, for example, convert Japanese characters into stuff such as
"^[$B!<!+^[(B" if you use the JIS encoding.  Return length of encoded
text.  BUFFER defaults to the current buffer if unspecified.
*/
       (start, end, coding_system, buffer))
{
  Bufpos b, e;
  struct buffer *buf = decode_buffer (buffer, 0);
  Lisp_Object instream, lb_outstream, de_outstream, outstream;
  Lstream *istr, *ostr;
  struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;

  get_buffer_range_char (buf, start, end, &b, &e, 0);

  barf_if_buffer_read_only (buf, b, e);

  coding_system = Fget_coding_system (coding_system);
  instream  = make_lisp_buffer_input_stream  (buf, b, e, 0);
  lb_outstream = make_lisp_buffer_output_stream (buf, b, 0);
  de_outstream = make_decoding_output_stream (XLSTREAM (lb_outstream),
					      Fget_coding_system (Qbinary));
  outstream = make_encoding_output_stream (XLSTREAM (de_outstream),
					   coding_system);
  istr = XLSTREAM (instream);
  ostr = XLSTREAM (outstream);
  GCPRO4 (instream, outstream, de_outstream, lb_outstream);
  /* The chain of streams looks like this:

     [BUFFER] <----- send through
                     ------> [ENCODE AS SPECIFIED]
		             ------> [DECODE AS BINARY]
			             ------> [BUFFER]
   */
  while (1)
    {
      char tempbuf[1024]; /* some random amount */
      Bufpos newpos, even_newer_pos;
      Bufpos oldpos = lisp_buffer_stream_startpos (istr);
      int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));

      if (!size_in_bytes)
	break;
      newpos = lisp_buffer_stream_startpos (istr);
      Lstream_write (ostr, tempbuf, size_in_bytes);
      even_newer_pos = lisp_buffer_stream_startpos (istr);
      buffer_delete_range (buf, even_newer_pos - (newpos - oldpos),
			   even_newer_pos, 0);
    }

  {
    Charcount retlen =
      lisp_buffer_stream_startpos (XLSTREAM (instream)) - b;
    Lstream_close (istr);
    Lstream_close (ostr);
    UNGCPRO;
    Lstream_delete (istr);
    Lstream_delete (ostr);
    Lstream_delete (XLSTREAM (de_outstream));
    Lstream_delete (XLSTREAM (lb_outstream));
    return make_int (retlen);
  }
}


/************************************************************************/
/*                          Shift-JIS methods                           */
/************************************************************************/

/* Shift-JIS is a coding system encoding three character sets: ASCII, right
   half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
   as is.  A character of JISX0201-Kana (TYPE94 character set) is
   encoded by "position-code + 0x80".  A character of JISX0208
   (TYPE94x94 character set) is encoded in 2-byte but two
   position-codes are divided and shifted so that it fit in the range
   below.

   --- CODE RANGE of Shift-JIS ---
   (character set)	(range)
   ASCII		0x00 .. 0x7F
   JISX0201-Kana	0xA0 .. 0xDF
   JISX0208 (1st byte)	0x80 .. 0x9F and 0xE0 .. 0xEF
	    (2nd byte)	0x40 .. 0x7E and 0x80 .. 0xFC
   -------------------------------

*/

/* Is this the first byte of a Shift-JIS two-byte char? */

#define BYTE_SJIS_TWO_BYTE_1_P(c) \
  (((c) >= 0x81 && (c) <= 0x9F) || ((c) >= 0xE0 && (c) <= 0xEF))

/* Is this the second byte of a Shift-JIS two-byte char? */

#define BYTE_SJIS_TWO_BYTE_2_P(c) \
  (((c) >= 0x40 && (c) <= 0x7E) || ((c) >= 0x80 && (c) <= 0xFC))

#define BYTE_SJIS_KATAKANA_P(c)	\
  ((c) >= 0xA1 && (c) <= 0xDF)

static int
detect_coding_sjis (struct detection_state *st, CONST unsigned char *src,
		    unsigned int n)
{
  int c;

  while (n--)
    {
      c = *src++;
      if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
	return 0;
      if (st->shift_jis.in_second_byte)
	{
	  st->shift_jis.in_second_byte = 0;
	  if (c < 0x40)
	    return 0;
	}
      else if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
	st->shift_jis.in_second_byte = 1;
    }
  return CODING_CATEGORY_SHIFT_JIS_MASK;
}

/* Convert Shift-JIS data to internal format. */

static void
decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
		    unsigned_char_dynarr *dst, unsigned int n)
{
  unsigned char c;
  unsigned int flags, ch;
  enum eol_type eol_type;
  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);

  CODING_STREAM_DECOMPOSE (str, flags, ch);
  eol_type = str->eol_type;

  while (n--)
    {
      c = *src++;

      if (ch)
	{
	  /* Previous character was first byte of Shift-JIS Kanji char. */
	  if (BYTE_SJIS_TWO_BYTE_2_P (c))
	    {
	      unsigned char e1, e2;

	      Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
	      DECODE_SJIS (ch, c, e1, e2);
	      Dynarr_add (dst, e1);
	      Dynarr_add (dst, e2);
	    }
	  else
	    {
	      DECODE_ADD_BINARY_CHAR (ch, dst);
	      DECODE_ADD_BINARY_CHAR (c, dst);
	    }
	  ch = 0;
	}
      else
	{
	  DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
	  if (BYTE_SJIS_TWO_BYTE_1_P (c))
	    ch = c;
	  else if (BYTE_SJIS_KATAKANA_P (c))
	    {
	      Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201);
	      Dynarr_add (dst, c);
	    }
	  else
	    DECODE_ADD_BINARY_CHAR (c, dst);
	}
    label_continue_loop:;
    }

  DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);

  CODING_STREAM_COMPOSE (str, flags, ch);
}

/* Convert internally-formatted data to Shift-JIS. */

static void
encode_coding_sjis (Lstream *encoding, CONST unsigned char *src,
		    unsigned_char_dynarr *dst, unsigned int n)
{
  unsigned char c;
  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
  unsigned int flags, ch;
  enum eol_type eol_type;

  CODING_STREAM_DECOMPOSE (str, flags, ch);
  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);

  while (n--)
    {
      c = *src++;
      if (c == '\n')
	{
	  if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
	    Dynarr_add (dst, '\r');
	  if (eol_type != EOL_CR)
	    Dynarr_add (dst, '\n');
	  ch = 0;
	}
      else if (BYTE_ASCII_P (c))
	{
	  Dynarr_add (dst, c);
	  ch = 0;
	}
      else if (BUFBYTE_LEADING_BYTE_P (c))
	ch = (c == LEADING_BYTE_KATAKANA_JISX0201 ||
	      c == LEADING_BYTE_JAPANESE_JISX0208_1978 ||
	      c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0;
      else if (ch)
	{
	  if (ch == LEADING_BYTE_KATAKANA_JISX0201)
	    {
	      Dynarr_add (dst, c);
	      ch = 0;
	    }
	  else if (ch == LEADING_BYTE_JAPANESE_JISX0208_1978 ||
		   ch == LEADING_BYTE_JAPANESE_JISX0208)
	    ch = c;
	  else
	    {
	      unsigned char j1, j2;
	      ENCODE_SJIS (ch, c, j1, j2);
	      Dynarr_add (dst, j1);
	      Dynarr_add (dst, j2);
	      ch = 0;
	    }
	}
    }

  CODING_STREAM_COMPOSE (str, flags, ch);
}

DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /*
Decode a JISX0208 character of Shift-JIS coding-system.
CODE is the character code in Shift-JIS as a cons of type bytes.
Return the corresponding character.
*/
       (code))
{
  unsigned char c1, c2, s1, s2;

  CHECK_CONS (code);
  CHECK_INT (XCAR (code));
  CHECK_INT (XCDR (code));
  s1 = XINT (XCAR (code));
  s2 = XINT (XCDR (code));
  if (BYTE_SJIS_TWO_BYTE_1_P (s1) &&
      BYTE_SJIS_TWO_BYTE_2_P (s2))
    {
      DECODE_SJIS (s1, s2, c1, c2);
      return make_char (MAKE_CHAR (Vcharset_japanese_jisx0208,
				   c1 & 0x7F, c2 & 0x7F));
    }
  else
    return Qnil;
}

DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /*
Encode a JISX0208 character CHAR to SHIFT-JIS coding-system.
Return the corresponding character code in SHIFT-JIS as a cons of two bytes.
*/
       (ch))
{
  Lisp_Object charset;
  int c1, c2, s1, s2;

  CHECK_CHAR_COERCE_INT (ch);
  BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
  if (EQ (charset, Vcharset_japanese_jisx0208))
    {
      ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
      return Fcons (make_int (s1), make_int (s2));
    }
  else
    return Qnil;
}


/************************************************************************/
/*                            Big5 methods                              */
/************************************************************************/

/* BIG5 is a coding system encoding two character sets: ASCII and
   Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
   character set and is encoded in two-byte.

   --- CODE RANGE of BIG5 ---
   (character set)	(range)
   ASCII		0x00 .. 0x7F
   Big5 (1st byte)	0xA1 .. 0xFE
	(2nd byte)	0x40 .. 0x7E and 0xA1 .. 0xFE
   --------------------------

   Since the number of characters in Big5 is larger than maximum
   characters in Emacs' charset (96x96), it can't be handled as one
   charset.  So, in Emacs, Big5 is devided into two: `charset-big5-1'
   and `charset-big5-2'.  Both <type>s are TYPE94x94.  The former
   contains frequently used characters and the latter contains less
   frequently used characters.  */

#define BYTE_BIG5_TWO_BYTE_1_P(c) \
  ((c) >= 0xA1 && (c) <= 0xFE)

/* Is this the second byte of a Shift-JIS two-byte char? */

#define BYTE_BIG5_TWO_BYTE_2_P(c) \
  (((c) >= 0x40 && (c) <= 0x7E) || ((c) >= 0xA1 && (c) <= 0xFE))

/* Number of Big5 characters which have the same code in 1st byte.  */

#define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)

/* Code conversion macros.  These are macros because they are used in
   inner loops during code conversion.

   Note that temporary variables in macros introduce the classic
   dynamic-scoping problems with variable names.  We use capital-
   lettered variables in the assumption that XEmacs does not use
   capital letters in variables except in a very formalized way
   (e.g. Qstring). */

/* Convert Big5 code (b1, b2) into its internal string representation
   (lb, c1, c2). */

/* There is a much simpler way to split the Big5 charset into two.
   For the moment I'm going to leave the algorithm as-is because it
   claims to separate out the most-used characters into a single
   charset, which perhaps will lead to optimizations in various
   places.

   The way the algorithm works is something like this:

   Big5 can be viewed as a 94x157 charset, where the row is
   encoded into the bytes 0xA1 .. 0xFE and the column is encoded
   into the bytes 0x40 .. 0x7E and 0xA1 .. 0xFE.  As for frequency,
   the split between low and high column numbers is apparently
   meaningless; ascending rows produce less and less frequent chars.
   Therefore, we assign the lower half of rows (0xA1 .. 0xC8) to
   the first charset, and the upper half (0xC9 .. 0xFE) to the
   second.  To do the conversion, we convert the character into
   a single number where 0 .. 156 is the first row, 157 .. 313
   is the second, etc.  That way, the characters are ordered by
   decreasing frequency.  Then we just chop the space in two
   and coerce the result into a 94x94 space.
   */

#define DECODE_BIG5(b1, b2, lb, c1, c2) do				\
{									\
  int B1 = b1, B2 = b2;							\
  unsigned int I							\
    = (B1 - 0xA1) * BIG5_SAME_ROW + B2 - (B2 < 0x7F ? 0x40 : 0x62);	\
									\
  if (B1 < 0xC9)							\
    {									\
      lb = LEADING_BYTE_CHINESE_BIG5_1;					\
    }									\
  else									\
    {									\
      lb = LEADING_BYTE_CHINESE_BIG5_2;					\
      I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1);				\
    }									\
  c1 = I / (0xFF - 0xA1) + 0xA1;					\
  c2 = I % (0xFF - 0xA1) + 0xA1;					\
} while (0)

/* Convert the internal string representation of a Big5 character
   (lb, c1, c2) into Big5 code (b1, b2). */

#define ENCODE_BIG5(lb, c1, c2, b1, b2) do				\
{									\
  unsigned int I = ((c1) - 0xA1) * (0xFF - 0xA1) + ((c2) - 0xA1);	\
									\
  if (lb == LEADING_BYTE_CHINESE_BIG5_2)				\
    {									\
      I += BIG5_SAME_ROW * (0xC9 - 0xA1);				\
    }									\
  b1 = I / BIG5_SAME_ROW + 0xA1;					\
  b2 = I % BIG5_SAME_ROW;						\
  b2 += b2 < 0x3F ? 0x40 : 0x62;					\
} while (0)

static int
detect_coding_big5 (struct detection_state *st, CONST unsigned char *src,
		    unsigned int n)
{
  int c;

  while (n--)
    {
      c = *src++;
      if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO ||
	  (c >= 0x80 && c <= 0xA0))
	return 0;
      if (st->big5.in_second_byte)
	{
	  st->big5.in_second_byte = 0;
	  if (c < 0x40 || (c >= 0x80 && c <= 0xA0))
	    return 0;
	}
      else if (c >= 0xA1)
	st->big5.in_second_byte = 1;
    }
  return CODING_CATEGORY_BIG5_MASK;
}

/* Convert Big5 data to internal format. */

static void
decode_coding_big5 (Lstream *decoding, CONST unsigned char *src,
		    unsigned_char_dynarr *dst, unsigned int n)
{
  unsigned char c;
  unsigned int flags, ch;
  enum eol_type eol_type;
  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);

  CODING_STREAM_DECOMPOSE (str, flags, ch);
  eol_type = str->eol_type;

  while (n--)
    {
      c = *src++;
      if (ch)
	{
	  /* Previous character was first byte of Big5 char. */
	  if (BYTE_BIG5_TWO_BYTE_2_P (c))
	    {
	      unsigned char b1, b2, b3;
	      DECODE_BIG5 (ch, c, b1, b2, b3);
	      Dynarr_add (dst, b1);
	      Dynarr_add (dst, b2);
	      Dynarr_add (dst, b3);
	    }
	  else
	    {
	      DECODE_ADD_BINARY_CHAR (ch, dst);
	      DECODE_ADD_BINARY_CHAR (c, dst);
	    }
	  ch = 0;
	}
      else
	{
	  DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
	  if (BYTE_BIG5_TWO_BYTE_1_P (c))
	    ch = c;
	  else
	    DECODE_ADD_BINARY_CHAR (c, dst);
	}
    label_continue_loop:;
    }

  DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);

  CODING_STREAM_COMPOSE (str, flags, ch);
}

/* Convert internally-formatted data to Big5. */

static void
encode_coding_big5 (Lstream *encoding, CONST unsigned char *src,
		    unsigned_char_dynarr *dst, unsigned int n)
{
  unsigned char c;
  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
  unsigned int flags, ch;
  enum eol_type eol_type;

  CODING_STREAM_DECOMPOSE (str, flags, ch);
  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);

  while (n--)
    {
      c = *src++;
      if (c == '\n')
	{
	  if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
	    Dynarr_add (dst, '\r');
	  if (eol_type != EOL_CR)
	    Dynarr_add (dst, '\n');
	}
      else if (BYTE_ASCII_P (c))
	{
	  /* ASCII. */
	  Dynarr_add (dst, c);
	}
      else if (BUFBYTE_LEADING_BYTE_P (c))
	{
	  if (c == LEADING_BYTE_CHINESE_BIG5_1 ||
	      c == LEADING_BYTE_CHINESE_BIG5_2)
	    {
	      /* A recognized leading byte. */
	      ch = c;
	      continue; /* not done with this character. */
	    }
	  /* otherwise just ignore this character. */
	}
      else if (ch == LEADING_BYTE_CHINESE_BIG5_1 ||
	       ch == LEADING_BYTE_CHINESE_BIG5_2)
	{
	  /* Previous char was a recognized leading byte. */
	  ch = (ch << 8) | c;
	  continue; /* not done with this character. */
	}
      else if (ch)
	{
	  /* Encountering second byte of a Big5 character. */
	  unsigned char b1, b2;

	  ENCODE_BIG5 (ch >> 8, ch & 0xFF, c, b1, b2);
	  Dynarr_add (dst, b1);
	  Dynarr_add (dst, b2);
	}

      ch = 0;
    }

  CODING_STREAM_COMPOSE (str, flags, ch);
}


DEFUN ("decode-big5-char", Fdecode_big5_char, 1, 1, 0, /*
Decode a Big5 character CODE of BIG5 coding-system.
CODE is the character code in BIG5, a cons of two integers.
Return the corresponding character.
*/
       (code))
{
  unsigned char c1, c2, b1, b2;

  CHECK_CONS (code);
  CHECK_INT (XCAR (code));
  CHECK_INT (XCDR (code));
  b1 = XINT (XCAR (code));
  b2 = XINT (XCDR (code));
  if (BYTE_BIG5_TWO_BYTE_1_P (b1) &&
      BYTE_BIG5_TWO_BYTE_2_P (b2))
    {
      int leading_byte;
      Lisp_Object charset;
      DECODE_BIG5 (b1, b2, leading_byte, c1, c2);
      charset = CHARSET_BY_LEADING_BYTE (leading_byte);
      return make_char (MAKE_CHAR (charset, c1 & 0x7F, c2 & 0x7F));
    }
  else
    return Qnil;
}

DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /*
Encode the Big5 character CH to BIG5 coding-system.
Return the corresponding character code in Big5.
*/
       (ch))
{
  Lisp_Object charset;
  int c1, c2, b1, b2;

  CHECK_CHAR_COERCE_INT (ch);
  BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
  if (EQ (charset, Vcharset_chinese_big5_1) ||
      EQ (charset, Vcharset_chinese_big5_2))
    {
      ENCODE_BIG5 (XCHARSET_LEADING_BYTE (charset), c1 | 0x80, c2 | 0x80,
		   b1, b2);
      return Fcons (make_int (b1), make_int (b2));
    }
  else
    return Qnil;
}


/************************************************************************/
/*                           ISO2022 methods                            */
/************************************************************************/

/* The following note describes the coding system ISO2022 briefly.
   Since the intention of this note is to help understanding of the
   programs in this file, some parts are NOT ACCURATE or OVERLY
   SIMPLIFIED.  For thorough understanding, please refer to the
   original document of ISO2022.

   ISO2022 provides many mechanisms to encode several character sets
   in 7-bit and 8-bit environments.  If one chooses 7-bit environment,
   all text is encoded by codes of less than 128.  This may make the
   encoded text a little bit longer, but the text get more stability
   to pass through several gateways (some of them strip off MSB).

   There are two kind of character sets: control character set and
   graphic character set.  The former contains control characters such
   as `newline' and `escape' to provide control functions (control
   functions are provided also by escape sequence).  The latter
   contains graphic characters such as 'A' and '-'.  Emacs recognizes
   two control character sets and many graphic character sets.

   Graphic character sets are classified into one of four types,
   according to the dimension and number of characters in the set:
   TYPE94, TYPE96, TYPE94x94, and TYPE96x96.  In addition, each
   character set is assigned an identification byte, unique for each
   type, called "final character" (denoted as <F> hereafter).  The <F>
   of each character set is decided by ECMA(*) when it is registered
   in ISO.  Code range of <F> is 0x30..0x7F (0x30..0x3F are for
   private use only).

   Note (*): ECMA = European Computer Manufacturers Association

   Here are examples of graphic character set [NAME(<F>)]:
	o TYPE94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
	o TYPE96 -- right-half-of-ISO8859-1('A'), ...
	o TYPE94x94 -- GB2312('A'), JISX0208('B'), ...
	o TYPE96x96 -- none for the moment

   A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
	C0 [0x00..0x1F] -- control character plane 0
	GL [0x20..0x7F] -- graphic character plane 0
	C1 [0x80..0x9F] -- control character plane 1
	GR [0xA0..0xFF] -- graphic character plane 1

   A control character set is directly designated and invoked to C0 or
   C1 by an escape sequence.  The most common case is that:
   - ISO646's  control character set is designated/invoked to C0, and
   - ISO6429's control character set is designated/invoked to C1,
   and usually these designations/invocations are omitted in encoded
   text.  In a 7-bit environment, only C0 can be used, and a control
   character for C1 is encoded by an appropriate escape sequence to
   fit into the environment.  All control characters for C1 are
   defined to have corresponding escape sequences.

   A graphic character set is at first designated to one of four
   graphic registers (G0 through G3), then these graphic registers are
   invoked to GL or GR.  These designations and invocations can be
   done independently.  The most common case is that G0 is invoked to
   GL, G1 is invoked to GR, and ASCII is designated to G0.  Usually
   these invocations and designations are omitted in encoded text.
   In a 7-bit environment, only GL can be used.

   When a graphic character set of TYPE94 or TYPE94x94 is invoked to
   GL, codes 0x20 and 0x7F of the GL area work as control characters
   SPACE and DEL respectively, and code 0xA0 and 0xFF of GR area
   should not be used.

   There are two ways of invocation: locking-shift and single-shift.
   With locking-shift, the invocation lasts until the next different
   invocation, whereas with single-shift, the invocation works only
   for the following character and doesn't affect locking-shift.
   Invocations are done by the following control characters or escape
   sequences.

   ----------------------------------------------------------------------
   abbrev  function	             cntrl escape seq	description
   ----------------------------------------------------------------------
   SI/LS0  (shift-in)		     0x0F  none		invoke G0 into GL
   SO/LS1  (shift-out)		     0x0E  none		invoke G1 into GL
   LS1R    (locking-shift-1 right)   none  ESC '~'      invoke G1 into GR
   LS2     (locking-shift-2)	     none  ESC 'n'	invoke G2 into GL
   LS2R    (locking-shift-2 right)   none  ESC '}'      invoke G2 into GR
   LS3     (locking-shift-3)	     none  ESC 'o'	invoke G3 into GL
   LS3R    (locking-shift 3 right)   none  ESC '|'      invoke G3 into GR
   SS2     (single-shift-2)	     0x8E  ESC 'N'	invoke G2 for one char
   SS3     (single-shift-3)	     0x8F  ESC 'O'	invoke G3 for one char
   ----------------------------------------------------------------------
   The first four are for locking-shift.  Control characters for these
   functions are defined by macros ISO_CODE_XXX in `coding.h'.

   Designations are done by the following escape sequences.
   ----------------------------------------------------------------------
   escape sequence	description
   ----------------------------------------------------------------------
   ESC '(' <F>		designate TYPE94<F> to G0
   ESC ')' <F>		designate TYPE94<F> to G1
   ESC '*' <F>		designate TYPE94<F> to G2
   ESC '+' <F>		designate TYPE94<F> to G3
   ESC ',' <F>		designate TYPE96<F> to G0 (*)
   ESC '-' <F>		designate TYPE96<F> to G1
   ESC '.' <F>		designate TYPE96<F> to G2
   ESC '/' <F>		designate TYPE96<F> to G3
   ESC '$' '(' <F>	designate TYPE94x94<F> to G0 (**)
   ESC '$' ')' <F>	designate TYPE94x94<F> to G1
   ESC '$' '*' <F>	designate TYPE94x94<F> to G2
   ESC '$' '+' <F>	designate TYPE94x94<F> to G3
   ESC '$' ',' <F>	designate TYPE96x96<F> to G0 (*)
   ESC '$' '-' <F>	designate TYPE96x96<F> to G1
   ESC '$' '.' <F>	designate TYPE96x96<F> to G2
   ESC '$' '/' <F>	designate TYPE96x96<F> to G3
   ----------------------------------------------------------------------
   In this list, "TYPE94<F>" means a graphic character set of type TYPE94
   and final character <F>, and etc.

   Note (*): Although these designations are not allowed in ISO2022,
   Emacs accepts them on decoding, and produces them on encoding
   TYPE96 or TYPE96x96 character set in a coding system which is
   characterized as 7-bit environment, non-locking-shift, and
   non-single-shift.

   Note (**): If <F> is '@', 'A', or 'B', the intermediate character
   '(' can be omitted.  We call this as "short-form" here after.

   Now you may notice that there are a lot of ways for encoding the
   same multilingual text in ISO2022.  Actually, there exist many
   coding systems such as Compound Text (used in X's inter client
   communication, ISO-2022-JP (used in Japanese internet), ISO-2022-KR
   (used in Korean internet), EUC (Extended UNIX Code, used in Asian
   localized platforms), and all of these are variants of ISO2022.

   In addition to the above, Emacs handles two more kinds of escape
   sequences: ISO6429's direction specification and Emacs' private
   sequence for specifying character composition.

   ISO6429's direction specification takes the following format:
	o CSI ']'      -- end of the current direction
	o CSI '0' ']'  -- end of the current direction
	o CSI '1' ']'  -- start of left-to-right text
	o CSI '2' ']'  -- start of right-to-left text
   The control character CSI (0x9B: control sequence introducer) is
   abbreviated to the escape sequence ESC '[' in 7-bit environment.

   Character composition specification takes the following format:
	o ESC '0' -- start character composition
	o ESC '1' -- end character composition
   Since these are not standard escape sequences of any ISO, the use
   of them for these meanings is restricted to Emacs only.  */

static void
reset_iso2022 (Lisp_Object coding_system, struct iso2022_decoder *iso)
{
  int i;

  for (i = 0; i < 4; i++)
    {
      if (!NILP (coding_system))
	iso->charset[i] =
	  XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, i);
      else
	iso->charset[i] = Qt;
      iso->invalid_designated[i] = 0;
    }
  iso->esc = ISO_ESC_NOTHING;
  iso->esc_bytes_index = 0;
  iso->register_left = 0;
  iso->register_right = 1;
  iso->switched_dir_and_no_valid_charset_yet = 0;
  iso->invalid_switch_dir = 0;
  iso->output_direction_sequence = 0;
  iso->output_literally = 0;
  if (iso->composite_chars)
    Dynarr_reset (iso->composite_chars);
}

static int
fit_to_be_escape_quoted (unsigned char c)
{
  switch (c)
    {
    case ISO_CODE_ESC:
    case ISO_CODE_CSI:
    case ISO_CODE_SS2:
    case ISO_CODE_SS3:
    case ISO_CODE_SO:
    case ISO_CODE_SI:
      return 1;

    default:
      return 0;
    }
}

/* Parse one byte of an ISO2022 escape sequence.
   If the result is an invalid escape sequence, return 0 and
   do not change anything in STR.  Otherwise, if the result is
   an incomplete escape sequence, update ISO2022.ESC and
   ISO2022.ESC_BYTES and return -1.  Otherwise, update
   all the state variables (but not ISO2022.ESC_BYTES) and
   return 1.

   If CHECK_INVALID_CHARSETS is non-zero, check for designation
   or invocation of an invalid character set and treat that as
   an unrecognized escape sequence. */

static int
parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
		   unsigned char c, unsigned int *flags,
		   int check_invalid_charsets)
{
  /* (1) If we're at the end of a designation sequence, CS is the
     charset being designated and REG is the register to designate
     it to.

     (2) If we're at the end of a locking-shift sequence, REG is
     the register to invoke and HALF (0 == left, 1 == right) is
     the half to invoke it into.

     (3) If we're at the end of a single-shift sequence, REG is
     the register to invoke. */
  Lisp_Object cs = Qnil;
  int reg, half;

  /* NOTE: This code does goto's all over the fucking place.
     The reason for this is that we're basically implementing
     a state machine here, and hierarchical languages like C
     don't really provide a clean way of doing this. */

  if (! (*flags & CODING_STATE_ESCAPE))
    /* At beginning of escape sequence; we need to reset our
       escape-state variables. */
    iso->esc = ISO_ESC_NOTHING;

  iso->output_literally = 0;
  iso->output_direction_sequence = 0;

  switch (iso->esc)
    {
    case ISO_ESC_NOTHING:
      iso->esc_bytes_index = 0;
      switch (c)
	{
	case ISO_CODE_ESC:	/* Start escape sequence */
	  *flags |= CODING_STATE_ESCAPE;
	  iso->esc = ISO_ESC;
	  goto not_done;

	case ISO_CODE_CSI:      /* ISO6429 (specifying directionality) */
	  *flags |= CODING_STATE_ESCAPE;
	  iso->esc = ISO_ESC_5_11;
	  goto not_done;

	case ISO_CODE_SO:	/* locking shift 1 */
	  reg = 1; half = 0;
	  goto locking_shift;
	case ISO_CODE_SI:	/* locking shift 0 */
	  reg = 0; half = 0;
	  goto locking_shift;

	case ISO_CODE_SS2:	/* single shift */
	  reg = 2;
	  goto single_shift;
	case ISO_CODE_SS3:	/* single shift */
	  reg = 3;
	  goto single_shift;

	default:			/* Other control characters */
	  return 0;
	}

    case ISO_ESC:
      switch (c)
	{
	  /**** single shift ****/

	case 'N':	/* single shift 2 */
	  reg = 2;
	  goto single_shift;
	case 'O':	/* single shift 3 */
	  reg = 3;
	  goto single_shift;

	  /**** locking shift ****/

	case '~':	/* locking shift 1 right */
	  reg = 1; half = 1;
	  goto locking_shift;
	case 'n':	/* locking shift 2 */
	  reg = 2; half = 0;
	  goto locking_shift;
	case '}':	/* locking shift 2 right */
	  reg = 2; half = 1;
	  goto locking_shift;
	case 'o':	/* locking shift 3 */
	  reg = 3; half = 0;
	  goto locking_shift;
	case '|':	/* locking shift 3 right */
	  reg = 3; half = 1;
	  goto locking_shift;

	  /**** composite ****/

	case '0':
	  iso->esc = ISO_ESC_START_COMPOSITE;
	  *flags = (*flags & CODING_STATE_ISO2022_LOCK) |
	    CODING_STATE_COMPOSITE;
	  return 1;

	case '1':
	  iso->esc = ISO_ESC_END_COMPOSITE;
	  *flags = (*flags & CODING_STATE_ISO2022_LOCK) &
	    ~CODING_STATE_COMPOSITE;
	  return 1;

	  /**** directionality ****/

	case '[':
	  iso->esc = ISO_ESC_5_11;
	  goto not_done;

	  /**** designation ****/

	case '$':	/* multibyte charset prefix */
	  iso->esc = ISO_ESC_2_4;
	  goto not_done;

	default:
	  if (0x28 <= c && c <= 0x2F)
	    {
	      iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_8);
	      goto not_done;
	    }

	  /* This function is called with CODESYS equal to nil when
	     doing coding-system detection. */
	  if (!NILP (codesys)
	      && XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
	      && fit_to_be_escape_quoted (c))
	    {
	      iso->esc = ISO_ESC_LITERAL;
	      *flags &= CODING_STATE_ISO2022_LOCK;
	      return 1;
	    }

	  /* bzzzt! */
	  return 0;
	}



      /**** directionality ****/

    case ISO_ESC_5_11:		/* ISO6429 direction control */
      if (c == ']')
	{
	  *flags &= (CODING_STATE_ISO2022_LOCK & ~CODING_STATE_R2L);
	  goto directionality;
	}
      if      (c == '0') iso->esc = ISO_ESC_5_11_0;
      else if (c == '1') iso->esc = ISO_ESC_5_11_1;
      else if (c == '2') iso->esc = ISO_ESC_5_11_2;
      else               return 0;
      goto not_done;

    case ISO_ESC_5_11_0:
      if (c == ']')
	{
	  *flags &= (CODING_STATE_ISO2022_LOCK & ~CODING_STATE_R2L);
	  goto directionality;
	}
      return 0;

    case ISO_ESC_5_11_1:
      if (c == ']')
	{
	  *flags = (CODING_STATE_ISO2022_LOCK & ~CODING_STATE_R2L);
	  goto directionality;
	}
      return 0;

    case ISO_ESC_5_11_2:
      if (c == ']')
	{
	  *flags = (*flags & CODING_STATE_ISO2022_LOCK) | CODING_STATE_R2L;
	  goto directionality;
	}
      return 0;

    directionality:
      iso->esc = ISO_ESC_DIRECTIONALITY;
      /* Various junk here to attempt to preserve the direction sequences
	 literally in the text if they would otherwise be swallowed due
	 to invalid designations that don't show up as actual charset
	 changes in the text. */
      if (iso->invalid_switch_dir)
	{
	  /* We already inserted a direction switch literally into the
	     text.  We assume (#### this may not be right) that the
	     next direction switch is the one going the other way,
	     and we need to output that literally as well. */
	  iso->output_literally = 1;
	  iso->invalid_switch_dir = 0;
	}
      else
	{
	  int jj;

	  /* If we are in the thrall of an invalid designation,
	   then stick the directionality sequence literally into the
	   output stream so it ends up in the original text again. */
	  for (jj = 0; jj < 4; jj++)
	    if (iso->invalid_designated[jj])
	      break;
	  if (jj < 4)
	    {
	      iso->output_literally = 1;
	      iso->invalid_switch_dir = 1;
	    }
	  else
	    /* Indicate that we haven't yet seen a valid designation,
	       so that if a switch-dir is directly followed by an
	       invalid designation, both get inserted literally. */
	    iso->switched_dir_and_no_valid_charset_yet = 1;
	}
      return 1;


      /**** designation ****/

    case ISO_ESC_2_4:
      if (0x28 <= c && c <= 0x2F)
	{
	  iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_4_8);
	  goto not_done;
	}
      if (0x40 <= c && c <= 0x42)
	{
	  cs = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94, c,
				      *flags & CODING_STATE_R2L ?
				      CHARSET_RIGHT_TO_LEFT :
				      CHARSET_LEFT_TO_RIGHT);
	  reg = 0;
	  goto designated;
	}
      return 0;

    default:
      {
	int type =-1;

	if (c < '0' || c > '~')
	  return 0; /* bad final byte */

	if (iso->esc >= ISO_ESC_2_8 &&
	    iso->esc <= ISO_ESC_2_15)
	  {
	    type = ((iso->esc >= ISO_ESC_2_12) ?
		    CHARSET_TYPE_96 : CHARSET_TYPE_94);
	    reg = (iso->esc - ISO_ESC_2_8) & 3;
	  }
	else if (iso->esc >= ISO_ESC_2_4_8 &&
		 iso->esc <= ISO_ESC_2_4_15)
	  {
	    type = ((iso->esc >= ISO_ESC_2_4_12) ?
		    CHARSET_TYPE_96X96 : CHARSET_TYPE_94X94);
	    reg = (iso->esc - ISO_ESC_2_4_8) & 3;
	  }
	else
	  {
	    /* Can this ever be reached? -slb */
	    abort();
	  }

	cs = CHARSET_BY_ATTRIBUTES (type, c,
				    *flags & CODING_STATE_R2L ?
				    CHARSET_RIGHT_TO_LEFT :
				    CHARSET_LEFT_TO_RIGHT);
	goto designated;
      }
    }

 not_done:
  iso->esc_bytes[iso->esc_bytes_index++] = (unsigned char) c;
  return -1;

 single_shift:
  if (check_invalid_charsets && !CHARSETP (iso->charset[reg]))
    /* can't invoke something that ain't there. */
    return 0;
  iso->esc = ISO_ESC_SINGLE_SHIFT;
  *flags &= CODING_STATE_ISO2022_LOCK;
  if (reg == 2)
    *flags |= CODING_STATE_SS2;
  else
    *flags |= CODING_STATE_SS3;
  return 1;

 locking_shift:
  if (check_invalid_charsets &&
      !CHARSETP (iso->charset[reg]))
    /* can't invoke something that ain't there. */
    return 0;
  if (half)
    iso->register_right = reg;
  else
    iso->register_left = reg;
  *flags &= CODING_STATE_ISO2022_LOCK;
  iso->esc = ISO_ESC_LOCKING_SHIFT;
  return 1;

 designated:
  if (NILP (cs) && check_invalid_charsets)
    {
      iso->invalid_designated[reg] = 1;
      iso->charset[reg] = Vcharset_ascii;
      iso->esc = ISO_ESC_DESIGNATE;
      *flags &= CODING_STATE_ISO2022_LOCK;
      iso->output_literally = 1;
      if (iso->switched_dir_and_no_valid_charset_yet)
	{
	  /* We encountered a switch-direction followed by an
	     invalid designation.  Ensure that the switch-direction
	     gets outputted; otherwise it will probably get eaten
	     when the text is written out again. */
	  iso->switched_dir_and_no_valid_charset_yet = 0;
	  iso->output_direction_sequence = 1;
	  /* And make sure that the switch-dir going the other
	     way gets outputted, as well. */
	  iso->invalid_switch_dir = 1;
	}
      return 1;
    }
  /* This function is called with CODESYS equal to nil when
     doing coding-system detection. */
  if (!NILP (codesys))
    {
      charset_conversion_spec_dynarr *dyn =
	XCODING_SYSTEM (codesys)->iso2022.input_conv;

      if (dyn)
	{
	  int i;

	  for (i = 0; i < Dynarr_length (dyn); i++)
	    {
	      struct charset_conversion_spec *spec = Dynarr_atp (dyn, i);
	      if (EQ (cs, spec->from_charset))
		cs = spec->to_charset;
	    }
	}
    }

  iso->charset[reg] = cs;
  iso->esc = ISO_ESC_DESIGNATE;
  *flags &= CODING_STATE_ISO2022_LOCK;
  if (iso->invalid_designated[reg])
    {
      iso->invalid_designated[reg] = 0;
      iso->output_literally = 1;
    }
  if (iso->switched_dir_and_no_valid_charset_yet)
    iso->switched_dir_and_no_valid_charset_yet = 0;
  return 1;
}

static int
detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src,
		       unsigned int n)
{
  int c;
  int mask;

  /* #### There are serious deficiencies in the recognition mechanism
     here.  This needs to be much smarter if it's going to cut it. */

  if (!st->iso2022.initted)
    {
      reset_iso2022 (Qnil, &st->iso2022.iso);
      st->iso2022.mask = (CODING_CATEGORY_ISO_7_MASK |
			  CODING_CATEGORY_ISO_8_DESIGNATE_MASK |
			  CODING_CATEGORY_ISO_8_1_MASK |
			  CODING_CATEGORY_ISO_8_2_MASK |
			  CODING_CATEGORY_ISO_LOCK_SHIFT_MASK);
      st->iso2022.flags = 0;
      st->iso2022.high_byte_count = 0;
      st->iso2022.saw_single_shift = 0;
      st->iso2022.initted = 1;
    }

  mask = st->iso2022.mask;

  while (n--)
    {
      c = *src++;
      if (c >= 0xA0)
	{
	  mask &= ~CODING_CATEGORY_ISO_7_MASK;
	  st->iso2022.high_byte_count++;
	}
      else
	{
	  if (st->iso2022.high_byte_count && !st->iso2022.saw_single_shift)
	    {
	      if (st->iso2022.high_byte_count & 1)
		/* odd number of high bytes; assume not iso-8-2 */
		mask &= ~CODING_CATEGORY_ISO_8_2_MASK;
	    }
	  st->iso2022.high_byte_count = 0;
	  st->iso2022.saw_single_shift = 0;
	  if (c > 0x80)
	    mask &= ~CODING_CATEGORY_ISO_7_MASK;
	}
      if (!(st->iso2022.flags & CODING_STATE_ESCAPE)
	  && (BYTE_C0_P (c) || BYTE_C1_P (c)))
	{ /* control chars */
	  switch (c)
	    {
	      /* Allow and ignore control characters that you might
		 reasonably see in a text file */
	    case '\r':
	    case '\n':
	    case '\t':
	    case  7: /* bell */
	    case  8: /* backspace */
	    case 11: /* vertical tab */
	    case 12: /* form feed */
	    case 26: /* MS-DOS C-z junk */
	    case 31: /* '^_' -- for info */
	      goto label_continue_loop;

	    default:
	      break;
	    }
	}

      if ((st->iso2022.flags & CODING_STATE_ESCAPE) || BYTE_C0_P (c)
          || BYTE_C1_P (c))
	{
	  if (parse_iso2022_esc (Qnil, &st->iso2022.iso, c,
				 &st->iso2022.flags, 0))
	    {
	      switch (st->iso2022.iso.esc)
		{
		case ISO_ESC_DESIGNATE:
		  mask &= ~CODING_CATEGORY_ISO_8_1_MASK;
		  mask &= ~CODING_CATEGORY_ISO_8_2_MASK;
		  break;
		case ISO_ESC_LOCKING_SHIFT:
		  mask = CODING_CATEGORY_ISO_LOCK_SHIFT_MASK;
		  goto ran_out_of_chars;
		case ISO_ESC_SINGLE_SHIFT:
		  mask &= ~CODING_CATEGORY_ISO_8_DESIGNATE_MASK;
		  st->iso2022.saw_single_shift = 1;
		  break;
		default:
		  break;
		}
	    }
	  else
	    {
	      mask = 0;
	      goto ran_out_of_chars;
	    }
	}
    label_continue_loop:;
    }

 ran_out_of_chars:

  return mask;
}

static int
postprocess_iso2022_mask (int mask)
{
  /* #### kind of cheesy */
  /* If seven-bit ISO is allowed, then assume that the encoding is
     entirely seven-bit and turn off the eight-bit ones. */
  if (mask & CODING_CATEGORY_ISO_7_MASK)
    mask &= ~ (CODING_CATEGORY_ISO_8_DESIGNATE_MASK |
	       CODING_CATEGORY_ISO_8_1_MASK |
	       CODING_CATEGORY_ISO_8_2_MASK);
  return mask;
}

/* If FLAGS is a null pointer or specifies right-to-left motion,
   output a switch-dir-to-left-to-right sequence to DST.
   Also update FLAGS if it is not a null pointer.
   If INTERNAL_P is set, we are outputting in internal format and
   need to handle the CSI differently. */

static void
restore_left_to_right_direction (struct Lisp_Coding_System *codesys,
				 unsigned_char_dynarr *dst,
				 unsigned int *flags,
				 int internal_p)
{
  if (!flags || (*flags & CODING_STATE_R2L))
    {
      if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
	{
	  Dynarr_add (dst, ISO_CODE_ESC);
	  Dynarr_add (dst, '[');
	}
      else if (internal_p)
	DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst);
      else
	Dynarr_add (dst, ISO_CODE_CSI);
      Dynarr_add (dst, '0');
      Dynarr_add (dst, ']');
      if (flags)
	*flags &= ~CODING_STATE_R2L;
    }
}

/* If FLAGS is a null pointer or specifies a direction different from
   DIRECTION (which should be either CHARSET_RIGHT_TO_LEFT or
   CHARSET_LEFT_TO_RIGHT), output the appropriate switch-dir escape
   sequence to DST.  Also update FLAGS if it is not a null pointer.
   If INTERNAL_P is set, we are outputting in internal format and
   need to handle the CSI differently. */

static void
ensure_correct_direction (int direction, struct Lisp_Coding_System *codesys,
			  unsigned_char_dynarr *dst, unsigned int *flags,
			  int internal_p)
{
  if ((!flags || (*flags & CODING_STATE_R2L)) &&
      direction == CHARSET_LEFT_TO_RIGHT)
    restore_left_to_right_direction (codesys, dst, flags, internal_p);
  else if (!CODING_SYSTEM_ISO2022_NO_ISO6429 (codesys)
	   && (!flags || !(*flags & CODING_STATE_R2L)) &&
	   direction == CHARSET_RIGHT_TO_LEFT)
    {
      if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
	{
	  Dynarr_add (dst, ISO_CODE_ESC);
	  Dynarr_add (dst, '[');
	}
      else if (internal_p)
	DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst);
      else
	Dynarr_add (dst, ISO_CODE_CSI);
      Dynarr_add (dst, '2');
      Dynarr_add (dst, ']');
      if (flags)
	*flags |= CODING_STATE_R2L;
    }
}

/* Convert ISO2022-format data to internal format. */

static void
decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
		       unsigned_char_dynarr *dst, unsigned int n)
{
  unsigned char c;
  unsigned int flags, ch;
  enum eol_type eol_type;
  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
  Lisp_Object coding_system = Qnil;
  unsigned_char_dynarr *real_dst = dst;

  CODING_STREAM_DECOMPOSE (str, flags, ch);
  eol_type = str->eol_type;
  XSETCODING_SYSTEM (coding_system, str->codesys);

  if (flags & CODING_STATE_COMPOSITE)
    dst = str->iso2022.composite_chars;

  while (n--)
    {
      c = *src++;
      if (flags & CODING_STATE_ESCAPE)
	{	/* Within ESC sequence */
	  int retval = parse_iso2022_esc (coding_system, &str->iso2022,
					  c, &flags, 1);

	  if (retval)
	    {
	      switch (str->iso2022.esc)
		{
		case ISO_ESC_START_COMPOSITE:
		  if (str->iso2022.composite_chars)
		    Dynarr_reset (str->iso2022.composite_chars);
		  else
		    str->iso2022.composite_chars = Dynarr_new (unsigned_char);
		  dst = str->iso2022.composite_chars;
		  break;
		case ISO_ESC_END_COMPOSITE:
		  {
		    Bufbyte comstr[MAX_EMCHAR_LEN];
		    Bytecount len;
		    Emchar emch = lookup_composite_char (Dynarr_atp (dst, 0),
							 Dynarr_length (dst));
		    dst = real_dst;
		    len = set_charptr_emchar (comstr, emch);
		    Dynarr_add_many (dst, comstr, len);
		    break;
		  }

		case ISO_ESC_LITERAL:
		  DECODE_ADD_BINARY_CHAR (c, dst);
		  break;

		default:
		  /* Everything else handled already */
		  break;
		}
	    }

	  /* Attempted error recovery. */
	  if (str->iso2022.output_direction_sequence)
	    ensure_correct_direction (flags & CODING_STATE_R2L ?
				      CHARSET_RIGHT_TO_LEFT :
				      CHARSET_LEFT_TO_RIGHT,
				      str->codesys, dst, 0, 1);
	  /* More error recovery. */
	  if (!retval || str->iso2022.output_literally)
	    {
	      /* Output the (possibly invalid) sequence */
	      int i;
	      for (i = 0; i < str->iso2022.esc_bytes_index; i++)
		DECODE_ADD_BINARY_CHAR (str->iso2022.esc_bytes[i], dst);
	      flags &= CODING_STATE_ISO2022_LOCK;
	      if (!retval)
		n++, src--;/* Repeat the loop with the same character. */
	      else
		{
		  /* No sense in reprocessing the final byte of the
		     escape sequence; it could mess things up anyway.
		     Just add it now. */
		  DECODE_ADD_BINARY_CHAR (c, dst);
		}
	    }
	  ch = 0;
	}
      else if (BYTE_C0_P (c) || BYTE_C1_P (c))
	{ /* Control characters */

	  /***** Error-handling *****/

	  /* If we were in the middle of a character, dump out the
	     partial character. */
	  DECODE_OUTPUT_PARTIAL_CHAR (ch);

	  /* If we just saw a single-shift character, dump it out.
	     This may dump out the wrong sort of single-shift character,
	     but least it will give an indication that something went
	     wrong. */
	  if (flags & CODING_STATE_SS2)
	    {
	      DECODE_ADD_BINARY_CHAR (ISO_CODE_SS2, dst);
	      flags &= ~CODING_STATE_SS2;
	    }
	  if (flags & CODING_STATE_SS3)
	    {
	      DECODE_ADD_BINARY_CHAR (ISO_CODE_SS3, dst);
	      flags &= ~CODING_STATE_SS3;
	    }

	  /***** Now handle the control characters. *****/

	  /* Handle CR/LF */
	  DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);

	  flags &= CODING_STATE_ISO2022_LOCK;

	  if (!parse_iso2022_esc (coding_system, &str->iso2022, c, &flags, 1))
	    DECODE_ADD_BINARY_CHAR (c, dst);
	}
      else
	{			/* Graphic characters */
	  Lisp_Object charset;
	  int lb;
	  int reg;

	  DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);

	  /* Now determine the charset. */
	  reg = ((flags & CODING_STATE_SS2) ? 2
		 : (flags & CODING_STATE_SS3) ? 3
		 : !BYTE_ASCII_P (c) ? str->iso2022.register_right
		 : str->iso2022.register_left);
	  charset = str->iso2022.charset[reg];

	  /* Error checking: */
	  if (NILP (charset) || str->iso2022.invalid_designated[reg]
	      || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL)
		  && XCHARSET_CHARS (charset) == 94))
	    /* Mrmph.  We are trying to invoke a register that has no
	       or an invalid charset in it, or trying to add a character
	       outside the range of the charset.  Insert that char literally
	       to preserve it for the output. */
	    {
	      DECODE_OUTPUT_PARTIAL_CHAR (ch);
	      DECODE_ADD_BINARY_CHAR (c, dst);
	    }

	  else
	    {
	      /* Things are probably hunky-dorey. */

	      /* Fetch reverse charset, maybe. */
	      if (((flags & CODING_STATE_R2L) &&
		   XCHARSET_DIRECTION (charset) == CHARSET_LEFT_TO_RIGHT)
		  ||
		  (!(flags & CODING_STATE_R2L) &&
		   XCHARSET_DIRECTION (charset) == CHARSET_RIGHT_TO_LEFT))
		{
		  Lisp_Object new_charset =
		    XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
		  if (!NILP (new_charset))
		    charset = new_charset;
		}

	      lb = XCHARSET_LEADING_BYTE (charset);
	      switch (XCHARSET_REP_BYTES (charset))
		{
		case 1:	/* ASCII */
		  DECODE_OUTPUT_PARTIAL_CHAR (ch);
		  Dynarr_add (dst, c & 0x7F);
		  break;

		case 2:	/* one-byte official */
		  DECODE_OUTPUT_PARTIAL_CHAR (ch);
		  Dynarr_add (dst, lb);
		  Dynarr_add (dst, c | 0x80);
		  break;

		case 3:	/* one-byte private or two-byte official */
		  if (XCHARSET_PRIVATE_P (charset))
		    {
		      DECODE_OUTPUT_PARTIAL_CHAR (ch);
		      Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1);
		      Dynarr_add (dst, lb);
		      Dynarr_add (dst, c | 0x80);
		    }
		  else
		    {
		      if (ch)
			{
			  Dynarr_add (dst, lb);
			  Dynarr_add (dst, ch | 0x80);
			  Dynarr_add (dst, c | 0x80);
			  ch = 0;
			}
		      else
			ch = c;
		    }
		  break;

		default:	/* two-byte private */
		  if (ch)
		    {
		      Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2);
		      Dynarr_add (dst, lb);
		      Dynarr_add (dst, ch | 0x80);
		      Dynarr_add (dst, c | 0x80);
		      ch = 0;
		    }
		  else
		    ch = c;
		}
	    }

	  if (!ch)
	    flags &= CODING_STATE_ISO2022_LOCK;
	}

    label_continue_loop:;
    }

  if (flags & CODING_STATE_END)
    DECODE_OUTPUT_PARTIAL_CHAR (ch);

  CODING_STREAM_COMPOSE (str, flags, ch);
}


/***** ISO2022 encoder *****/

/* Designate CHARSET into register REG. */

static void
iso2022_designate (Lisp_Object charset, unsigned char reg,
		   struct encoding_stream *str, unsigned_char_dynarr *dst)
{
  CONST char *inter94 = "()*+", *inter96= ",-./";
  int type;
  unsigned char final;
  Lisp_Object old_charset = str->iso2022.charset[reg];

  str->iso2022.charset[reg] = charset;
  if (!CHARSETP (charset))
    /* charset might be an initial nil or t. */
    return;
  type = XCHARSET_TYPE (charset);
  final = XCHARSET_FINAL (charset);
  if (!str->iso2022.force_charset_on_output[reg] &&
      CHARSETP (old_charset) &&
      XCHARSET_TYPE (old_charset) == type &&
      XCHARSET_FINAL (old_charset) == final)
    return;

  str->iso2022.force_charset_on_output[reg] = 0;

  {
    charset_conversion_spec_dynarr *dyn =
      str->codesys->iso2022.output_conv;

    if (dyn)
      {
	int i;

	for (i = 0; i < Dynarr_length (dyn); i++)
	  {
	    struct charset_conversion_spec *spec = Dynarr_atp (dyn, i);
	    if (EQ (charset, spec->from_charset))
		charset = spec->to_charset;
	  }
      }
  }

  Dynarr_add (dst, ISO_CODE_ESC);
  switch (type)
    {
    case CHARSET_TYPE_94:
      Dynarr_add (dst, inter94[reg]);
      break;
    case CHARSET_TYPE_96:
      Dynarr_add (dst, inter96[reg]);
      break;
    case CHARSET_TYPE_94X94:
      Dynarr_add (dst, '$');
      if (reg != 0
	  || !(CODING_SYSTEM_ISO2022_SHORT (str->codesys))
	  || final < '@'
	  || final > 'B')
	Dynarr_add (dst, inter94[reg]);
      break;
    case CHARSET_TYPE_96X96:
      Dynarr_add (dst, '$');
      Dynarr_add (dst, inter96[reg]);
      break;
    }
  Dynarr_add (dst, final);
}

static void
ensure_normal_shift (struct encoding_stream *str, unsigned_char_dynarr *dst)
{
  if (str->iso2022.register_left != 0)
    {
      Dynarr_add (dst, ISO_CODE_SI);
      str->iso2022.register_left = 0;
    }
}

static void
ensure_shift_out (struct encoding_stream *str, unsigned_char_dynarr *dst)
{
  if (str->iso2022.register_left != 1)
    {
      Dynarr_add (dst, ISO_CODE_SO);
      str->iso2022.register_left = 1;
    }
}

/* Convert internally-formatted data to ISO2022 format. */

static void
encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
		       unsigned_char_dynarr *dst, unsigned int n)
{
  unsigned char charmask, c;
  unsigned int flags, ch;
  enum eol_type eol_type;
  unsigned char char_boundary;
  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
  struct Lisp_Coding_System *codesys = str->codesys;
  int i;
  Lisp_Object charset;
  int half;

  /* flags for handling composite chars.  We do a little switcharoo
     on the source while we're outputting the composite char. */
  unsigned int saved_n = 0;
  CONST unsigned char *saved_src = NULL;
  int in_composite = 0;

  CODING_STREAM_DECOMPOSE (str, flags, ch);
  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
  char_boundary = str->iso2022.current_char_boundary;
  charset = str->iso2022.current_charset;
  half = str->iso2022.current_half;

 back_to_square_n:
  while (n--)
    {
      c = *src++;

      if (BYTE_ASCII_P (c))
	{		/* Processing ASCII character */
	  ch = 0;

	  restore_left_to_right_direction (codesys, dst, &flags, 0);

	  /* Make sure G0 contains ASCII */
	  if ((c > ' ' && c < ISO_CODE_DEL) ||
	      !CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (codesys))
	    {
	      ensure_normal_shift (str, dst);
	      iso2022_designate (Vcharset_ascii, 0, str, dst);
	    }

	  /* If necessary, restore everything to the default state
	     at end-of-line */
	  if (c == '\n' &&
	      !(CODING_SYSTEM_ISO2022_NO_ASCII_EOL (codesys)))
	    {
	      restore_left_to_right_direction (codesys, dst, &flags, 0);

	      ensure_normal_shift (str, dst);

	      for (i = 0; i < 4; i++)
		{
		  Lisp_Object initial_charset =
		    CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i);
		  iso2022_designate (initial_charset, i, str, dst);
		}
	    }
	  if (c == '\n')
	    {
	      if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
		Dynarr_add (dst, '\r');
	      if (eol_type != EOL_CR)
		Dynarr_add (dst, c);
	    }
	  else
	    {
	      if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
		  && fit_to_be_escape_quoted (c))
		Dynarr_add (dst, ISO_CODE_ESC);
	      Dynarr_add (dst, c);
	    }
	  char_boundary = 1;
	}

      else if (BUFBYTE_LEADING_BYTE_P (c) || BUFBYTE_LEADING_BYTE_P (ch))
	{ /* Processing Leading Byte */
	  ch = 0;
	  charset = CHARSET_BY_LEADING_BYTE (c);
	  if (LEADING_BYTE_PREFIX_P(c))
	    ch = c;
	  else if (!EQ (charset, Vcharset_control_1)
		   && !EQ (charset, Vcharset_composite))
	    {
	      int reg;

	      ensure_correct_direction (XCHARSET_DIRECTION (charset),
					codesys, dst, &flags, 0);

	      /* Now determine which register to use. */
	      reg = -1;
	      for (i = 0; i < 4; i++)
		{
		  if (EQ (charset, str->iso2022.charset[i]) ||
		      EQ (charset,
			  CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)))
		    {
		      reg = i;
		      break;
		    }
		}

	      if (reg == -1)
		{
		  if (XCHARSET_GRAPHIC (charset) != 0)
		    {
		      if (!NILP (str->iso2022.charset[1]) &&
			  (!CODING_SYSTEM_ISO2022_SEVEN (codesys) ||
			   CODING_SYSTEM_ISO2022_LOCK_SHIFT (codesys)))
			reg = 1;
		      else if (!NILP (str->iso2022.charset[2]))
			reg = 2;
		      else if (!NILP (str->iso2022.charset[3]))
			reg = 3;
		      else
			reg = 0;
		    }
		  else
		    reg = 0;
		}

	      iso2022_designate (charset, reg, str, dst);

	      /* Now invoke that register. */
	      switch (reg)
		{
		case 0:
		  ensure_normal_shift (str, dst);
		  half = 0;
		  break;

		case 1:
		  if (CODING_SYSTEM_ISO2022_SEVEN (codesys))
		    {
		      ensure_shift_out (str, dst);
		      half = 0;
		    }
		  else
		    half = 1;
		  break;

		case 2:
		  if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
		    {
		      Dynarr_add (dst, ISO_CODE_ESC);
		      Dynarr_add (dst, 'N');
		      half = 0;
		    }
		  else
		    {
		      Dynarr_add (dst, ISO_CODE_SS2);
		      half = 1;
		    }
		  break;

		case 3:
		  if (CODING_SYSTEM_ISO2022_SEVEN (str->codesys))
		    {
		      Dynarr_add (dst, ISO_CODE_ESC);
		      Dynarr_add (dst, 'O');
		      half = 0;
		    }
		  else
		    {
		      Dynarr_add (dst, ISO_CODE_SS3);
		      half = 1;
		    }
		  break;

		default:
		  abort ();
		}
	    }
	  char_boundary = 0;
	}
      else
	{			/* Processing Non-ASCII character */
	  charmask = (half == 0 ? 0x7F : 0xFF);
	  char_boundary = 1;
	  if (EQ (charset, Vcharset_control_1))
	    {
	      if (CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys)
		  && fit_to_be_escape_quoted (c))
		Dynarr_add (dst, ISO_CODE_ESC);
	      /* you asked for it ... */
	      Dynarr_add (dst, c - 0x20);
	    }
	  else
	    {
	      switch (XCHARSET_REP_BYTES (charset))
		{
		case 2:
		  Dynarr_add (dst, c & charmask);
		  break;
		case 3:
		  if (XCHARSET_PRIVATE_P (charset))
		    {
		      Dynarr_add (dst, c & charmask);
		      ch = 0;
		    }
		  else if (ch)
		    {
		      if (EQ (charset, Vcharset_composite))
			{
			  if (in_composite)
			    {
			      /* #### Bother! We don't know how to
				 handle this yet. */
			      Dynarr_add (dst, '~');
			    }
			  else
			    {
			      Emchar emch = MAKE_CHAR (Vcharset_composite,
						       ch & 0x7F, c & 0x7F);
			      Lisp_Object lstr = composite_char_string (emch);
			      saved_n = n;
			      saved_src = src;
			      in_composite = 1;
			      src = XSTRING_DATA   (lstr);
			      n   = XSTRING_LENGTH (lstr);
			      Dynarr_add (dst, ISO_CODE_ESC);
			      Dynarr_add (dst, '0'); /* start composing */
			    }
			}
		      else
			{
			  Dynarr_add (dst, ch & charmask);
			  Dynarr_add (dst, c & charmask);
			}
		      ch = 0;
		    }
		  else
		    {
		      ch = c;
		      char_boundary = 0;
		    }
		  break;
		case 4:
		  if (ch)
		    {
		      Dynarr_add (dst, ch & charmask);
		      Dynarr_add (dst, c & charmask);
		      ch = 0;
		    }
		  else
		    {
		      ch = c;
		      char_boundary = 0;
		    }
		  break;
		default:
		  abort ();
		}
	    }
	}
    }

  if (in_composite)
    {
      n = saved_n;
      src = saved_src;
      in_composite = 0;
      Dynarr_add (dst, ISO_CODE_ESC);
      Dynarr_add (dst, '1'); /* end composing */
      goto back_to_square_n; /* Wheeeeeeeee ..... */
    }

  if (char_boundary && flags & CODING_STATE_END)
    {
      restore_left_to_right_direction (codesys, dst, &flags, 0);
      ensure_normal_shift (str, dst);
      for (i = 0; i < 4; i++)
	{
	  Lisp_Object initial_charset =
	    CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i);
	  iso2022_designate (initial_charset, i, str, dst);
	}
    }

  CODING_STREAM_COMPOSE (str, flags, ch);
  str->iso2022.current_char_boundary = char_boundary;
  str->iso2022.current_charset = charset;
  str->iso2022.current_half = half;

  /* Verbum caro factum est! */
}


/************************************************************************/
/*                     No-conversion methods                            */
/************************************************************************/

/* This is used when reading in "binary" files -- i.e. files that may
   contain all 256 possible byte values and that are not to be
   interpreted as being in any particular decoding. */
static void
decode_coding_no_conversion (Lstream *decoding, CONST unsigned char *src,
			     unsigned_char_dynarr *dst, unsigned int n)
{
  unsigned char c;
  unsigned int flags, ch;
  enum eol_type eol_type;
  struct decoding_stream *str = DECODING_STREAM_DATA (decoding);

  CODING_STREAM_DECOMPOSE (str, flags, ch);
  eol_type = str->eol_type;

  while (n--)
    {
      c = *src++;

      DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
      DECODE_ADD_BINARY_CHAR (c, dst);
    label_continue_loop:;
    }

  DECODE_HANDLE_END_OF_CONVERSION (flags, ch, dst);

  CODING_STREAM_COMPOSE (str, flags, ch);
}

static void
encode_coding_no_conversion (Lstream *encoding, CONST unsigned char *src,
			     unsigned_char_dynarr *dst, unsigned int n)
{
  unsigned char c;
  struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
  unsigned int flags, ch;
  enum eol_type eol_type;

  CODING_STREAM_DECOMPOSE (str, flags, ch);
  eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);

  while (n--)
    {
      c = *src++;
      if (c == '\n')
	{
	  if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
	    Dynarr_add (dst, '\r');
	  if (eol_type != EOL_CR)
	    Dynarr_add (dst, '\n');
	  ch = 0;
	}
      else if (BYTE_ASCII_P (c))
	{
	  assert (ch == 0);
	  Dynarr_add (dst, c);
	}
      else if (BUFBYTE_LEADING_BYTE_P (c))
	{
	  assert (ch == 0);
	  if (c == LEADING_BYTE_LATIN_ISO8859_1 ||
	      c == LEADING_BYTE_CONTROL_1)
	    ch = c;
	  else
	    Dynarr_add (dst, '~'); /* untranslatable character */
	}
      else
	{
	  if (ch == LEADING_BYTE_LATIN_ISO8859_1)
	    Dynarr_add (dst, c);
	  else if (ch == LEADING_BYTE_CONTROL_1)
	    {
	      assert (c < 0xC0);
	      Dynarr_add (dst, c - 0x20);
	    }
	  /* else it should be the second or third byte of an
	     untranslatable character, so ignore it */
	  ch = 0;
	}
    }

  CODING_STREAM_COMPOSE (str, flags, ch);
}


/************************************************************************/
/*                   Simple internal/external functions                 */
/************************************************************************/

static Extbyte_dynarr *conversion_out_dynarr;
static Bufbyte_dynarr *conversion_in_dynarr;

/* Determine coding system from coding format */

#define FILE_NAME_CODING_SYSTEM 			\
 ((NILP (Vfile_name_coding_system) ||			\
   (EQ ((Vfile_name_coding_system), Qbinary))) ?	\
  Qnil : Fget_coding_system (Vfile_name_coding_system))

/* #### not correct for all values of `fmt'! */
#define FMT_CODING_SYSTEM(fmt)					\
 (((fmt) == FORMAT_FILENAME) ? FILE_NAME_CODING_SYSTEM     :	\
  ((fmt) == FORMAT_CTEXT   ) ? Fget_coding_system (Qctext) :	\
  ((fmt) == FORMAT_TERMINAL) ? FILE_NAME_CODING_SYSTEM     :	\
  Qnil)

extern CONST Extbyte *
convert_to_external_format (CONST Bufbyte *ptr,
			    Bytecount len,
			    Extcount *len_out,
			    enum external_data_format fmt)
{
  Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt);

  if (!conversion_out_dynarr)
    conversion_out_dynarr = Dynarr_new (Extbyte);
  else
    Dynarr_reset (conversion_out_dynarr);

  if (NILP (coding_system))
    {
      CONST Bufbyte *end = ptr + len;

      for (; ptr < end;)
        {
          Bufbyte c =
            (BYTE_ASCII_P (*ptr))		   ? *ptr :
            (*ptr == LEADING_BYTE_CONTROL_1)	   ? (*(ptr+1) - 0x20) :
            (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) :
            '~';

          Dynarr_add (conversion_out_dynarr, (Extbyte) c);
          INC_CHARPTR (ptr);
        }

#ifdef ERROR_CHECK_BUFPOS
      assert (ptr == end);
#endif
    }
  else
    {
      Lisp_Object instream, outstream, da_outstream;
      Lstream *istr, *ostr;
      struct gcpro gcpro1, gcpro2, gcpro3;
      char tempbuf[1024]; /* some random amount */

      instream = make_fixed_buffer_input_stream ((unsigned char *) ptr, len);
      da_outstream = make_dynarr_output_stream
        ((unsigned_char_dynarr *) conversion_out_dynarr);
      outstream =
        make_encoding_output_stream (XLSTREAM (da_outstream), coding_system);
      istr = XLSTREAM (instream);
      ostr = XLSTREAM (outstream);
      GCPRO3 (instream, outstream, da_outstream);
      while (1)
        {
          int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
          if (!size_in_bytes)
            break;
          Lstream_write (ostr, tempbuf, size_in_bytes);
        }
      Lstream_close (istr);
      Lstream_close (ostr);
      UNGCPRO;
      Lstream_delete (istr);
      Lstream_delete (ostr);
      Lstream_delete (XLSTREAM (da_outstream));
    }

  *len_out = Dynarr_length (conversion_out_dynarr);
  Dynarr_add (conversion_out_dynarr, 0); /* remember to zero-terminate! */
  return Dynarr_atp (conversion_out_dynarr, 0);
}

extern CONST Bufbyte *
convert_from_external_format (CONST Extbyte *ptr,
			      Extcount len,
			      Bytecount *len_out,
			      enum external_data_format fmt)
{
  Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt);

  if (!conversion_in_dynarr)
    conversion_in_dynarr = Dynarr_new (Bufbyte);
  else
    Dynarr_reset (conversion_in_dynarr);

  if (NILP (coding_system))
    {
      CONST Extbyte *end = ptr + len;
      for (; ptr < end; ptr++)
        {
          Extbyte c = *ptr;
          DECODE_ADD_BINARY_CHAR (c, conversion_in_dynarr);
        }
    }
  else
    {
      Lisp_Object instream, outstream, da_outstream;
      Lstream *istr, *ostr;
      struct gcpro gcpro1, gcpro2, gcpro3;
      char tempbuf[1024]; /* some random amount */

      instream = make_fixed_buffer_input_stream ((unsigned char *) ptr, len);
      da_outstream = make_dynarr_output_stream
        ((unsigned_char_dynarr *) conversion_in_dynarr);
      outstream =
        make_decoding_output_stream (XLSTREAM (da_outstream), coding_system);
      istr = XLSTREAM (instream);
      ostr = XLSTREAM (outstream);
      GCPRO3 (instream, outstream, da_outstream);
      while (1)
        {
          int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
          if (!size_in_bytes)
            break;
          Lstream_write (ostr, tempbuf, size_in_bytes);
        }
      Lstream_close (istr);
      Lstream_close (ostr);
      UNGCPRO;
      Lstream_delete (istr);
      Lstream_delete (ostr);
      Lstream_delete (XLSTREAM (da_outstream));
    }

  *len_out = Dynarr_length (conversion_in_dynarr);
  Dynarr_add (conversion_in_dynarr, 0); /* remember to zero-terminate! */
  return Dynarr_atp (conversion_in_dynarr, 0);
}


/************************************************************************/
/*                             Initialization                           */
/************************************************************************/

void
syms_of_mule_coding (void)
{
  defsymbol (&Qbuffer_file_coding_system, "buffer-file-coding-system");
  deferror (&Qcoding_system_error, "coding-system-error",
	    "Coding-system error", Qio_error);

  DEFSUBR (Fcoding_system_p);
  DEFSUBR (Ffind_coding_system);
  DEFSUBR (Fget_coding_system);
  DEFSUBR (Fcoding_system_list);
  DEFSUBR (Fcoding_system_name);
  DEFSUBR (Fmake_coding_system);
  DEFSUBR (Fcopy_coding_system);
  DEFSUBR (Fsubsidiary_coding_system);

  DEFSUBR (Fcoding_system_type);
  DEFSUBR (Fcoding_system_doc_string);
  DEFSUBR (Fcoding_system_charset);
  DEFSUBR (Fcoding_system_property);

  DEFSUBR (Fcoding_category_list);
  DEFSUBR (Fset_coding_priority_list);
  DEFSUBR (Fcoding_priority_list);
  DEFSUBR (Fset_coding_category_system);
  DEFSUBR (Fcoding_category_system);

  DEFSUBR (Fdetect_coding_region);
  DEFSUBR (Fdecode_coding_region);
  DEFSUBR (Fencode_coding_region);
  DEFSUBR (Fdecode_shift_jis_char);
  DEFSUBR (Fencode_shift_jis_char);
  DEFSUBR (Fdecode_big5_char);
  DEFSUBR (Fencode_big5_char);

  defsymbol (&Qcoding_system_p, "coding-system-p");

  defsymbol (&Qbig5, "big5");
  defsymbol (&Qshift_jis, "shift-jis");
  defsymbol (&Qno_conversion, "no-conversion");
  defsymbol (&Qccl, "ccl");
  defsymbol (&Qiso2022, "iso2022");

  defsymbol (&Qmnemonic, "mnemonic");
  defsymbol (&Qeol_type, "eol-type");
  defsymbol (&Qpost_read_conversion, "post-read-conversion");
  defsymbol (&Qpre_write_conversion, "pre-write-conversion");

  defsymbol (&Qcr, "cr");
  defsymbol (&Qlf, "lf");
  defsymbol (&Qcrlf, "crlf");
  defsymbol (&Qeol_cr, "eol-cr");
  defsymbol (&Qeol_lf, "eol-lf");
  defsymbol (&Qeol_crlf, "eol-crlf");

  defsymbol (&Qcharset_g0, "charset-g0");
  defsymbol (&Qcharset_g1, "charset-g1");
  defsymbol (&Qcharset_g2, "charset-g2");
  defsymbol (&Qcharset_g3, "charset-g3");
  defsymbol (&Qforce_g0_on_output, "force-g0-on-output");
  defsymbol (&Qforce_g1_on_output, "force-g1-on-output");
  defsymbol (&Qforce_g2_on_output, "force-g2-on-output");
  defsymbol (&Qforce_g3_on_output, "force-g3-on-output");
  defsymbol (&Qshort, "short");
  defsymbol (&Qno_ascii_eol, "no-ascii-eol");
  defsymbol (&Qno_ascii_cntl, "no-ascii-cntl");
  defsymbol (&Qseven, "seven");
  defsymbol (&Qlock_shift, "lock-shift");
  defsymbol (&Qno_iso6429, "no-iso6429");
  defsymbol (&Qescape_quoted, "escape-quoted");
  defsymbol (&Qinput_charset_conversion, "input-charset-conversion");
  defsymbol (&Qoutput_charset_conversion, "output-charset-conversion");

  defsymbol (&Qencode, "encode");
  defsymbol (&Qdecode, "decode");

  defsymbol (&Qctext, "ctext");

  defsymbol (&coding_category_symbol[CODING_CATEGORY_SHIFT_JIS],
	     "shift-jis");
  defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_7],
	     "iso-7");
  defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_DESIGNATE],
	     "iso-8-designate");
  defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_1],
	     "iso-8-1");
  defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_8_2],
	     "iso-8-2");
  defsymbol (&coding_category_symbol[CODING_CATEGORY_ISO_LOCK_SHIFT],
	     "iso-lock-shift");
  defsymbol (&coding_category_symbol[CODING_CATEGORY_BIG5],
	     "big5");
  defsymbol (&coding_category_symbol[CODING_CATEGORY_NO_CONVERSION],
	     "no-conversion");
}

void
lstream_type_create_mule_coding (void)
{
  LSTREAM_HAS_METHOD (decoding, reader);
  LSTREAM_HAS_METHOD (decoding, writer);
  LSTREAM_HAS_METHOD (decoding, rewinder);
  LSTREAM_HAS_METHOD (decoding, seekable_p);
  LSTREAM_HAS_METHOD (decoding, flusher);
  LSTREAM_HAS_METHOD (decoding, closer);
  LSTREAM_HAS_METHOD (decoding, marker);

  LSTREAM_HAS_METHOD (encoding, reader);
  LSTREAM_HAS_METHOD (encoding, writer);
  LSTREAM_HAS_METHOD (encoding, rewinder);
  LSTREAM_HAS_METHOD (encoding, seekable_p);
  LSTREAM_HAS_METHOD (encoding, flusher);
  LSTREAM_HAS_METHOD (encoding, closer);
  LSTREAM_HAS_METHOD (encoding, marker);
}

void
vars_of_mule_coding (void)
{
  int i;

  /* Initialize to something reasonable ... */
  for (i = 0; i <= CODING_CATEGORY_LAST; i++)
    {
      coding_category_system[i] = Qnil;
      coding_category_by_priority[i] = i;
    }

  DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /*
Coding system used for TTY keyboard input.
Not used under a windowing system.
*/ );
  Vkeyboard_coding_system = Qnil;

  DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /*
Coding system used for TTY display output.
Not used under a windowing system.
*/ );
  Vterminal_coding_system = Qnil;

  DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /*
Overriding coding system used when writing a file or process.
You should *bind* this, not set it.  If this is non-nil, it specifies
the coding system that will be used when a file or process is read
in, and overrides `buffer-file-coding-system-for-read',
`insert-file-contents-pre-hook', etc.  Use those variables instead of
this one for permanent changes to the environment.
*/ );
  Vcoding_system_for_read = Qnil;

  DEFVAR_LISP ("coding-system-for-write",
               &Vcoding_system_for_write /*
Overriding coding system used when writing a file or process.
You should *bind* this, not set it.  If this is non-nil, it specifies
the coding system that will be used when a file or process is wrote
in, and overrides `buffer-file-coding-system',
`write-region-pre-hook', etc.  Use those variables instead of this one
for permanent changes to the environment.
*/ );
  Vcoding_system_for_write = Qnil;

  DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /*
Coding system used to convert pathnames when accessing files.
*/ );
  Vfile_name_coding_system = Qnil;

  DEFVAR_BOOL ("enable-multibyte-characters", &enable_multibyte_characters /*
Non-nil means the buffer contents are regarded as multi-byte form
of characters, not a binary code.  This affects the display, file I/O,
and behaviors of various editing commands.

Setting this to nil does not do anything.
*/ );
  enable_multibyte_characters = 1;
}

void
complex_vars_of_mule_coding (void)
{
  staticpro (&Vcoding_system_hashtable);
  Vcoding_system_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK,
						  HASHTABLE_EQ);

  the_codesys_prop_dynarr = Dynarr_new (codesys_prop);

#define DEFINE_CODESYS_PROP(Prop_Type, Sym) do	\
{						\
  struct codesys_prop csp;			\
  csp.sym = (Sym);				\
  csp.prop_type = (Prop_Type);			\
  Dynarr_add (the_codesys_prop_dynarr, csp);	\
} while (0)

  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qmnemonic);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qeol_type);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qeol_cr);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qeol_crlf);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qeol_lf);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qpost_read_conversion);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ALL_OK,  Qpre_write_conversion);

  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qcharset_g0);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qcharset_g1);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qcharset_g2);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qcharset_g3);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qforce_g0_on_output);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qforce_g1_on_output);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qforce_g2_on_output);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qforce_g3_on_output);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qshort);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qno_ascii_eol);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qno_ascii_cntl);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qseven);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qlock_shift);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qno_iso6429);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qescape_quoted);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qinput_charset_conversion);
  DEFINE_CODESYS_PROP (CODESYS_PROP_ISO2022, Qoutput_charset_conversion);

  DEFINE_CODESYS_PROP (CODESYS_PROP_CCL,     Qencode);
  DEFINE_CODESYS_PROP (CODESYS_PROP_CCL,     Qdecode);

  /* Need to create this here or we're really screwed. */
  Fmake_coding_system (Qno_conversion, Qno_conversion, build_string ("No conversion"),
		       list2 (Qmnemonic, build_string ("Noconv")));

  Fcopy_coding_system (Fcoding_system_property (Qno_conversion, Qeol_lf),
		       Qbinary);

  /* Need this for bootstrapping */
  coding_category_system[CODING_CATEGORY_NO_CONVERSION] =
    Fget_coding_system (Qno_conversion);
}

#endif