diff src/text.h @ 2367:ecf1ebac70d8

[xemacs-hg @ 2004-11-04 23:05:23 by ben] commit mega-patch configure.in: Turn off -Winline and -Wchar-subscripts. Use the right set of cflags when compiling modules. Rewrite ldap configuration to separate the inclusion of lber (needed in recent Cygwin) from the basic checks for the needed libraries. add a function for MAKE_JUNK_C; initially code was added to generate xemacs.def using this, but it will need to be rewritten. add an rm -f for junk.c to avoid weird Cygwin bug with cp -f onto an existing file. Sort list of auto-detected functions and eliminate unused checks for stpcpy, setlocale and getwd. Add autodetection of Cygwin scanf problems BETA: Rewrite section on configure to indicate what flags are important and what not. digest-doc.c, make-dump-id.c, profile.c, sorted-doc.c: Add proper decls for main(). make-msgfile.c: Document that this is old junk. Move proposal to text.c. make-msgfile.lex: Move proposal to text.c. make-mswin-unicode.pl: Convert error-generating code so that the entire message will be seen as a single unrecognized token. mule/mule-ccl.el: Update docs. lispref/mule.texi: Update CCL docs. ldap/eldap.c: Mule-ize. Use EXTERNAL_LIST_LOOP_2 instead of deleted EXTERNAL_LIST_LOOP. * XEmacs 21.5.18 "chestnut" is released. --------------------------------------------------------------- MULE-RELATED WORK: --------------------------------------------------------------- --------------------------- byte-char conversion --------------------------- buffer.c, buffer.h, insdel.c, text.c: Port FSF algorithm for byte-char conversion, replacing broken previous version. Track the char position of the gap. Add functions to do char-byte conversion downwards as well as upwards. Move comments about algorithm workings to internals manual. --------------------------- work on types --------------------------- alloc.c, console-x-impl.h, dump-data.c, dump-data.h, dumper.c, dialog-msw.c, dired-msw.c, doc.c, editfns.c, esd.c, event-gtk.h, event-msw.c, events.c, file-coding.c, file-coding.h, fns.c, glyphs-eimage.c, glyphs-gtk.c, glyphs-msw.c, glyphs-shared.c, glyphs-x.c, glyphs.c, glyphs.h, gui.c, hpplay.c, imgproc.c, intl-win32.c, lrecord.h, lstream.c, keymap.c, lisp.h, libsst.c, linuxplay.c, miscplay.c, miscplay.h, mule-coding.c, nas.c, nt.c, ntheap.c, ntplay.c, objects-msw.c, objects-tty.c, objects-x.c, print.c, process-nt.c, process.c, redisplay.h, select-common.h, select-gtk.c, select-x.c, sgiplay.c, sound.c, sound.h, sunplay.c, sysfile.h, sysdep.c, syswindows.h, text.c, unexnt.c, win32.c, xgccache.c: Further work on types. This creates a full set of types for all the basic semantics of `char' that I have so far identified, so that its semantics can always be identified for the purposes of proper Mule-safe code, and the raw use of `char' always avoided. (1) More type renaming, for consistency of naming. Char_ASCII -> Ascbyte UChar_ASCII -> UAscbyte Char_Binary -> CBinbyte UChar_Binary -> Binbyte SChar_Binary -> SBinbyte (2) Introduce Rawbyte, CRawbyte, Boolbyte, Chbyte, UChbyte, and Bitbyte and use them. (3) New types Itext, Wexttext and Textcount for separating out the concepts of bytes and textual units (different under UTF-16 and UTF-32, which are potential internal encodings). (4) qxestr*_c -> qxestr*_ascii. lisp.h: New; goes with other qxe() functions. #### Maybe goes in a different section. lisp.h: Group generic int-type defs together with EMACS_INT defs. lisp.h: * lisp.h (WEXTTEXT_IS_WIDE) New defns. lisp.h: New type to replace places where int occurs as a boolean. It's signed because occasionally people may want to use -1 as an error value, and because unsigned ints are viral -- see comments in the internals manual against using them. dynarr.c: int -> Bytecount. --------------------------- Mule-izing --------------------------- device-x.c: Partially Mule-ize. dumper.c, dumper.h: Mule-ize. Use Rawbyte. Use stderr_out not printf. Use wext_*(). sysdep.c, syswindows.h, text.c: New Wexttext API for manipulation of external text that may be Unicode (e.g. startup code under Windows). emacs.c: Mule-ize. Properly deal with argv in external encoding. Use wext_*() and Wexttext. Use Rawbyte. #if 0 some old junk on SCO that is unlikely to be correct. Rewrite allocation code in run-temacs. emacs.c, symsinit.h, win32.c: Rename win32 init function and call it even earlier, to initialize mswindows_9x_p even earlier, for use in startup code (XEUNICODE_P). process.c: Use _wenviron not environ under Windows, to get Unicode environment variables. event-Xt.c: Mule-ize drag-n-drop related stuff. dragdrop.c, dragdrop.h, frame-x.c: Mule-ize. text.h: Add some more stand-in defines for particular kinds of conversion; use in Mule-ization work in frame-x.c etc. --------------------------- Freshening --------------------------- intl-auto-encap-win32.c, intl-auto-encap-win32.h: Regenerate. --------------------------- Unicode-work --------------------------- intl-win32.c, syswindows.h: Factor out common options to MultiByteToWideChar and WideCharToMultiByte. Add convert_unicode_to_multibyte_malloc() and convert_unicode_to_multibyte_dynarr() and use. Add stuff for alloca() conversion of multibyte/unicode. alloc.c: Use dfc_external_data_len() in case of unicode coding system. alloc.c, mule-charset.c: Don't zero out and reinit charset Unicode tables. This fucks up dump-time loading. Anyway, either we load them at dump time or run time, never both. unicode.c: Dump the blank tables as well. --------------------------------------------------------------- DOCUMENTATION, MOSTLY MULE-RELATED: --------------------------------------------------------------- EmacsFrame.c, emodules.c, event-Xt.c, fileio.c, input-method-xlib.c, mule-wnnfns.c, redisplay-gtk.c, redisplay-tty.c, redisplay-x.c, regex.c, sysdep.c: Add comment about Mule work needed. text.h: Add more documentation describing why DFC routines were not written to return their value. Add some other DFC documentation. console-msw.c, console-msw.h: Add pointer to docs in win32.c. emacs.c: Add comments on sources of doc info. text.c, charset.h, unicode.c, intl-win32.c, intl-encap-win32.c, text.h, file-coding.c, mule-coding.c: Collect background comments and related to text matters and internationalization, and proposals for work to be done, in text.c or Internals manual, stuff related to specific textual API's in text.h, and stuff related to internal implementation of Unicode conversion in unicode.c. Put lots of pointers to the comments to make them easier to find. s/mingw32.h, s/win32-common.h, s/win32-native.h, s/windowsnt.h, win32.c: Add bunches of new documentation on the different kinds of builds and environments under Windows and how they work. Collect this info in win32.c. Add pointers to these docs in the relevant s/* files. emacs.c: Document places with long comments. Remove comment about exiting, move to internals manual, put in pointer. event-stream.c: Move docs about event queues and focus to internals manual, put in pointer. events.h: Move docs about event stream callbacks to internals manual, put in pointer. profile.c, redisplay.c, signal.c: Move documentation to the Internals manual. process-nt.c: Add pointer to comment in win32-native.el. lisp.h: Add comments about some comment conventions. lisp.h: Add comment about the second argument. device-msw.c, redisplay-msw.c: @@#### comments are out-of-date. --------------------------------------------------------------- PDUMP WORK (MOTIVATED BY UNICODE CHANGES) --------------------------------------------------------------- alloc.c, buffer.c, bytecode.c, console-impl.h, console.c, device.c, dumper.c, lrecord.h, elhash.c, emodules.h, events.c, extents.c, frame.c, glyphs.c, glyphs.h, mule-charset.c, mule-coding.c, objects.c, profile.c, rangetab.c, redisplay.c, specifier.c, specifier.h, window.c, lstream.c, file-coding.h, file-coding.c: PDUMP: Properly implement dump_add_root_block(), which never worked before, and is necessary for dumping Unicode tables. Pdump name changes for accuracy: XD_STRUCT_PTR -> XD_BLOCK_PTR. XD_STRUCT_ARRAY -> XD_BLOCK_ARRAY. XD_C_STRING -> XD_ASCII_STRING. *_structure_* -> *_block_*. lrecord.h: some comments added about dump_add_root_block() vs dump_add_root_block_ptr(). extents.c: remove incorrect comment about pdump problems with gap array. --------------------------------------------------------------- ALLOCATION --------------------------------------------------------------- abbrev.c, alloc.c, bytecode.c, casefiddle.c, device-msw.c, device-x.c, dired-msw.c, doc.c, doprnt.c, dragdrop.c, editfns.c, emodules.c, file-coding.c, fileio.c, filelock.c, fns.c, glyphs-eimage.c, glyphs-gtk.c, glyphs-msw.c, glyphs-x.c, gui-msw.c, gui-x.c, imgproc.c, intl-win32.c, lread.c, menubar-gtk.c, menubar.c, nt.c, objects-msw.c, objects-x.c, print.c, process-nt.c, process-unix.c, process.c, realpath.c, redisplay.c, search.c, select-common.c, symbols.c, sysdep.c, syswindows.h, text.c, text.h, ui-byhand.c: New macros {alloca,xnew}_{itext,{i,ext,raw,bin,asc}bytes} for more convenient allocation of these commonly requested items. Modify functions to use alloca_ibytes, alloca_array, alloca_extbytes, xnew_ibytes, etc. also XREALLOC_ARRAY, xnew. alloc.c: Rewrite the allocation functions to factor out repeated code. Add assertions for freeing dumped data. lisp.h: Moved down and consolidated with other allocation stuff. lisp.h, dynarr.c: New functions for allocation that's very efficient when mostly in LIFO order. lisp.h, text.c, text.h: Factor out some stuff for general use by alloca()-conversion funs. text.h, lisp.h: Fill out convenience routines for allocating various kinds of bytes and put them in lisp.h. Use them in place of xmalloc(), ALLOCA(). text.h: Fill out the convenience functions so the _MALLOC() kinds match the alloca() kinds. --------------------------------------------------------------- ERROR-CHECKING --------------------------------------------------------------- text.h: Create ASSERT_ASCTEXT_ASCII() and ASSERT_ASCTEXT_ASCII_LEN() from similar Eistring checkers and change the Eistring checkers to use them instead. --------------------------------------------------------------- MACROS IN LISP.H --------------------------------------------------------------- lisp.h: Redo GCPRO declarations. Create a "base" set of functions that can be used to generate any kind of gcpro sets -- regular, ngcpro, nngcpro, private ones used in GC_EXTERNAL_LIST_LOOP_2. buffer.c, callint.c, chartab.c, console-msw.c, device-x.c, dialog-msw.c, dired.c, extents.c, ui-gtk.c, rangetab.c, nt.c, mule-coding.c, minibuf.c, menubar-msw.c, menubar.c, menubar-gtk.c, lread.c, lisp.h, gutter.c, glyphs.c, glyphs-widget.c, fns.c, fileio.c, file-coding.c, specifier.c: Eliminate EXTERNAL_LIST_LOOP, which does not check for circularities. Use EXTERNAL_LIST_LOOP_2 instead or EXTERNAL_LIST_LOOP_3 or EXTERNAL_PROPERTY_LIST_LOOP_3 or GC_EXTERNAL_LIST_LOOP_2 (new macro). Removed/redid comments on EXTERNAL_LIST_LOOP. --------------------------------------------------------------- SPACING FIXES --------------------------------------------------------------- callint.c, hftctl.c, number-gmp.c, process-unix.c: Spacing fixes. --------------------------------------------------------------- FIX FOR GEOMETRY PROBLEM IN FIRST FRAME --------------------------------------------------------------- unicode.c: Add workaround for newlib bug in sscanf() [should be fixed by release 1.5.12 of Cygwin]. toolbar.c: bug fix for problem of initial frame being 77 chars wide on Windows. will be overridden by my other ws. --------------------------------------------------------------- FIX FOR LEAKING PROCESS HANDLES: --------------------------------------------------------------- process-nt.c: Fixes for leaking handles. Inspired by work done by Adrian Aichner <adrian@xemacs.org>. --------------------------------------------------------------- FIX FOR CYGWIN BUG (Unicode-related): --------------------------------------------------------------- unicode.c: Add workaround for newlib bug in sscanf() [should be fixed by release 1.5.12 of Cygwin]. --------------------------------------------------------------- WARNING FIXES: --------------------------------------------------------------- console-stream.c: `reinit' is unused. compiler.h, event-msw.c, frame-msw.c, intl-encap-win32.c, text.h: Add stuff to deal with ANSI-aliasing warnings I got. regex.c: Gather includes together to avoid warning. --------------------------------------------------------------- CHANGES TO INITIALIZATION ROUTINES: --------------------------------------------------------------- buffer.c, emacs.c, console.c, debug.c, device-x.c, device.c, dragdrop.c, emodules.c, eval.c, event-Xt.c, event-gtk.c, event-msw.c, event-stream.c, event-tty.c, events.c, extents.c, faces.c, file-coding.c, fileio.c, font-lock.c, frame-msw.c, glyphs-widget.c, glyphs.c, gui-x.c, insdel.c, lread.c, lstream.c, menubar-gtk.c, menubar-x.c, minibuf.c, mule-wnnfns.c, objects-msw.c, objects.c, print.c, scrollbar-x.c, search.c, select-x.c, text.c, undo.c, unicode.c, window.c, symsinit.h: Call reinit_*() functions directly from emacs.c, for clarity. Factor out some redundant init code. Move disallowed stuff that had crept into vars_of_glyphs() into complex_vars_of_glyphs(). Call init_eval_semi_early() from eval.c not in the middle of vars_of_() in emacs.c since there should be no order dependency in the latter calls. --------------------------------------------------------------- ARMAGEDDON: --------------------------------------------------------------- alloc.c, emacs.c, lisp.h, print.c: Rename inhibit_non_essential_printing_operations to inhibit_non_essential_conversion_operations. text.c: Assert on !inhibit_non_essential_conversion_operations. console-msw.c, print.c: Don't do conversion in SetConsoleTitle or FindWindow to avoid problems during armageddon. Put #errors for NON_ASCII_INTERNAL_FORMAT in places where problems would arise. --------------------------------------------------------------- CHANGES TO THE BUILD PROCEDURE: --------------------------------------------------------------- config.h.in, s/cxux.h, s/usg5-4-2.h, m/powerpc.h: Add comment about correct ordering of this file. Rearrange everything to follow this -- put all #undefs together and before the s&m files. Add undefs for HAVE_ALLOCA, C_ALLOCA, BROKEN_ALLOCA_IN_FUNCTION_CALLS, STACK_DIRECTION. Remove unused HAVE_STPCPY, HAVE_GETWD, HAVE_SETLOCALE. m/gec63.h: Deleted; totally broken, not used at all, not in FSF. m/7300.h, m/acorn.h, m/alliant-2800.h, m/alliant.h, m/altos.h, m/amdahl.h, m/apollo.h, m/att3b.h, m/aviion.h, m/celerity.h, m/clipper.h, m/cnvrgnt.h, m/convex.h, m/cydra5.h, m/delta.h, m/delta88k.h, m/dpx2.h, m/elxsi.h, m/ews4800r.h, m/gould.h, m/hp300bsd.h, m/hp800.h, m/hp9000s300.h, m/i860.h, m/ibmps2-aix.h, m/ibmrs6000.h, m/ibmrt-aix.h, m/ibmrt.h, m/intel386.h, m/iris4d.h, m/iris5d.h, m/iris6d.h, m/irist.h, m/isi-ov.h, m/luna88k.h, m/m68k.h, m/masscomp.h, m/mg1.h, m/mips-nec.h, m/mips-siemens.h, m/mips.h, m/news.h, m/nh3000.h, m/nh4000.h, m/ns32000.h, m/orion105.h, m/pfa50.h, m/plexus.h, m/pmax.h, m/powerpc.h, m/pyrmips.h, m/sequent-ptx.h, m/sequent.h, m/sgi-challenge.h, m/symmetry.h, m/tad68k.h, m/tahoe.h, m/targon31.h, m/tekxd88.h, m/template.h, m/tower32.h, m/tower32v3.h, m/ustation.h, m/vax.h, m/wicat.h, m/xps100.h: Delete C_ALLOCA, HAVE_ALLOCA, STACK_DIRECTION, BROKEN_ALLOCA_IN_FUNCTION_CALLS. All of this is auto-detected. When in doubt, I followed recent FSF sources, which also have these things deleted.
author ben
date Thu, 04 Nov 2004 23:08:28 +0000
parents ba4677f54a05
children ac4ffbd57062
line wrap: on
line diff
--- a/src/text.h	Thu Nov 04 22:51:31 2004 +0000
+++ b/src/text.h	Thu Nov 04 23:08:28 2004 +0000
@@ -1,7 +1,7 @@
 /* Header file for text manipulation primitives and macros.
    Copyright (C) 1985-1995 Free Software Foundation, Inc.
    Copyright (C) 1995 Sun Microsystems, Inc.
-   Copyright (C) 2000, 2001, 2002, 2003 Ben Wing.
+   Copyright (C) 2000, 2001, 2002, 2003, 2004 Ben Wing.
 
 This file is part of XEmacs.
 
@@ -166,73 +166,8 @@
 
 #endif /* not MULE */
 
-/* ---------------- Handling non-default formats ----------------- */
-
-/* We support, at least to some extent, formats other than the default
-   variable-width format, for speed; all of these alternative formats are
-   fixed-width.  Currently we only handle these non-default formats in
-   buffers, because access to their text is strictly controlled and thus
-   the details of the format mostly compartmentalized.  The only really
-   tricky part is the search code -- the regex, Boyer-Moore, and
-   simple-search algorithms in search.c and regex.c.  All other code that
-   knows directly about the buffer representation is the basic code to
-   modify or retrieve the buffer text.
-
-   Supporting fixed-width formats in Lisp strings is harder, but possible
-   -- FSF currently does this, for example.  In this case, however,
-   probably only 8-bit-fixed is reasonable for Lisp strings -- getting
-   non-ASCII-compatible fixed-width formats to work is much, much harder
-   because a lot of code assumes that strings are ASCII-compatible
-   (i.e. ASCII + other characters represented exclusively using high-bit
-   bytes) and a lot of code mixes Lisp strings and non-Lisp strings freely.
-
-   The different possible fixed-width formats are 8-bit fixed, 16-bit
-   fixed, and 32-bit fixed.  The latter can represent all possible
-   characters, but at a substantial memory penalty.  The other two can
-   represent only a subset of the possible characters.  How these subsets
-   are defined can be simple or very tricky.
-
-   Currently we support only the default format and the 8-bit fixed format,
-   and in the latter, we only allow these to be the first 256 characters in
-   an Ichar (ASCII and Latin 1).
-   
-   One reasonable approach for 8-bit fixed is to allow the upper half to
-   represent any 1-byte charset, which is specified on a per-buffer basis.
-   This should work fairly well in practice since most documents are in
-   only one foreign language (possibly with some English mixed in).  I
-   think FSF does something like this; or at least, they have something
-   called nonascii-translation-table and use it when converting from
-   8-bit-fixed text ("unibyte text") to default text ("multibyte text").
-   With 16-bit fixed, you could do something like assign chunks of the 64K
-   worth of characters to charsets as they're encountered in documents.
-   This should work well with most Asian documents.
-
-   If/when we switch to using Unicode internally, we might have formats more
-   like this:
-
-   -- UTF-8 or some extension as the default format.  Perl uses an
-   extension that handles 64-bit chars and requires as much as 13 bytes per
-   char, vs. the standard of 31-bit chars and 6 bytes max.  UTF-8 has the
-   same basic properties as our own variable-width format (see text.c,
-   Internal String Encoding) and so most code would not need to be changed.
-
-   -- UTF-16 as a "pseudo-fixed" format (i.e. 16-bit fixed plus surrogates
-   for representing characters not in the BMP, aka >= 65536).  The vast
-   majority of documents will have no surrogates in them so byte/char
-   conversion will be very fast.
-
-   -- an 8-bit fixed format, like currently.
-   
-   -- possibly, UCS-4 as a 32-bit fixed format.
-
-   The fixed-width formats essentially treat the buffer as an array of
-   8-bit, 16-bit or 32-bit integers.  This means that how they are stored
-   in memory (in particular, big-endian or little-endian) depends on the
-   native format of the machine's processor.  It also means we have to
-   worry a bit about alignment (basically, we just need to keep the gap an
-   integral size of the character size, and get things aligned properly
-   when converting the buffer between formats).
-   */
+/* For more discussion, see text.c, "handling non-default formats" */
+
 typedef enum internal_format
 {
   FORMAT_DEFAULT,
@@ -603,6 +538,26 @@
 
 #endif /* MULE */
 
+#ifdef ERROR_CHECK_TEXT
+#define ASSERT_ASCTEXT_ASCII_LEN(ptr, len)			\
+do {								\
+  int aia2;							\
+  const Ascbyte *aia2ptr = (ptr);				\
+  int aia2len = (len);						\
+								\
+  for (aia2 = 0; aia2 < aia2len; aia2++)			\
+    assert (aia2ptr[aia2] >= 0x00 && aia2ptr[aia2] < 0x7F);	\
+} while (0)
+#define ASSERT_ASCTEXT_ASCII(ptr)			\
+do {							\
+  const Ascbyte *aiaz2 = (ptr);				\
+  ASSERT_ASCTEXT_ASCII_LEN (aiaz2, strlen (aiaz2));	\
+} while (0)
+#else
+#define ASSERT_ASCTEXT_ASCII_LEN(ptr, len)
+#define ASSERT_ASCTEXT_ASCII(ptr)
+#endif
+
 /* -------------------------------------------------------------- */
 /*      Working with the length (in bytes and characters) of a    */
 /*               section of internally-formatted text 	          */
@@ -672,6 +627,68 @@
     return charcount_to_bytecount_fun (ptr, len);
 }
 
+MODULE_API Bytecount
+charcount_to_bytecount_down_fun (const Ibyte *ptr, Charcount len);
+
+/* Given a pointer to a text string and a length in bytes, return
+   the equivalent length in characters of the stretch [PTR - LEN, PTR). */
+
+DECLARE_INLINE_HEADER (
+Charcount
+bytecount_to_charcount_down (const Ibyte *ptr, Bytecount len)
+)
+{
+  /* No need to be clever here */
+  return bytecount_to_charcount (ptr - len, len);
+}
+
+/* Given a pointer to a text string and a length in characters, return the
+   equivalent length in bytes of the stretch of characters of that length
+   BEFORE the pointer.
+*/
+
+DECLARE_INLINE_HEADER (
+Bytecount
+charcount_to_bytecount_down (const Ibyte *ptr, Charcount len)
+)
+{
+#define SLEDGEHAMMER_CHECK_TEXT
+#ifdef SLEDGEHAMMER_CHECK_TEXT
+  Charcount len1 = len;
+  Bytecount ret1, ret2;
+
+  /* To test the correctness of the function version, always do the
+     calculation both ways and check that the values are the same. */
+  text_checking_assert (len >= 0);
+  {
+    const Ibyte *newptr = ptr;
+    while (len1 > 0)
+      {
+	DEC_IBYTEPTR (newptr);
+	len1--;
+      }
+    ret1 = ptr - newptr;
+  }
+  ret2 = charcount_to_bytecount_down_fun (ptr, len);
+  text_checking_assert (ret1 == ret2);
+  return ret1;
+#else
+  text_checking_assert (len >= 0);
+  if (len < 20) /* See above */
+    {
+      const Ibyte *newptr = ptr;
+      while (len > 0)
+	{
+	  DEC_IBYTEPTR (newptr);
+	  len--;
+	}
+      return ptr - newptr;
+    }
+  else
+    return charcount_to_bytecount_down_fun (ptr, len);
+#endif /* SLEDGEHAMMER_CHECK_TEXT */
+}
+
 /* Given a pointer to a text string in the specified format and a length in
    bytes, return the equivalent length in characters.
 */
@@ -991,7 +1008,7 @@
 
 
 /* ---------------------------- */
-/*     Working with Ichars     */
+/*      Working with Ichars     */
 /* ---------------------------- */
 
 /* NOTE: There are other functions/macros for working with Ichars in
@@ -1100,24 +1117,10 @@
 do {									\
   Ibyte **_lta_ = (Ibyte **) &(lval);					\
   Lisp_Object _lta_2 = (s);						\
-  *_lta_ = alloca_array (Ibyte, 1 + XSTRING_LENGTH (_lta_2));		\
+  *_lta_ = alloca_ibytes (1 + XSTRING_LENGTH (_lta_2));		\
   memcpy (*_lta_, XSTRING_DATA (_lta_2), 1 + XSTRING_LENGTH (_lta_2));	\
 } while (0)
 
-/* Make an alloca'd copy of a Ibyte * */
-#define IBYTE_STRING_TO_ALLOCA(p, lval)		\
-do {						\
-  Ibyte **_bsta_ = (Ibyte **) &(lval);		\
-  const Ibyte *_bsta_2 = (p);			\
-  Bytecount _bsta_3 = qxestrlen (_bsta_2);	\
-  *_bsta_ = alloca_array (Ibyte, 1 + _bsta_3);	\
-  memcpy (*_bsta_, _bsta_2, 1 + _bsta_3);	\
-} while (0)
-
-
-#define alloca_ibytes(num) alloca_array (Ibyte, num)
-#define alloca_extbytes(num) alloca_array (Extbyte, num)
-
 void resize_string (Lisp_Object s, Bytecount pos, Bytecount delta);
 
 /* Convert a byte index into a string into a char index. */
@@ -1335,19 +1338,20 @@
    variable section:
 
    DECLARE_EISTRING (name);
-        Declare a new Eistring.  This is a standard local variable declaration
-        and can go anywhere in the variable declaration section.  NAME itself
-        is declared as an Eistring *, and its storage declared on the stack.
+        Declare a new Eistring and initialize it to the empy string.  This
+        is a standard local variable declaration and can go anywhere in the
+        variable declaration section.  NAME itself is declared as an
+        Eistring *, and its storage declared on the stack.
 
    DECLARE_EISTRING_MALLOC (name);
-        Declare a new Eistring, which uses malloc()ed instead of ALLOCA()ed
-        data.  This is a standard local variable declaration and can go
-        anywhere in the variable declaration section.  Once you initialize
-	the Eistring, you will have to free it using eifree() to avoid
-	memory leaks.  You will need to use this form if you are passing
-	an Eistring to any function that modifies it (otherwise, the
-	modified data may be in stack space and get overwritten when the
-	function returns).
+        Declare and initialize a new Eistring, which uses malloc()ed
+        instead of ALLOCA()ed data.  This is a standard local variable
+        declaration and can go anywhere in the variable declaration
+        section.  Once you initialize the Eistring, you will have to free
+        it using eifree() to avoid memory leaks.  You will need to use this
+        form if you are passing an Eistring to any function that modifies
+        it (otherwise, the modified data may be in stack space and get
+        overwritten when the function returns).
 
    or use
 
@@ -1416,10 +1420,10 @@
         ... from raw internal-format data in the specified format that is
         "null-terminated" (the meaning of this depends on the nature of
         the specific format).
-   void eicpy_c (Eistring *eistr, const Char_ASCII *c_string);
+   void eicpy_c (Eistring *eistr, const Ascbyte *c_string);
         ... from an ASCII null-terminated string.  Non-ASCII characters in
 	the string are *ILLEGAL* (read abort() with error-checking defined).
-   void eicpy_c_len (Eistring *eistr, const Char_ASCII *c_string, len);
+   void eicpy_c_len (Eistring *eistr, const Ascbyte *c_string, len);
         ... from an ASCII string, with length specified.  Non-ASCII characters
 	in the string are *ILLEGAL* (read abort() with error-checking defined).
    void eicpy_ext (Eistring *eistr, const Extbyte *extdata,
@@ -1559,7 +1563,7 @@
 
    void eicat_ei (Eistring *eistr, Eistring *eistr2);
         ... from another Eistring.
-   void eicat_c (Eistring *eistr, Char_ASCII *c_string);
+   void eicat_c (Eistring *eistr, Ascbyte *c_string);
         ... from an ASCII null-terminated string.  Non-ASCII characters in
 	the string are *ILLEGAL* (read abort() with error-checking defined).
    void eicat_raw (ei, const Ibyte *data, Bytecount len);
@@ -1589,7 +1593,7 @@
 		  Bytecount len, Charcount charlen, Eistring *eistr2);
         ... with another Eistring.
    void eisub_c (Eistring *eistr, Bytecount off, Charcount charoff,
-		 Bytecount len, Charcount charlen, Char_ASCII *c_string);
+		 Bytecount len, Charcount charlen, Ascbyte *c_string);
         ... with an ASCII null-terminated string.  Non-ASCII characters in
 	the string are *ILLEGAL* (read abort() with error-checking defined).
    void eisub_ch (Eistring *eistr, Bytecount off, Charcount charoff,
@@ -1656,17 +1660,17 @@
    Charcount eirstr_ei_off_char (Eistring *eistr, Eistring *eistr2,
 				 Bytecount off, Charcount charoff);
 
-   Bytecount eistr_c (Eistring *eistr, Char_ASCII *c_string);
-   Charcount eistr_c_char (Eistring *eistr, Char_ASCII *c_string);
-   Bytecount eistr_c_off (Eistring *eistr, Char_ASCII *c_string, Bytecount off,
+   Bytecount eistr_c (Eistring *eistr, Ascbyte *c_string);
+   Charcount eistr_c_char (Eistring *eistr, Ascbyte *c_string);
+   Bytecount eistr_c_off (Eistring *eistr, Ascbyte *c_string, Bytecount off,
 			   Charcount charoff);
-   Charcount eistr_c_off_char (Eistring *eistr, Char_ASCII *c_string,
+   Charcount eistr_c_off_char (Eistring *eistr, Ascbyte *c_string,
 			       Bytecount off, Charcount charoff);
-   Bytecount eirstr_c (Eistring *eistr, Char_ASCII *c_string);
-   Charcount eirstr_c_char (Eistring *eistr, Char_ASCII *c_string);
-   Bytecount eirstr_c_off (Eistring *eistr, Char_ASCII *c_string,
+   Bytecount eirstr_c (Eistring *eistr, Ascbyte *c_string);
+   Charcount eirstr_c_char (Eistring *eistr, Ascbyte *c_string);
+   Bytecount eirstr_c_off (Eistring *eistr, Ascbyte *c_string,
 			   Bytecount off, Charcount charoff);
-   Charcount eirstr_c_off_char (Eistring *eistr, Char_ASCII *c_string,
+   Charcount eirstr_c_off_char (Eistring *eistr, Ascbyte *c_string,
 				Bytecount off, Charcount charoff);
 
 
@@ -1707,17 +1711,17 @@
 			      Charcount charoff, Bytecount len,
 			      Charcount charlen, Eistring *eistr2);
 
-   int eicmp_c (Eistring *eistr, Char_ASCII *c_string);
+   int eicmp_c (Eistring *eistr, Ascbyte *c_string);
    int eicmp_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
-                    Bytecount len, Charcount charlen, Char_ASCII *c_string);
-   int eicasecmp_c (Eistring *eistr, Char_ASCII *c_string);
+                    Bytecount len, Charcount charlen, Ascbyte *c_string);
+   int eicasecmp_c (Eistring *eistr, Ascbyte *c_string);
    int eicasecmp_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
                         Bytecount len, Charcount charlen,
-                        Char_ASCII *c_string);
-   int eicasecmp_i18n_c (Eistring *eistr, Char_ASCII *c_string);
+                        Ascbyte *c_string);
+   int eicasecmp_i18n_c (Eistring *eistr, Ascbyte *c_string);
    int eicasecmp_i18n_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
                              Bytecount len, Charcount charlen,
-                             Char_ASCII *c_string);
+                             Ascbyte *c_string);
 
 
     ********************************************** 
@@ -1899,7 +1903,7 @@
 	      /* We don't have realloc, so ALLOCA() more space and copy the   \
 		 data into it. */					      \
 	      Ibyte *ei1oldeidata = (ei)->data_;			      \
-	      (ei)->data_ = (Ibyte *) ALLOCA (ei1newsize);		      \
+	      (ei)->data_ = alloca_ibytes (ei1newsize);		      \
               if (ei1oldeidata)						      \
 	        memcpy ((ei)->data_, ei1oldeidata, ei1oldeibytelen + 1);      \
 	    }								      \
@@ -1916,27 +1920,6 @@
   memcpy ((ei)->data_, data, (ei)->bytelen_);		\
 } while (0)
 
-#ifdef ERROR_CHECK_TEXT
-#define EI_ASSERT_ASCII(ptr, len)			\
-do {							\
-  int ei5;						\
-  const Char_ASCII *ei5ptr = (ptr);			\
-  int ei5len = (len);					\
-							\
-  for (ei5 = 0; ei5 < ei5len; ei5++)			\
-    assert (ei5ptr[ei5] >= 0x00 && ei5ptr[ei5] < 0x7F);	\
-} while (0)
-#define EI_ASSERT_ASCIIZ(ptr)			\
-do {						\
-  const Char_ASCII *ei5p1 = (ptr);		\
-  EI_ASSERT_ASCII (ei5p1, strlen (ei5p1));	\
-} while (0)
-#else
-#define EI_ASSERT_ASCII(ptr, len)
-#define EI_ASSERT_ASCIIZ(ptr)
-#endif
-
-
 /*   ----- Initialization -----   */
 
 #define eicpy_ei(ei, eicpy)						\
@@ -2001,18 +1984,18 @@
 
 #define eicpy_c(ei, c_string)			\
 do {						\
-  const Char_ASCII *ei4 = (c_string);		\
+  const Ascbyte *ei4 = (c_string);		\
 						\
-  EI_ASSERT_ASCIIZ (ei4);			\
+  ASSERT_ASCTEXT_ASCII (ei4);			\
   eicpy_ext (ei, ei4, Qbinary);			\
 } while (0)
 
 #define eicpy_c_len(ei, c_string, c_len)	\
 do {						\
-  const Char_ASCII *ei6 = (c_string);		\
+  const Ascbyte *ei6 = (c_string);		\
   int ei6len = (c_len);				\
 						\
-  EI_ASSERT_ASCII (ei6, ei6len);		\
+  ASSERT_ASCTEXT_ASCII_LEN (ei6, ei6len);	\
   eicpy_ext_len (ei, ei6, ei6len, Qbinary);	\
 } while (0)
 
@@ -2078,7 +2061,7 @@
   assert (ei23fmt == FORMAT_DEFAULT);				\
 								\
   *ei23lenout = (eistr)->bytelen_;				\
-  *ei23ptrout = alloca_array (Ibyte, (eistr)->bytelen_ + 1);	\
+  *ei23ptrout = alloca_ibytes ((eistr)->bytelen_ + 1);	\
   memcpy (*ei23ptrout, (eistr)->data_, (eistr)->bytelen_ + 1);	\
 } while (0)
 
@@ -2114,7 +2097,7 @@
 									\
       (ei)->max_size_allocated_ =					\
 	eifind_large_enough_buffer (0, (ei)->bytelen_ + 1);		\
-      ei13newdata = (Ibyte *) ALLOCA ((ei)->max_size_allocated_);	\
+      ei13newdata = alloca_ibytes ((ei)->max_size_allocated_);		\
       memcpy (ei13newdata, (ei)->data_, (ei)->bytelen_ + 1);		\
       xfree ((ei)->data_, Ibyte *);					\
       (ei)->data_ = ei13newdata;					\
@@ -2122,7 +2105,7 @@
 									\
   if ((ei)->extdata_)							\
     {									\
-      Extbyte *ei13newdata = (Extbyte *) ALLOCA ((ei)->extlen_ + 2);	\
+      Extbyte *ei13newdata = alloca_extbytes ((ei)->extlen_ + 2);	\
 									\
       memcpy (ei13newdata, (ei)->extdata_, (ei)->extlen_);		\
       /* Double null-terminate in case of Unicode data */		\
@@ -2226,10 +2209,10 @@
 
 #define eicat_c(ei, c_string)					\
 do {								\
-  const Char_ASCII *ei15 = (c_string);				\
+  const Ascbyte *ei15 = (c_string);				\
   int ei15len = strlen (ei15);					\
 								\
-  EI_ASSERT_ASCII (ei15, ei15len);				\
+  ASSERT_ASCTEXT_ASCII_LEN (ei15, ei15len);			\
   eicat_1 (ei, ei15, ei15len,					\
            bytecount_to_charcount ((Ibyte *) ei15, ei15len));	\
 } while (0)
@@ -2305,9 +2288,9 @@
 
 #define eisub_c(ei, off, charoff, len, charlen, c_string)	\
 do {								\
-  const Char_ASCII *ei20 = (c_string);				\
+  const Ascbyte *ei20 = (c_string);				\
   int ei20len = strlen (ei20);					\
-  EI_ASSERT_ASCII (ei20, ei20len);				\
+  ASSERT_ASCTEXT_ASCII_LEN (ei20, ei20len);			\
   eisub_1 (ei, off, charoff, len, charlen, ei20, ei20len, -1);	\
 } while (0)
 
@@ -2446,7 +2429,7 @@
 do {									\
   int ei11new_allocmax = (ei)->charlen_ * MAX_ICHAR_LEN + 1;		\
   Ibyte *ei11storage =							\
-     (Ibyte *) alloca_array (Ibyte, ei11new_allocmax);			\
+     (Ibyte *) alloca_ibytes (ei11new_allocmax);			\
   int ei11newlen = eistr_casefiddle_1 ((ei)->data_, (ei)->bytelen_,	\
 				       ei11storage, downp);		\
 									\
@@ -2525,9 +2508,27 @@
 
   Typical use is
 
-  TO_EXTERNAL_FORMAT (DATA, (ptr, len),
-                      LISP_BUFFER, buffer,
-		      Qfile_name);
+     TO_EXTERNAL_FORMAT (LISP_STRING, str, C_STRING_MALLOC, ptr, Qfile_name);
+
+  which means that the contents of the lisp string `str' are written
+  to a malloc'ed memory area which will be pointed to by `ptr', after the
+  function returns.  The conversion will be done using the `file-name'
+  coding system (which will be controlled by the user indirectly by
+  setting or binding the variable `file-name-coding-system').
+
+  Some sources and sinks require two C variables to specify.  We use
+  some preprocessor magic to allow different source and sink types, and
+  even different numbers of arguments to specify different types of
+  sources and sinks.
+
+  So we can have a call that looks like
+
+     TO_INTERNAL_FORMAT (DATA, (ptr, len),
+                         MALLOC, (ptr, len),
+                         coding_system);
+
+  The parenthesized argument pairs are required to make the
+  preprocessor magic work.
 
   NOTE: GC is inhibited during the entire operation of these macros.  This
   is because frequently the data to be converted comes from strings but
@@ -2552,6 +2553,12 @@
   When specifying the sink, use lvalues, since the macro will assign to them,
   except when the sink is an lstream or a lisp buffer.
 
+  For the sink types `ALLOCA' and `C_STRING_ALLOCA', the resulting text is
+  stored in a stack-allocated buffer, which is automatically freed on
+  returning from the function.  However, the sink types `MALLOC' and
+  `C_STRING_MALLOC' return `xmalloc()'ed memory.  The caller is responsible
+  for freeing this memory using `xfree()'.
+
   The macros accept the kinds of sources and sinks appropriate for
   internal and external data representation.  See the type_checking_assert
   macros below for the actual allowed types.
@@ -2607,7 +2614,45 @@
   behavior may change in the future, and you cannot rely on this --
   the most you can rely on is that sink data in Unicode format will
   have two terminating nulls, which combine to form one Unicode null
-  character.  */
+  character.
+
+  NOTE: You might ask, why are these not written as functions that
+  *RETURN* the converted string, since that would allow them to be used
+  much more conveniently, without having to constantly declare temporary
+  variables?  The answer is that in fact I originally did write the
+  routines that way, but that required either
+
+  (a) calling alloca() inside of a function call, or
+  (b) using expressions separated by commas and a global temporary variable, or
+  (c) using the GCC extension ({ ... }).
+
+  Turned out that all of the above had bugs, all caused by GCC (hence the
+  comments about "those GCC wankers" and "ream gcc up the ass").  As for
+  (a), some versions of GCC (especially on Intel platforms), which had
+  buggy implementations of alloca() that couldn't handle being called
+  inside of a function call -- they just decremented the stack right in the
+  middle of pushing args.  Oops, crash with stack trashing, very bad.  (b)
+  was an attempt to fix (a), and that led to further GCC crashes, esp. when
+  you had two such calls in a single subexpression, because GCC couldn't be
+  counted upon to follow even a minimally reasonable order of execution.
+  True, you can't count on one argument being evaluated before another, but
+  GCC would actually interleave them so that the temp var got stomped on by
+  one while the other was accessing it.  So I tried (c), which was
+  problematic because that GCC extension has more bugs in it than a
+  termite's nest.
+
+  So reluctantly I converted to the current way.  Now, that was awhile ago
+  (c. 1994), and it appears that the bug involving alloca in function calls
+  has long since been fixed.  More recently, I defined the new-dfc routines
+  down below, which DO allow exactly such convenience of returning your
+  args rather than store them in temp variables, and I also wrote a
+  configure check to see whether alloca() causes crashes inside of function
+  calls, and if so use the portable alloca() implementation in alloca.c.
+  If you define TEST_NEW_DFC, the old routines get written in terms of the
+  new ones, and I've had a beta put out with this on and it appeared to
+  this appears to cause no problems -- so we should consider
+  switching, and feel no compunctions about writing further such function-
+  like alloca() routines in lieu of statement-like ones. --ben */
 
 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, codesys)  \
 do {									   \
@@ -2802,24 +2847,24 @@
 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do {			\
   void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2);			\
   memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2);	\
-  ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret;	\
+  VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret;				\
   (DFC_CPP_CDR sink) = dfc_sink.data.len;				\
 } while (0)
 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do {			\
   void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2);		\
   memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2);	\
-  ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret;	\
+  VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret;				\
   (DFC_CPP_CDR sink) = dfc_sink.data.len;				\
 } while (0)
 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do {		\
   void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2);			\
   memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2);	\
-  ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret;			\
+  VOIDP_CAST (sink) = dfc_sink_ret;					\
 } while (0)
 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do {		\
   void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2);		\
   memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2);	\
-  ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret;			\
+  VOIDP_CAST (sink) = dfc_sink_ret;					\
 } while (0)
 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \
   sink = make_string ((Ibyte *) dfc_sink.data.ptr, dfc_sink.data.len)
@@ -2879,21 +2924,46 @@
 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, codesys)			\
   do { * (Extbyte **) &(out) =						\
        NEW_C_STRING_TO_EXTERNAL_MALLOC (in, codesys); } while (0)
+#define SIZED_C_STRING_TO_EXTERNAL_MALLOC(in, inlen, out, codesys)	\
+  do { * (Extbyte **) &(out) =						\
+       NEW_SIZED_C_STRING_TO_EXTERNAL_MALLOC (in, inlen, codesys); }	\
+  while (0)
 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, codesys)			\
   do { * (Ibyte **) &(out) =						\
        NEW_EXTERNAL_TO_C_STRING_MALLOC (in, codesys); } while (0)
+#define SIZED_EXTERNAL_TO_C_STRING_MALLOC(in, inlen, out, codesys)	\
+  do { * (Ibyte **) &(out) =						\
+       NEW_SIZED_EXTERNAL_TO_C_STRING_MALLOC (in, inlen, codesys); }	\
+  while (0)
 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, codesys)		\
   do { * (Extbyte **) &(out) =						\
        NEW_LISP_STRING_TO_EXTERNAL_MALLOC (in, codesys); } while (0)
 #else
 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \
   TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, codesys)
+#define SIZED_C_STRING_TO_EXTERNAL_MALLOC(in, inlen, out, codesys) \
+  TO_EXTERNAL_FORMAT (DATA, (in, inlen), C_STRING_MALLOC, out, codesys)
 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, codesys) \
   TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, codesys)
+#define SIZED_EXTERNAL_TO_C_STRING_MALLOC(in, inlen, out, codesys) \
+  TO_INTERNAL_FORMAT (DATA, (in, inlen), C_STRING_MALLOC, out, codesys)
 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \
   TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_MALLOC, out, codesys)
 #endif /* TEST_NEW_DFC */
 
+#define C_STRING_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \
+  TO_EXTERNAL_FORMAT (C_STRING, in, MALLOC, (out, outlen), codesys)
+#define SIZED_C_STRING_TO_SIZED_EXTERNAL_MALLOC(in, inlen, out, outlen, \
+						codesys)		\
+  TO_EXTERNAL_FORMAT (DATA, (in, inlen), MALLOC, (out, outlen), codesys)
+#define EXTERNAL_TO_SIZED_C_STRING_MALLOC(in, out, outlen, codesys) \
+  TO_INTERNAL_FORMAT (C_STRING, in, MALLOC, (out, outlen), codesys)
+#define SIZED_EXTERNAL_TO_SIZED_C_STRING_MALLOC(in, inlen, out, outlen, \
+						codesys)		\
+  TO_INTERNAL_FORMAT (DATA, (in, inlen), MALLOC, (out, outlen), codesys)
+#define LISP_STRING_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \
+  TO_EXTERNAL_FORMAT (LISP_STRING, in, MALLOC, (out, outlen), codesys)
+
 enum new_dfc_src_type
 {
   DFC_EXTERNAL,
@@ -2906,11 +2976,13 @@
 MODULE_API void *new_dfc_convert_malloc (const void *src, Bytecount src_size,
 					 enum new_dfc_src_type type,
 					 Lisp_Object codesys);
-MODULE_API void *new_dfc_convert_alloca (const char *srctext, void *alloca_data);
-MODULE_API Bytecount new_dfc_convert_size (const char *srctext, const void *src,
+MODULE_API Bytecount new_dfc_convert_size (const char *srctext,
+					   const void *src,
 					   Bytecount src_size,
 					   enum new_dfc_src_type type,
 					   Lisp_Object codesys);
+MODULE_API void *new_dfc_convert_copy_data (const char *srctext,
+					    void *alloca_data);
 
 END_C_DECLS
 
@@ -2932,7 +3004,7 @@
    could be inside of a function call. */
 
 #define NEW_DFC_CONVERT_1_ALLOCA(src, src_size, type, codesys)		\
-  new_dfc_convert_alloca						\
+  new_dfc_convert_copy_data						\
    (#src, ALLOCA_FUNCALL_OK (new_dfc_convert_size (#src, src, src_size,	\
 						   type, codesys)))
 
@@ -2959,15 +3031,76 @@
   (Extbyte *) new_dfc_convert_malloc (LISP_TO_VOID (src), -1,	\
 				      DFC_LISP_STRING, codesys)
 
-/* Standins for various encodings, until we know them better */
+/* Standins for various encodings. */
+#ifdef WEXTTEXT_IS_WIDE
+#define Qcommand_argument_encoding Qmswindows_unicode
+#define Qenvironment_variable_encoding Qmswindows_unicode
+#else
 #define Qcommand_argument_encoding Qnative
 #define Qenvironment_variable_encoding Qnative
+#endif
 #define Qunix_host_name_encoding Qnative
 #define Qunix_service_name_encoding Qnative
 #define Qmswindows_host_name_encoding Qmswindows_multibyte
 #define Qmswindows_service_name_encoding Qmswindows_multibyte
 
-/* Standins for various X encodings, until we know them better.
+/* Wexttext functions.  The type of Wexttext is selected at compile time
+   and will sometimes be wchar_t, sometimes char. */
+
+int wcscmp_ascii (const wchar_t *s1, const Ascbyte *s2);
+int wcsncmp_ascii (const wchar_t *s1, const Ascbyte *s2, Charcount len);
+
+#ifdef WEXTTEXT_IS_WIDE /* defined under MS Windows i.e. WIN32_NATIVE */
+#define WEXTTEXT_ZTERM_SIZE sizeof (wchar_t)
+/* Extra indirection needed in case of manifest constant as arg */
+#define WEXTSTRING_1(arg) L##arg
+#define WEXTSTRING(arg) WEXTSTRING_1(arg)
+#define wext_strlen wcslen
+#define wext_strcmp wcscmp
+#define wext_strncmp wcsncmp
+#define wext_strcmp_ascii wcscmp_ascii
+#define wext_strncmp_ascii wcsncmp_ascii
+#define wext_strcpy wcscpy
+#define wext_strncpy wcsncpy
+#define wext_strchr wcschr
+#define wext_strrchr wcsrchr
+#define wext_strdup wcsdup
+#define wext_atol(str) wcstol (str, 0, 10)
+#define wext_sprintf wsprintfW /* Huh?  both wsprintfA and wsprintfW? */
+#define wext_getenv _wgetenv
+#define build_wext_string(str, cs) build_ext_string ((Extbyte *) str, cs)
+#define WEXTTEXT_TO_8_BIT(arg) WEXTTEXT_TO_MULTIBYTE(arg)
+#ifdef WIN32_NATIVE
+int XCDECL wext_retry_open (const Wexttext *path, int oflag, ...);
+#else
+#error Cannot handle Wexttext yet on this system
+#endif
+#define wext_access _waccess
+#define wext_stat _wstat
+#else
+#define WEXTTEXT_ZTERM_SIZE sizeof (char)
+#define WEXTSTRING(arg) arg
+#define wext_strlen strlen
+#define wext_strcmp strcmp
+#define wext_strncmp strncmp
+#define wext_strcmp_ascii strcmp
+#define wext_strncmp_ascii strncmp
+#define wext_strcpy strcpy
+#define wext_strncpy strncpy
+#define wext_strchr strchr
+#define wext_strrchr strrchr
+#define wext_strdup xstrdup
+#define wext_atol(str) atol (str)
+#define wext_sprintf sprintf
+#define wext_getenv getenv
+#define build_wext_string build_ext_string
+#define wext_retry_open retry_open
+#define wext_access access
+#define wext_stat stat
+#define WEXTTEXT_TO_8_BIT(arg) ((Extbyte *) arg)
+#endif
+
+/* Standins for various X encodings.
 
    About encodings in X:
 
@@ -3042,6 +3175,11 @@
 #define Qx_display_name_encoding Qx_hpc_encoding
 #define Qx_xpm_data_encoding Qx_hpc_encoding
 
+/* !!#### Verify these! */
+#define Qxt_widget_arg_encoding Qnative
+#define Qdt_dnd_encoding Qnative
+#define Qoffix_dnd_encoding Qnative
+
 /* RedHat 6.2 contains a locale called "Francais" with the C-cedilla
    encoded in ISO2022! */
 #define Qlocale_name_encoding Qctext