xemacs-beta: src/text.c comparison

comparison src/text.c @ 826:6728e641994e

[xemacs-hg @ 2002-05-05 11:30:15 by ben] syntax cache, 8-bit-format, lots of code cleanup README.packages: Update info about --package-path. i.c: Create an inheritable event and pass it on to XEmacs, so that ^C can be handled properly. Intercept ^C and signal the event. "Stop Build" in VC++ now works. bytecomp-runtime.el: Doc string changes. compat.el: Some attempts to redo this to make it truly useful and fix the "multiple versions interacting with each other" problem. Not yet done. Currently doesn't work. files.el: Use with-obsolete-variable to avoid warnings in new revert-buffer code. xemacs.mak: Split up CFLAGS into a version without flags specifying the C library. The problem seems to be that minitar depends on zlib, which depends specifically on libc.lib, not on any of the other C libraries. Unless you compile with libc.lib, you get errors -- specifically, no _errno in the other libraries, which must make it something other than an int. (#### But this doesn't seem to obtain in XEmacs, which also uses zlib, and can be linked with any of the C libraries. Maybe zlib is used differently and doesn't need errno, or maybe XEmacs provides an int errno; ... I don't understand. Makefile.in.in: Fix so that packages are around when testing. abbrev.c, alloc.c, buffer.c, buffer.h, bytecode.c, callint.c, casefiddle.c, casetab.c, casetab.h, charset.h, chartab.c, chartab.h, cmds.c, console-msw.h, console-stream.c, console-x.c, console.c, console.h, data.c, device-msw.c, device.c, device.h, dialog-msw.c, dialog-x.c, dired-msw.c, dired.c, doc.c, doprnt.c, dumper.c, editfns.c, elhash.c, emacs.c, eval.c, event-Xt.c, event-gtk.c, event-msw.c, event-stream.c, events.c, events.h, extents.c, extents.h, faces.c, file-coding.c, file-coding.h, fileio.c, fns.c, font-lock.c, frame-gtk.c, frame-msw.c, frame-x.c, frame.c, frame.h, glade.c, glyphs-gtk.c, glyphs-msw.c, glyphs-msw.h, glyphs-x.c, glyphs.c, glyphs.h, gui-msw.c, gui-x.c, gui.h, gutter.h, hash.h, indent.c, insdel.c, intl-win32.c, intl.c, keymap.c, lisp-disunion.h, lisp-union.h, lisp.h, lread.c, lrecord.h, lstream.c, lstream.h, marker.c, menubar-gtk.c, menubar-msw.c, menubar-x.c, menubar.c, minibuf.c, mule-ccl.c, mule-charset.c, mule-coding.c, mule-wnnfns.c, nas.c, objects-msw.c, objects-x.c, opaque.c, postgresql.c, print.c, process-nt.c, process-unix.c, process.c, process.h, profile.c, rangetab.c, redisplay-gtk.c, redisplay-msw.c, redisplay-output.c, redisplay-x.c, redisplay.c, redisplay.h, regex.c, regex.h, scrollbar-msw.c, search.c, select-x.c, specifier.c, specifier.h, symbols.c, symsinit.h, syntax.c, syntax.h, syswindows.h, tests.c, text.c, text.h, tooltalk.c, ui-byhand.c, ui-gtk.c, unicode.c, win32.c, window.c: Another big Ben patch. -- FUNCTIONALITY CHANGES: add partial support for 8-bit-fixed, 16-bit-fixed, and 32-bit-fixed formats. not quite done yet. (in particular, needs functions to actually convert the buffer.) NOTE: lots of changes to regex.c here. also, many new *_fmt() inline funs that take an Internal_Format argument. redo syntax cache code. make the cache per-buffer; keep the cache valid across calls to functions that use it. also keep it valid across insertions/deletions and extent changes, as much as is possible. eliminate the junky regex-reentrancy code by passing in the relevant lisp info to the regex routines as local vars. add general mechanism in extents code for signalling extent changes. fix numerous problems with the case-table implementation; yoshiki never properly transferred many algorithms from old-style to new-style case tables. redo char tables to support a default argument, so that mapping only occurs over changed args. change many chartab functions to accept Lisp_Object instead of Lisp_Char_Table *. comment out the code in font-lock.c by default, because font-lock.el no longer uses it. we should consider eliminating it entirely. Don't output bell as ^G in console-stream when not a TTY. add -mswindows-termination-handle to interface with i.c, so we can properly kill a build. add more error-checking to buffer/string macros. add some additional buffer_or_string_() funs. -- INTERFACE CHANGES AFFECTING MORE CODE: switch the arguments of write_c_string and friends to be consistent with write_fmt_string, which must have printcharfun first. change BI_* macros to BYTE_* for increased clarity; similarly for bi_* local vars. change VOID_TO_LISP to be a one-argument function. eliminate no-longer-needed CVOID_TO_LISP. -- char/string macro changes: rename MAKE_CHAR() to make_emchar() for slightly less confusion with make_char(). (The former generates an Emchar, the latter a Lisp object. Conceivably we should rename make_char() -> wrap_char() and similarly for make_int(), make_float().) Similar changes for other *CHAR* macros -- we now consistently use names with `emchar' whenever we are working with Emchars. Any remaining name with just `char' always refers to a Lisp object. rename macros with XSTRING_* to string_* except for those that reference actual fields in the Lisp_String object, following conventions used elsewhere. rename set_string_{data,length} macros (the only ones to work with a Lisp_String_* instead of a Lisp_Object) to set_lispstringp_* to make the difference clear. try to be consistent about caps vs. lowercase in macro/inline-fun names for chars and such, which wasn't the case before. we now reserve caps either for XFOO_ macros that reference object fields (e.g. XSTRING_DATA) or for things that have non-function semantics, e.g. directly modifying an arg (BREAKUP_EMCHAR) or evaluating an arg (any arg) more than once. otherwise, use lowercase. here is a summary of most of the macros/inline funs changed by all of the above changes: BYTE_*_P -> byte_*_p XSTRING_BYTE -> string_byte set_string_data/length -> set_lispstringp_data/length XSTRING_CHAR_LENGTH -> string_char_length XSTRING_CHAR -> string_emchar INTBYTE_FIRST_BYTE_P -> intbyte_first_byte_p INTBYTE_LEADING_BYTE_P -> intbyte_leading_byte_p charptr_copy_char -> charptr_copy_emchar LEADING_BYTE_* -> leading_byte_* CHAR_* -> EMCHAR_* *_CHAR_* -> *_EMCHAR_* *_CHAR -> *_EMCHAR CHARSET_BY_ -> charset_by_* BYTE_SHIFT_JIS* -> byte_shift_jis* BYTE_BIG5* -> byte_big5* REP_BYTES_BY_FIRST_BYTE -> rep_bytes_by_first_byte char_to_unicode -> emchar_to_unicode valid_char_p -> valid_emchar_p Change intbyte_strcmp -> qxestrcmp_c (duplicated functionality). -- INTERFACE CHANGES AFFECTING LESS CODE: use DECLARE_INLINE_HEADER in various places. remove '#ifdef emacs' from XEmacs-only files. eliminate CHAR_TABLE_VALUE(), which duplicated the functionality of get_char_table(). add BUFFER_TEXT_LOOP to simplify iterations over buffer text. define typedefs for signed and unsigned types of fixed sizes (INT_32_BIT, UINT_32_BIT, etc.). create ALIGN_FOR_TYPE as a higher-level interface onto ALIGN_SIZE; fix code to use it. add charptr_emchar_len to return the text length of the character pointed to by a ptr; use it in place of charcount_to_bytecount(..., 1). add emchar_len to return the text length of a given character. add types Bytexpos and Charxpos to generalize Bytebpos/Bytecount and Charbpos/Charcount, in code (particularly, the extents code and redisplay code) that works with either kind of index. rename redisplay struct params with names such as `charbpos' to e.g. `charpos' when they are e.g. a Charxpos, not a Charbpos. eliminate xxDEFUN in place of DEFUN; no longer necessary with changes awhile back to doc.c. split up big ugly combined list of EXFUNs in lisp.h on a file-by-file basis, since other prototypes are similarly split. rewrite some "*_UNSAFE" macros as inline funs and eliminate the _UNSAFE suffix. move most string code from lisp.h to text.h; the string code and text.h code is now intertwined in such a fashion that they need to be in the same place and partially interleaved. (you can't create forward references for inline funs) automated/lisp-tests.el, automated/symbol-tests.el, automated/test-harness.el: Fix test harness to output FAIL messages to stderr when in batch mode. Fix up some problems in lisp-tests/symbol-tests that were causing spurious failures.

author	ben
date	Sun, 05 May 2002 11:33:57 +0000
parents	a634e3b7acc8
children	44478bd99873

comparison

equal deleted inserted replaced

-:eb3bc15a6e0f
+:6728e641994e
 /************************************************************************/
 /*                            long comments                             */
 /************************************************************************/
 /*
+==========================================================================
+1. Character Sets
+==========================================================================
+A character set (or "charset") is an ordered set of characters.
+A character (which is, BTW, a surprisingly complex concept) is, in a
+written representation of text, the most basic written unit that has a
+meaning of its own.  It's comparable to a phoneme when analyzing words
+in spoken speech.  Just like with a phoneme (which is an abstract
+concept, and is represented in actual spoken speech by one or more
+allophones, ...&&#### finish this., a character is actually an abstract
+concept
+A particular character in a charset is indexed using one or
+more "position codes", which are non-negative integers.
+The number of position codes needed to identify a particular
+character in a charset is called the "dimension" of the
+charset.  In XEmacs/Mule, all charsets have 1 or 2 dimensions,
+and the size of all charsets (except for a few special cases)
+is either 94, 96, 94 by 94, or 96 by 96.  The range of
+position codes used to index characters from any of these
+types of character sets is as follows:
+Charset type		Position code 1		Position code 2
+------------------------------------------------------------
+94			33 - 126		N/A
+96			32 - 127		N/A
+94x94		33 - 126		33 - 126
+96x96		32 - 127		32 - 127
+Note that in the above cases position codes do not start at
+an expected value such as 0 or 1.  The reason for this will
+become clear later.
+For example, Latin-1 is a 96-character charset, and JISX0208
+(the Japanese national character set) is a 94x94-character
+charset.
+[Note that, although the ranges above define the *valid*
+position codes for a charset, some of the slots in a particular
+charset may in fact be empty.  This is the case for JISX0208,
+for example, where (e.g.) all the slots whose first
+position code is in the range 118 - 127 are empty.]
+There are three charsets that do not follow the above rules.
+All of them have one dimension, and have ranges of position
+codes as follows:
+Charset name		Position code 1
+------------------------------------
+ASCII		0 - 127
+Control-1		0 - 31
+Composite		0 - some large number
+(The upper bound of the position code for composite characters
+has not yet been determined, but it will probably be at
+least 16,383).
+ASCII is the union of two subsidiary character sets:
+Printing-ASCII (the printing ASCII character set,
+consisting of position codes 33 - 126, like for a standard
+94-character charset) and Control-ASCII (the non-printing
+characters that would appear in a binary file with codes 0
+- 32 and 127).
+Control-1 contains the non-printing characters that would
+appear in a binary file with codes 128 - 159.
+Composite contains characters that are generated by
+overstriking one or more characters from other charsets.
+Note that some characters in ASCII, and all characters
+in Control-1, are "control" (non-printing) characters.
+These have no printed representation but instead control
+some other function of the printing (e.g. TAB or 8 moves
+the current character position to the next tab stop).
+All other characters in all charsets are "graphic"
+(printing) characters.
+When a binary file is read in, the bytes in the file are
+assigned to character sets as follows:
+Bytes		Character set		Range
+--------------------------------------------------
+0 - 127		ASCII			0 - 127
+128 - 159		Control-1		0 - 31
+160 - 255		Latin-1			32 - 127
+This is a bit ad-hoc but gets the job done.
+==========================================================================
+2. Encodings
+==========================================================================
+An "encoding" is a way of numerically representing
+characters from one or more character sets.  If an encoding
+only encompasses one character set, then the position codes
+for the characters in that character set could be used
+directly.  This is not possible, however, if more than one
+character set is to be used in the encoding.
+For example, the conversion detailed above between bytes in
+a binary file and characters is effectively an encoding
+that encompasses the three character sets ASCII, Control-1,
+and Latin-1 in a stream of 8-bit bytes.
+Thus, an encoding can be viewed as a way of encoding
+characters from a specified group of character sets using a
+stream of bytes, each of which contains a fixed number of
+bits (but not necessarily 8, as in the common usage of
+"byte").
+Here are descriptions of a couple of common
+encodings:
+A. Japanese EUC (Extended Unix Code)
+This encompasses the character sets:
+- Printing-ASCII,
+- Katakana-JISX0201 (half-width katakana, the right half of JISX0201).
+- Japanese-JISX0208
+- Japanese-JISX0212
+It uses 8-bit bytes.
+Note that Printing-ASCII and Katakana-JISX0201 are 94-character
+charsets, while Japanese-JISX0208 is a 94x94-character charset.
+The encoding is as follows:
+Character set	Representation  (PC == position-code)
+-------------	--------------
+Printing-ASCII	PC1
+Japanese-JISX0208	PC1 + 0x80 | PC2 + 0x80
+Katakana-JISX0201	0x8E       | PC1 + 0x80
+B. JIS7
+This encompasses the character sets:
+- Printing-ASCII
+- Latin-JISX0201 (the left half of JISX0201; this character set is
+very similar to Printing-ASCII and is a 94-character charset)
+- Japanese-JISX0208
+- Katakana-JISX0201
+It uses 7-bit bytes.
+Unlike Japanese EUC, this is a "modal" encoding, which
+means that there are multiple states that the encoding can
+be in, which affect how the bytes are to be interpreted.
+Special sequences of bytes (called "escape sequences")
+are used to change states.
+The encoding is as follows:
+Character set	Representation
+-------------	--------------
+Printing-ASCII	PC1
+Latin-JISX0201	PC1
+Katakana-JISX0201	PC1
+Japanese-JISX0208	PC1 | PC2
+Escape sequence	ASCII equivalent  Meaning
+---------------	----------------  -------
+0x1B 0x28 0x42	ESC ( B		  invoke Printing-ASCII
+0x1B 0x28 0x4A	ESC ( J		  invoke Latin-JISX0201
+0x1B 0x28 0x49	ESC ( I		  invoke Katakana-JISX0201
+0x1B 0x24 0x42	ESC $ B		  invoke Japanese-JISX0208
+Initially, Printing-ASCII is invoked.
+==========================================================================
+3. Internal Mule Encodings
+==========================================================================
+In XEmacs/Mule, each character set is assigned a unique number,
+called a "leading byte".  This is used in the encodings of a
+character.  Leading bytes are in the range 0x80 - 0xFF
+(except for ASCII, which has a leading byte of 0), although
+some leading bytes are reserved.
+Charsets whose leading byte is in the range 0x80 - 0x9F are
+called "official" and are used for built-in charsets.
+Other charsets are called "private" and have leading bytes
+in the range 0xA0 - 0xFF; these are user-defined charsets.
+More specifically:
+Character set		Leading byte
+-------------		------------
+ASCII			0 (0x7F in arrays indexed by leading byte)
+Composite			0x8D
+Dimension-1 Official		0x80 - 0x8C/0x8D
+				  (0x8E is free)
+Control			0x8F
+Dimension-2 Official		0x90 - 0x99
+				  (0x9A - 0x9D are free)
+Dimension-1 Private Marker   0x9E
+Dimension-2 Private Marker   0x9F
+Dimension-1 Private		0xA0 - 0xEF
+Dimension-2 Private		0xF0 - 0xFF
+There are two internal encodings for characters in XEmacs/Mule.
+One is called "string encoding" and is an 8-bit encoding that
+is used for representing characters in a buffer or string.
+It uses 1 to 4 bytes per character.  The other is called
+"character encoding" and is a 19-bit encoding that is used
+for representing characters individually in a variable.
+(In the following descriptions, we'll ignore composite
+characters for the moment.  We also give a general (structural)
+overview first, followed later by the exact details.)
+A. Internal String Encoding
+ASCII characters are encoded using their position code directly.
+Other characters are encoded using their leading byte followed
+by their position code(s) with the high bit set.  Characters
+in private character sets have their leading byte prefixed with
+a "leading byte prefix", which is either 0x9E or 0x9F. (No
+character sets are ever assigned these leading bytes.) Specifically:
+Character set		Encoding (PC == position-code)
+-------------		-------- (LB == leading-byte)
+ASCII			PC1  |
+Control-1			LB   | PC1 + 0xA0
+Dimension-1 official		LB   | PC1 + 0x80
+Dimension-1 private		0x9E | LB         | PC1 + 0x80
+Dimension-2 official		LB   | PC1        | PC2 + 0x80
+Dimension-2 private		0x9F | LB         | PC1 + 0x80 | PC2 + 0x80
+The basic characteristic of this encoding is that the first byte
+of all characters is in the range 0x00 - 0x9F, and the second and
+following bytes of all characters is in the range 0xA0 - 0xFF.
+This means that it is impossible to get out of sync, or more
+specifically:
+1. Given any byte position, the beginning of the character it is
+within can be determined in constant time.
+2. Given any byte position at the beginning of a character, the
+beginning of the next character can be determined in constant
+time.
+3. Given any byte position at the beginning of a character, the
+beginning of the previous character can be determined in constant
+time.
+4. Textual searches can simply treat encoded strings as if they
+were encoded in a one-byte-per-character fashion rather than
+the actual multi-byte encoding.
+None of the standard non-modal encodings meet all of these
+conditions.  For example, EUC satisfies only (2) and (3), while
+Shift-JIS and Big5 (not yet described) satisfy only (2). (All
+non-modal encodings must satisfy (2), in order to be unambiguous.)
+B. Internal Character Encoding
+One 19-bit word represents a single character.  The word is
+separated into three fields:
+Bit number:	18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+		<------------> <------------------> <------------------>
+Field:	      1		         2		      3
+Note that fields 2 and 3 hold 7 bits each, while field 1 holds 5 bits.
+Character set		Field 1		Field 2		Field 3
+-------------		-------		-------		-------
+ASCII			   0		   0              PC1
+range:                                                   (00 - 7F)
+Control-1			   0		   1              PC1
+range:                                                   (00 - 1F)
+Dimension-1 official            0            LB - 0x7F         PC1
+range:                                    (01 - 0D)      (20 - 7F)
+Dimension-1 private             0            LB - 0x80         PC1
+range:                                    (20 - 6F)      (20 - 7F)
+Dimension-2 official		LB - 0x8F          PC1            PC2
+range:                    (01 - 0A)       (20 - 7F)      (20 - 7F)
+Dimension-2 private          LB - 0xE1          PC1            PC2
+range:                    (0F - 1E)       (20 - 7F)      (20 - 7F)
+Composite			  0x1F              ?              ?
+Note that character codes 0 - 255 are the same as the "binary encoding"
+described above.
+Most of the code in XEmacs knows nothing of the representation of a
+character other than that values 0 - 255 represent ASCII, Control 1,
+and Latin 1.
+WARNING WARNING WARNING: The Boyer-Moore code in search.c, and the
+code in search_buffer() that determines whether that code can be used,
+knows that "field 3" in a character always corresponds to the last
+byte in the textual representation of the character. (This is important
+because the Boyer-Moore algorithm works by looking at the last byte
+of the search string and &&#### finish this.
+==========================================================================
+4. Buffer Positions and Other Typedefs
+==========================================================================
+A. Buffer Positions
 There are three possible ways to specify positions in a buffer.  All
 of these are one-based: the beginning of the buffer is position or
 index 1, and 0 is not a valid position.
 As a "buffer position" (typedef Charbpos):
 buffer_start_address + memory_index(position) - 1
 except in the case of characters at the gap position.
-Other typedefs:
+B. Other Typedefs
-===============
 Emchar:
 -------
 This typedef represents a single Emacs character, which can be
 	ASCII, ISO-8859, or some extended character, as would typically
 ----------
 Similar to a Charcount but represents a count of bytes.
 	The difference between two Bytebpos's is a Bytecount.
-Usage of the various representations:
+C. Usage of the Various Representations
-=====================================
 Memory indices are used in low-level functions in insdel.c and for
 extent endpoints and marker positions.  The reason for this is that
 this way, the extents and markers don't need to be updated for most
 insertions, which merely shrink the gap and don't move any
 Strings are always passed around internally using internal format.
 Conversions between external format are performed at the time
 that the data goes in or out of Emacs.
-Working with the various representations:
+D. Working With the Various Representations
-========================================= */
+We write things this way because it's very important the
-/* We write things this way because it's very important the
 MAX_BYTEBPOS_GAP_SIZE_3 is a multiple of 3. (As it happens,
 65535 is a multiple of 3, but this may not always be the
-case.) */
+case. #### unfinished
+==========================================================================
-/*
+5. Miscellaneous
-1. Character Sets
+==========================================================================
-=================
+A. Unicode Support
-A character set (or "charset") is an ordered set of characters.
-A particular character in a charset is indexed using one or
-more "position codes", which are non-negative integers.
-The number of position codes needed to identify a particular
-character in a charset is called the "dimension" of the
-charset.  In XEmacs/Mule, all charsets have 1 or 2 dimensions,
-and the size of all charsets (except for a few special cases)
-is either 94, 96, 94 by 94, or 96 by 96.  The range of
-position codes used to index characters from any of these
-types of character sets is as follows:
-Charset type		Position code 1		Position code 2
-------------------------------------------------------------
-94			33 - 126		N/A
-96			32 - 127		N/A
-94x94		33 - 126		33 - 126
-96x96		32 - 127		32 - 127
-Note that in the above cases position codes do not start at
-an expected value such as 0 or 1.  The reason for this will
-become clear later.
-For example, Latin-1 is a 96-character charset, and JISX0208
-(the Japanese national character set) is a 94x94-character
-charset.
-[Note that, although the ranges above define the *valid*
-position codes for a charset, some of the slots in a particular
-charset may in fact be empty.  This is the case for JISX0208,
-for example, where (e.g.) all the slots whose first
-position code is in the range 118 - 127 are empty.]
-There are three charsets that do not follow the above rules.
-All of them have one dimension, and have ranges of position
-codes as follows:
-Charset name		Position code 1
-------------------------------------
-ASCII		0 - 127
-Control-1		0 - 31
-Composite		0 - some large number
-(The upper bound of the position code for composite characters
-has not yet been determined, but it will probably be at
-least 16,383).
-ASCII is the union of two subsidiary character sets:
-Printing-ASCII (the printing ASCII character set,
-consisting of position codes 33 - 126, like for a standard
-94-character charset) and Control-ASCII (the non-printing
-characters that would appear in a binary file with codes 0
-- 32 and 127).
-Control-1 contains the non-printing characters that would
-appear in a binary file with codes 128 - 159.
-Composite contains characters that are generated by
-overstriking one or more characters from other charsets.
-Note that some characters in ASCII, and all characters
-in Control-1, are "control" (non-printing) characters.
-These have no printed representation but instead control
-some other function of the printing (e.g. TAB or 8 moves
-the current character position to the next tab stop).
-All other characters in all charsets are "graphic"
-(printing) characters.
-When a binary file is read in, the bytes in the file are
-assigned to character sets as follows:
-Bytes		Character set		Range
---------------------------------------------------
-0 - 127		ASCII			0 - 127
-128 - 159		Control-1		0 - 31
-160 - 255		Latin-1			32 - 127
-This is a bit ad-hoc but gets the job done.
-2. Encodings
-============
-An "encoding" is a way of numerically representing
-characters from one or more character sets.  If an encoding
-only encompasses one character set, then the position codes
-for the characters in that character set could be used
-directly.  This is not possible, however, if more than one
-character set is to be used in the encoding.
-For example, the conversion detailed above between bytes in
-a binary file and characters is effectively an encoding
-that encompasses the three character sets ASCII, Control-1,
-and Latin-1 in a stream of 8-bit bytes.
-Thus, an encoding can be viewed as a way of encoding
-characters from a specified group of character sets using a
-stream of bytes, each of which contains a fixed number of
-bits (but not necessarily 8, as in the common usage of
-"byte").
-Here are descriptions of a couple of common
-encodings:
-A. Japanese EUC (Extended Unix Code)
-This encompasses the character sets:
-- Printing-ASCII,
-- Katakana-JISX0201 (half-width katakana, the right half of JISX0201).
-- Japanese-JISX0208
-- Japanese-JISX0212
-It uses 8-bit bytes.
-Note that Printing-ASCII and Katakana-JISX0201 are 94-character
-charsets, while Japanese-JISX0208 is a 94x94-character charset.
-The encoding is as follows:
-Character set	Representation  (PC == position-code)
--------------	--------------
-Printing-ASCII	PC1
-Japanese-JISX0208	PC1 + 0x80 | PC2 + 0x80
-Katakana-JISX0201	0x8E       | PC1 + 0x80
-B. JIS7
-This encompasses the character sets:
-- Printing-ASCII
-- Latin-JISX0201 (the left half of JISX0201; this character set is
-very similar to Printing-ASCII and is a 94-character charset)
-- Japanese-JISX0208
-- Katakana-JISX0201
-It uses 7-bit bytes.
-Unlike Japanese EUC, this is a "modal" encoding, which
-means that there are multiple states that the encoding can
-be in, which affect how the bytes are to be interpreted.
-Special sequences of bytes (called "escape sequences")
-are used to change states.
-The encoding is as follows:
-Character set	Representation
--------------	--------------
-Printing-ASCII	PC1
-Latin-JISX0201	PC1
-Katakana-JISX0201	PC1
-Japanese-JISX0208	PC1 | PC2
-Escape sequence	ASCII equivalent  Meaning
----------------	----------------  -------
-0x1B 0x28 0x42	ESC ( B		  invoke Printing-ASCII
-0x1B 0x28 0x4A	ESC ( J		  invoke Latin-JISX0201
-0x1B 0x28 0x49	ESC ( I		  invoke Katakana-JISX0201
-0x1B 0x24 0x42	ESC $ B		  invoke Japanese-JISX0208
-Initially, Printing-ASCII is invoked.
-3. Internal Mule Encodings
-==========================
-In XEmacs/Mule, each character set is assigned a unique number,
-called a "leading byte".  This is used in the encodings of a
-character.  Leading bytes are in the range 0x80 - 0xFF
-(except for ASCII, which has a leading byte of 0), although
-some leading bytes are reserved.
-Charsets whose leading byte is in the range 0x80 - 0x9F are
-called "official" and are used for built-in charsets.
-Other charsets are called "private" and have leading bytes
-in the range 0xA0 - 0xFF; these are user-defined charsets.
-More specifically:
-Character set		Leading byte
--------------		------------
-ASCII			0 (0x7F in arrays indexed by leading byte)
-Composite			0x8D
-Dimension-1 Official		0x80 - 0x8C/0x8D
-				  (0x8E is free)
-Control			0x8F
-Dimension-2 Official		0x90 - 0x99
-				  (0x9A - 0x9D are free)
-Dimension-1 Private Marker   0x9E
-Dimension-2 Private Marker   0x9F
-Dimension-1 Private		0xA0 - 0xEF
-Dimension-2 Private		0xF0 - 0xFF
-There are two internal encodings for characters in XEmacs/Mule.
-One is called "string encoding" and is an 8-bit encoding that
-is used for representing characters in a buffer or string.
-It uses 1 to 4 bytes per character.  The other is called
-"character encoding" and is a 19-bit encoding that is used
-for representing characters individually in a variable.
-(In the following descriptions, we'll ignore composite
-characters for the moment.  We also give a general (structural)
-overview first, followed later by the exact details.)
-A. Internal String Encoding
-ASCII characters are encoded using their position code directly.
-Other characters are encoded using their leading byte followed
-by their position code(s) with the high bit set.  Characters
-in private character sets have their leading byte prefixed with
-a "leading byte prefix", which is either 0x9E or 0x9F. (No
-character sets are ever assigned these leading bytes.) Specifically:
-Character set		Encoding (PC == position-code)
--------------		-------- (LB == leading-byte)
-ASCII			PC1  |
-Control-1			LB   | PC1 + 0xA0
-Dimension-1 official		LB   | PC1 + 0x80
-Dimension-1 private		0x9E | LB         | PC1 + 0x80
-Dimension-2 official		LB   | PC1        | PC2 + 0x80
-Dimension-2 private		0x9F | LB         | PC1 + 0x80 | PC2 + 0x80
-The basic characteristic of this encoding is that the first byte
-of all characters is in the range 0x00 - 0x9F, and the second and
-following bytes of all characters is in the range 0xA0 - 0xFF.
-This means that it is impossible to get out of sync, or more
-specifically:
-1. Given any byte position, the beginning of the character it is
-within can be determined in constant time.
-2. Given any byte position at the beginning of a character, the
-beginning of the next character can be determined in constant
-time.
-3. Given any byte position at the beginning of a character, the
-beginning of the previous character can be determined in constant
-time.
-4. Textual searches can simply treat encoded strings as if they
-were encoded in a one-byte-per-character fashion rather than
-the actual multi-byte encoding.
-None of the standard non-modal encodings meet all of these
-conditions.  For example, EUC satisfies only (2) and (3), while
-Shift-JIS and Big5 (not yet described) satisfy only (2). (All
-non-modal encodings must satisfy (2), in order to be unambiguous.)
-B. Internal Character Encoding
-One 19-bit word represents a single character.  The word is
-separated into three fields:
-Bit number:	18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
-		<------------> <------------------> <------------------>
-Field:	      1		         2		      3
-Note that fields 2 and 3 hold 7 bits each, while field 1 holds 5 bits.
-Character set		Field 1		Field 2		Field 3
--------------		-------		-------		-------
-ASCII			   0		   0              PC1
-range:                                                   (00 - 7F)
-Control-1			   0		   1              PC1
-range:                                                   (00 - 1F)
-Dimension-1 official            0            LB - 0x7F         PC1
-range:                                    (01 - 0D)      (20 - 7F)
-Dimension-1 private             0            LB - 0x80         PC1
-range:                                    (20 - 6F)      (20 - 7F)
-Dimension-2 official		LB - 0x8F          PC1            PC2
-range:                    (01 - 0A)       (20 - 7F)      (20 - 7F)
-Dimension-2 private          LB - 0xE1          PC1            PC2
-range:                    (0F - 1E)       (20 - 7F)      (20 - 7F)
-Composite			  0x1F              ?              ?
-Note that character codes 0 - 255 are the same as the "binary encoding"
-described above.
-*/
-/*
-About Unicode support:
 Adding Unicode support is very desirable.  Unicode will likely be a
 very common representation in the future, and thus we should
 represent Unicode characters using three bytes instead of four.
 This means we need to find leading bytes for Unicode.  Given that
 with a little change to the functions that assume that 0x80 is the
 lowest leading byte.) This means we still need to dump three
 leading bytes and move them into private space.  The CNS charsets
 are good candidates since they are rarely used, and
 JAPANESE_JISX0208_1978 is becoming less and less used and could
-also be dumped. */
+also be dumped.
+B. Composite Characters
-/* Composite characters are characters constructed by overstriking two
+Composite characters are characters constructed by overstriking two
 or more regular characters.
 1) The old Mule implementation involves storing composite characters
 in a buffer as a tag followed by all of the actual characters
 used to make up the composite character.  I think this is a bad
 where each C[1-3] is in the range 0xA0 - 0xFF.  This allows
 for slightly under 2^20 (one million) composite characters
 over the XEmacs process lifetime, and you only need to
 increase the size of a Mule character from 19 to 21 bits.
 Or you could use 0x8D C1 C2 C3 C4, allowing for about
-85 million (slightly over 2^26) composite characters. */
+85 million (slightly over 2^26) composite characters.
+*/
 /************************************************************************/
 /*                              declarations                            */
 /************************************************************************/
 indexed by the first byte of that representation.
 rep_bytes_by_first_byte(c) is more efficient than the equivalent
 canonical computation:
-XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
+XCHARSET_REP_BYTES (charset_by_leading_byte (c)) */
 const Bytecount rep_bytes_by_first_byte[0xA0] =
 { /* 0x00 - 0x7f are for straight ASCII */
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 malloc()ed string.  Note that the actual number of Intbytes allocated
 is one more than this: the returned string is zero-terminated. */
 Intbyte *
 convert_emchar_string_into_malloced_string (Emchar *arr, int nels,
-					   Bytecount *len_out)
+					    Bytecount *len_out)
 {
 /* Damn zero-termination. */
 Intbyte *str = (Intbyte *) alloca (nels * MAX_EMCHAR_LEN + 1);
 Intbyte *strorig = str;
 Bytecount len;
 if (len_out)
 *len_out = len;
 return str;
 }
+#define COPY_TEXT_BETWEEN_FORMATS(srcfmt, dstfmt)			 \
+do									 \
+{									 \
+if (dst)								 \
+{									 \
+Intbyte *dstend = dst + dstlen;					 \
+Intbyte *dstp = dst;						 \
+const Intbyte *srcend = src + srclen;				 \
+const Intbyte *srcp = src;					 \
+									 \
+while (srcp < srcend)						 \
+	{								 \
+	  Emchar ch = charptr_emchar_fmt (srcp, srcfmt, srcobj);	 \
+	  Bytecount len = emchar_len_fmt (ch, dstfmt);			 \
+									 \
+	    if (dstp + len <= dstend)					 \
+	      {								 \
+		set_charptr_emchar_fmt (dstp, ch, dstfmt, dstobj);	 \
+		dstp += len;						 \
+	      }								 \
+	    else							 \
+	      break;							 \
+	  INC_CHARPTR_FMT (srcp, srcfmt);				 \
+	}								 \
+text_checking_assert (srcp <= srcend);				 \
+if (src_used)							 \
+	*src_used = srcp - src;						 \
+return dstp - dst;						 \
+}									 \
+else									 \
+{									 \
+const Intbyte *srcend = src + srclen;				 \
+const Intbyte *srcp = src;					 \
+Bytecount total = 0;						 \
+									 \
+while (srcp < srcend)						 \
+	{								 \
+	  total += emchar_len_fmt (charptr_emchar_fmt (srcp, srcfmt,	 \
+						       srcobj), dstfmt); \
+	  INC_CHARPTR_FMT (srcp, srcfmt);				 \
+	}								 \
+text_checking_assert (srcp == srcend);				 \
+if (src_used)							 \
+	*src_used = srcp - src;						 \
+return total;							 \
+}									 \
+}									 \
+while (0)
+/* Copy as much text from SRC/SRCLEN to DST/DSTLEN as will fit, converting
+from SRCFMT/SRCOBJ to DSTFMT/DSTOBJ.  Return number of bytes stored into
+DST as return value, and number of bytes copied from SRC through
+SRC_USED (if not NULL).  If DST is NULL, don't actually store anything
+and just return the size needed to store all the text.  Will not copy
+partial characters into DST. */
+Bytecount
+copy_text_between_formats (const Intbyte *src, Bytecount srclen,
+			   Internal_Format srcfmt,
+			   Lisp_Object srcobj,
+			   Intbyte *dst, Bytecount dstlen,
+			   Internal_Format dstfmt,
+			   Lisp_Object dstobj,
+			   Bytecount *src_used)
+{
+if (srcfmt == dstfmt &&
+objects_have_same_internal_representation (srcobj, dstobj))
+{
+if (dst)
+	{
+	  srclen = min (srclen, dstlen);
+	  srclen = validate_intbyte_string_backward (src, srclen);
+	  memcpy (dst, src, srclen);
+	  if (src_used)
+	    *src_used = srclen;
+	  return srclen;
+	}
+else
+	return srclen;
+}
+/* Everything before the final else statement is an optimization.
+The inner loops inside COPY_TEXT_BETWEEN_FORMATS() have a number
+of calls to *_fmt(), each of which has a switch statement in it.
+By using constants as the FMT argument, these switch statements
+will be optimized out of existence. */
+#define ELSE_FORMATS(fmt1, fmt2)		\
+else if (srcfmt == fmt1 && dstfmt == fmt2)	\
+COPY_TEXT_BETWEEN_FORMATS (fmt1, fmt2)
+ELSE_FORMATS (FORMAT_DEFAULT, FORMAT_8_BIT_FIXED);
+ELSE_FORMATS (FORMAT_8_BIT_FIXED, FORMAT_DEFAULT);
+ELSE_FORMATS (FORMAT_DEFAULT, FORMAT_32_BIT_FIXED);
+ELSE_FORMATS (FORMAT_32_BIT_FIXED, FORMAT_DEFAULT);
+else
+COPY_TEXT_BETWEEN_FORMATS (srcfmt, dstfmt);
+#undef ELSE_FORMATS
+}
+/* Copy as much buffer text in BUF, starting at POS, of length LEN, as will
+fit into DST/DSTLEN, converting to DSTFMT.  Return number of bytes
+stored into DST as return value, and number of bytes copied from BUF
+through SRC_USED (if not NULL).  If DST is NULL, don't actually store
+anything and just return the size needed to store all the text. */
+Bytecount
+copy_buffer_text_out (struct buffer *buf, Bytebpos pos,
+		      Bytecount len, Intbyte *dst, Bytecount dstlen,
+		      Internal_Format dstfmt, Lisp_Object dstobj,
+		      Bytecount *src_used)
+{
+Bytecount dst_used = 0;
+if (src_used)
+*src_used = 0;
+{
+BUFFER_TEXT_LOOP (buf, pos, len, runptr, runlen)
+{
+	Bytecount the_src_used, the_dst_used;
+	the_dst_used = copy_text_between_formats (runptr, runlen,
+						  BUF_FORMAT (buf),
+						  wrap_buffer (buf),
+						  dst, dstlen, dstfmt,
+						  dstobj, &the_src_used);
+	dst_used += the_dst_used;
+	if (src_used)
+	  *src_used += the_src_used;
+	if (dst)
+	  {
+	    dst += the_dst_used;
+	    dstlen -= the_dst_used;
+	    if (!dstlen)
+	      break;
+	  }
+}
+}
+return dst_used;
+}
 /************************************************************************/
 /*                    charset properties of strings                     */
 /************************************************************************/
 return;
 }
 while (str < strend)
 {
-charsets[CHAR_LEADING_BYTE (charptr_emchar (str)) - MIN_LEADING_BYTE] =
+charsets[emchar_leading_byte (charptr_emchar (str)) - MIN_LEADING_BYTE] =
 	1;
 INC_CHARPTR (str);
 }
 #endif
 }
 return;
 }
 for (i = 0; i < len; i++)
 {
-charsets[CHAR_LEADING_BYTE (str[i]) - MIN_LEADING_BYTE] = 1;
+charsets[emchar_leading_byte (str[i]) - MIN_LEADING_BYTE] = 1;
 }
 #endif
 }
 int
 while (str < end)
 {
 #ifdef MULE
 Emchar ch = charptr_emchar (str);
-cols += XCHARSET_COLUMNS (CHAR_CHARSET (ch));
+cols += XCHARSET_COLUMNS (emchar_charset (ch));
 #else
 cols++;
 #endif
 INC_CHARPTR (str);
 }
 #ifdef MULE
 int cols = 0;
 int i;
 for (i = 0; i < len; i++)
-cols += XCHARSET_COLUMNS (CHAR_CHARSET (str[i]));
+cols += XCHARSET_COLUMNS (emchar_charset (str[i]));
 return cols;
 #else  /* not MULE */
 return len;
 #endif
 const Intbyte *end = str + len;
 Charcount retval = 0;
 while (str < end)
 {
-if (!BYTE_ASCII_P (*str))
+if (!byte_ascii_p (*str))
 	retval++;
 INC_CHARPTR (str);
 }
 return retval;
 	    qxetextcasecmp (src, len, dst, dstlen));
 }
 }
 Intbyte *
-eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out, Internal_Format fmt)
+eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out, Internal_Format fmt,
+		     Lisp_Object object)
 {
 Intbyte *ptr;
 assert (fmt == FORMAT_DEFAULT);
 ptr = xnew_array (Intbyte, eistr->bytelen_ + 1);
 /* Optimization.  Do it.  Live it.  Love it.  */
 #ifdef MULE
-/* We include the basic functions here that require no specific
+/* Skip as many ASCII bytes as possible in the memory block [PTR, END).
-knowledge of how data is Mule-encoded into a buffer other
+Return pointer to the first non-ASCII byte.  optimized for long
-than the basic (00 - 7F), (80 - 9F), (A0 - FF) scheme.
+stretches of ASCII. */
-Anything that requires more specific knowledge goes into
+inline static const Intbyte *
-mule-charset.c. */
+skip_ascii (const Intbyte *ptr, const Intbyte *end)
+{
-/* Given a pointer to a text string and a length in bytes, return
+#ifdef EFFICIENT_INT_128_BIT
-the equivalent length in characters. */
+# define STRIDE_TYPE INT_128_BIT
+# define HIGH_BIT_MASK \
-Charcount
+MAKE_128_BIT_UNSIGNED_CONSTANT (0x80808080808080808080808080808080)
-bytecount_to_charcount (const Intbyte *ptr, Bytecount len)
+#elif defined (EFFICIENT_INT_64_BIT)
-{
+# define STRIDE_TYPE INT_64_BIT
-Charcount count = 0;
+# define HIGH_BIT_MASK MAKE_64_BIT_UNSIGNED_CONSTANT (0x8080808080808080)
-const Intbyte *end = ptr + len;
-#if SIZEOF_LONG == 8
-# define STRIDE_TYPE long
-# define HIGH_BIT_MASK 0x8080808080808080UL
-#elif SIZEOF_LONG_LONG == 8 && !(defined (i386) || defined (__i386__))
-# define STRIDE_TYPE long long
-# define HIGH_BIT_MASK 0x8080808080808080ULL
-#elif SIZEOF_LONG == 4
-# define STRIDE_TYPE long
-# define HIGH_BIT_MASK 0x80808080UL
 #else
-# error Add support for 128-bit systems here
+# define STRIDE_TYPE INT_32_BIT
+# define HIGH_BIT_MASK MAKE_32_BIT_UNSIGNED_CONSTANT (0x80808080)
 #endif
 #define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1))
 #define ALIGN_MASK (~ ALIGN_BITS)
 #define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0)
 #define STRIDE sizeof (STRIDE_TYPE)
-while (ptr < end)
+const unsigned STRIDE_TYPE *ascii_end;
-{
-if (BYTE_ASCII_P (*ptr))
+/* Need to do in 3 sections -- before alignment start, aligned chunk,
-	{
+after alignment end. */
-	  /* optimize for long stretches of ASCII */
+while (!ALIGNED (ptr))
-	  if (! ALIGNED (ptr))
+{
-	    ptr++, count++;
+if (ptr == end || !byte_ascii_p (*ptr))
-	  else
+	return ptr;
-	    {
+ptr++;
-	      const unsigned STRIDE_TYPE *ascii_end =
+}
-		(const unsigned STRIDE_TYPE *) ptr;
+ascii_end = (const unsigned STRIDE_TYPE *) ptr;
-	      /* This loop screams, because we can detect ASCII
+/* This loop screams, because we can detect ASCII
-		 characters 4 or 8 at a time. */
+characters 4 or 8 at a time. */
-	      while ((const Intbyte *) ascii_end + STRIDE <= end
+while ((const Intbyte *) ascii_end + STRIDE <= end
-		     && !(*ascii_end & HIGH_BIT_MASK))
+	 && !(*ascii_end & HIGH_BIT_MASK))
-		ascii_end++;
+ascii_end++;
-	      if ((Intbyte *) ascii_end == ptr)
+ptr = (Intbyte *) ascii_end;
-		ptr++, count++;
+while (ptr < end && byte_ascii_p (*ptr))
-	      else
+ptr++;
-		{
+return ptr;
-		  count += (Intbyte *) ascii_end - ptr;
+}
-		  ptr = (Intbyte *) ascii_end;
-		}
+/* Function equivalents of bytecount_to_charcount/charcount_to_bytecount.
-	    }
+These work on strings of all sizes but are more efficient than a simple
-	}
+loop on large strings and probably less efficient on sufficiently small
-else
+strings. */
-	{
-	  /* optimize for successive characters from the same charset */
+Charcount
-	  Intbyte leading_byte = *ptr;
+bytecount_to_charcount_fun (const Intbyte *ptr, Bytecount len)
-	  int bytes = REP_BYTES_BY_FIRST_BYTE (leading_byte);
+{
-	  while ((ptr < end) && (*ptr == leading_byte))
+Charcount count = 0;
-	    ptr += bytes, count++;
+const Intbyte *end = ptr + len;
-	}
+while (1)
+{
+const Intbyte *newptr = skip_ascii (ptr, end);
+count += newptr - ptr;
+ptr = newptr;
+if (ptr == end)
+	break;
+{
+	/* Optimize for successive characters from the same charset */
+	Intbyte leading_byte = *ptr;
+	int bytes = rep_bytes_by_first_byte (leading_byte);
+	while (ptr < end && *ptr == leading_byte)
+	  ptr += bytes, count++;
+}
 }
 /* Bomb out if the specified substring ends in the middle
 of a character.  Note that we might have already gotten
 a core dump above from an invalid reference, but at least
 text_checking_assert (ptr == end);
 return count;
 }
-/* Given a pointer to a text string and a length in characters, return
-the equivalent length in bytes. */
 Bytecount
-charcount_to_bytecount (const Intbyte *ptr, Charcount len)
+charcount_to_bytecount_fun (const Intbyte *ptr, Charcount len)
 {
 const Intbyte *newptr = ptr;
+while (1)
-text_checking_assert (len >= 0);
+{
-while (len > 0)
+const Intbyte *newnewptr = skip_ascii (newptr, newptr + len);
-{
+len -= newnewptr - newptr;
-INC_CHARPTR (newptr);
+newptr = newnewptr;
-len--;
+if (!len)
+	break;
+{
+	/* Optimize for successive characters from the same charset */
+	Intbyte leading_byte = *newptr;
+	int bytes = rep_bytes_by_first_byte (leading_byte);
+	while (len > 0 && *newptr == leading_byte)
+	  newptr += bytes, len--;
+}
 }
 return newptr - ptr;
-}
-inline static void
-update_entirely_ascii_p_flag (struct buffer *buf)
-{
-buf->text->entirely_ascii_p = buf->text->z == buf->text->bufz;
 }
 /* The next two functions are the actual meat behind the
 charbpos-to-bytebpos and bytebpos-to-charbpos conversions.  Currently
 the method they use is fairly unsophisticated; see buffer.h.
 int diff_so_far;
 int add_to_cache = 0;
 /* Check for some cached positions, for speed. */
 if (x == BUF_PT (buf))
-return BI_BUF_PT (buf);
+return BYTE_BUF_PT (buf);
 if (x == BUF_ZV (buf))
-return BI_BUF_ZV (buf);
+return BYTE_BUF_ZV (buf);
 if (x == BUF_BEGV (buf))
-return BI_BUF_BEGV (buf);
+return BYTE_BUF_BEGV (buf);
 bufmin = buf->text->mule_bufmin;
 bufmax = buf->text->mule_bufmax;
 bytmin = buf->text->mule_bytmin;
 bytmax = buf->text->mule_bytmax;
 diffzv += heuristic_hack;
 diffpt += heuristic_hack;
 if (diffpt < diffmax && diffpt <= diffzv)
 	{
 	  bufmax = bufmin = BUF_PT (buf);
-	  bytmax = bytmin = BI_BUF_PT (buf);
+	  bytmax = bytmin = BYTE_BUF_PT (buf);
 	  /* We set the size to 1 even though it doesn't really
 	     matter because the new known region contains no
 	     characters.  We do this because this is the most
 	     likely size of the characters around the new known
 	     region, and we avoid potential yuckiness that is
 	  size = 1;
 	}
 if (diffzv < diffmax)
 	{
 	  bufmax = bufmin = BUF_ZV (buf);
-	  bytmax = bytmin = BI_BUF_ZV (buf);
+	  bytmax = bytmin = BYTE_BUF_ZV (buf);
 	  size = 1;
 	}
 }
 #ifdef ERROR_CHECK_TEXT
 else if (x >= bufmin)
 diffpt += heuristic_hack;
 if (diffpt < diffmin && diffpt <= diffbegv)
 	{
 	  bufmax = bufmin = BUF_PT (buf);
-	  bytmax = bytmin = BI_BUF_PT (buf);
+	  bytmax = bytmin = BYTE_BUF_PT (buf);
 	  /* We set the size to 1 even though it doesn't really
 	     matter because the new known region contains no
 	     characters.  We do this because this is the most
 	     likely size of the characters around the new known
 	     region, and we avoid potential yuckiness that is
 	  size = 1;
 	}
 if (diffbegv < diffmin)
 	{
 	  bufmax = bufmin = BUF_BEGV (buf);
-	  bytmax = bytmin = BI_BUF_BEGV (buf);
+	  bytmax = bytmin = BYTE_BUF_BEGV (buf);
 	  size = 1;
 	}
 }
 diff_so_far = x > bufmax ? x - bufmax : bufmin - x;
 Charbpos retval;
 int diff_so_far;
 int add_to_cache = 0;
 /* Check for some cached positions, for speed. */
-if (x == BI_BUF_PT (buf))
+if (x == BYTE_BUF_PT (buf))
 return BUF_PT (buf);
-if (x == BI_BUF_ZV (buf))
+if (x == BYTE_BUF_ZV (buf))
 return BUF_ZV (buf);
-if (x == BI_BUF_BEGV (buf))
+if (x == BYTE_BUF_BEGV (buf))
 return BUF_BEGV (buf);
 bufmin = buf->text->mule_bufmin;
 bufmax = buf->text->mule_bufmax;
 bytmin = buf->text->mule_bytmin;
 the upper bound of the known region up one character at a time,
 and moving the lower bound of the known region up as necessary
 when the size of the character just seen changes.
 We optimize this, however, by first shifting the known region to
-one of the cached points if it's close by. (We don't check BI_BEG or
+one of the cached points if it's close by. (We don't check BYTE_BEG or
-BI_Z, even though they're cached; most of the time these will be the
+BYTE_Z, even though they're cached; most of the time these will be the
-same as BI_BEGV and BI_ZV, and when they're not, they're not likely
+same as BYTE_BEGV and BYTE_ZV, and when they're not, they're not likely
 to be used.) */
 if (x > bytmax)
 {
 Bytebpos diffmax = x - bytmax;
-Bytebpos diffpt = x - BI_BUF_PT (buf);
+Bytebpos diffpt = x - BYTE_BUF_PT (buf);
-Bytebpos diffzv = BI_BUF_ZV (buf) - x;
+Bytebpos diffzv = BYTE_BUF_ZV (buf) - x;
 /* #### This value could stand some more exploration. */
 Bytecount heuristic_hack = (bytmax - bytmin) >> 2;
 /* Check if the position is closer to PT or ZV than to the
 	 end of the known region. */
 	diffpt = -diffpt;
 if (diffzv < 0)
 	diffzv = -diffzv;
 /* But also implement a heuristic that favors the known region
-	 over BI_PT or BI_ZV.  The reason for this is that switching to
+	 over BYTE_PT or BYTE_ZV.  The reason for this is that switching to
-	 BI_PT or BI_ZV will wipe out the knowledge in the known region,
+	 BYTE_PT or BYTE_ZV will wipe out the knowledge in the known region,
 	 which might be annoying if the known region is large and
-	 BI_PT or BI_ZV is not that much closer than the end of the known
+	 BYTE_PT or BYTE_ZV is not that much closer than the end of the known
 	 region. */
 diffzv += heuristic_hack;
 diffpt += heuristic_hack;
 if (diffpt < diffmax && diffpt <= diffzv)
 	{
 	  bufmax = bufmin = BUF_PT (buf);
-	  bytmax = bytmin = BI_BUF_PT (buf);
+	  bytmax = bytmin = BYTE_BUF_PT (buf);
 	  /* We set the size to 1 even though it doesn't really
 	     matter because the new known region contains no
 	     characters.  We do this because this is the most
 	     likely size of the characters around the new known
 	     region, and we avoid potential yuckiness that is
 	  size = 1;
 	}
 if (diffzv < diffmax)
 	{
 	  bufmax = bufmin = BUF_ZV (buf);
-	  bytmax = bytmin = BI_BUF_ZV (buf);
+	  bytmax = bytmin = BYTE_BUF_ZV (buf);
 	  size = 1;
 	}
 }
 #ifdef ERROR_CHECK_TEXT
 else if (x >= bytmin)
 abort ();
 #endif
 else
 {
 Bytebpos diffmin = bytmin - x;
-Bytebpos diffpt = BI_BUF_PT (buf) - x;
+Bytebpos diffpt = BYTE_BUF_PT (buf) - x;
-Bytebpos diffbegv = x - BI_BUF_BEGV (buf);
+Bytebpos diffbegv = x - BYTE_BUF_BEGV (buf);
 /* #### This value could stand some more exploration. */
 Bytecount heuristic_hack = (bytmax - bytmin) >> 2;
 if (diffpt < 0)
 	diffpt = -diffpt;
 diffpt += heuristic_hack;
 if (diffpt < diffmin && diffpt <= diffbegv)
 	{
 	  bufmax = bufmin = BUF_PT (buf);
-	  bytmax = bytmin = BI_BUF_PT (buf);
+	  bytmax = bytmin = BYTE_BUF_PT (buf);
 	  /* We set the size to 1 even though it doesn't really
 	     matter because the new known region contains no
 	     characters.  We do this because this is the most
 	     likely size of the characters around the new known
 	     region, and we avoid potential yuckiness that is
 	  size = 1;
 	}
 if (diffbegv < diffmin)
 	{
 	  bufmax = bufmin = BUF_BEGV (buf);
-	  bytmax = bytmin = BI_BUF_BEGV (buf);
+	  bytmax = bytmin = BYTE_BUF_BEGV (buf);
 	  size = 1;
 	}
 }
 diff_so_far = x > bytmax ? x - bytmax : bytmin - x;
 	  buf->text->mule_bytebpos_cache[i] += bytelength;
 	}
 }
 if (start >= buf->text->mule_bufmax)
-goto done;
+return;
 /* The insertion is either before the known region, in which case
 it shoves it forward; or within the known region, in which case
 it shoves the end forward. (But it may make the known region
 inconsistent, so we may have to shorten it.) */
 	      buf->text->mule_bufmin = end;
 	      buf->text->mule_bytmin = byteend;
 	    }
 	}
 }
-done:
+}
-update_entirely_ascii_p_flag (buf);
-}
+/* Text from START to END (equivalent in Bytebpos's: from BYTE_START to
+BYTE_END) was deleted. */
-/* Text from START to END (equivalent in Bytebposs: from BI_START to
-BI_END) was deleted. */
 void
 buffer_mule_signal_deleted_region (struct buffer *buf, Charbpos start,
-				   Charbpos end, Bytebpos bi_start,
+				   Charbpos end, Bytebpos byte_start,
-				   Bytebpos bi_end)
+				   Bytebpos byte_end)
 {
 int i;
 /* Adjust the cache of known positions. */
 for (i = 0; i < 16; i++)
 {
 /* After the end; gets shoved backward */
 if (buf->text->mule_charbpos_cache[i] > end)
 	{
 	  buf->text->mule_charbpos_cache[i] -= end - start;
-	  buf->text->mule_bytebpos_cache[i] -= bi_end - bi_start;
+	  buf->text->mule_bytebpos_cache[i] -= byte_end - byte_start;
 	}
 /* In the range; moves to start of range */
 else if (buf->text->mule_charbpos_cache[i] > start)
 	{
 	  buf->text->mule_charbpos_cache[i] = start;
-	  buf->text->mule_bytebpos_cache[i] = bi_start;
+	  buf->text->mule_bytebpos_cache[i] = byte_start;
 	}
 }
 /* We don't care about any text after the end of the known region. */
 end = min (end, buf->text->mule_bufmax);
-bi_end = min (bi_end, buf->text->mule_bytmax);
+byte_end = min (byte_end, buf->text->mule_bytmax);
 if (start >= end)
-goto done;
+return;
 /* The end of the known region offsets by the total amount of deletion,
 since it's all before it. */
 buf->text->mule_bufmax -= end - start;
-buf->text->mule_bytmax -= bi_end - bi_start;
+buf->text->mule_bytmax -= byte_end - byte_start;
 /* Now we don't care about any text after the start of the known region. */
 end = min (end, buf->text->mule_bufmin);
-bi_end = min (bi_end, buf->text->mule_bytmin);
+byte_end = min (byte_end, buf->text->mule_bytmin);
 if (start < end)
 {
 buf->text->mule_bufmin -= end - start;
-buf->text->mule_bytmin -= bi_end - bi_start;
+buf->text->mule_bytmin -= byte_end - byte_start;
 }
-done:
-update_entirely_ascii_p_flag (buf);
 }
 #endif /* MULE */
-#ifdef ERROR_CHECK_TEXT
-Bytebpos
-charbpos_to_bytebpos (struct buffer *buf, Charbpos x)
-{
-Bytebpos retval = real_charbpos_to_bytebpos (buf, x);
-ASSERT_VALID_BYTEBPOS_UNSAFE (buf, retval);
-return retval;
-}
-Charbpos
-bytebpos_to_charbpos (struct buffer *buf, Bytebpos x)
-{
-ASSERT_VALID_BYTEBPOS_UNSAFE (buf, x);
-return real_bytebpos_to_charbpos (buf, x);
-}
-#endif /* ERROR_CHECK_TEXT */
 /************************************************************************/
 /*                verifying buffer and string positions                 */
 /************************************************************************/
 and to signal an error if the positions are out of range.
 */
 void
 get_buffer_range_char (struct buffer *b, Lisp_Object from, Lisp_Object to,
-		       Charbpos *from_out, Charbpos *to_out, unsigned int flags)
+		       Charbpos *from_out, Charbpos *to_out,
+		       unsigned int flags)
 {
 /* Does not GC */
 Charbpos min_allowed, max_allowed;
 min_allowed = (flags & GB_ALLOW_PAST_ACCESSIBLE) ?
 }
 }
 void
 get_buffer_range_byte (struct buffer *b, Lisp_Object from, Lisp_Object to,
-		       Bytebpos *from_out, Bytebpos *to_out, unsigned int flags)
+		       Bytebpos *from_out, Bytebpos *to_out,
+		       unsigned int flags)
 {
 Charbpos s, e;
 get_buffer_range_char (b, from, to, &s, &e, flags);
 if (s >= 0)
 Charcount
 get_string_pos_char (Lisp_Object string, Lisp_Object pos, unsigned int flags)
 {
 return get_string_pos_char_1 (string, pos, flags,
-				XSTRING_CHAR_LENGTH (string));
+				string_char_length (string));
 }
 Bytecount
 get_string_pos_byte (Lisp_Object string, Lisp_Object pos, unsigned int flags)
 {
 get_string_range_char (Lisp_Object string, Lisp_Object from, Lisp_Object to,
 		       Charcount *from_out, Charcount *to_out,
 		       unsigned int flags)
 {
 Charcount min_allowed = 0;
-Charcount max_allowed = XSTRING_CHAR_LENGTH (string);
+Charcount max_allowed = string_char_length (string);
 if (NILP (from) && (flags & GB_ALLOW_NIL))
 *from_out = min_allowed;
 else
 *from_out = get_string_pos_char_1 (string, from,
 else
 *to_out = -1;
 }
-Charbpos
+Charxpos
 get_buffer_or_string_pos_char (Lisp_Object object, Lisp_Object pos,
 			       unsigned int flags)
 {
 return STRINGP (object) ?
 get_string_pos_char (object, pos, flags) :
 get_buffer_pos_char (XBUFFER (object), pos, flags);
 }
-Bytebpos
+Bytexpos
 get_buffer_or_string_pos_byte (Lisp_Object object, Lisp_Object pos,
 			       unsigned int flags)
 {
 return STRINGP (object) ?
 get_string_pos_byte (object, pos, flags) :
 get_buffer_pos_byte (XBUFFER (object), pos, flags);
 }
 void
 get_buffer_or_string_range_char (Lisp_Object object, Lisp_Object from,
-				 Lisp_Object to, Charbpos *from_out,
+				 Lisp_Object to, Charxpos *from_out,
-				 Charbpos *to_out, unsigned int flags)
+				 Charxpos *to_out, unsigned int flags)
 {
 if (STRINGP (object))
 get_string_range_char (object, from, to, from_out, to_out, flags);
 else
-get_buffer_range_char (XBUFFER (object), from, to, from_out, to_out, flags);
+get_buffer_range_char (XBUFFER (object), from, to, from_out, to_out,
+			   flags);
 }
 void
 get_buffer_or_string_range_byte (Lisp_Object object, Lisp_Object from,
-				 Lisp_Object to, Bytebpos *from_out,
+				 Lisp_Object to, Bytexpos *from_out,
-				 Bytebpos *to_out, unsigned int flags)
+				 Bytexpos *to_out, unsigned int flags)
 {
 if (STRINGP (object))
 get_string_range_byte (object, from, to, from_out, to_out, flags);
 else
-get_buffer_range_byte (XBUFFER (object), from, to, from_out, to_out, flags);
+get_buffer_range_byte (XBUFFER (object), from, to, from_out, to_out,
+			   flags);
+}
+Charxpos
+buffer_or_string_accessible_begin_char (Lisp_Object object)
+{
+return STRINGP (object) ? 0 : BUF_BEGV (XBUFFER (object));
+}
+Charxpos
+buffer_or_string_accessible_end_char (Lisp_Object object)
+{
+return STRINGP (object) ?
+string_char_length (object) : BUF_ZV (XBUFFER (object));
+}
+Bytexpos
+buffer_or_string_accessible_begin_byte (Lisp_Object object)
+{
+return STRINGP (object) ? 0 : BYTE_BUF_BEGV (XBUFFER (object));
+}
+Bytexpos
+buffer_or_string_accessible_end_byte (Lisp_Object object)
+{
+return STRINGP (object) ?
+XSTRING_LENGTH (object) : BYTE_BUF_ZV (XBUFFER (object));
+}
+Charxpos
+buffer_or_string_absolute_begin_char (Lisp_Object object)
+{
+return STRINGP (object) ? 0 : BUF_BEG (XBUFFER (object));
+}
+Charxpos
+buffer_or_string_absolute_end_char (Lisp_Object object)
+{
+return STRINGP (object) ?
+string_char_length (object) : BUF_Z (XBUFFER (object));
+}
+Bytexpos
+buffer_or_string_absolute_begin_byte (Lisp_Object object)
+{
+return STRINGP (object) ? 0 : BYTE_BUF_BEG (XBUFFER (object));
+}
+Bytexpos
+buffer_or_string_absolute_end_byte (Lisp_Object object)
+{
+return STRINGP (object) ?
+XSTRING_LENGTH (object) : BYTE_BUF_Z (XBUFFER (object));
 }
 Charbpos
-buffer_or_string_accessible_begin_char (Lisp_Object object)
+charbpos_clip_to_bounds (Charbpos lower, Charbpos num, Charbpos upper)
 {
-return STRINGP (object) ? 0 : BUF_BEGV (XBUFFER (object));
+return (num < lower ? lower :
-}
+	  num > upper ? upper :
+	  num);
-Charbpos
-buffer_or_string_accessible_end_char (Lisp_Object object)
-{
-return STRINGP (object) ?
-XSTRING_CHAR_LENGTH (object) : BUF_ZV (XBUFFER (object));
 }
 Bytebpos
-buffer_or_string_accessible_begin_byte (Lisp_Object object)
+bytebpos_clip_to_bounds (Bytebpos lower, Bytebpos num, Bytebpos upper)
 {
-return STRINGP (object) ? 0 : BI_BUF_BEGV (XBUFFER (object));
+return (num < lower ? lower :
-}
+	  num > upper ? upper :
+	  num);
-Bytebpos
+}
-buffer_or_string_accessible_end_byte (Lisp_Object object)
-{
+Charxpos
-return STRINGP (object) ?
+charxpos_clip_to_bounds (Charxpos lower, Charxpos num, Charxpos upper)
-XSTRING_LENGTH (object) : BI_BUF_ZV (XBUFFER (object));
+{
-}
+return (num < lower ? lower :
+	  num > upper ? upper :
-Charbpos
+	  num);
-buffer_or_string_absolute_begin_char (Lisp_Object object)
+}
-{
-return STRINGP (object) ? 0 : BUF_BEG (XBUFFER (object));
+Bytexpos
-}
+bytexpos_clip_to_bounds (Bytexpos lower, Bytexpos num, Bytexpos upper)
+{
-Charbpos
+return (num < lower ? lower :
-buffer_or_string_absolute_end_char (Lisp_Object object)
+	  num > upper ? upper :
-{
+	  num);
-return STRINGP (object) ?
+}
-XSTRING_CHAR_LENGTH (object) : BUF_Z (XBUFFER (object));
-}
+/* These could be implemented in terms of the get_buffer_or_string()
+functions above, but those are complicated and handle lots of weird
-Bytebpos
+cases stemming from uncertain external input. */
-buffer_or_string_absolute_begin_byte (Lisp_Object object)
-{
+Charxpos
-return STRINGP (object) ? 0 : BI_BUF_BEG (XBUFFER (object));
+buffer_or_string_clip_to_accessible_char (Lisp_Object object, Charxpos pos)
-}
+{
+return (charxpos_clip_to_bounds
-Bytebpos
+	  (pos, buffer_or_string_accessible_begin_char (object),
-buffer_or_string_absolute_end_byte (Lisp_Object object)
+	   buffer_or_string_accessible_end_char (object)));
-{
+}
-return STRINGP (object) ?
-XSTRING_LENGTH (object) : BI_BUF_Z (XBUFFER (object));
+Bytexpos
+buffer_or_string_clip_to_accessible_byte (Lisp_Object object, Bytexpos pos)
+{
+return (bytexpos_clip_to_bounds
+	  (pos, buffer_or_string_accessible_begin_byte (object),
+	   buffer_or_string_accessible_end_byte (object)));
+}
+Charxpos
+buffer_or_string_clip_to_absolute_char (Lisp_Object object, Charxpos pos)
+{
+return (charxpos_clip_to_bounds
+	  (pos, buffer_or_string_absolute_begin_char (object),
+	   buffer_or_string_absolute_end_char (object)));
+}
+Bytexpos
+buffer_or_string_clip_to_absolute_byte (Lisp_Object object, Bytexpos pos)
+{
+return (bytexpos_clip_to_bounds
+	  (pos, buffer_or_string_absolute_begin_byte (object),
+	   buffer_or_string_absolute_end_byte (object)));
 }
 /************************************************************************/
 /*           Implement TO_EXTERNAL_FORMAT, TO_INTERNAL_FORMAT           */
 {
 	const Intbyte *end;
 	for (end = ptr + len; ptr < end;)
 	  {
 	    Intbyte c =
-	      (BYTE_ASCII_P (*ptr))		   ? *ptr :
+	      (byte_ascii_p (*ptr))		   ? *ptr :
 	      (*ptr == LEADING_BYTE_CONTROL_1)	   ? (*(ptr+1) - 0x20) :
 	      (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) :
 	      '~';
 	    Dynarr_add (conversion_out_dynarr, (Extbyte) c);
 	}
 end = ptr + len;
 for (p = ptr; p < end; p++)
 	{
-	  if (!BYTE_ASCII_P (*p))
+	  if (!byte_ascii_p (*p))
 	    goto the_hard_way;
 	}
 for (p = ptr; p < end; p++)
 	{
 for (; ptr < end; ptr++)
 {
 Intbyte c = *ptr;
-	  if (BYTE_ASCII_P (c))
+	  if (byte_ascii_p (c))
 	    Dynarr_add (conversion_in_dynarr, c);
-	  else if (BYTE_C1_P (c))
+	  else if (byte_c1_p (c))
 	    {
 	      Dynarr_add (conversion_in_dynarr, LEADING_BYTE_CONTROL_1);
 	      Dynarr_add (conversion_in_dynarr, c + 0x20);
 	    }
 	  else
 for (; ptr < end; ptr += 2)
 	{
 Intbyte c = *ptr;
-	  if (BYTE_ASCII_P (c))
+	  if (byte_ascii_p (c))
 	    Dynarr_add (conversion_in_dynarr, c);
 #ifdef MULE
-	  else if (BYTE_C1_P (c))
+	  else if (byte_c1_p (c))
 	    {
 	      Dynarr_add (conversion_in_dynarr, LEADING_BYTE_CONTROL_1);
 	      Dynarr_add (conversion_in_dynarr, c + 0x20);
 	    }
 	  else
 Intbyte lb;
 int c1, c2;
 Lisp_Object charset;
 p = str;
-BREAKUP_CHAR (c, charset, c1, c2);
+BREAKUP_EMCHAR (c, charset, c1, c2);
-lb = CHAR_LEADING_BYTE (c);
+lb = emchar_leading_byte (c);
-if (LEADING_BYTE_PRIVATE_P (lb))
+if (leading_byte_private_p (lb))
-*p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
+*p++ = private_leading_byte_prefix (lb);
 *p++ = lb;
 if (EQ (charset, Vcharset_control_1))
 c1 += 0x20;
 *p++ = c1 | 0x80;
 if (c2)
 Lisp_Object charset;
 if (i0 == LEADING_BYTE_CONTROL_1)
 return (Emchar) (*++str - 0x20);
-if (LEADING_BYTE_PREFIX_P (i0))
+if (leading_byte_prefix_p (i0))
 i0 = *++str;
 i1 = *++str & 0x7F;
-charset = CHARSET_BY_LEADING_BYTE (i0);
+charset = charset_by_leading_byte (i0);
 if (XCHARSET_DIMENSION (charset) == 2)
 i2 = *++str & 0x7F;
-return MAKE_CHAR (charset, i1, i2);
+return make_emchar (charset, i1, i2);
 }
 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
-Do not call this directly.  Use the macro valid_char_p() instead. */
+Do not call this directly.  Use the macro valid_emchar_p() instead. */
 int
-non_ascii_valid_char_p (Emchar ch)
+non_ascii_valid_emchar_p (Emchar ch)
 {
 int f1, f2, f3;
 /* Must have only lowest 19 bits set */
 if (ch & ~0x7FFFF)
 return 0;
-f1 = CHAR_FIELD1 (ch);
+f1 = emchar_field1 (ch);
-f2 = CHAR_FIELD2 (ch);
+f2 = emchar_field2 (ch);
-f3 = CHAR_FIELD3 (ch);
+f3 = emchar_field3 (ch);
 if (f1 == 0)
 {
 /* dimension-1 char */
 Lisp_Object charset;
 /* leading byte must be correct */
-if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
+if (f2 < MIN_EMCHAR_FIELD2_OFFICIAL ||
-	  (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
+	  (f2 > MAX_EMCHAR_FIELD2_OFFICIAL && f2 < MIN_EMCHAR_FIELD2_PRIVATE) ||
-	   f2 > MAX_CHAR_FIELD2_PRIVATE)
+	   f2 > MAX_EMCHAR_FIELD2_PRIVATE)
 	return 0;
 /* octet not out of range */
 if (f3 < 0x20)
 	return 0;
 /* charset exists */
 /*
 	 NOTE: This takes advantage of the fact that
 	 FIELD2_TO_OFFICIAL_LEADING_BYTE and
 	 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
 	 */
-charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
+charset = charset_by_leading_byte (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
 if (EQ (charset, Qnil))
 	return 0;
 /* check range as per size (94 or 96) of charset */
 return ((f3 > 0x20 && f3 < 0x7f) || XCHARSET_CHARS (charset) == 96);
 }
 {
 /* dimension-2 char */
 Lisp_Object charset;
 /* leading byte must be correct */
-if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
+if (f1 < MIN_EMCHAR_FIELD1_OFFICIAL ||
-	  (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
+	  (f1 > MAX_EMCHAR_FIELD1_OFFICIAL && f1 < MIN_EMCHAR_FIELD1_PRIVATE) ||
-	  f1 > MAX_CHAR_FIELD1_PRIVATE)
+	  f1 > MAX_EMCHAR_FIELD1_PRIVATE)
 	return 0;
 /* octets not out of range */
 if (f2 < 0x20 || f3 < 0x20)
 	return 0;
 	  return 1;
 	}
 #endif /* ENABLE_COMPOSITE_CHARS */
 /* charset exists */
-if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
+if (f1 <= MAX_EMCHAR_FIELD1_OFFICIAL)
 	charset =
-	  CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
+	  charset_by_leading_byte (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
 else
 	charset =
-	  CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
+	  charset_by_leading_byte (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
 if (EQ (charset, Qnil))
 	return 0;
 /* check range as per size (94x94 or 96x96) of charset */
 return ((f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F) ||
 	      XCHARSET_CHARS (charset) == 96);
 }
 }
 /* Copy the character pointed to by SRC into DST.  Do not call this
-directly.  Use the macro charptr_copy_char() instead.
+directly.  Use the macro charptr_copy_emchar() instead.
 Return the number of bytes copied.  */
 Bytecount
-non_ascii_charptr_copy_char (const Intbyte *src, Intbyte *dst)
+non_ascii_charptr_copy_emchar (const Intbyte *src, Intbyte *dst)
 {
-Bytecount bytes = REP_BYTES_BY_FIRST_BYTE (*src);
+Bytecount bytes = rep_bytes_by_first_byte (*src);
 Bytecount i;
 for (i = bytes; i; i--, dst++, src++)
 *dst = *src;
 return bytes;
 }
 Intbyte *strptr = str;
 Bytecount bytes;
 str[0] = (Intbyte) ch;
-for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
+for (bytes = rep_bytes_by_first_byte (ch) - 1; bytes; bytes--)
 {
 int c = Lstream_getc (stream);
 text_checking_assert (c >= 0);
 *++strptr = (Intbyte) c;
 }
 if (CHARSET_DIMENSION (cs) == 1)
 {
 if (!NILP (arg2))
 invalid_argument
 ("Charset is of dimension one; second octet must be nil", arg2);
-return make_char (MAKE_CHAR (charset, a1, 0));
+return make_char (make_emchar (charset, a1, 0));
 }
 CHECK_INT (arg2);
 a2 = XINT (arg2) & 0x7f;
 if (a2 < lowlim || a2 > highlim)
 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
-return make_char (MAKE_CHAR (charset, a1, a2));
+return make_char (make_emchar (charset, a1, a2));
 #else
 int a1;
 int lowlim, highlim;
 if      (EQ (charset, Qascii))     lowlim =  0, highlim = 127;
 */
 (ch))
 {
 CHECK_CHAR_COERCE_INT (ch);
-return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
+return XCHARSET_NAME (charset_by_leading_byte
-			(CHAR_LEADING_BYTE (XCHAR (ch))));
+			(emchar_leading_byte (XCHAR (ch))));
 }
 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
 Return the octet numbered N (should be 0 or 1) of char CH.
 N defaults to 0 if omitted.
 Lisp_Object charset;
 int octet0, octet1;
 CHECK_CHAR_COERCE_INT (ch);
-BREAKUP_CHAR (XCHAR (ch), charset, octet0, octet1);
+BREAKUP_EMCHAR (XCHAR (ch), charset, octet0, octet1);
 if (NILP (n) || EQ (n, Qzero))
 return make_int (octet0);
 else if (EQ (n, make_int (1)))
 return make_int (octet1);
 int c1, c2;
 GCPRO2 (charset, rc);
 CHECK_CHAR_COERCE_INT (character);
-BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
+BREAKUP_EMCHAR (XCHAR (character), charset, c1, c2);
 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
 {
 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
 }
 if (UNBOUNDP (ch))
 {
 if (composite_char_row_next >= 128)
 	invalid_operation ("No more composite chars available", lispstr);
-emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
+emch = make_emchar (Vcharset_composite, composite_char_row_next,
 			composite_char_col_next);
 Fputhash (make_char (emch), lispstr,
 	        Vcomposite_char_char2string_hash_table);
 Fputhash (lispstr, make_char (emch),
 		Vcomposite_char_string2char_hash_table);
 			      Qunbound);
 assert (!UNBOUNDP (str));
 return str;
 }
-xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
+DEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
 Convert a string into a single composite character.
 The character is the result of overstriking all the characters in
 the string.
 */
 (string))
 CHECK_STRING (string);
 return make_char (lookup_composite_char (XSTRING_DATA (string),
 					   XSTRING_LENGTH (string)));
 }
-xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
+DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
 Return a string of the characters comprising a composite character.
 */
 (ch))
 {
 Emchar emch;
 CHECK_CHAR (ch);
 emch = XCHAR (ch);
-if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
+if (emchar_leading_byte (emch) != LEADING_BYTE_COMPOSITE)
 invalid_argument ("Must be composite char", ch);
 return composite_char_string (emch);
 }
 #endif /* ENABLE_COMPOSITE_CHARS */

Mercurial > hg > xemacs-beta

comparison src/text.c @ 826:6728e641994e