diff src/buffer.h @ 70:131b0175ea99 r20-0b30

Import from CVS: tag r20-0b30
author cvs
date Mon, 13 Aug 2007 09:02:59 +0200
parents 0293115a14e9
children 6a378aca36af
line wrap: on
line diff
--- a/src/buffer.h	Mon Aug 13 09:00:04 2007 +0200
+++ b/src/buffer.h	Mon Aug 13 09:02:59 2007 +0200
@@ -32,6 +32,9 @@
 #ifndef _XEMACS_BUFFER_H_
 #define _XEMACS_BUFFER_H_
 
+#ifdef MULE
+#include "mule-charset.h"
+#endif
 
 /************************************************************************/
 /*                                                                      */
@@ -85,6 +88,18 @@
     int save_modiff;		/* Previous value of modiff, as of last
 				   time buffer visited or saved a file.  */
 
+#ifdef MULE
+    /* We keep track of a "known" region for very fast access.
+       This information is text-only so it goes here. */
+    Bufpos mule_bufmin, mule_bufmax;
+    Bytind mule_bytmin, mule_bytmax;
+    int mule_shifter, mule_three_p;
+
+    /* And we also cache 16 positions for fairly fast access near those
+       positions. */
+    Bufpos mule_bufpos_cache[16];
+    Bytind mule_bytind_cache[16];
+#endif
 
     /* Change data that goes with the text. */
     struct buffer_text_change_data *changes;
@@ -354,6 +369,10 @@
    (D) For working with Emchars:
    -----------------------------
 
+   [Note that there are other functions/macros for working with Emchars
+    in mule-charset.h, for retrieving the charset of an Emchar
+    and such.  These are only valid when MULE is defined.]
+
    valid_char_p(ch):
 	Return whether the given Emchar is valid.
 
@@ -384,7 +403,11 @@
 /* (A) For working with charptr's (pointers to internally-formatted text) */
 /* ---------------------------------------------------------------------- */
 
+#ifdef MULE
+# define VALID_CHARPTR_P(ptr) BUFBYTE_FIRST_BYTE_P (* (unsigned char *) ptr)
+#else
 # define VALID_CHARPTR_P(ptr) 1
+#endif
 
 #ifdef ERROR_CHECK_BUFPOS
 # define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr))
@@ -438,8 +461,31 @@
 #define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr)
 #endif
 
+#ifdef MULE
+
+#define VALIDATE_CHARPTR_BACKWARD(ptr) do	\
+{						\
+  while (!VALID_CHARPTR_P (ptr)) ptr--;		\
+} while (0)
+
+/* This needs to be trickier to avoid the possibility of running off
+   the end of the string. */
+
+#define VALIDATE_CHARPTR_FORWARD(ptr) do	\
+{						\
+  Bufbyte *__vcfptr__ = (ptr);			\
+  VALIDATE_CHARPTR_BACKWARD (__vcfptr__);	\
+  if (__vcfptr__ != (ptr))			\
+    {						\
+      (ptr) = __vcfptr__;			\
+      INC_CHARPTR (ptr);			\
+    }						\
+} while (0)
+
+#else /* not MULE */
 #define VALIDATE_CHARPTR_BACKWARD(ptr)
 #define VALIDATE_CHARPTR_FORWARD(ptr)
+#endif /* not MULE */
 
 /* -------------------------------------------------------------- */
 /* (B) For working with the length (in bytes and characters) of a */
@@ -469,10 +515,52 @@
 #define simple_set_charptr_emchar(ptr, x)	((ptr)[0] = (Bufbyte) (x), 1)
 #define simple_charptr_copy_char(ptr, ptr2)	((ptr2)[0] = *(ptr), 1)
 
+#ifdef MULE
+
+Emchar non_ascii_charptr_emchar (CONST Bufbyte *ptr);
+Bytecount non_ascii_set_charptr_emchar (Bufbyte *ptr, Emchar c);
+Bytecount non_ascii_charptr_copy_char (CONST Bufbyte *ptr,
+				       Bufbyte *ptr2);
+
+INLINE Emchar charptr_emchar (CONST Bufbyte *ptr);
+INLINE Emchar
+charptr_emchar (CONST Bufbyte *ptr)
+{
+  if (BYTE_ASCII_P (*ptr))
+    return simple_charptr_emchar (ptr);
+  else
+    return non_ascii_charptr_emchar (ptr);
+}
+
+INLINE Bytecount set_charptr_emchar (Bufbyte *ptr, Emchar x);
+INLINE Bytecount
+set_charptr_emchar (Bufbyte *ptr, Emchar x)
+{
+  if (!CHAR_MULTIBYTE_P (x))
+    return simple_set_charptr_emchar (ptr, x);
+  else
+    return non_ascii_set_charptr_emchar (ptr, x);
+}
+
+INLINE Bytecount charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *ptr2);
+INLINE Bytecount
+charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *ptr2)
+{
+  if (BYTE_ASCII_P (*ptr))
+    return simple_charptr_copy_char (ptr, ptr2);
+  else
+    return non_ascii_charptr_copy_char (ptr, ptr2);
+}
+
+#else /* not MULE */
+
 # define charptr_emchar(ptr)		simple_charptr_emchar (ptr)
 # define set_charptr_emchar(ptr, x)	simple_set_charptr_emchar (ptr, x)
 # define charptr_copy_char(ptr, ptr2)	simple_charptr_copy_char (ptr, ptr2)
 
+#endif /* not MULE */
+
+
 #define charptr_emchar_n(ptr, offset) \
   charptr_emchar (charptr_n_addr (ptr, offset))
 
@@ -481,8 +569,26 @@
 /* (D) For working with Emchars */
 /* ---------------------------- */
 
+#ifdef MULE
+
+int non_ascii_valid_char_p (Emchar ch);
+
+INLINE int valid_char_p (Emchar ch);
+INLINE int
+valid_char_p (Emchar ch)
+{
+  if (ch >= 0 && ch < 0400)
+    return 1;
+  else
+    return non_ascii_valid_char_p (ch);
+}
+
+#else /* not MULE */
+
 #define valid_char_p(ch) ((unsigned int) (ch) < 0400)
 
+#endif /* not MULE */
+
 #define CHAR_INTP(x) (INTP (x) && valid_char_p (XINT (x)))
 
 #define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x))
@@ -511,7 +617,11 @@
        else								\
          x = wrong_type_argument (Qcharacterp, x); } while (0)
 
+#ifdef MULE
+# define MAX_EMCHAR_LEN 4
+#else
 # define MAX_EMCHAR_LEN 1
+#endif
 
 
 /*----------------------------------------------------------------------*/
@@ -762,7 +872,12 @@
 /*                       working with byte indices                      */
 /*----------------------------------------------------------------------*/
 
+#ifdef MULE
+# define VALID_BYTIND_P(buf, x) \
+  BUFBYTE_FIRST_BYTE_P (*BI_BUF_BYTE_ADDRESS (buf, x))
+#else
 # define VALID_BYTIND_P(buf, x) 1
+#endif
 
 #ifdef ERROR_CHECK_BUFPOS
 
@@ -792,9 +907,37 @@
 
 #endif /* not ERROR_CHECK_BUFPOS */
 
+/* Note that, although the Mule version will work fine for non-Mule
+   as well (it should reduce down to nothing), we provide a separate
+   version to avoid compilation warnings and possible non-optimal
+   results with stupid compilers. */
+
+#ifdef MULE
+# define VALIDATE_BYTIND_BACKWARD(buf, x) do		\
+{							\
+  Bufbyte *__ibptr = BI_BUF_BYTE_ADDRESS (buf, x);	\
+  while (!BUFBYTE_FIRST_BYTE_P (*__ibptr))		\
+    __ibptr--, (x)--;					\
+} while (0)
+#else
 # define VALIDATE_BYTIND_BACKWARD(buf, x)
+#endif
 
+/* Note that, although the Mule version will work fine for non-Mule
+   as well (it should reduce down to nothing), we provide a separate
+   version to avoid compilation warnings and possible non-optimal
+   results with stupid compilers. */
+
+#ifdef MULE
+# define VALIDATE_BYTIND_FORWARD(buf, x) do		\
+{							\
+  Bufbyte *__ibptr = BI_BUF_BYTE_ADDRESS (buf, x);	\
+  while (!BUFBYTE_FIRST_BYTE_P (*__ibptr))		\
+    __ibptr++, (x)++;					\
+} while (0)
+#else
 # define VALIDATE_BYTIND_FORWARD(buf, x)
+#endif
 
 /* Note that in the simplest case (no MULE, no ERROR_CHECK_BUFPOS),
    this crap reduces down to simply (x)++. */
@@ -846,9 +989,87 @@
 /*         Converting between buffer positions and byte indices         */
 /*----------------------------------------------------------------------*/
 
+#ifdef MULE
+
+Bytind bufpos_to_bytind_func (struct buffer *buf, Bufpos x);
+Bufpos bytind_to_bufpos_func (struct buffer *buf, Bytind x);
+
+/* The basic algorithm we use is to keep track of a known region of
+   characters in each buffer, all of which are of the same width.  We
+   keep track of the boundaries of the region in both Bufpos and
+   Bytind coordinates and also keep track of the char width, which
+   is 1 - 4 bytes.  If the position we're translating is not in
+   the known region, then we invoke a function to update the known
+   region to surround the position in question.  This assumes
+   locality of reference, which is usually the case.
+
+   Note that the function to update the known region can be simple
+   or complicated depending on how much information we cache.
+   For the moment, we don't cache any information, and just move
+   linearly forward or back from the known region, with a few
+   shortcuts to catch all-ASCII buffers. (Note that this will
+   thrash with bad locality of reference.) A smarter method would
+   be to keep some sort of pseudo-extent layer over the buffer;
+   maybe keep track of the bufpos/bytind correspondence at the
+   beginning of each line, which would allow us to do a binary
+   search over the pseudo-extents to narrow things down to the
+   correct line, at which point you could use a linear movement
+   method.  This would also mesh well with efficiently
+   implementing a line-numbering scheme.
+
+   Note also that we have to multiply or divide by the char width
+   in order to convert the positions.  We do some tricks to avoid
+   ever actually having to do a multiply or divide, because that
+   is typically an expensive operation (esp. divide).  Multiplying
+   or dividing by 1, 2, or 4 can be implemented simply as a
+   shift left or shift right, and we keep track of a shifter value
+   (0, 1, or 2) indicating how much to shift.  Multiplying by 3
+   can be implemented by doubling and then adding the original
+   value.  Dividing by 3, alas, cannot be implemented in any
+   simple shift/subtract method, as far as I know; so we just
+   do a table lookup.  For simplicity, we use a table of size
+   128K, which indexes the "divide-by-3" values for the first
+   64K non-negative numbers. (Note that we can increase the
+   size up to 384K, i.e. indexing the first 192K non-negative
+   numbers, while still using shorts in the array.) This also
+   means that the size of the known region can be at most
+   64K for width-three characters.
+   */
+   
+extern short three_to_one_table[];
+
+INLINE int real_bufpos_to_bytind (struct buffer *buf, Bufpos x);
+INLINE int
+real_bufpos_to_bytind (struct buffer *buf, Bufpos x)
+{
+  if (x >= buf->text->mule_bufmin && x <= buf->text->mule_bufmax)
+    return (buf->text->mule_bytmin +
+	    ((x - buf->text->mule_bufmin) << buf->text->mule_shifter) +
+	    (buf->text->mule_three_p ? (x - buf->text->mule_bufmin) : 0));
+  else
+    return bufpos_to_bytind_func (buf, x);
+}
+
+INLINE int real_bytind_to_bufpos (struct buffer *buf, Bytind x);
+INLINE int
+real_bytind_to_bufpos (struct buffer *buf, Bytind x)
+{
+  if (x >= buf->text->mule_bytmin && x <= buf->text->mule_bytmax)
+    return (buf->text->mule_bufmin +
+	    ((buf->text->mule_three_p
+	      ? three_to_one_table[x - buf->text->mule_bytmin]
+	      : (x - buf->text->mule_bytmin) >> buf->text->mule_shifter)));
+  else
+    return bytind_to_bufpos_func (buf, x);
+}
+
+#else /* not MULE */
+
 # define real_bufpos_to_bytind(buf, x)	((Bytind) x)
 # define real_bytind_to_bufpos(buf, x)	((Bufpos) x)
 
+#endif /* not MULE */
+
 #ifdef ERROR_CHECK_BUFPOS
 
 Bytind bufpos_to_bytind (struct buffer *buf, Bufpos x);
@@ -901,11 +1122,30 @@
    format strings back from a library function.
 */
 
+#ifdef MULE
+
+/* WARNING: These use a static buffer.  This can lead to disaster if
+   these functions are not used *very* carefully.  Under normal
+   circumstances, do not call these functions; call the front ends
+   below. */
+
+CONST Extbyte *convert_to_external_format (CONST Bufbyte *ptr,
+					   Bytecount len,
+					   Extcount *len_out,
+					   enum external_data_format fmt);
+CONST Bufbyte *convert_from_external_format (CONST Extbyte *ptr,
+					     Extcount len,
+					     Bytecount *len_out,
+					     enum external_data_format fmt);
+
+#else /* ! MULE */
+
 #define convert_to_external_format(ptr, len, len_out, fmt) \
      (*(len_out) = (int) (len), (CONST Extbyte *) (ptr))
 #define convert_from_external_format(ptr, len, len_out, fmt) \
      (*(len_out) = (Bytecount) (len), (CONST Bufbyte *) (ptr))
 
+#endif /* ! MULE */
 
 /* In all of the following macros we use the following general principles:
 
@@ -958,6 +1198,41 @@
    middle of the arguments to the function call and you are unbelievably
    hosed.) */
      
+#ifdef MULE
+
+#define GET_CHARPTR_EXT_DATA_ALLOCA(ptr, len, fmt, stick_value_here, stick_len_here) \
+do									\
+{									\
+  Bytecount __gceda_len_in__ = (len);					\
+  Extcount  __gceda_len_out__;						\
+  CONST Bufbyte *__gceda_ptr_in__ = (ptr);				\
+  CONST Extbyte *__gceda_ptr_out__;					\
+									\
+  __gceda_ptr_out__ =							\
+     convert_to_external_format (__gceda_ptr_in__, __gceda_len_in__,	\
+				&__gceda_len_out__, fmt);		\
+  /* If the new string is identical to the old (will be the case most	\
+     of the time), just return the same string back.  This saves	\
+     on alloca()ing, which can be useful on C alloca() machines and	\
+     on stack-space-challenged environments. */				\
+     									\
+  if (__gceda_len_in__ == __gceda_len_out__ &&				\
+      !memcmp (__gceda_ptr_in__, __gceda_ptr_out__, __gceda_len_out__))	\
+    {									\
+      (stick_value_here) = (CONST Extbyte *) __gceda_ptr_in__;		\
+      (stick_len_here) = (Extcount) __gceda_len_in__;			\
+    }									\
+  else									\
+    {									\
+      (stick_value_here) = (CONST Extbyte *) alloca(1 + __gceda_len_out__);\
+      memcpy ((Extbyte *) stick_value_here, __gceda_ptr_out__,		\
+	      1 + __gceda_len_out__);					\
+      (stick_len_here) = (Extcount) __gceda_len_out__;			\
+    }									\
+} while (0)
+
+#else /* ! MULE */
+
 #define GET_CHARPTR_EXT_DATA_ALLOCA(ptr, len, fmt, stick_value_here, stick_len_here)\
 do								\
 {								\
@@ -965,6 +1240,8 @@
   (stick_len_here) = (Extcount) (len);				\
 } while (0)
 
+#endif /* ! MULE */
+
 #define GET_C_CHARPTR_EXT_DATA_ALLOCA(ptr, fmt, stick_value_here)	\
 do									\
 {									\
@@ -1010,13 +1287,50 @@
    middle of the arguments to the function call and you are unbelievably
    hosed.) */
      
+#ifdef MULE
+
 #define GET_CHARPTR_INT_DATA_ALLOCA(ptr, len, fmt, stick_value_here, stick_len_here)\
-do								\
-{								\
-  (stick_value_here) = (CONST Bufbyte *) (ptr);			\
-  (stick_len_here) = (Bytecount) (len);				\
+do									\
+{									\
+  Extcount __gcida_len_in__ = (len);					\
+  Bytecount __gcida_len_out__;						\
+  CONST Extbyte *__gcida_ptr_in__ = (ptr);				\
+  CONST Bufbyte *__gcida_ptr_out__;					\
+									\
+  __gcida_ptr_out__ =							\
+     convert_from_external_format (__gcida_ptr_in__, __gcida_len_in__,	\
+				  &__gcida_len_out__, fmt);		\
+  /* If the new string is identical to the old (will be the case most	\
+     of the time), just return the same string back.  This saves	\
+     on alloca()ing, which can be useful on C alloca() machines and	\
+     on stack-space-challenged environments. */				\
+     									\
+  if (__gcida_len_in__ == __gcida_len_out__ &&				\
+      !memcmp (__gcida_ptr_in__, __gcida_ptr_out__, __gcida_len_out__))	\
+    {									\
+      (stick_value_here) = (CONST Bufbyte *) __gcida_ptr_in__;		\
+      (stick_len_here) = (Bytecount) __gcida_len_in__;			\
+    }									\
+  else									\
+    {									\
+      (stick_value_here) = alloca (1 + __gcida_len_out__);		\
+      memcpy ((Bufbyte *) stick_value_here, __gcida_ptr_out__,		\
+	      1 + __gcida_len_out__); 					\
+      (stick_len_here) = __gcida_len_out__;				\
+    }									\
 } while (0)
 
+#else /* ! MULE */
+
+#define GET_CHARPTR_INT_DATA_ALLOCA(ptr, len, fmt, stick_value_here, stick_len_here)\
+do						\
+{						\
+  (stick_value_here) = (CONST Bufbyte *) (ptr);	\
+  (stick_len_here) = (Bytecount) (len);		\
+} while (0)
+
+#endif /* ! MULE */
+
 #define GET_C_CHARPTR_INT_DATA_ALLOCA(ptr, fmt, stick_value_here)	\
 do									\
 {									\
@@ -1073,7 +1387,7 @@
   __gseda_ptr__ = convert_to_external_format (string_data (__gseda_s__),   \
 					      string_length (__gseda_s__), \
 					      &__gseda_len__, fmt);	   \
-  (stick_value_here) = (CONST Extbyte *) alloca (1 + __gseda_len__);			   \
+  (stick_value_here) = alloca (1 + __gseda_len__);			   \
   memcpy ((Extbyte *) stick_value_here, __gseda_ptr__, 1 + __gseda_len__); \
   (stick_len_here) = __gseda_len__;					   \
 } while (0)
@@ -1122,6 +1436,11 @@
 /*                                                                      */
 /************************************************************************/
 
+/* used when MULE is not defined, so that Charset-type stuff can still
+   be done */
+
+#ifndef MULE
+
 #define Vcharset_ascii Qnil
 
 #define CHAR_CHARSET(ch) Vcharset_ascii
@@ -1145,6 +1464,8 @@
 } while (0)
 #define BYTE_ASCII_P(byte) 1
 
+#endif /* ! MULE */
+
 
 /************************************************************************/
 /*                                                                      */
@@ -1459,8 +1780,27 @@
 # define SET_TRT_TABLE_CHAR_1(table, ch1, ch2) \
   set_string_char (XSTRING (table), (Charcount) ch1, ch2)
 
+#ifdef MULE
+# define MAKE_MIRROR_TRT_TABLE() make_opaque (256, 0)
+# define MIRROR_TRT_TABLE_AS_STRING(table) ((Bufbyte *) XOPAQUE_DATA (table))
+# define MIRROR_TRT_TABLE_CHAR_1(table, ch) \
+  ((Emchar) (MIRROR_TRT_TABLE_AS_STRING (table)[ch]))
+# define SET_MIRROR_TRT_TABLE_CHAR_1(table, ch1, ch2) \
+  (MIRROR_TRT_TABLE_AS_STRING (table)[ch1] = (Bufbyte) (ch2))
+#endif
+
 # define IN_TRT_TABLE_DOMAIN(c) (((unsigned EMACS_INT) (c)) < 0400)
 
+#ifdef MULE
+#define MIRROR_DOWNCASE_TABLE_AS_STRING(buf) \
+  MIRROR_TRT_TABLE_AS_STRING (buf->mirror_downcase_table)
+#define MIRROR_UPCASE_TABLE_AS_STRING(buf) \
+  MIRROR_TRT_TABLE_AS_STRING (buf->mirror_upcase_table)
+#define MIRROR_CANON_TABLE_AS_STRING(buf) \
+  MIRROR_TRT_TABLE_AS_STRING (buf->mirror_case_canon_table)
+#define MIRROR_EQV_TABLE_AS_STRING(buf) \
+  MIRROR_TRT_TABLE_AS_STRING (buf->mirror_case_eqv_table)
+#else
 #define MIRROR_DOWNCASE_TABLE_AS_STRING(buf) \
   TRT_TABLE_AS_STRING (buf->downcase_table)
 #define MIRROR_UPCASE_TABLE_AS_STRING(buf) \
@@ -1469,6 +1809,7 @@
   TRT_TABLE_AS_STRING (buf->case_canon_table)
 #define MIRROR_EQV_TABLE_AS_STRING(buf) \
   TRT_TABLE_AS_STRING (buf->case_eqv_table)
+#endif
 
 INLINE Emchar TRT_TABLE_OF (Lisp_Object trt, Emchar c);
 INLINE Emchar