diff src/regex.c @ 446:1ccc32a20af4 r21-2-38

Import from CVS: tag r21-2-38
author cvs
date Mon, 13 Aug 2007 11:37:21 +0200
parents 576fb035e263
children 98528da0b7fc
line wrap: on
line diff
--- a/src/regex.c	Mon Aug 13 11:36:20 2007 +0200
+++ b/src/regex.c	Mon Aug 13 11:37:21 2007 +0200
@@ -104,7 +104,10 @@
 {
 }
 
-#endif /* not MULE */
+#endif /* MULE */
+
+#define RE_TRANSLATE(ch) TRT_TABLE_OF (translate, (Emchar) ch)
+#define TRANSLATE_P(tr) (!NILP (tr))
 
 #else  /* not emacs */
 
@@ -173,11 +176,14 @@
     }
 }
 
-#endif /* not SYNTAX_TABLE */
+#endif /* SYNTAX_TABLE */
 
 #define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c]
 
-#endif /* not emacs */
+#define RE_TRANSLATE(c) translate[(unsigned char) (c)]
+#define TRANSLATE_P(tr) tr
+
+#endif /* emacs */
 
 /* Under XEmacs, this is needed because we don't define it elsewhere. */
 #ifdef SWITCH_ENUM_BUG
@@ -288,8 +294,8 @@
 #ifndef _AIX /* Already did AIX, up at the top.  */
 void *alloca ();
 #endif /* not _AIX */
-#endif /* not HAVE_ALLOCA_H */
-#endif /* not __GNUC__ */
+#endif /* HAVE_ALLOCA_H */
+#endif /* __GNUC__ */
 
 #endif /* not alloca */
 
@@ -304,7 +310,7 @@
 /* No need to do anything to free, after alloca.  */
 #define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
 
-#endif /* not REGEX_MALLOC */
+#endif /* REGEX_MALLOC */
 
 /* Define how to allocate the failure stack.  */
 
@@ -333,8 +339,8 @@
 /* No need to explicitly free anything.  */
 #define REGEX_FREE_STACK(arg)
 
-#endif /* not REGEX_MALLOC */
-#endif /* not REL_ALLOC */
+#endif /* REGEX_MALLOC */
+#endif /* REL_ALLOC */
 
 
 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
@@ -359,6 +365,9 @@
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 
+/* Type of source-pattern and string chars.  */
+typedef const unsigned char re_char;
+
 typedef char boolean;
 #define false 0
 #define true 1
@@ -554,7 +563,7 @@
 
 #ifdef DEBUG
 static void
-extract_number (int *dest, unsigned char *source)
+extract_number (int *dest, re_char *source)
 {
   int temp = SIGN_EXTEND_CHAR (*(source + 1));
   *dest = *source & 0377;
@@ -657,11 +666,11 @@
    the START pointer into it and ending just before the pointer END.  */
 
 static void
-print_partial_compiled_pattern (unsigned char *start, unsigned char *end)
+print_partial_compiled_pattern (re_char *start, re_char *end)
 {
   int mcnt, mcnt2;
-  unsigned char *p = start;
-  unsigned char *pend = end;
+  unsigned char *p = (unsigned char *) start;
+  re_char *pend = end;
 
   if (start == NULL)
     {
@@ -939,7 +948,7 @@
 static void
 print_compiled_pattern (struct re_pattern_buffer *bufp)
 {
-  unsigned char *buffer = bufp->buffer;
+  re_char *buffer = bufp->buffer;
 
   print_partial_compiled_pattern (buffer, buffer + bufp->used);
   printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used,
@@ -965,8 +974,8 @@
 
 
 static void
-print_double_string (const char *where, const char *string1, int size1,
-		     const char *string2, int size2)
+print_double_string (re_char *where, re_char *string1, int size1,
+		     re_char *string2, int size2)
 {
   if (where == NULL)
     printf ("(null)");
@@ -1000,7 +1009,7 @@
 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
 
-#endif /* not DEBUG */
+#endif /* DEBUG */
 
 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
    also be assigned to arbitrarily: each pattern buffer stores its own
@@ -1123,7 +1132,7 @@
 
 union fail_stack_elt
 {
-  unsigned char *pointer;
+  re_char *pointer;
   int integer;
 };
 
@@ -1132,8 +1141,8 @@
 typedef struct
 {
   fail_stack_elt_t *stack;
-  unsigned size;
-  unsigned avail;			/* Offset of next open position.  */
+  size_t size;
+  size_t avail;			/* Offset of next open position.  */
 } fail_stack_type;
 
 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
@@ -1309,7 +1318,7 @@
     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
     PUSH_FAILURE_INT (highest_active_reg);				\
 									\
-    DEBUG_PRINT2 ("  Pushing pattern 0x%lx: ", (long) pattern_place);	\
+    DEBUG_PRINT2 ("  Pushing pattern 0x%lx: \n", (long) pattern_place);	\
     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
     PUSH_FAILURE_POINTER (pattern_place);				\
 									\
@@ -1385,7 +1394,7 @@
      saved NULL, thus retaining our current position in the string.  */	\
   string_temp = POP_FAILURE_POINTER ();					\
   if (string_temp != NULL)						\
-    str = (const char *) string_temp;					\
+    str = string_temp;							\
 									\
   DEBUG_PRINT2 ("  Popping string 0x%lx: `",  (long) str);		\
   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
@@ -1410,10 +1419,10 @@
       DEBUG_PRINT2 ("      info: 0x%lx\n",				\
 		    * (long *) &reg_info[this_reg]);			\
 									\
-      regend[this_reg] = (const char *) POP_FAILURE_POINTER ();		\
+      regend[this_reg] = POP_FAILURE_POINTER ();			\
       DEBUG_PRINT2 ("      end: 0x%lx\n", (long) regend[this_reg]);	\
 									\
-      regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
+      regstart[this_reg] = POP_FAILURE_POINTER ();			\
       DEBUG_PRINT2 ("      start: 0x%lx\n", (long) regstart[this_reg]);	\
     }									\
 									\
@@ -1476,7 +1485,7 @@
   while (0)
 
 /* Registers are set to a sentinel when they haven't yet matched.  */
-static char reg_unset_dummy;
+static unsigned char reg_unset_dummy;
 #define REG_UNSET_VALUE (&reg_unset_dummy)
 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
 
@@ -1487,10 +1496,9 @@
    string passed to us by the user to an unsigned char that we can use
    as an array index (in, e.g., `translate').  */
 #define PATFETCH(c)							\
-  do {if (p == pend) return REG_EEND;					\
-    assert (p < pend);							\
-    c = (unsigned char) *p++;						\
-    if (translate) c = (unsigned char) translate[c]; 			\
+  do {									\
+    PATFETCH_RAW (c);							\
+    c = TRANSLATE (c);							\
   } while (0)
 
 /* Fetch the next character in the uncompiled pattern, with no
@@ -1498,11 +1506,12 @@
 #define PATFETCH_RAW(c)							\
   do {if (p == pend) return REG_EEND;					\
     assert (p < pend);							\
-    c = (unsigned char) *p++; 						\
+    c = charptr_emchar (p); 						\
+    INC_CHARPTR (p);							\
   } while (0)
 
 /* Go backwards one character in the pattern.  */
-#define PATUNFETCH p--
+#define PATUNFETCH DEC_CHARPTR (p)
 
 #ifdef MULE
 
@@ -1511,8 +1520,8 @@
     assert (p < pend);							\
     emch = charptr_emchar ((const Bufbyte *) p);			\
     INC_CHARPTR (p);							\
-    if (translate && emch < 0x80)					\
-      emch = (Emchar) (unsigned char) translate[emch];			\
+    if (TRANSLATE_P (translate) && emch < 0x80)				\
+      emch = (Emchar) (unsigned char) RE_TRANSLATE (emch);		\
   } while (0)
 
 #define PATFETCH_RAW_EXTENDED(emch)					\
@@ -1554,18 +1563,18 @@
 #define PATFETCH_RAW_EITHER(emch) PATFETCH_RAW (emch)
 #define PATUNFETCH_EITHER PATUNFETCH
 
-#endif /* not MULE */
+#endif /* MULE */
 
 /* If `translate' is non-null, return translate[D], else just D.  We
    cast the subscript to translate because some data is declared as
    `char *', to avoid warnings when a string constant is passed.  But
    when we use a character as a subscript we must make it unsigned.  */
-#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+#define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d))
 
 #ifdef MULE
 
 #define TRANSLATE_EXTENDED_UNSAFE(emch) \
-  (translate && emch < 0x80 ? translate[emch] : (emch))
+  (TRANSLATE_P (translate) && emch < 0x80 ? RE_TRANSLATE (emch) : (emch))
 
 #endif
 
@@ -1576,14 +1585,14 @@
 
 /* Make sure we have at least N more bytes of space in buffer.  */
 #define GET_BUFFER_SPACE(n)						\
-    while (b - bufp->buffer + (n) > bufp->allocated)			\
+    while (buf_end - bufp->buffer + (n) > bufp->allocated)		\
       EXTEND_BUFFER ()
 
 /* Make sure we have one more byte of buffer space and then add C to it.  */
 #define BUF_PUSH(c)							\
   do {									\
     GET_BUFFER_SPACE (1);						\
-    *b++ = (unsigned char) (c);						\
+    *buf_end++ = (unsigned char) (c);					\
   } while (0)
 
 
@@ -1591,8 +1600,8 @@
 #define BUF_PUSH_2(c1, c2)						\
   do {									\
     GET_BUFFER_SPACE (2);						\
-    *b++ = (unsigned char) (c1);					\
-    *b++ = (unsigned char) (c2);					\
+    *buf_end++ = (unsigned char) (c1);					\
+    *buf_end++ = (unsigned char) (c2);					\
   } while (0)
 
 
@@ -1600,9 +1609,9 @@
 #define BUF_PUSH_3(c1, c2, c3)						\
   do {									\
     GET_BUFFER_SPACE (3);						\
-    *b++ = (unsigned char) (c1);					\
-    *b++ = (unsigned char) (c2);					\
-    *b++ = (unsigned char) (c3);					\
+    *buf_end++ = (unsigned char) (c1);					\
+    *buf_end++ = (unsigned char) (c2);					\
+    *buf_end++ = (unsigned char) (c3);					\
   } while (0)
 
 
@@ -1615,13 +1624,15 @@
 #define STORE_JUMP2(op, loc, to, arg) \
   store_op2 (op, loc, (to) - (loc) - 3, arg)
 
-/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
+/* Like `STORE_JUMP', but for inserting.  Assume `buf_end' is the
+   buffer end.  */
 #define INSERT_JUMP(op, loc, to) \
-  insert_op1 (op, loc, (to) - (loc) - 3, b)
-
-/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
+  insert_op1 (op, loc, (to) - (loc) - 3, buf_end)
+
+/* Like `STORE_JUMP2', but for inserting.  Assume `buf_end' is the
+   buffer end.  */
 #define INSERT_JUMP2(op, loc, to, arg) \
-  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+  insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end)
 
 
 /* This is not an arbitrary limit: the arguments which represent offsets
@@ -1636,7 +1647,7 @@
    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
 #define EXTEND_BUFFER()							\
   do { 									\
-    unsigned char *old_buffer = bufp->buffer;				\
+    re_char *old_buffer = bufp->buffer;				\
     if (bufp->allocated == MAX_BUF_SIZE) 				\
       return REG_ESIZE;							\
     bufp->allocated <<= 1;						\
@@ -1648,7 +1659,7 @@
     /* If the buffer moved, move all the pointers into it.  */		\
     if (old_buffer != bufp->buffer)					\
       {									\
-        b = (b - old_buffer) + bufp->buffer;				\
+        buf_end = (buf_end - old_buffer) + bufp->buffer;		\
         begalt = (begalt - old_buffer) + bufp->buffer;			\
         if (fixup_alt_jump)						\
           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
@@ -1705,7 +1716,7 @@
 
 /* Set the bit for character C in a bit vector.  */
 #define SET_LIST_BIT(c)				\
-  (b[((unsigned char) (c)) / BYTEWIDTH]		\
+  (buf_end[((unsigned char) (c)) / BYTEWIDTH]	\
    |= 1 << (((unsigned char) c) % BYTEWIDTH))
 
 #ifdef MULE
@@ -1762,18 +1773,19 @@
 			unsigned char *end);
 static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
 			unsigned char *end);
-static boolean at_begline_loc_p (const char *pattern, const char *p,
+static boolean at_begline_loc_p (re_char *pattern, re_char *p,
 				 reg_syntax_t syntax);
-static boolean at_endline_loc_p (const char *p, const char *pend, int syntax);
+static boolean at_endline_loc_p (re_char *p, re_char *pend, int syntax);
 static boolean group_in_compile_stack (compile_stack_type compile_stack,
 				       regnum_t regnum);
-static reg_errcode_t compile_range (const char **p_ptr, const char *pend,
-				    char *translate, reg_syntax_t syntax,
+static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend,
+				    RE_TRANSLATE_TYPE translate,
+				    reg_syntax_t syntax,
 				    unsigned char *b);
 #ifdef MULE
-static reg_errcode_t compile_extended_range (const char **p_ptr,
-					     const char *pend,
-					     char *translate,
+static reg_errcode_t compile_extended_range (re_char **p_ptr,
+					     re_char *pend,
+					     RE_TRANSLATE_TYPE translate,
 					     reg_syntax_t syntax,
 					     Lisp_Object rtab);
 #endif /* MULE */
@@ -1786,10 +1798,10 @@
 					      unsigned char *end,
 					      register_info_type *reg_info);
 static int bcmp_translate (const unsigned char *s1, const unsigned char *s2,
-			   REGISTER int len, char *translate);
+			   REGISTER int len, RE_TRANSLATE_TYPE translate);
 static int re_match_2_internal (struct re_pattern_buffer *bufp,
-				const char *string1, int size1,
-				const char *string2, int size2, int pos,
+				re_char *string1, int size1,
+				re_char *string2, int size2, int pos,
 				struct re_registers *regs, int stop);
 
 #ifndef MATCH_MAY_ALLOCATE
@@ -1808,11 +1820,11 @@
    but never make them smaller.  */
 static int regs_allocated_size;
 
-static const char **     regstart, **     regend;
-static const char ** old_regstart, ** old_regend;
-static const char **best_regstart, **best_regend;
+static re_char **     regstart, **     regend;
+static re_char ** old_regstart, ** old_regend;
+static re_char **best_regstart, **best_regend;
 static register_info_type *reg_info;
-static const char **reg_dummy;
+static re_char **reg_dummy;
 static register_info_type *reg_info_dummy;
 
 /* Make the register vectors big enough for NUM_REGS registers,
@@ -1823,14 +1835,14 @@
 {
   if (num_regs > regs_allocated_size)
     {
-      RETALLOC_IF (regstart,	   num_regs, const char *);
-      RETALLOC_IF (regend,	   num_regs, const char *);
-      RETALLOC_IF (old_regstart,   num_regs, const char *);
-      RETALLOC_IF (old_regend,	   num_regs, const char *);
-      RETALLOC_IF (best_regstart,  num_regs, const char *);
-      RETALLOC_IF (best_regend,	   num_regs, const char *);
+      RETALLOC_IF (regstart,	   num_regs, re_char *);
+      RETALLOC_IF (regend,	   num_regs, re_char *);
+      RETALLOC_IF (old_regstart,   num_regs, re_char *);
+      RETALLOC_IF (old_regend,	   num_regs, re_char *);
+      RETALLOC_IF (best_regstart,  num_regs, re_char *);
+      RETALLOC_IF (best_regend,	   num_regs, re_char *);
       RETALLOC_IF (reg_info,	   num_regs, register_info_type);
-      RETALLOC_IF (reg_dummy,	   num_regs, const char *);
+      RETALLOC_IF (reg_dummy,	   num_regs, re_char *);
       RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
 
       regs_allocated_size = num_regs;
@@ -1862,7 +1874,7 @@
   return (free (compile_stack.stack), value)
 
 static reg_errcode_t
-regex_compile (const char *pattern, int size, reg_syntax_t syntax,
+regex_compile (re_char *pattern, int size, reg_syntax_t syntax,
 	       struct re_pattern_buffer *bufp)
 {
   /* We fetch characters from PATTERN here.  We declare these as int
@@ -1874,20 +1886,20 @@
   REGISTER EMACS_INT c, c1;
 
   /* A random temporary spot in PATTERN.  */
-  const char *p1;
+  re_char *p1;
 
   /* Points to the end of the buffer, where we should append.  */
-  REGISTER unsigned char *b;
+  REGISTER unsigned char *buf_end;
 
   /* Keeps track of unclosed groups.  */
   compile_stack_type compile_stack;
 
   /* Points to the current (ending) position in the pattern.  */
-  const char *p = pattern;
-  const char *pend = pattern + size;
+  re_char *p = pattern;
+  re_char *pend = pattern + size;
 
   /* How to translate the characters in the pattern.  */
-  char *translate = bufp->translate;
+  RE_TRANSLATE_TYPE translate = bufp->translate;
 
   /* Address of the count-byte of the most recently inserted `exactn'
      command.  This makes it possible to tell if a new exact-match
@@ -1905,7 +1917,7 @@
 
   /* Place in the uncompiled pattern (i.e., the {) to
      which to go back if the interval is invalid.  */
-  const char *beg_interval;
+  re_char *beg_interval;
 
   /* Address of the place where a forward jump should go to the end of
      the containing expression.  Each alternative of an `or' -- except the
@@ -1972,7 +1984,7 @@
       bufp->allocated = INIT_BUF_SIZE;
     }
 
-  begalt = b = bufp->buffer;
+  begalt = buf_end = bufp->buffer;
 
   /* Loop through the uncompiled pattern until we're at the end.  */
   while (p != pend)
@@ -2103,10 +2115,10 @@
                        9: end of pattern.
                      */
                     GET_BUFFER_SPACE (6);
-                    INSERT_JUMP (jump, laststart, b + 3);
-                    b += 3;
+                    INSERT_JUMP (jump, laststart, buf_end + 3);
+                    buf_end += 3;
                     INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
-                    b += 3;
+                    buf_end += 3;
                   }
                 else if (zero_times_ok)
                   {
@@ -2117,10 +2129,10 @@
                        9: end of pattern.
                      */
                     GET_BUFFER_SPACE (6);
-                    INSERT_JUMP (jump, laststart, b + 3);
-                    b += 3;
-                    STORE_JUMP (on_failure_jump, b, laststart + 3);
-                    b += 3;
+                    INSERT_JUMP (jump, laststart, buf_end + 3);
+                    buf_end += 3;
+                    STORE_JUMP (on_failure_jump, buf_end, laststart + 3);
+                    buf_end += 3;
                   }
                 else
                   {
@@ -2130,8 +2142,8 @@
                        6: end of pattern.
                      */
                     GET_BUFFER_SPACE (3);
-                    STORE_JUMP (on_failure_jump, b, laststart);
-                    b += 3;
+                    STORE_JUMP (on_failure_jump, buf_end, laststart);
+                    buf_end += 3;
                   }
               }
             else
@@ -2140,10 +2152,11 @@
                 boolean keep_string_p = false;
 
                 if (many_times_ok)
-                  { /* More than one repetition is allowed, so put in at the
-                       end a backward relative jump from `b' to before the next
-                       jump we're going to put in below (which jumps from
-                       laststart to after this jump).
+                  { /* More than one repetition is allowed, so put in
+                       at the end a backward relative jump from
+                       `buf_end' to before the next jump we're going
+                       to put in below (which jumps from laststart to
+                       after this jump).
 
                        But if we are at the `*' in the exact sequence `.*\n',
                        insert an unconditional jump backwards to the .,
@@ -2161,29 +2174,30 @@
                        character after the `*'.  Do we have to do something
                        analogous here for null bytes, because of
                        RE_DOT_NOT_NULL? */
-                    if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+                    if (*(p - 2) == '.'
                         && zero_times_ok
-                        && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+                        && p < pend && *p == '\n'
                         && !(syntax & RE_DOT_NEWLINE))
                       { /* We have .*\n.  */
-                        STORE_JUMP (jump, b, laststart);
+                        STORE_JUMP (jump, buf_end, laststart);
                         keep_string_p = true;
                       }
                     else
                       /* Anything else.  */
-                      STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+                      STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3);
 
                     /* We've added more stuff to the buffer.  */
-                    b += 3;
+                    buf_end += 3;
                   }
 
-                /* On failure, jump from laststart to b + 3, which will be the
-                   end of the buffer after this jump is inserted.  */
+                /* On failure, jump from laststart to buf_end + 3,
+                   which will be the end of the buffer after this jump
+                   is inserted.  */
                 GET_BUFFER_SPACE (3);
                 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
 					   : on_failure_jump,
-                             laststart, b + 3);
-                b += 3;
+                             laststart, buf_end + 3);
+                buf_end += 3;
 
                 if (!zero_times_ok)
                   {
@@ -2194,7 +2208,7 @@
                        we hit that loop.  */
                     GET_BUFFER_SPACE (3);
                     INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
-                    b += 3;
+                    buf_end += 3;
                   }
               }
             pending_exact = 0;
@@ -2203,7 +2217,7 @@
 
 
 	case '.':
-          laststart = b;
+          laststart = buf_end;
           BUF_PUSH (anychar);
           break;
 
@@ -2223,7 +2237,7 @@
                opcode, the length count, and the bitset; 34 bytes in all.  */
 	    GET_BUFFER_SPACE (34);
 
-            laststart = b;
+            laststart = buf_end;
 
             /* We test `*p == '^' twice, instead of using an if
                statement, so we only need one BUF_PUSH.  */
@@ -2238,10 +2252,10 @@
             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
 
             /* Clear the whole map.  */
-            memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
+            memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
 
             /* charset_not matches newline according to a syntax bit.  */
-            if ((re_opcode_t) b[-2] == charset_not
+            if ((re_opcode_t) buf_end[-2] == charset_not
                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
               SET_LIST_BIT ('\n');
 
@@ -2251,18 +2265,18 @@
 	      {
 		/* There are extended chars here, which means we need to start
 		   over and shift to unified range-table format. */
-		if (b[-2] == charset)
-		  b[-2] = charset_mule;
+		if (buf_end[-2] == charset)
+		  buf_end[-2] = charset_mule;
 		else
-		  b[-2] = charset_mule_not;
-		b--;
+		  buf_end[-2] = charset_mule_not;
+		buf_end--;
 		p = p1; /* go back to the beginning of the charset, after
 			   a possible ^. */
 		rtab = Vthe_lisp_rangetab;
 		Fclear_range_table (rtab);
 
 		/* charset_not matches newline according to a syntax bit.  */
-		if ((re_opcode_t) b[-1] == charset_mule_not
+		if ((re_opcode_t) buf_end[-1] == charset_mule_not
 		    && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
 		  SET_EITHER_BIT ('\n');
 	      }
@@ -2273,7 +2287,7 @@
               {
                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
-                PATFETCH_EITHER (c);
+                PATFETCH (c);
 
 #ifdef MULE
 		if (c >= 0x80 && !has_extended_chars)
@@ -2292,7 +2306,7 @@
                   {
                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
 
-                    PATFETCH_EITHER (c1);
+                    PATFETCH (c1);
 #ifdef MULE
 		    if (c1 >= 0x80 && !has_extended_chars)
 		      {
@@ -2321,7 +2335,7 @@
                    operator.  */
                 if (c == '-'
                     && !(p - 2 >= pattern && p[-2] == '[')
-                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+		    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
                     && *p != ']')
                   {
                     reg_errcode_t ret;
@@ -2337,7 +2351,7 @@
 						    syntax, rtab);
 		    else
 #endif /* MULE */
-		      ret = compile_range (&p, pend, translate, syntax, b);
+		      ret = compile_range (&p, pend, translate, syntax, buf_end);
                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
                   }
 
@@ -2359,7 +2373,7 @@
 						    syntax, rtab);
 		    else
 #endif /* MULE */
-		      ret = compile_range (&p, pend, translate, syntax, b);
+		      ret = compile_range (&p, pend, translate, syntax, buf_end);
                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
                   }
 
@@ -2378,12 +2392,9 @@
 
                     for (;;)
                       {
-			/* Do not do PATFETCH_EITHER() here.  We want
-			   to just see if the bytes match particular
-			   strings, and we put them all back if not.
-
-			   #### May need to be changed once trt tables
-			   are working. */
+			/* #### This code is unused.
+			   Correctness is not checked after TRT
+			   table change.  */
                         PATFETCH (c);
                         if (c == ':' || c == ']' || p == pend
                             || c1 == CHAR_CLASS_MAX_LENGTH)
@@ -2392,7 +2403,7 @@
                       }
                     str[c1] = '\0';
 
-                    /* If isn't a word bracketed by `[:' and:`]':
+                    /* If isn't a word bracketed by `[:' and `:]':
                        undo the ending character, the letters, and leave
                        the leading `:' and `[' (but set bits for them).  */
                     if (c == ':' && *p == ']')
@@ -2466,16 +2477,16 @@
 		int bytes_needed =
 		  unified_range_table_bytes_needed (rtab);
 		GET_BUFFER_SPACE (bytes_needed);
-		unified_range_table_copy_data (rtab, b);
-		b += unified_range_table_bytes_used (b);
+		unified_range_table_copy_data (rtab, buf_end);
+		buf_end += unified_range_table_bytes_used (buf_end);
 		break;
 	      }
 #endif /* MULE */
             /* Discard any (non)matching list bytes that are all 0 at the
                end of the map.  Decrease the map-length byte too.  */
-            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
-              b[-1]--;
-            b += b[-1];
+            while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0)
+              buf_end[-1]--;
+            buf_end += buf_end[-1];
 	  }
 	  break;
 
@@ -2535,10 +2546,10 @@
 
                 if (!(syntax & RE_NO_SHY_GROUPS)
                     && p != pend
-                    && TRANSLATE(*p) == TRANSLATE('?'))
+                    && *p == '?')
                   {
                     p++;
-                    PATFETCH(c);
+                    PATFETCH (c);
                     switch (c)
                       {
                       case ':': /* shy groups */
@@ -2572,7 +2583,7 @@
                 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
                 COMPILE_STACK_TOP.fixup_alt_jump
                   = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
-                COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+                COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer;
                 COMPILE_STACK_TOP.regnum = r;
 
                 /* We will eventually replace the 0 with the number of
@@ -2582,7 +2593,7 @@
                 if (r <= MAX_REGNUM)
                   {
                     COMPILE_STACK_TOP.inner_group_offset
-                      = b - bufp->buffer + 2;
+                      = buf_end - bufp->buffer + 2;
                     BUF_PUSH_3 (start_memory, r, 0);
                   }
 
@@ -2590,7 +2601,7 @@
 
                 fixup_alt_jump = 0;
                 laststart = 0;
-                begalt = b;
+                begalt = buf_end;
                 /* If we've reached MAX_REGNUM groups, then this open
                    won't actually generate any code, so we'll have to
                    clear pending_exact explicitly.  */
@@ -2619,7 +2630,7 @@
 
                   /* We allocated space for this jump when we assigned
                      to `fixup_alt_jump', in the `handle_alt' case below.  */
-                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+                  STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1);
                 }
 
               /* See similar code for backslashed left paren above.  */
@@ -2677,9 +2688,9 @@
               /* Insert before the previous alternative a jump which
                  jumps to this alternative if the former fails.  */
               GET_BUFFER_SPACE (3);
-              INSERT_JUMP (on_failure_jump, begalt, b + 6);
+              INSERT_JUMP (on_failure_jump, begalt, buf_end + 6);
               pending_exact = 0;
-              b += 3;
+              buf_end += 3;
 
               /* The alternative before this one has a jump after it
                  which gets executed if it gets matched.  Adjust that
@@ -2698,17 +2709,17 @@
                  bytes which we'll fill in when we get to after `c'.  */
 
               if (fixup_alt_jump)
-                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+                STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end);
 
               /* Mark and leave space for a jump after this alternative,
                  to be filled in later either by next alternative or
                  when know we're at the end of a series of alternatives.  */
-              fixup_alt_jump = b;
+              fixup_alt_jump = buf_end;
               GET_BUFFER_SPACE (3);
-              b += 3;
+              buf_end += 3;
 
               laststart = 0;
-              begalt = b;
+              begalt = buf_end;
               break;
 
 
@@ -2781,7 +2792,7 @@
                     if (syntax & RE_CONTEXT_INVALID_OPS)
                       FREE_STACK_RETURN (REG_BADRPT);
                     else if (syntax & RE_CONTEXT_INDEP_OPS)
-                      laststart = b;
+                      laststart = buf_end;
                     else
                       goto unfetch_interval;
                   }
@@ -2792,8 +2803,8 @@
                  if (upper_bound == 0)
                    {
                      GET_BUFFER_SPACE (3);
-                     INSERT_JUMP (jump, laststart, b + 3);
-                     b += 3;
+                     INSERT_JUMP (jump, laststart, buf_end + 3);
+                     buf_end += 3;
                    }
 
                  /* Otherwise, we have a nontrivial interval.  When
@@ -2818,16 +2829,16 @@
                         because `re_compile_fastmap' needs to know.
                         Jump to the `jump_n' we might insert below.  */
                      INSERT_JUMP2 (succeed_n, laststart,
-                                   b + 5 + (upper_bound > 1) * 5,
+                                   buf_end + 5 + (upper_bound > 1) * 5,
                                    lower_bound);
-                     b += 5;
+                     buf_end += 5;
 
                      /* Code to initialize the lower bound.  Insert
                         before the `succeed_n'.  The `5' is the last two
                         bytes of this `set_number_at', plus 3 bytes of
                         the following `succeed_n'.  */
-                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
-                     b += 5;
+                     insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end);
+                     buf_end += 5;
 
                      if (upper_bound > 1)
                        { /* More than one repetition is allowed, so
@@ -2837,9 +2848,9 @@
                             When we've reached this during matching,
                             we'll have matched the interval once, so
                             jump back only `upper_bound - 1' times.  */
-                         STORE_JUMP2 (jump_n, b, laststart + 5,
+                         STORE_JUMP2 (jump_n, buf_end, laststart + 5,
                                       upper_bound - 1);
-                         b += 5;
+                         buf_end += 5;
 
                          /* The location we want to set is the second
                             parameter of the `jump_n'; that is `b-2' as
@@ -2855,9 +2866,10 @@
                             We insert this at the beginning of the loop
                             so that if we fail during matching, we'll
                             reinitialize the bounds.  */
-                         insert_op2 (set_number_at, laststart, b - laststart,
-                                     upper_bound - 1, b);
-                         b += 5;
+                         insert_op2 (set_number_at, laststart,
+				     buf_end - laststart,
+                                     upper_bound - 1, buf_end);
+                         buf_end += 5;
                        }
                    }
                 pending_exact = 0;
@@ -2889,7 +2901,7 @@
               break;
 
             case 's':
-              laststart = b;
+              laststart = buf_end;
               PATFETCH (c);
 	      /* XEmacs addition */
 	      if (c >= 0x80 || syntax_spec_code[c] == 0377)
@@ -2898,7 +2910,7 @@
               break;
 
             case 'S':
-              laststart = b;
+              laststart = buf_end;
               PATFETCH (c);
 	      /* XEmacs addition */
 	      if (c >= 0x80 || syntax_spec_code[c] == 0377)
@@ -2909,7 +2921,7 @@
 #ifdef MULE
 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */
 	    case 'c':
-	      laststart = b;
+	      laststart = buf_end;
 	      PATFETCH_RAW (c);
 	      if (c < 32 || c > 127)
 		FREE_STACK_RETURN (REG_ECATEGORY);
@@ -2917,7 +2929,7 @@
 	      break;
 
 	    case 'C':
-	      laststart = b;
+	      laststart = buf_end;
 	      PATFETCH_RAW (c);
 	      if (c < 32 || c > 127)
 		FREE_STACK_RETURN (REG_ECATEGORY);
@@ -2929,13 +2941,13 @@
 
 
             case 'w':
-              laststart = b;
+              laststart = buf_end;
               BUF_PUSH (wordchar);
               break;
 
 
             case 'W':
-              laststart = b;
+              laststart = buf_end;
               BUF_PUSH (notwordchar);
               break;
 
@@ -2966,20 +2978,23 @@
 
             case '1': case '2': case '3': case '4': case '5':
             case '6': case '7': case '8': case '9':
-              if (syntax & RE_NO_BK_REFS)
-                goto normal_char;
-
-              c1 = c - '0';
-
-              if (c1 > regnum)
-                FREE_STACK_RETURN (REG_ESUBREG);
-
-              /* Can't back reference to a subexpression if inside of it.  */
-              if (group_in_compile_stack (compile_stack, c1))
-                goto normal_char;
-
-              laststart = b;
-              BUF_PUSH_2 (duplicate, c1);
+	      {
+		regnum_t reg;
+		if (syntax & RE_NO_BK_REFS)
+		  goto normal_char;
+
+		reg = c - '0';
+
+		if (reg > regnum)
+		  FREE_STACK_RETURN (REG_ESUBREG);
+
+		/* Can't back reference to a subexpression if inside of it.  */
+		if (group_in_compile_stack (compile_stack, reg))
+		  goto normal_char;
+
+		laststart = buf_end;
+		BUF_PUSH_2 (duplicate, reg);
+	      }
               break;
 
 
@@ -3008,14 +3023,13 @@
 	  {
 	    /* XEmacs: modifications here for Mule. */
 	    /* `q' points to the beginning of the next char. */
-	    const char *q = p - 1;
-	    INC_CHARPTR (q);
+	    re_char *q = p;
 
 	    /* If no exactn currently being built.  */
 	    if (!pending_exact
 
 		/* If last exactn not at current position.  */
-		|| pending_exact + *pending_exact + 1 != b
+		|| pending_exact + *pending_exact + 1 != buf_end
 
 		/* We have only one byte following the exactn for the count. */
 		|| ((unsigned int) (*pending_exact + (q - p)) >=
@@ -3033,21 +3047,30 @@
 	      {
 		/* Start building a new exactn.  */
 
-		laststart = b;
+		laststart = buf_end;
 
 		BUF_PUSH_2 (exactn, 0);
-		pending_exact = b - 1;
+		pending_exact = buf_end - 1;
 	      }
 
+#ifndef MULE
 	    BUF_PUSH (c);
 	    (*pending_exact)++;
-
-	    while (p < q)
-	      {
-		PATFETCH (c);
-		BUF_PUSH (c);
-		(*pending_exact)++;
-	      }
+#else
+	    {
+	      Bytecount bt_count;
+	      Bufbyte tmp_buf[MAX_EMCHAR_LEN];
+	      int i;
+
+	      bt_count = set_charptr_emchar (tmp_buf, c);
+
+	      for (i = 0; i < bt_count; i++)
+		{
+		  BUF_PUSH (tmp_buf[i]);
+		  (*pending_exact)++;
+		}
+	    }
+#endif
 	    break;
 	  }
         } /* switch (c) */
@@ -3057,7 +3080,7 @@
   /* Through the pattern now.  */
 
   if (fixup_alt_jump)
-    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+    STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end);
 
   if (!COMPILE_STACK_EMPTY)
     FREE_STACK_RETURN (REG_EPAREN);
@@ -3070,7 +3093,7 @@
   free (compile_stack.stack);
 
   /* We have succeeded; set the length of the buffer.  */
-  bufp->used = b - bufp->buffer;
+  bufp->used = buf_end - bufp->buffer;
 
 #ifdef DEBUG
   if (debug)
@@ -3114,7 +3137,7 @@
 	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
 					    (fail_stack.size
 					     * sizeof (fail_stack_elt_t)));
-#endif /* not emacs */
+#endif /* emacs */
       }
 
     regex_grow_registers (num_regs);
@@ -3184,9 +3207,9 @@
    least one character before the ^.  */
 
 static boolean
-at_begline_loc_p (const char *pattern, const char *p, reg_syntax_t syntax)
+at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax)
 {
-  const char *prev = p - 2;
+  re_char *prev = p - 2;
   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
 
   return
@@ -3201,11 +3224,11 @@
    at least one character after the $, i.e., `P < PEND'.  */
 
 static boolean
-at_endline_loc_p (const char *p, const char *pend, int syntax)
+at_endline_loc_p (re_char *p, re_char *pend, int syntax)
 {
-  const char *next = p;
+  re_char *next = p;
   boolean next_backslash = *next == '\\';
-  const char *next_next = p + 1 < pend ? p + 1 : 0;
+  re_char *next_next = p + 1 < pend ? p + 1 : 0;
 
   return
        /* Before a subexpression?  */
@@ -3247,12 +3270,12 @@
    `regex_compile' itself.  */
 
 static reg_errcode_t
-compile_range (const char **p_ptr, const char *pend, char *translate,
-	       reg_syntax_t syntax, unsigned char *b)
+compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate,
+	       reg_syntax_t syntax, unsigned char *buf_end)
 {
   unsigned this_char;
 
-  const char *p = *p_ptr;
+  re_char *p = *p_ptr;
   int range_start, range_end;
 
   if (p == pend)
@@ -3292,7 +3315,8 @@
 #ifdef MULE
 
 static reg_errcode_t
-compile_extended_range (const char **p_ptr, const char *pend, char *translate,
+compile_extended_range (re_char **p_ptr, re_char *pend,
+			RE_TRANSLATE_TYPE translate,
 			reg_syntax_t syntax, Lisp_Object rtab)
 {
   Emchar this_char, range_start, range_end;
@@ -3414,7 +3438,7 @@
 	      /* Reset for next path.  */
 	      path_can_be_null = true;
 
-	      p = fail_stack.stack[--fail_stack.avail].pointer;
+	      p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer;
 
 	      continue;
 	    }
@@ -3463,10 +3487,10 @@
 	  /* And all extended characters must be allowed, too. */
 	  for (j = 0x80; j < 0xA0; j++)
 	    fastmap[j] = 1;
-#else /* ! MULE */
+#else /* not MULE */
 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
             fastmap[j] = 1;
-#endif /* ! MULE */
+#endif /* MULE */
 
 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
@@ -3623,14 +3647,14 @@
 		    }
 		}
 	    }
-#else /* ! MULE */
+#else /* not MULE */
 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
 	    if (SYNTAX_UNSAFE
 		(XCHAR_TABLE
 		 (regex_emacs_buffer->mirror_syntax_table), j) ==
 		(enum syntaxcode) k)
 	      fastmap[j] = 1;
-#endif /* ! MULE */
+#endif /* MULE */
 	  break;
 
 
@@ -3664,14 +3688,14 @@
 		    }
 		}
 	    }
-#else /* ! MULE */
+#else /* not MULE */
 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
 	    if (SYNTAX_UNSAFE
 		(XCHAR_TABLE
 		 (regex_emacs_buffer->mirror_syntax_table), j) !=
 		(enum syntaxcode) k)
 	      fastmap[j] = 1;
-#endif /* ! MULE */
+#endif /* MULE */
 	  break;
 
 #ifdef MULE
@@ -3901,19 +3925,21 @@
    stack overflow).  */
 
 int
-re_search_2 (struct re_pattern_buffer *bufp, const char *string1,
-	     int size1, const char *string2, int size2, int startpos,
+re_search_2 (struct re_pattern_buffer *bufp, const char *str1,
+	     int size1, const char *str2, int size2, int startpos,
 	     int range, struct re_registers *regs, int stop)
 {
   int val;
+  re_char *string1 = (re_char *) str1;
+  re_char *string2 = (re_char *) str2;
   REGISTER char *fastmap = bufp->fastmap;
-  REGISTER char *translate = bufp->translate;
+  REGISTER RE_TRANSLATE_TYPE translate = bufp->translate;
   int total_size = size1 + size2;
   int endpos = startpos + range;
 #ifdef REGEX_BEGLINE_CHECK
   int anchored_at_begline = 0;
 #endif
-  const unsigned char *d;
+  re_char *d;
   Charcount d_size;
 
   /* Check for out-of-range STARTPOS.  */
@@ -3985,12 +4011,8 @@
 	  DEC_CHARPTR(d);	/* Ok, since startpos != size1. */
 	  d_size = charcount_to_bytecount (d, 1);
 
-	  if (translate)
-#ifdef MULE
-	    while (range > lim && (*d >= 0x80 || translate[*d] != '\n'))
-#else
-	    while (range > lim && translate[*d] != '\n')
-#endif
+	  if (TRANSLATE_P (translate))
+	    while (range > lim && *d != '\n')
 	      {
 		d += d_size;	/* Speedier INC_CHARPTR(d) */
 		d_size = charcount_to_bytecount (d, 1);
@@ -4027,13 +4049,20 @@
 
               /* Written out as an if-else to avoid testing `translate'
                  inside the loop.  */
-	      if (translate)
-                while (range > lim &&
+	      if (TRANSLATE_P (translate))
+                while (range > lim)
+		  {
 #ifdef MULE
-		       *d < 0x80 &&
-#endif
-		       !fastmap[(unsigned char)translate[*d]])
-		  {
+		    Emchar buf_ch;
+
+		    buf_ch = charptr_emchar (d);
+		    buf_ch = RE_TRANSLATE (buf_ch);
+		    if (buf_ch >= 0200 || fastmap[(unsigned char) buf_ch])
+		      break;
+#else
+		    if (fastmap[(unsigned char)RE_TRANSLATE (*d)])
+		      break;
+#endif /* MULE */
 		    d_size = charcount_to_bytecount (d, 1);
 		    range -= d_size;
 		    d += d_size; /* Speedier INC_CHARPTR(d) */
@@ -4050,15 +4079,17 @@
 	    }
 	  else				/* Searching backwards.  */
 	    {
-	      unsigned char c = (size1 == 0 || startpos >= size1
-				 ? string2[startpos - size1]
-				 : string1[startpos]);
+	      Emchar c = (size1 == 0 || startpos >= size1
+			  ? charptr_emchar (string2 + startpos - size1)
+			  : charptr_emchar (string1 + startpos));
+	      c = TRANSLATE (c);
 #ifdef MULE
-	      if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)])
+	      if (!(c >= 0200 || fastmap[(unsigned char) c]))
+		goto advance;
 #else
-	      if (!fastmap[(unsigned char) TRANSLATE (c)])
+	      if (!fastmap[(unsigned char) c])
+		goto advance;
 #endif
-		goto advance;
 	    }
 	}
 
@@ -4171,9 +4202,9 @@
     FREE_VAR (reg_dummy);						\
     FREE_VAR (reg_info_dummy);						\
   } while (0)
-#else
+#else /* not MATCH_MAY_ALLOCATE */
 #define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
-#endif /* not MATCH_MAY_ALLOCATE */
+#endif /* MATCH_MAY_ALLOCATE */
 
 /* These values must meet several constraints.  They must not be valid
    register values; since we have a limit of 255 registers (because
@@ -4194,7 +4225,7 @@
 re_match (struct re_pattern_buffer *bufp, const char *string, int size,
 	  int pos, struct re_registers *regs)
 {
-  int result = re_match_2_internal (bufp, NULL, 0, string, size,
+  int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size,
 				    pos, regs, size);
   alloca (0);
   return result;
@@ -4220,7 +4251,8 @@
 	    int size1, const char *string2, int size2, int pos,
 	    struct re_registers *regs, int stop)
 {
-  int result = re_match_2_internal (bufp, string1, size1, string2, size2,
+  int result = re_match_2_internal (bufp, (re_char *) string1, size1,
+				    (re_char *) string2, size2,
 				    pos, regs, stop);
   alloca (0);
   return result;
@@ -4229,8 +4261,8 @@
 /* This is a separate function so that we can force an alloca cleanup
    afterwards.  */
 static int
-re_match_2_internal (struct re_pattern_buffer *bufp, const char *string1,
-		     int size1, const char *string2, int size2, int pos,
+re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
+		     int size1, re_char *string2, int size2, int pos,
 		     struct re_registers *regs, int stop)
 {
   /* General temporaries.  */
@@ -4239,14 +4271,14 @@
   int should_succeed; /* XEmacs change */
 
   /* Just past the end of the corresponding string.  */
-  const char *end1, *end2;
+  re_char *end1, *end2;
 
   /* Pointers into string1 and string2, just past the last characters in
      each to consider matching.  */
-  const char *end_match_1, *end_match_2;
+  re_char *end_match_1, *end_match_2;
 
   /* Where we are in the data, and the end of the current string.  */
-  const char *d, *dend;
+  re_char *d, *dend;
 
   /* Where we are in the pattern, and the end of the pattern.  */
   unsigned char *p = bufp->buffer;
@@ -4254,10 +4286,10 @@
 
   /* Mark the opcode just after a start_memory, so we can test for an
      empty subpattern when we get to the stop_memory.  */
-  unsigned char *just_past_start_mem = 0;
+  re_char *just_past_start_mem = 0;
 
   /* We use this to map every character in the string.  */
-  char *translate = bufp->translate;
+  RE_TRANSLATE_TYPE translate = bufp->translate;
 
   /* Failure point stack.  Each place that can handle a failure further
      down the line pushes a failure point on this stack.  It consists of
@@ -4299,7 +4331,7 @@
      stopped matching the regnum-th subexpression.  (The zeroth register
      keeps track of what the whole pattern matches.)  */
 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
-  const char **regstart, **regend;
+  re_char **regstart, **regend;
 #endif
 
   /* If a group that's operated upon by a repetition operator fails to
@@ -4308,7 +4340,7 @@
      are when we last see its open-group operator.  Similarly for a
      register's end.  */
 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
-  const char **old_regstart, **old_regend;
+  re_char **old_regstart, **old_regend;
 #endif
 
   /* The is_active field of reg_info helps us keep track of which (possibly
@@ -4327,7 +4359,7 @@
      turn happens only if we have not yet matched the entire string. */
   unsigned best_regs_set = false;
 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
-  const char **best_regstart, **best_regend;
+  re_char **best_regstart, **best_regend;
 #endif
 
   /* Logically, this is `best_regend[0]'.  But we don't want to have to
@@ -4338,14 +4370,14 @@
      the end of the best match so far in a separate variable.  We
      initialize this to NULL so that when we backtrack the first time
      and need to test it, it's not garbage.  */
-  const char *match_end = NULL;
+  re_char *match_end = NULL;
 
   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
   int set_regs_matched_done = 0;
 
   /* Used when we pop values we don't care about.  */
 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
-  const char **reg_dummy;
+  re_char **reg_dummy;
   register_info_type *reg_info_dummy;
 #endif
 
@@ -4373,14 +4405,14 @@
      array indexing.  We should fix this.  */
   if (bufp->re_nsub)
     {
-      regstart       = REGEX_TALLOC (num_regs, const char *);
-      regend         = REGEX_TALLOC (num_regs, const char *);
-      old_regstart   = REGEX_TALLOC (num_regs, const char *);
-      old_regend     = REGEX_TALLOC (num_regs, const char *);
-      best_regstart  = REGEX_TALLOC (num_regs, const char *);
-      best_regend    = REGEX_TALLOC (num_regs, const char *);
+      regstart       = REGEX_TALLOC (num_regs, re_char *);
+      regend         = REGEX_TALLOC (num_regs, re_char *);
+      old_regstart   = REGEX_TALLOC (num_regs, re_char *);
+      old_regend     = REGEX_TALLOC (num_regs, re_char *);
+      best_regstart  = REGEX_TALLOC (num_regs, re_char *);
+      best_regend    = REGEX_TALLOC (num_regs, re_char *);
       reg_info       = REGEX_TALLOC (num_regs, register_info_type);
-      reg_dummy      = REGEX_TALLOC (num_regs, const char *);
+      reg_dummy      = REGEX_TALLOC (num_regs, re_char *);
       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
 
       if (!(regstart && regend && old_regstart && old_regend && reg_info
@@ -4420,7 +4452,6 @@
       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
     }
-
   /* We move `string1' into `string2' if the latter's empty -- but not if
      `string1' is null.  */
   if (size2 == 0 && string1 != NULL)
@@ -4462,7 +4493,7 @@
       dend = end_match_2;
     }
 
-  DEBUG_PRINT1 ("The compiled pattern is: ");
+  DEBUG_PRINT1 ("The compiled pattern is: \n");
   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
   DEBUG_PRINT1 ("The string to match is: `");
   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
@@ -4660,22 +4691,40 @@
 
           /* This is written out as an if-else so we don't waste time
              testing `translate' inside the loop.  */
-          if (translate)
+          if (TRANSLATE_P (translate))
 	    {
 	      do
 		{
+#ifdef MULE
+		  Emchar pat_ch, buf_ch;
+		  Bytecount pat_len;
+
 		  PREFETCH ();
-		  if (translate[(unsigned char) *d++] != (char) *p++)
+		  pat_ch = charptr_emchar (p);
+		  buf_ch = charptr_emchar (d);
+		  if (RE_TRANSLATE (buf_ch) != pat_ch)
                     goto fail;
+
+		  pat_len = charcount_to_bytecount (p, 1);
+		  p += pat_len;
+		  INC_CHARPTR (d);
+		  
+		  mcnt -= pat_len;
+#else /* not MULE */
+		  PREFETCH ();
+		  if ((unsigned char) RE_TRANSLATE (*d++) != *p++)
+                    goto fail;
+		  mcnt--;
+#endif
 		}
-	      while (--mcnt);
+	      while (mcnt > 0);
 	    }
 	  else
 	    {
 	      do
 		{
 		  PREFETCH ();
-		  if (*d++ != (char) *p++) goto fail;
+		  if (*d++ != *p++) goto fail;
 		}
 	      while (--mcnt);
 	    }
@@ -4950,7 +4999,7 @@
            followed by the numeric value of <digit> as the register number.  */
         case duplicate:
 	  {
-	    REGISTER const char *d2, *dend2;
+	    REGISTER re_char *d2, *dend2;
 	    int regno = *p++;   /* Get which register to match against.  */
 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
 
@@ -4998,7 +5047,7 @@
 
 		/* Compare that many; failure if mismatch, else move
                    past them.  */
-		if (translate
+		if (TRANSLATE_P (translate)
                     ? bcmp_translate ((unsigned char *) d,
 				      (unsigned char *) d2, mcnt, translate)
                     : memcmp (d, d2, mcnt))
@@ -5086,7 +5135,7 @@
           EXTRACT_NUMBER_AND_INCR (mcnt, p);
           DEBUG_PRINT3 (" %d (to 0x%lx):\n", mcnt, (long) (p + mcnt));
 
-          PUSH_FAILURE_POINT (p + mcnt, (char *) 0, -2);
+          PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2);
           break;
 
 
@@ -5306,7 +5355,7 @@
                `pop_failure_point'.  */
             unsigned dummy_low_reg, dummy_high_reg;
             unsigned char *pdummy;
-            const char *sdummy = NULL;
+            re_char *sdummy = NULL;
 
             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
             POP_FAILURE_POINT (sdummy, pdummy,
@@ -5342,7 +5391,7 @@
           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
           /* It doesn't matter what we push for the string here.  What
              the code at `fail' tests is the value for the pattern.  */
-          PUSH_FAILURE_POINT ((unsigned char *) 0, (char *) 0, -2);
+          PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2);
           goto unconditional_jump;
 
 
@@ -5355,7 +5404,7 @@
           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
           /* See comments just above at `dummy_failure_jump' about the
              two zeroes.  */
-          PUSH_FAILURE_POINT ((unsigned char *) 0, (char *) 0, -2);
+          PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2);
           break;
 
         /* Have to succeed matching what follows at least n times.
@@ -5611,7 +5660,7 @@
           SET_REGS_MATCHED ();
           d++;
 	  break;
-#endif /* not emacs */
+#endif /* emacs */
 
         default:
           abort ();
@@ -5921,15 +5970,34 @@
    bytes; nonzero otherwise.  */
 
 static int
-bcmp_translate (const unsigned char *s1, const unsigned char *s2,
-		REGISTER int len, char *translate)
+bcmp_translate (re_char *s1, re_char *s2,
+		REGISTER int len, RE_TRANSLATE_TYPE translate)
 {
   REGISTER const unsigned char *p1 = s1, *p2 = s2;
+#ifdef MULE
+  const unsigned char *p1_end = s1 + len;
+  const unsigned char *p2_end = s2 + len;
+
+  while (p1 != p1_end && p2 != p2_end)
+    {
+      Emchar p1_ch, p2_ch;
+
+      p1_ch = charptr_emchar (p1);
+      p2_ch = charptr_emchar (p2);
+
+      if (RE_TRANSLATE (p1_ch)
+	  != RE_TRANSLATE (p2_ch))
+	return 1;
+      INC_CHARPTR (p1);
+      INC_CHARPTR (p2);
+    }
+#else /* not MULE */
   while (len)
     {
-      if (translate[*p1++] != translate[*p2++]) return 1;
+      if (RE_TRANSLATE (*p1++) != RE_TRANSLATE (*p2++)) return 1;
       len--;
     }
+#endif /* MULE */
   return 0;
 }
 
@@ -5962,7 +6030,7 @@
   /* Match anchors at newline.  */
   bufp->newline_anchor = 1;
 
-  ret = regex_compile (pattern, length, re_syntax_options, bufp);
+  ret = regex_compile ((unsigned char *) pattern, length, re_syntax_options, bufp);
 
   if (!ret)
     return NULL;
@@ -6007,7 +6075,7 @@
   /* Match anchors at newlines.  */
   re_comp_buf.newline_anchor = 1;
 
-  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+  ret = regex_compile ((unsigned char *)s, strlen (s), re_syntax_options, &re_comp_buf);
 
   if (!ret)
     return NULL;
@@ -6113,7 +6181,7 @@
 
   /* POSIX says a null character in the pattern terminates it, so we
      can use strlen here in compiling the pattern.  */
-  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+  ret = regex_compile ((unsigned char *) pattern, strlen (pattern), syntax, preg);
 
   /* POSIX doesn't distinguish between an unmatched open-group and an
      unmatched close-group: both are REG_EPAREN.  */