diff src/regex.c @ 102:a145efe76779 r20-1b3

Import from CVS: tag r20-1b3
author cvs
date Mon, 13 Aug 2007 09:15:49 +0200
parents 6a378aca36af
children cf808b4c4290
line wrap: on
line diff
--- a/src/regex.c	Mon Aug 13 09:15:13 2007 +0200
+++ b/src/regex.c	Mon Aug 13 09:15:49 2007 +0200
@@ -3771,16 +3771,29 @@
 		      regs, size);
 }
 
+#ifndef emacs
+/* Snarfed from src/lisp.h, needed for compiling [ce]tags. */
+# define bytecount_to_charcount(ptr, len) (len)
+# define charcount_to_bytecount(ptr, len) (len)
+typedef int Charcount;
+#endif
 
 /* Using the compiled pattern in BUFP->buffer, first tries to match the
    virtual concatenation of STRING1 and STRING2, starting first at index
    STARTPOS, then at STARTPOS + 1, and so on.
+
+   With MULE, STARTPOS is a byte position, not a char position.  And the
+   search will increment STARTPOS by the width of the current leading
+   character.
    
    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
    
    RANGE is how far to scan while trying to match.  RANGE = 0 means try
    only at STARTPOS; in general, the last start tried is STARTPOS +
    RANGE.
+
+   With MULE, RANGE is a byte position, not a char position.  The last
+   start tried is the character starting <= STARTPOS + RANGE.
    
    In REGS, return the indices of the virtual concatenation of STRING1
    and STRING2 that matched the entire BUFP->buffer and its contained
@@ -3813,8 +3826,13 @@
     
   /* Fix up RANGE if it might eventually take us outside
      the virtual concatenation of STRING1 and STRING2.  */
+#if 0
   if (endpos < -1)
     range = -1 - startpos;
+#else
+  if (endpos < 0)
+    range = 0 - startpos;
+#endif
   else if (endpos > total_size)
     range = total_size - startpos;
 
@@ -3862,25 +3880,33 @@
 	  /* whose stupid idea was it anyway to make this
 	     function take two strings to match?? */
 	  int lim = 0;
-	  unsigned char *p;
+	  register CONST unsigned char *d;
 	  int irange = range;
+
 	  if (startpos < size1 && startpos + range >= size1)
 	    lim = range - (size1 - startpos);
 
-	  p = ((unsigned char *)
-	       &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
-	  p--;
+	  d = ((CONST unsigned char *)
+	       (startpos >= size1 ? string2 - size1 : string1) + startpos);
+	  DEC_CHARPTR(d);
 
 	  if (translate)
-	    {
-	      while (range > lim && translate[*p++] != '\n')
-		range--;
-	    }
+#ifdef MULE
+	    while (range > lim && (*d >= 0x80 || translate[*d] != '\n'))
+#else
+	    while (range > lim && translate[*d] != '\n')
+#endif
+	      {
+		INC_CHARPTR(d);
+		range -= charcount_to_bytecount (d, 1);
+	      }
 	  else
-	    {
-	      while (range > lim && *p++ != '\n')
-		range--;
-	    }
+	    while (range > lim && *d != '\n')
+	      {
+		INC_CHARPTR(d);
+		range -= charcount_to_bytecount (d, 1);
+	      }
+
 	  startpos += irange - range;
 	}
 #endif /* REGEX_BEGLINE_CHECK */
@@ -3893,35 +3919,47 @@
 	{
 	  if (range > 0)	/* Searching forwards.  */
 	    {
-	      register CONST char *d;
+	      register CONST unsigned char *d;
 	      register int lim = 0;
 	      int irange = range;
 
               if (startpos < size1 && startpos + range >= size1)
                 lim = range - (size1 - startpos);
 
-	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
+	      d = ((CONST unsigned char *)
+		   (startpos >= size1 ? string2 - size1 : string1) + startpos);
    
               /* Written out as an if-else to avoid testing `translate'
                  inside the loop.  */
 	      if (translate)
-                while (range > lim
-                       && !fastmap[(unsigned char)
-				   translate[(unsigned char) *d++]])
-                  range--;
+#ifdef MULE
+                while (range > lim && *d < 0x80 && !fastmap[translate[*d]])
+#else
+                while (range > lim && !fastmap[translate[*d]])
+#endif
+		  {
+		    range -= charcount_to_bytecount (d, 1);
+		    INC_CHARPTR(d);
+		  }
 	      else
-                while (range > lim && !fastmap[(unsigned char) *d++])
-                  range--;
+                while (range > lim && !fastmap[*d])
+		  {
+		    range -= charcount_to_bytecount (d, 1);
+		    INC_CHARPTR(d);
+		  }
 
 	      startpos += irange - range;
 	    }
 	  else				/* Searching backwards.  */
 	    {
-	      register char c = (size1 == 0 || startpos >= size1
-                                 ? string2[startpos - size1] 
-                                 : string1[startpos]);
-
+	      register unsigned char c = (size1 == 0 || startpos >= size1
+					  ? string2[startpos - size1] 
+					  : string1[startpos]);
+#ifdef MULE
+	      if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)])
+#else
 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
+#endif
 		goto advance;
 	    }
 	}
@@ -3951,17 +3989,28 @@
 
     advance:
       if (!range) 
-        break;
-      else if (range > 0) 
-        {
-          range--; 
-          startpos++;
-        }
-      else
-        {
-          range++; 
-          startpos--;
-        }
+	break;
+      else {
+	register CONST unsigned char *d;
+	Charcount d_size;
+
+	d = ((CONST unsigned char *)
+	     (startpos >= size1 ? string2 - size1 : string1) + startpos);
+
+	if (range > 0) 
+	  {
+	    d_size = charcount_to_bytecount (d, 1);
+	    range -= d_size;
+	    startpos += d_size;
+	  }
+	else
+	  {
+	    DEC_CHARPTR(d);
+	    d_size = charcount_to_bytecount (d, 1);
+	    range += d_size;
+	    startpos -= d_size;
+	  }
+      }
     }
   return -1;
 } /* re_search_2 */
@@ -5075,10 +5124,19 @@
                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
 #endif
 
+#if 1
+                /* dmoore@ucsd.edu - emacs 19.34 uses this: */
+
                 if ((re_opcode_t) p1[3] == exactn
-		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
-			  && (p2[1 + p1[4] / BYTEWIDTH]
-			      & (1 << (p1[4] % BYTEWIDTH)))))
+                    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
+                          && (p2[2 + p1[5] / BYTEWIDTH]
+                              & (1 << (p1[5] % BYTEWIDTH)))))
+#else
+                if ((re_opcode_t) p1[3] == exactn
+                    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
+                          && (p2[1 + p1[4] / BYTEWIDTH]
+                              & (1 << (p1[4] % BYTEWIDTH)))))
+#endif
                   {
   		    p[-3] = (unsigned char) pop_failure_jump;
                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",