diff src/search.c @ 4897:91a023144e72

fix longstanding search bug involving searching for Control-1 chars -------------------- ChangeLog entries follow: -------------------- src/ChangeLog addition: 2010-01-29 Ben Wing <ben@xemacs.org> * search.c (boyer_moore): Fix longstanding bug involving searching for Control-1 chars; code was trying to directly extract the last byte in the textual representation of a char from an Ichar (and doing it in a buggy fashion) rather than just converting the Ichar to text and looking at the last byte. tests/ChangeLog addition: 2010-01-29 Ben Wing <ben@xemacs.org> * automated/search-tests.el: New file. * automated/search-tests.el: * automated/case-tests.el: * automated/case-tests.el (pristine-case-table): Removed. * automated/case-tests.el (uni-mappings): * automated/lisp-tests.el: * automated/regexp-tests.el: Extract some search-related code from case-tests and regexp-tests and move to search-tests. Move some regexp-related code from lisp-tests to regexp-tests. Write a comment trying to express the proper division of labor between case-tests, search-tests and regexp-tests. Add a new test for the Control-1 search bug. Fix a buggy test in the Unicode torture-test section of case-tests.el.
author Ben Wing <ben@xemacs.org>
date Fri, 29 Jan 2010 20:57:42 -0600
parents 69b803c646cd
children 7504864a986c
line wrap: on
line diff
--- a/src/search.c	Fri Jan 29 20:49:50 2010 -0600
+++ b/src/search.c	Fri Jan 29 20:57:42 2010 -0600
@@ -1779,7 +1779,8 @@
       if (!NILP (trt))
 	{
 #ifdef MULE
-	  Ichar ch, untranslated;
+	  Ichar ch = -1, untranslated;
+	  Ibyte byte;
 	  int this_translated = 1;
 
 	  /* Is *PTR the last byte of a character?  */
@@ -1829,16 +1830,23 @@
                      for charset_base.) */
                   assert (1 == count || starting_ch != ch);
 		}
+	      {
+		Ibyte tmp[MAX_ICHAR_LEN];
+		Bytecount chlen;
+
+		chlen = set_itext_ichar (tmp, ch);
+		byte = tmp[chlen - 1];
+	      }
 	    }
 	  else
 	    {
-	      ch = *ptr;
+	      byte = *ptr;
 	      this_translated = 0;
+	      ch = -1;
 	    }
-	  if (ch > 0400)
-	    j = ((unsigned char) ch | 0200);
-	  else
-	    j = (unsigned char) ch;
+
+	  /* BYTE = last byte of character CH when represented as text */
+	  j = byte;
 	      
 	  if (i == infinity)
 	    stride_for_teases = BM_tab[j];
@@ -1849,6 +1857,8 @@
 	    {
 	      Ichar starting_ch = ch;
 	      EMACS_INT starting_j = j;
+
+	      text_checking_assert (valid_ichar_p (ch));
 	      do
 		{
 		  ch = TRANSLATE (inverse_trt, ch);
@@ -1859,20 +1869,27 @@
                   if (ch > 0xFF && buffer_nothing_greater_than_0xff)
                     continue;
 
-                  if (ch > 0400)
-                    j = ((unsigned char) ch | 0200);
-                  else
-                    j = (unsigned char) ch;
-
+
+		  /* Retrieve last byte of character CH when represented as
+		     text */
+		  {
+		    Ibyte tmp[MAX_ICHAR_LEN];
+		    Bytecount chlen;
+
+		    chlen = set_itext_ichar (tmp, ch);
+		    j = tmp[chlen - 1];
+		  }
+	      
                   /* For all the characters that map into CH, set up
                      simple_translate to map the last byte into
                      STARTING_J.  */
                   simple_translate[j] = (Ibyte) starting_j;
                   BM_tab[j] = dirlen - i;
 
-		} while (ch != starting_ch);
+		}
+	      while (ch != starting_ch);
 	    }
-#else
+#else /* not MULE */
 	  EMACS_INT k;
 	  j = *ptr;
 	  k = (j = TRANSLATE (trt, j));
@@ -1886,7 +1903,7 @@
 	      simple_translate[j] = (Ibyte) k;
 	      BM_tab[j] = dirlen - i;
 	    }
-#endif
+#endif /* (not) MULE */
 	}
       else
 	{