changeset 4904:e91e3e353805

Don't compare the same octet with itself if checking for boyer_moore_ok src/ChangeLog addition: 2010-01-31 Aidan Kehoe <kehoea@parhasard.net> * search.c (search_buffer): When checking the octets of c for identity, don't compare the same octet with itself. Thank you Ben Wing! tests/ChangeLog addition: 2010-01-31 Aidan Kehoe <kehoea@parhasard.net> * automated/search-tests.el: Check that multidimensional characters with repeated octets and case information force simple_search(), since boyer_moore() doesn't understand them when constructing its stride table. In passing; correct my spelling. * automated/case-tests.el (uni-mappings): In passing; delete a couple of redundant tests, correct the logic of another.
author Aidan Kehoe <kehoea@parhasard.net>
date Sun, 31 Jan 2010 18:09:57 +0000
parents 70089046adef
children 755ae5b97edb 8431b52e43b1
files src/ChangeLog src/search.c tests/ChangeLog tests/automated/case-tests.el tests/automated/search-tests.el
diffstat 5 files changed, 51 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Sat Jan 30 20:34:23 2010 -0600
+++ b/src/ChangeLog	Sun Jan 31 18:09:57 2010 +0000
@@ -1,3 +1,9 @@
+2010-01-31  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* search.c (search_buffer): 
+	When checking the octets of c for identity, don't compare the
+	same octet with itself. Thank you Ben Wing!
+
 2010-01-30  Ben Wing  <ben@xemacs.org>
 
 	* intl-auto-encap-win32.c:
--- a/src/search.c	Sat Jan 30 20:34:23 2010 -0600
+++ b/src/search.c	Sun Jan 31 18:09:57 2010 +0000
@@ -1441,7 +1441,7 @@
 		      int i, j;
 		      for (i = 0; i < len && boyer_moore_ok; ++i)
 			{
-			  for (j = 0; i < len && boyer_moore_ok; ++j)
+			  for (j = i + 1; j < len && boyer_moore_ok; ++j)
 			    {
 			      if (encoded[i] == encoded[j])
 				{
--- a/tests/ChangeLog	Sat Jan 30 20:34:23 2010 -0600
+++ b/tests/ChangeLog	Sun Jan 31 18:09:57 2010 +0000
@@ -1,3 +1,14 @@
+2010-01-31  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* automated/search-tests.el:
+	Check that multidimensional characters with repeated octets and
+	case information force simple_search(), since boyer_moore()
+	doesn't understand them when constructing its stride table.
+	In passing; correct my spelling.
+	* automated/case-tests.el (uni-mappings):
+	In passing; delete a couple of redundant tests, correct the logic
+	of another.
+
 2010-01-30  Ben Wing  <ben@xemacs.org>
 
 	* automated/search-tests.el:
--- a/tests/automated/case-tests.el	Sat Jan 30 20:34:23 2010 -0600
+++ b/tests/automated/case-tests.el	Sun Jan 31 18:09:57 2010 +0000
@@ -1466,9 +1466,7 @@
     (Assert-equalp lower upper)
     (Assert-equalp lowerupper upperlower)
     (Assert-equal lower (downcase upper))
-    (Assert-equal upper (downcase lower))
-    (Assert-equal lower (downcase upper))
-    (Assert-equal upper (downcase lower))
+    (Assert-equal upper (upcase lower))
     (Assert-equal (downcase lower) (downcase (downcase lower)))
     (Assert-equal (upcase lowerupper) (upcase upperlower))
     (Assert-equal (downcase lowerupper) (downcase upperlower))
--- a/tests/automated/search-tests.el	Sat Jan 30 20:34:23 2010 -0600
+++ b/tests/automated/search-tests.el	Sun Jan 31 18:09:57 2010 +0000
@@ -192,22 +192,23 @@
  (boundp 'debug-xemacs-searches) ; normal when we have DEBUG_XEMACS
  "not a DEBUG_XEMACS build"
  "checks that the algorithm chosen by #'search-forward is relatively sane"
- (let ((debug-xemacs-searches 1))
+ (let ((debug-xemacs-searches 1)
+       newcase)
    (with-temp-buffer
      ;;#### Ben thinks this is unnecessary.  with-temp-buffer creates
      ;;a new buffer, which automatically inherits the standard case table.
      ;;(set-case-table pristine-case-table)
-     (insert "\n\nDer beruhmte deutsche Fleiss\n\n")
+     (insert "\n\nDer beruehmte deutsche Fleiss\n\n")
      (goto-char (point-min))
      (Assert (search-forward "Fleiss"))
      (delete-region (point-min) (point-max))
-     (insert "\n\nDer beruhmte deutsche Flei\xdf\n\n")
+     (insert "\n\nDer ber\xfchmte deutsche Flei\xdf\n\n")
      (goto-char (point-min))
      (Assert (search-forward "Flei\xdf"))
      (Assert-eq 'boyer-moore search-algorithm-used)
      (delete-region (point-min) (point-max))
      (when (featurep 'mule)
-       (insert "\n\nDer beruhmte deutsche Flei\xdf\n\n")
+       (insert "\n\nDer ber\xfchmte deutsche Flei\xdf\n\n")
        (goto-char (point-min))
        (Assert 
         (search-forward (format "Fle%c\xdf"
@@ -220,8 +221,33 @@
        (goto-char (point-min))
        (Assert (search-forward (format "Fle%c\xdf"
                                        (make-char 'latin-iso8859-9 #xfd))))
-       (Assert-eq 'simple-search search-algorithm-used)))))
-
+       (Assert-eq 'simple-search search-algorithm-used)
+       (setq newcase (copy-case-table (standard-case-table)))
+       (put-case-table-pair (make-char 'ethiopic #x23 #x23)
+			    (make-char 'ethiopic #x23 #x25)
+			    newcase)
+       (with-case-table
+	 ;; Check that when a multidimensional character has case and two
+	 ;; repeating octets, searches involving it in the search pattern
+	 ;; use simple-search; otherwise boyer_moore() gets confused in the
+	 ;; construction of the stride table.
+	 newcase
+	 (delete-region (point-min) (point-max))
+	 (insert ?0)
+	 (insert (make-char 'ethiopic #x23 #x23))
+	 (insert ?1)
+	 (goto-char (point-min))
+	 (Assert-eql (search-forward
+		      (string (make-char 'ethiopic #x23 #x25))
+		      nil t)
+		     3)
+	 (Assert-eq 'simple-search search-algorithm-used)
+	 (goto-char (point-min))
+	 (Assert-eql (search-forward
+		      (string (make-char 'ethiopic #x23 #x27))
+		      nil t)
+		     nil)
+	 (Assert-eq 'boyer-moore search-algorithm-used))))))
 
 ;; XEmacs bug of long standing.