diff src/search.c @ 4901:7504864a986c

Don't use Boyer-Moore if repeated octets & case-insensitive search. 2010-01-30 Aidan Kehoe <kehoea@parhasard.net> * search.c (search_buffer): Don't use Boyer-Moore for case-insensitive search if the search pattern contains repeated Ibytes and the corresponding character has case information (or, equivalently, if one of its case equivalents would contain repeated Ibytes).
author Aidan Kehoe <kehoea@parhasard.net>
date Sat, 30 Jan 2010 22:25:39 +0000
parents 91a023144e72
children e91e3e353805
line wrap: on
line diff
--- a/src/search.c	Sat Jan 30 19:23:43 2010 +0000
+++ b/src/search.c	Sat Jan 30 22:25:39 2010 +0000
@@ -1425,6 +1425,37 @@
                           break;
                         }
                     }
+
+		  if (ichar_len (c) > 2)
+		    {
+		      /* Case-equivalence plus repeated octets throws off
+			 the construction of the stride table; avoid this.
+
+		         It should be possible to correct boyer_moore to
+		         behave correctly even in this case--it doesn't have
+		         problems with repeated octets when case conversion
+		         is not involved--but this is not a critical
+		         issue. */
+		      Ibyte encoded[MAX_ICHAR_LEN];
+		      Bytecount len = set_itext_ichar (encoded, c);
+		      int i, j;
+		      for (i = 0; i < len && boyer_moore_ok; ++i)
+			{
+			  for (j = 0; i < len && boyer_moore_ok; ++j)
+			    {
+			      if (encoded[i] == encoded[j])
+				{
+				  boyer_moore_ok = 0;
+				}
+			    }
+			}
+
+		      if (0 == boyer_moore_ok)
+			{
+			  break;
+			}
+		    }
+			  
                 } while (c != starting_c);
 
               if (!checked)