# HG changeset patch # User Aidan Kehoe # Date 1264890339 0 # Node ID 7504864a986c1d7863e35f33c9b70b28f872305d # Parent 0eccfd4850d60d5cf6405ee8158dd72c26820af5 Don't use Boyer-Moore if repeated octets & case-insensitive search. 2010-01-30 Aidan Kehoe * search.c (search_buffer): Don't use Boyer-Moore for case-insensitive search if the search pattern contains repeated Ibytes and the corresponding character has case information (or, equivalently, if one of its case equivalents would contain repeated Ibytes). diff -r 0eccfd4850d6 -r 7504864a986c src/ChangeLog --- a/src/ChangeLog Sat Jan 30 19:23:43 2010 +0000 +++ b/src/ChangeLog Sat Jan 30 22:25:39 2010 +0000 @@ -1,3 +1,11 @@ +2010-01-30 Aidan Kehoe + + * search.c (search_buffer): + Don't use Boyer-Moore for case-insensitive search if the search + pattern contains repeated Ibytes and the corresponding character + has case information (or, equivalently, if one of its case + equivalents would contain repeated Ibytes). + 2010-01-28 Jerry James * Makefile.in.in: Remove internationalization rules, since the diff -r 0eccfd4850d6 -r 7504864a986c src/search.c --- a/src/search.c Sat Jan 30 19:23:43 2010 +0000 +++ b/src/search.c Sat Jan 30 22:25:39 2010 +0000 @@ -1425,6 +1425,37 @@ break; } } + + if (ichar_len (c) > 2) + { + /* Case-equivalence plus repeated octets throws off + the construction of the stride table; avoid this. + + It should be possible to correct boyer_moore to + behave correctly even in this case--it doesn't have + problems with repeated octets when case conversion + is not involved--but this is not a critical + issue. */ + Ibyte encoded[MAX_ICHAR_LEN]; + Bytecount len = set_itext_ichar (encoded, c); + int i, j; + for (i = 0; i < len && boyer_moore_ok; ++i) + { + for (j = 0; i < len && boyer_moore_ok; ++j) + { + if (encoded[i] == encoded[j]) + { + boyer_moore_ok = 0; + } + } + } + + if (0 == boyer_moore_ok) + { + break; + } + } + } while (c != starting_c); if (!checked)