Mercurial > hg > xemacs-beta
diff src/search.c @ 4962:e813cf16c015
merge
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Mon, 01 Feb 2010 05:29:05 -0600 |
parents | 304aebb79cd3 e91e3e353805 |
children | 2ade80e8c640 |
line wrap: on
line diff
--- a/src/search.c Sun Jan 31 21:11:44 2010 -0600 +++ b/src/search.c Mon Feb 01 05:29:05 2010 -0600 @@ -1425,6 +1425,37 @@ break; } } + + if (ichar_len (c) > 2) + { + /* Case-equivalence plus repeated octets throws off + the construction of the stride table; avoid this. + + It should be possible to correct boyer_moore to + behave correctly even in this case--it doesn't have + problems with repeated octets when case conversion + is not involved--but this is not a critical + issue. */ + Ibyte encoded[MAX_ICHAR_LEN]; + Bytecount len = set_itext_ichar (encoded, c); + int i, j; + for (i = 0; i < len && boyer_moore_ok; ++i) + { + for (j = i + 1; j < len && boyer_moore_ok; ++j) + { + if (encoded[i] == encoded[j]) + { + boyer_moore_ok = 0; + } + } + } + + if (0 == boyer_moore_ok) + { + break; + } + } + } while (c != starting_c); if (!checked) @@ -1779,7 +1810,8 @@ if (!NILP (trt)) { #ifdef MULE - Ichar ch, untranslated; + Ichar ch = -1, untranslated; + Ibyte byte; int this_translated = 1; /* Is *PTR the last byte of a character? */ @@ -1829,16 +1861,23 @@ for charset_base.) */ assert (1 == count || starting_ch != ch); } + { + Ibyte tmp[MAX_ICHAR_LEN]; + Bytecount chlen; + + chlen = set_itext_ichar (tmp, ch); + byte = tmp[chlen - 1]; + } } else { - ch = *ptr; + byte = *ptr; this_translated = 0; + ch = -1; } - if (ch > 0400) - j = ((unsigned char) ch | 0200); - else - j = (unsigned char) ch; + + /* BYTE = last byte of character CH when represented as text */ + j = byte; if (i == infinity) stride_for_teases = BM_tab[j]; @@ -1849,6 +1888,8 @@ { Ichar starting_ch = ch; EMACS_INT starting_j = j; + + text_checking_assert (valid_ichar_p (ch)); do { ch = TRANSLATE (inverse_trt, ch); @@ -1859,20 +1900,27 @@ if (ch > 0xFF && buffer_nothing_greater_than_0xff) continue; - if (ch > 0400) - j = ((unsigned char) ch | 0200); - else - j = (unsigned char) ch; - + + /* Retrieve last byte of character CH when represented as + text */ + { + Ibyte tmp[MAX_ICHAR_LEN]; + Bytecount chlen; + + chlen = set_itext_ichar (tmp, ch); + j = tmp[chlen - 1]; + } + /* For all the characters that map into CH, set up simple_translate to map the last byte into STARTING_J. */ simple_translate[j] = (Ibyte) starting_j; BM_tab[j] = dirlen - i; - } while (ch != starting_ch); + } + while (ch != starting_ch); } -#else +#else /* not MULE */ EMACS_INT k; j = *ptr; k = (j = TRANSLATE (trt, j)); @@ -1886,7 +1934,7 @@ simple_translate[j] = (Ibyte) k; BM_tab[j] = dirlen - i; } -#endif +#endif /* (not) MULE */ } else {