Mercurial > hg > xemacs-beta
comparison src/search.c @ 4897:91a023144e72
fix longstanding search bug involving searching for Control-1 chars
-------------------- ChangeLog entries follow: --------------------
src/ChangeLog addition:
2010-01-29 Ben Wing <ben@xemacs.org>
* search.c (boyer_moore): Fix longstanding bug involving
searching for Control-1 chars; code was trying to directly
extract the last byte in the textual representation of a char
from an Ichar (and doing it in a buggy fashion) rather than
just converting the Ichar to text and looking at the last byte.
tests/ChangeLog addition:
2010-01-29 Ben Wing <ben@xemacs.org>
* automated/search-tests.el:
New file.
* automated/search-tests.el:
* automated/case-tests.el:
* automated/case-tests.el (pristine-case-table): Removed.
* automated/case-tests.el (uni-mappings):
* automated/lisp-tests.el:
* automated/regexp-tests.el:
Extract some search-related code from case-tests and regexp-tests
and move to search-tests. Move some regexp-related code from
lisp-tests to regexp-tests.
Write a comment trying to express the proper division of labor
between case-tests, search-tests and regexp-tests.
Add a new test for the Control-1 search bug.
Fix a buggy test in the Unicode torture-test section of case-tests.el.
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Fri, 29 Jan 2010 20:57:42 -0600 |
parents | 69b803c646cd |
children | 7504864a986c |
comparison
equal
deleted
inserted
replaced
4896:a7ab1d6ff301 | 4897:91a023144e72 |
---|---|
1777 if (i == dirlen) | 1777 if (i == dirlen) |
1778 i = infinity; | 1778 i = infinity; |
1779 if (!NILP (trt)) | 1779 if (!NILP (trt)) |
1780 { | 1780 { |
1781 #ifdef MULE | 1781 #ifdef MULE |
1782 Ichar ch, untranslated; | 1782 Ichar ch = -1, untranslated; |
1783 Ibyte byte; | |
1783 int this_translated = 1; | 1784 int this_translated = 1; |
1784 | 1785 |
1785 /* Is *PTR the last byte of a character? */ | 1786 /* Is *PTR the last byte of a character? */ |
1786 if (pat_end - ptr == 1 || ibyte_first_byte_p (ptr[1])) | 1787 if (pat_end - ptr == 1 || ibyte_first_byte_p (ptr[1])) |
1787 { | 1788 { |
1827 know from the canon table that untranslated maps to | 1828 know from the canon table that untranslated maps to |
1828 starting_ch and that untranslated has the correct value | 1829 starting_ch and that untranslated has the correct value |
1829 for charset_base.) */ | 1830 for charset_base.) */ |
1830 assert (1 == count || starting_ch != ch); | 1831 assert (1 == count || starting_ch != ch); |
1831 } | 1832 } |
1833 { | |
1834 Ibyte tmp[MAX_ICHAR_LEN]; | |
1835 Bytecount chlen; | |
1836 | |
1837 chlen = set_itext_ichar (tmp, ch); | |
1838 byte = tmp[chlen - 1]; | |
1839 } | |
1832 } | 1840 } |
1833 else | 1841 else |
1834 { | 1842 { |
1835 ch = *ptr; | 1843 byte = *ptr; |
1836 this_translated = 0; | 1844 this_translated = 0; |
1845 ch = -1; | |
1837 } | 1846 } |
1838 if (ch > 0400) | 1847 |
1839 j = ((unsigned char) ch | 0200); | 1848 /* BYTE = last byte of character CH when represented as text */ |
1840 else | 1849 j = byte; |
1841 j = (unsigned char) ch; | |
1842 | 1850 |
1843 if (i == infinity) | 1851 if (i == infinity) |
1844 stride_for_teases = BM_tab[j]; | 1852 stride_for_teases = BM_tab[j]; |
1845 BM_tab[j] = dirlen - i; | 1853 BM_tab[j] = dirlen - i; |
1846 /* A translation table is accompanied by its inverse -- see | 1854 /* A translation table is accompanied by its inverse -- see |
1847 comment in casetab.c. */ | 1855 comment in casetab.c. */ |
1848 if (this_translated) | 1856 if (this_translated) |
1849 { | 1857 { |
1850 Ichar starting_ch = ch; | 1858 Ichar starting_ch = ch; |
1851 EMACS_INT starting_j = j; | 1859 EMACS_INT starting_j = j; |
1860 | |
1861 text_checking_assert (valid_ichar_p (ch)); | |
1852 do | 1862 do |
1853 { | 1863 { |
1854 ch = TRANSLATE (inverse_trt, ch); | 1864 ch = TRANSLATE (inverse_trt, ch); |
1855 | 1865 |
1856 if (ch > 0x7F && buffer_entirely_one_byte_p) | 1866 if (ch > 0x7F && buffer_entirely_one_byte_p) |
1857 continue; | 1867 continue; |
1858 | 1868 |
1859 if (ch > 0xFF && buffer_nothing_greater_than_0xff) | 1869 if (ch > 0xFF && buffer_nothing_greater_than_0xff) |
1860 continue; | 1870 continue; |
1861 | 1871 |
1862 if (ch > 0400) | 1872 |
1863 j = ((unsigned char) ch | 0200); | 1873 /* Retrieve last byte of character CH when represented as |
1864 else | 1874 text */ |
1865 j = (unsigned char) ch; | 1875 { |
1866 | 1876 Ibyte tmp[MAX_ICHAR_LEN]; |
1877 Bytecount chlen; | |
1878 | |
1879 chlen = set_itext_ichar (tmp, ch); | |
1880 j = tmp[chlen - 1]; | |
1881 } | |
1882 | |
1867 /* For all the characters that map into CH, set up | 1883 /* For all the characters that map into CH, set up |
1868 simple_translate to map the last byte into | 1884 simple_translate to map the last byte into |
1869 STARTING_J. */ | 1885 STARTING_J. */ |
1870 simple_translate[j] = (Ibyte) starting_j; | 1886 simple_translate[j] = (Ibyte) starting_j; |
1871 BM_tab[j] = dirlen - i; | 1887 BM_tab[j] = dirlen - i; |
1872 | 1888 |
1873 } while (ch != starting_ch); | 1889 } |
1890 while (ch != starting_ch); | |
1874 } | 1891 } |
1875 #else | 1892 #else /* not MULE */ |
1876 EMACS_INT k; | 1893 EMACS_INT k; |
1877 j = *ptr; | 1894 j = *ptr; |
1878 k = (j = TRANSLATE (trt, j)); | 1895 k = (j = TRANSLATE (trt, j)); |
1879 if (i == infinity) | 1896 if (i == infinity) |
1880 stride_for_teases = BM_tab[j]; | 1897 stride_for_teases = BM_tab[j]; |
1884 while ((j = TRANSLATE (inverse_trt, j)) != k) | 1901 while ((j = TRANSLATE (inverse_trt, j)) != k) |
1885 { | 1902 { |
1886 simple_translate[j] = (Ibyte) k; | 1903 simple_translate[j] = (Ibyte) k; |
1887 BM_tab[j] = dirlen - i; | 1904 BM_tab[j] = dirlen - i; |
1888 } | 1905 } |
1889 #endif | 1906 #endif /* (not) MULE */ |
1890 } | 1907 } |
1891 else | 1908 else |
1892 { | 1909 { |
1893 j = *ptr; | 1910 j = *ptr; |
1894 | 1911 |