comparison src/search.c @ 4962:e813cf16c015

merge
author Ben Wing <ben@xemacs.org>
date Mon, 01 Feb 2010 05:29:05 -0600
parents 304aebb79cd3 e91e3e353805
children 2ade80e8c640
comparison
equal deleted inserted replaced
4961:b90f8cf474e0 4962:e813cf16c015
1423 boyer_moore(). */ 1423 boyer_moore(). */
1424 boyer_moore_ok = 0; 1424 boyer_moore_ok = 0;
1425 break; 1425 break;
1426 } 1426 }
1427 } 1427 }
1428
1429 if (ichar_len (c) > 2)
1430 {
1431 /* Case-equivalence plus repeated octets throws off
1432 the construction of the stride table; avoid this.
1433
1434 It should be possible to correct boyer_moore to
1435 behave correctly even in this case--it doesn't have
1436 problems with repeated octets when case conversion
1437 is not involved--but this is not a critical
1438 issue. */
1439 Ibyte encoded[MAX_ICHAR_LEN];
1440 Bytecount len = set_itext_ichar (encoded, c);
1441 int i, j;
1442 for (i = 0; i < len && boyer_moore_ok; ++i)
1443 {
1444 for (j = i + 1; j < len && boyer_moore_ok; ++j)
1445 {
1446 if (encoded[i] == encoded[j])
1447 {
1448 boyer_moore_ok = 0;
1449 }
1450 }
1451 }
1452
1453 if (0 == boyer_moore_ok)
1454 {
1455 break;
1456 }
1457 }
1458
1428 } while (c != starting_c); 1459 } while (c != starting_c);
1429 1460
1430 if (!checked) 1461 if (!checked)
1431 { 1462 {
1432 #ifdef DEBUG_XEMACS 1463 #ifdef DEBUG_XEMACS
1777 if (i == dirlen) 1808 if (i == dirlen)
1778 i = infinity; 1809 i = infinity;
1779 if (!NILP (trt)) 1810 if (!NILP (trt))
1780 { 1811 {
1781 #ifdef MULE 1812 #ifdef MULE
1782 Ichar ch, untranslated; 1813 Ichar ch = -1, untranslated;
1814 Ibyte byte;
1783 int this_translated = 1; 1815 int this_translated = 1;
1784 1816
1785 /* Is *PTR the last byte of a character? */ 1817 /* Is *PTR the last byte of a character? */
1786 if (pat_end - ptr == 1 || ibyte_first_byte_p (ptr[1])) 1818 if (pat_end - ptr == 1 || ibyte_first_byte_p (ptr[1]))
1787 { 1819 {
1827 know from the canon table that untranslated maps to 1859 know from the canon table that untranslated maps to
1828 starting_ch and that untranslated has the correct value 1860 starting_ch and that untranslated has the correct value
1829 for charset_base.) */ 1861 for charset_base.) */
1830 assert (1 == count || starting_ch != ch); 1862 assert (1 == count || starting_ch != ch);
1831 } 1863 }
1864 {
1865 Ibyte tmp[MAX_ICHAR_LEN];
1866 Bytecount chlen;
1867
1868 chlen = set_itext_ichar (tmp, ch);
1869 byte = tmp[chlen - 1];
1870 }
1832 } 1871 }
1833 else 1872 else
1834 { 1873 {
1835 ch = *ptr; 1874 byte = *ptr;
1836 this_translated = 0; 1875 this_translated = 0;
1876 ch = -1;
1837 } 1877 }
1838 if (ch > 0400) 1878
1839 j = ((unsigned char) ch | 0200); 1879 /* BYTE = last byte of character CH when represented as text */
1840 else 1880 j = byte;
1841 j = (unsigned char) ch;
1842 1881
1843 if (i == infinity) 1882 if (i == infinity)
1844 stride_for_teases = BM_tab[j]; 1883 stride_for_teases = BM_tab[j];
1845 BM_tab[j] = dirlen - i; 1884 BM_tab[j] = dirlen - i;
1846 /* A translation table is accompanied by its inverse -- see 1885 /* A translation table is accompanied by its inverse -- see
1847 comment in casetab.c. */ 1886 comment in casetab.c. */
1848 if (this_translated) 1887 if (this_translated)
1849 { 1888 {
1850 Ichar starting_ch = ch; 1889 Ichar starting_ch = ch;
1851 EMACS_INT starting_j = j; 1890 EMACS_INT starting_j = j;
1891
1892 text_checking_assert (valid_ichar_p (ch));
1852 do 1893 do
1853 { 1894 {
1854 ch = TRANSLATE (inverse_trt, ch); 1895 ch = TRANSLATE (inverse_trt, ch);
1855 1896
1856 if (ch > 0x7F && buffer_entirely_one_byte_p) 1897 if (ch > 0x7F && buffer_entirely_one_byte_p)
1857 continue; 1898 continue;
1858 1899
1859 if (ch > 0xFF && buffer_nothing_greater_than_0xff) 1900 if (ch > 0xFF && buffer_nothing_greater_than_0xff)
1860 continue; 1901 continue;
1861 1902
1862 if (ch > 0400) 1903
1863 j = ((unsigned char) ch | 0200); 1904 /* Retrieve last byte of character CH when represented as
1864 else 1905 text */
1865 j = (unsigned char) ch; 1906 {
1866 1907 Ibyte tmp[MAX_ICHAR_LEN];
1908 Bytecount chlen;
1909
1910 chlen = set_itext_ichar (tmp, ch);
1911 j = tmp[chlen - 1];
1912 }
1913
1867 /* For all the characters that map into CH, set up 1914 /* For all the characters that map into CH, set up
1868 simple_translate to map the last byte into 1915 simple_translate to map the last byte into
1869 STARTING_J. */ 1916 STARTING_J. */
1870 simple_translate[j] = (Ibyte) starting_j; 1917 simple_translate[j] = (Ibyte) starting_j;
1871 BM_tab[j] = dirlen - i; 1918 BM_tab[j] = dirlen - i;
1872 1919
1873 } while (ch != starting_ch); 1920 }
1921 while (ch != starting_ch);
1874 } 1922 }
1875 #else 1923 #else /* not MULE */
1876 EMACS_INT k; 1924 EMACS_INT k;
1877 j = *ptr; 1925 j = *ptr;
1878 k = (j = TRANSLATE (trt, j)); 1926 k = (j = TRANSLATE (trt, j));
1879 if (i == infinity) 1927 if (i == infinity)
1880 stride_for_teases = BM_tab[j]; 1928 stride_for_teases = BM_tab[j];
1884 while ((j = TRANSLATE (inverse_trt, j)) != k) 1932 while ((j = TRANSLATE (inverse_trt, j)) != k)
1885 { 1933 {
1886 simple_translate[j] = (Ibyte) k; 1934 simple_translate[j] = (Ibyte) k;
1887 BM_tab[j] = dirlen - i; 1935 BM_tab[j] = dirlen - i;
1888 } 1936 }
1889 #endif 1937 #endif /* (not) MULE */
1890 } 1938 }
1891 else 1939 else
1892 { 1940 {
1893 j = *ptr; 1941 j = *ptr;
1894 1942