Mercurial > hg > xemacs-beta
annotate src/search.c @ 5557:53c066311921
If XLookupKeysym() returned an XFree86 "special key", ignore it. Fixes Sh-F11.
src/ChangeLog addition:
2011-08-27 Aidan Kehoe <kehoea@parhasard.net>
* config.h.in: Make HAVE_X11_XF86KEYSYM_H available here.
* event-Xt.c: #include X11/XF86keysym.h if available.
* event-Xt.c (x_event_to_emacs_event):
If XLookupKeysym () returned one of the XFree86 "special action
keys" for the shifted keysym, treat that as NoSymbol, fixing a
long-standing bug with shifted function keys under X.org.
Details of why in:
http://mid.gmane.org/16960.15685.26911.644835@parhasard.net
ChangeLog addition:
2011-08-27 Aidan Kehoe <kehoea@parhasard.net>
* configure.ac: Check whether X11/XF86keysym.h is available, to
allow us to avoid a bug in the interaction of XKB and XLookupKeysym.
* configure: Regenerate.
| author | Aidan Kehoe <kehoea@parhasard.net> |
|---|---|
| date | Sat, 27 Aug 2011 20:35:23 +0100 |
| parents | dab422055bab |
| children | 56144c8593a8 |
| rev | line source |
|---|---|
| 428 | 1 /* String search routines for XEmacs. |
| 2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc. | |
| 3 Copyright (C) 1995 Sun Microsystems, Inc. | |
| 5041 | 4 Copyright (C) 2001, 2002, 2010 Ben Wing. |
| 428 | 5 |
| 6 This file is part of XEmacs. | |
| 7 | |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5089
diff
changeset
|
8 XEmacs is free software: you can redistribute it and/or modify it |
| 428 | 9 under the terms of the GNU General Public License as published by the |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5089
diff
changeset
|
10 Free Software Foundation, either version 3 of the License, or (at your |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5089
diff
changeset
|
11 option) any later version. |
| 428 | 12 |
| 13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
| 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
| 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
| 16 for more details. | |
| 17 | |
| 18 You should have received a copy of the GNU General Public License | |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5089
diff
changeset
|
19 along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */ |
| 428 | 20 |
| 21 /* Synched up with: FSF 19.29, except for region-cache stuff. */ | |
| 22 | |
| 23 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */ | |
| 24 | |
| 826 | 25 /* This file has been Mule-ized. */ |
| 428 | 26 |
| 27 #include <config.h> | |
| 28 #include "lisp.h" | |
| 29 | |
| 30 #include "buffer.h" | |
| 31 #include "insdel.h" | |
| 32 #include "opaque.h" | |
| 33 #ifdef REGION_CACHE_NEEDS_WORK | |
| 34 #include "region-cache.h" | |
| 35 #endif | |
| 36 #include "syntax.h" | |
| 37 | |
| 38 #include <sys/types.h> | |
| 39 #include "regex.h" | |
| 446 | 40 #include "casetab.h" |
| 41 #include "chartab.h" | |
| 42 | |
| 43 #define TRANSLATE(table, pos) \ | |
| 867 | 44 (!NILP (table) ? TRT_TABLE_OF (table, (Ichar) pos) : pos) |
| 428 | 45 |
| 46 #define REGEXP_CACHE_SIZE 20 | |
| 47 | |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
48 #ifdef DEBUG_XEMACS |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
49 |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
50 /* Used in tests/automated/case-tests.el if available. */ |
| 5041 | 51 Fixnum debug_searches; |
| 52 | |
| 53 /* Declare as int rather than Bitflags because it's used by regex.c, which | |
| 54 may be used outside of XEmacs (e.g. etags.c). */ | |
| 55 int debug_regexps; | |
| 56 Lisp_Object Vdebug_regexps; | |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
57 |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
58 Lisp_Object Qsearch_algorithm_used, Qboyer_moore, Qsimple_search; |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
59 |
| 5041 | 60 Lisp_Object Qcompilation, Qfailure_point, Qmatching; |
| 61 | |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
62 #endif |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
63 |
| 428 | 64 /* If the regexp is non-nil, then the buffer contains the compiled form |
| 65 of that regexp, suitable for searching. */ | |
| 446 | 66 struct regexp_cache |
| 67 { | |
| 428 | 68 struct regexp_cache *next; |
| 69 Lisp_Object regexp; | |
| 70 struct re_pattern_buffer buf; | |
| 71 char fastmap[0400]; | |
| 72 /* Nonzero means regexp was compiled to do full POSIX backtracking. */ | |
| 73 char posix; | |
| 74 }; | |
| 75 | |
| 76 /* The instances of that struct. */ | |
| 77 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE]; | |
| 78 | |
| 79 /* The head of the linked list; points to the most recently used buffer. */ | |
| 80 static struct regexp_cache *searchbuf_head; | |
| 81 | |
| 82 | |
| 83 /* Every call to re_match, etc., must pass &search_regs as the regs | |
| 84 argument unless you can show it is unnecessary (i.e., if re_match | |
| 85 is certainly going to be called again before region-around-match | |
| 86 can be called). | |
| 87 | |
| 88 Since the registers are now dynamically allocated, we need to make | |
| 89 sure not to refer to the Nth register before checking that it has | |
| 90 been allocated by checking search_regs.num_regs. | |
| 91 | |
| 92 The regex code keeps track of whether it has allocated the search | |
| 93 buffer using bits in the re_pattern_buffer. This means that whenever | |
| 94 you compile a new pattern, it completely forgets whether it has | |
| 95 allocated any registers, and will allocate new registers the next | |
| 96 time you call a searching or matching function. Therefore, we need | |
| 97 to call re_set_registers after compiling a new pattern or after | |
| 98 setting the match registers, so that the regex functions will be | |
| 99 able to free or re-allocate it properly. */ | |
| 100 | |
| 101 /* Note: things get trickier under Mule because the values returned from | |
| 826 | 102 the regexp routines are in Bytebpos's but we need them to be in Charbpos's. |
| 428 | 103 We take the easy way out for the moment and just convert them immediately. |
| 104 We could be more clever by not converting them until necessary, but | |
| 105 that gets real ugly real fast since the buffer might have changed and | |
| 106 the positions might be out of sync or out of range. | |
| 107 */ | |
| 108 static struct re_registers search_regs; | |
| 109 | |
| 1468 | 110 /* Every function that sets the match data _must_ clear unused search |
| 111 registers on success. An unsuccessful search or match _must_ preserve | |
| 112 the search registers. The traditional documentation implied that | |
| 113 any match operation might trash the registers, but in fact failures | |
| 114 have always preserved the match data (in GNU Emacs as well). Some | |
| 115 plausible code depends on this behavior (cf. `w3-configuration-data' | |
| 116 in library "w3-cfg"). | |
| 117 | |
| 118 Ordinary string searchs use set_search_regs to set the whole-string | |
| 119 match. That function takes care of clearing the unused subexpression | |
| 1425 | 120 registers. |
| 121 */ | |
| 122 static void set_search_regs (struct buffer *buf, Charbpos beg, Charcount len); | |
| 1468 | 123 static void clear_search_regs (void); |
| 1425 | 124 |
| 428 | 125 /* The buffer in which the last search was performed, or |
| 126 Qt if the last search was done in a string; | |
| 127 Qnil if no searching has been done yet. */ | |
| 128 static Lisp_Object last_thing_searched; | |
| 129 | |
| 130 /* error condition signalled when regexp compile_pattern fails */ | |
| 131 | |
| 132 Lisp_Object Qinvalid_regexp; | |
| 133 | |
| 134 /* Regular expressions used in forward/backward-word */ | |
| 135 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp; | |
| 136 | |
| 507 | 137 Fixnum warn_about_possibly_incompatible_back_references; |
| 502 | 138 |
| 428 | 139 /* range table for use with skip_chars. Only needed for Mule. */ |
| 140 Lisp_Object Vskip_chars_range_table; | |
| 141 | |
| 867 | 142 static Charbpos simple_search (struct buffer *buf, Ibyte *base_pat, |
| 826 | 143 Bytecount len, Bytebpos pos, Bytebpos lim, |
| 144 EMACS_INT n, Lisp_Object trt); | |
| 867 | 145 static Charbpos boyer_moore (struct buffer *buf, Ibyte *base_pat, |
| 826 | 146 Bytecount len, Bytebpos pos, Bytebpos lim, |
| 147 EMACS_INT n, Lisp_Object trt, | |
| 148 Lisp_Object inverse_trt, int charset_base); | |
| 665 | 149 static Charbpos search_buffer (struct buffer *buf, Lisp_Object str, |
| 826 | 150 Charbpos charbpos, Charbpos buflim, EMACS_INT n, |
| 151 int RE, Lisp_Object trt, | |
| 152 Lisp_Object inverse_trt, int posix); | |
| 771 | 153 |
| 2268 | 154 static DECLARE_DOESNT_RETURN (matcher_overflow (void)); |
| 155 | |
| 156 static DOESNT_RETURN | |
| 157 matcher_overflow () | |
| 428 | 158 { |
| 563 | 159 stack_overflow ("Stack overflow in regexp matcher", Qunbound); |
| 428 | 160 } |
| 161 | |
| 162 /* Compile a regexp and signal a Lisp error if anything goes wrong. | |
| 163 PATTERN is the pattern to compile. | |
| 164 CP is the place to put the result. | |
| 826 | 165 TRANSLATE is a translation table for ignoring case, or Qnil for none. |
| 428 | 166 REGP is the structure that says where to store the "register" |
| 167 values that will result from matching this pattern. | |
| 168 If it is 0, we should compile the pattern not to record any | |
| 169 subexpression bounds. | |
| 170 POSIX is nonzero if we want full backtracking (POSIX style) | |
| 171 for this pattern. 0 means backtrack only enough to get a valid match. */ | |
| 172 | |
| 173 static int | |
| 174 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, | |
| 2286 | 175 struct re_registers *UNUSED (regp), Lisp_Object translate, |
| 826 | 176 int posix, Error_Behavior errb) |
| 428 | 177 { |
| 442 | 178 const char *val; |
| 428 | 179 reg_syntax_t old; |
| 180 | |
| 181 cp->regexp = Qnil; | |
| 182 cp->buf.translate = translate; | |
| 183 cp->posix = posix; | |
| 184 old = re_set_syntax (RE_SYNTAX_EMACS | |
| 185 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING)); | |
| 442 | 186 val = (const char *) |
| 428 | 187 re_compile_pattern ((char *) XSTRING_DATA (pattern), |
| 188 XSTRING_LENGTH (pattern), &cp->buf); | |
| 189 re_set_syntax (old); | |
| 190 if (val) | |
| 191 { | |
|
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
192 maybe_signal_error (Qinvalid_regexp, 0, build_cistring (val), |
| 428 | 193 Qsearch, errb); |
| 194 return 0; | |
| 195 } | |
| 196 | |
| 197 cp->regexp = Fcopy_sequence (pattern); | |
| 198 return 1; | |
| 199 } | |
| 200 | |
| 201 /* Compile a regexp if necessary, but first check to see if there's one in | |
| 202 the cache. | |
| 203 PATTERN is the pattern to compile. | |
| 826 | 204 TRANSLATE is a translation table for ignoring case, or Qnil for none. |
| 428 | 205 REGP is the structure that says where to store the "register" |
| 206 values that will result from matching this pattern. | |
| 207 If it is 0, we should compile the pattern not to record any | |
| 208 subexpression bounds. | |
| 209 POSIX is nonzero if we want full backtracking (POSIX style) | |
| 210 for this pattern. 0 means backtrack only enough to get a valid match. */ | |
| 211 | |
| 212 struct re_pattern_buffer * | |
| 213 compile_pattern (Lisp_Object pattern, struct re_registers *regp, | |
| 2286 | 214 Lisp_Object translate, Lisp_Object UNUSED (searchobj), |
| 215 struct buffer *UNUSED (searchbuf), int posix, | |
| 216 Error_Behavior errb) | |
| 428 | 217 { |
| 218 struct regexp_cache *cp, **cpp; | |
| 219 | |
| 220 for (cpp = &searchbuf_head; ; cpp = &cp->next) | |
| 221 { | |
| 222 cp = *cpp; | |
| 826 | 223 /* &&#### once we fix up the fastmap code in regex.c for 8-bit-fixed, |
| 224 we need to record and compare the buffer and format, since the | |
| 225 fastmap will reflect the state of the buffer -- and things get | |
| 226 more complicated if the buffer has changed formats or (esp.) has | |
| 227 kept the format but changed its interpretation! may need to have | |
| 228 the code that changes the interpretation go through and invalidate | |
| 229 cache entries for that buffer. */ | |
| 428 | 230 if (!NILP (Fstring_equal (cp->regexp, pattern)) |
| 446 | 231 && EQ (cp->buf.translate, translate) |
| 428 | 232 && cp->posix == posix) |
| 233 break; | |
| 234 | |
| 235 /* If we're at the end of the cache, compile into the last cell. */ | |
| 236 if (cp->next == 0) | |
| 237 { | |
| 826 | 238 if (!compile_pattern_1 (cp, pattern, regp, translate, |
| 239 posix, errb)) | |
| 428 | 240 return 0; |
| 241 break; | |
| 242 } | |
| 243 } | |
| 244 | |
| 245 /* When we get here, cp (aka *cpp) contains the compiled pattern, | |
| 246 either because we found it in the cache or because we just compiled it. | |
| 247 Move it to the front of the queue to mark it as most recently used. */ | |
| 248 *cpp = cp->next; | |
| 249 cp->next = searchbuf_head; | |
| 250 searchbuf_head = cp; | |
| 251 | |
| 252 /* Advise the searching functions about the space we have allocated | |
| 253 for register data. */ | |
| 254 if (regp) | |
| 255 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end); | |
| 256 | |
| 257 return &cp->buf; | |
| 258 } | |
| 259 | |
| 260 /* Error condition used for failing searches */ | |
| 261 Lisp_Object Qsearch_failed; | |
| 262 | |
| 2268 | 263 static DECLARE_DOESNT_RETURN (signal_failure (Lisp_Object)); |
| 264 | |
| 265 static DOESNT_RETURN | |
| 428 | 266 signal_failure (Lisp_Object arg) |
| 267 { | |
| 446 | 268 for (;;) |
| 269 Fsignal (Qsearch_failed, list1 (arg)); | |
| 428 | 270 } |
| 271 | |
| 826 | 272 /* Convert the search registers from Bytebpos's to Charbpos's. Needs to be |
| 428 | 273 done after each regexp match that uses the search regs. |
| 274 | |
| 275 We could get a potential speedup by not converting the search registers | |
| 276 until it's really necessary, e.g. when match-data or replace-match is | |
| 277 called. However, this complexifies the code a lot (e.g. the buffer | |
| 826 | 278 could have changed and the Bytebpos's stored might be invalid) and is |
| 428 | 279 probably not a great time-saver. */ |
| 280 | |
| 281 static void | |
| 282 fixup_search_regs_for_buffer (struct buffer *buf) | |
| 283 { | |
| 284 int i; | |
| 285 int num_regs = search_regs.num_regs; | |
| 286 | |
| 287 for (i = 0; i < num_regs; i++) | |
| 288 { | |
| 289 if (search_regs.start[i] >= 0) | |
| 826 | 290 search_regs.start[i] = bytebpos_to_charbpos (buf, |
| 291 search_regs.start[i]); | |
| 428 | 292 if (search_regs.end[i] >= 0) |
| 665 | 293 search_regs.end[i] = bytebpos_to_charbpos (buf, search_regs.end[i]); |
| 428 | 294 } |
| 295 } | |
| 296 | |
| 297 /* Similar but for strings. */ | |
| 298 static void | |
| 299 fixup_search_regs_for_string (Lisp_Object string) | |
| 300 { | |
| 301 int i; | |
| 302 int num_regs = search_regs.num_regs; | |
| 303 | |
| 304 /* #### bytecount_to_charcount() is not that efficient. This function | |
| 867 | 305 could be faster if it did its own conversion (using INC_IBYTEPTR() |
| 428 | 306 and such), because the register ends are likely to be somewhat ordered. |
| 307 (Even if not, you could sort them.) | |
| 308 | |
| 309 Think about this if this function is a time hog, which it's probably | |
| 310 not. */ | |
| 311 for (i = 0; i < num_regs; i++) | |
| 312 { | |
| 313 if (search_regs.start[i] > 0) | |
| 314 { | |
| 315 search_regs.start[i] = | |
| 793 | 316 string_index_byte_to_char (string, search_regs.start[i]); |
| 428 | 317 } |
| 318 if (search_regs.end[i] > 0) | |
| 319 { | |
| 320 search_regs.end[i] = | |
| 793 | 321 string_index_byte_to_char (string, search_regs.end[i]); |
| 428 | 322 } |
| 323 } | |
| 324 } | |
| 325 | |
| 326 | |
| 327 static Lisp_Object | |
| 328 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix) | |
| 329 { | |
| 330 Lisp_Object val; | |
| 665 | 331 Bytebpos p1, p2; |
| 428 | 332 Bytecount s1, s2; |
| 333 REGISTER int i; | |
| 334 struct re_pattern_buffer *bufp; | |
| 826 | 335 struct syntax_cache scache_struct; |
| 336 struct syntax_cache *scache = &scache_struct; | |
| 337 | |
| 428 | 338 CHECK_STRING (string); |
| 339 bufp = compile_pattern (string, &search_regs, | |
| 340 (!NILP (buf->case_fold_search) | |
| 446 | 341 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil), |
| 826 | 342 wrap_buffer (buf), buf, posix, ERROR_ME); |
| 428 | 343 |
| 344 QUIT; | |
| 345 | |
| 346 /* Get pointers and sizes of the two strings | |
| 347 that make up the visible portion of the buffer. */ | |
| 348 | |
| 826 | 349 p1 = BYTE_BUF_BEGV (buf); |
| 350 p2 = BYTE_BUF_CEILING_OF (buf, p1); | |
| 428 | 351 s1 = p2 - p1; |
| 826 | 352 s2 = BYTE_BUF_ZV (buf) - p2; |
| 353 | |
| 354 /* By making the regex object, regex buffer, and syntax cache arguments | |
| 355 to re_{search,match}{,_2}, we've removed the need to do nasty things | |
| 356 to deal with regex reentrancy. (See stack trace in signal.c for proof | |
| 357 that this can happen.) | |
| 358 | |
| 359 #### there is still a potential problem with the regex cache -- | |
| 360 the compiled regex could be overwritten. we'd need 20-fold | |
| 361 reentrancy, though. Fix this. */ | |
| 362 | |
| 363 i = re_match_2 (bufp, (char *) BYTE_BUF_BYTE_ADDRESS (buf, p1), | |
| 364 s1, (char *) BYTE_BUF_BYTE_ADDRESS (buf, p2), s2, | |
| 365 BYTE_BUF_PT (buf) - BYTE_BUF_BEGV (buf), &search_regs, | |
| 366 BYTE_BUF_ZV (buf) - BYTE_BUF_BEGV (buf), wrap_buffer (buf), | |
| 367 buf, scache); | |
| 428 | 368 |
| 369 if (i == -2) | |
| 370 matcher_overflow (); | |
| 371 | |
| 372 val = (0 <= i ? Qt : Qnil); | |
| 373 if (NILP (val)) | |
| 826 | 374 return Qnil; |
| 428 | 375 { |
| 376 int num_regs = search_regs.num_regs; | |
| 377 for (i = 0; i < num_regs; i++) | |
| 378 if (search_regs.start[i] >= 0) | |
| 379 { | |
| 826 | 380 search_regs.start[i] += BYTE_BUF_BEGV (buf); |
| 381 search_regs.end[i] += BYTE_BUF_BEGV (buf); | |
| 428 | 382 } |
| 383 } | |
| 793 | 384 last_thing_searched = wrap_buffer (buf); |
| 428 | 385 fixup_search_regs_for_buffer (buf); |
| 826 | 386 return val; |
| 428 | 387 } |
| 388 | |
| 389 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /* | |
| 390 Return t if text after point matches regular expression REGEXP. | |
| 1468 | 391 When the match is successful, this function modifies the match data |
| 392 that `match-beginning', `match-end' and `match-data' access; save the | |
| 393 match data with `match-data' and restore it with `store-match-data' if | |
| 394 you want to preserve them. If the match fails, the match data from the | |
| 395 previous success match is preserved. | |
| 428 | 396 |
| 397 Optional argument BUFFER defaults to the current buffer. | |
| 398 */ | |
| 399 (regexp, buffer)) | |
| 400 { | |
| 401 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0); | |
| 402 } | |
| 403 | |
| 404 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /* | |
| 405 Return t if text after point matches regular expression REGEXP. | |
| 406 Find the longest match, in accord with Posix regular expression rules. | |
| 1468 | 407 When the match is successful, this function modifies the match data |
| 408 that `match-beginning', `match-end' and `match-data' access; save the | |
| 409 match data with `match-data' and restore it with `store-match-data' if | |
| 410 you want to preserve them. If the match fails, the match data from the | |
| 411 previous success match is preserved. | |
| 428 | 412 |
| 413 Optional argument BUFFER defaults to the current buffer. | |
| 414 */ | |
| 415 (regexp, buffer)) | |
| 416 { | |
| 826 | 417 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1); |
| 428 | 418 } |
| 419 | |
| 420 static Lisp_Object | |
| 421 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, | |
| 2286 | 422 struct buffer *buf, int UNUSED (posix)) |
| 428 | 423 { |
| 424 Bytecount val; | |
| 425 Charcount s; | |
| 426 struct re_pattern_buffer *bufp; | |
| 427 | |
| 853 | 428 /* Some FSF junk with running_asynch_code, to preserve the match |
| 429 data. Not necessary because we don't call process filters | |
| 430 asynchronously (i.e. from within QUIT). */ | |
| 428 | 431 |
| 432 CHECK_STRING (regexp); | |
| 433 CHECK_STRING (string); | |
| 434 | |
| 435 if (NILP (start)) | |
| 436 s = 0; | |
| 437 else | |
| 438 { | |
| 826 | 439 Charcount len = string_char_length (string); |
| 428 | 440 |
| 441 CHECK_INT (start); | |
| 442 s = XINT (start); | |
| 443 if (s < 0 && -s <= len) | |
| 444 s = len + s; | |
| 445 else if (0 > s || s > len) | |
| 446 args_out_of_range (string, start); | |
| 447 } | |
| 448 | |
| 449 | |
| 450 bufp = compile_pattern (regexp, &search_regs, | |
| 451 (!NILP (buf->case_fold_search) | |
| 446 | 452 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil), |
| 826 | 453 string, buf, 0, ERROR_ME); |
| 428 | 454 QUIT; |
| 455 { | |
| 793 | 456 Bytecount bis = string_index_char_to_byte (string, s); |
| 826 | 457 struct syntax_cache scache_struct; |
| 458 struct syntax_cache *scache = &scache_struct; | |
| 459 | |
| 460 /* By making the regex object, regex buffer, and syntax cache arguments | |
| 461 to re_{search,match}{,_2}, we've removed the need to do nasty things | |
| 462 to deal with regex reentrancy. (See stack trace in signal.c for proof | |
| 463 that this can happen.) | |
| 464 | |
| 465 #### there is still a potential problem with the regex cache -- | |
| 466 the compiled regex could be overwritten. we'd need 20-fold | |
| 467 reentrancy, though. Fix this. */ | |
| 468 | |
| 428 | 469 val = re_search (bufp, (char *) XSTRING_DATA (string), |
| 470 XSTRING_LENGTH (string), bis, | |
| 471 XSTRING_LENGTH (string) - bis, | |
| 826 | 472 &search_regs, string, buf, scache); |
| 428 | 473 } |
| 474 if (val == -2) | |
| 475 matcher_overflow (); | |
| 826 | 476 if (val < 0) return Qnil; |
| 428 | 477 last_thing_searched = Qt; |
| 478 fixup_search_regs_for_string (string); | |
| 826 | 479 return make_int (string_index_byte_to_char (string, val)); |
| 428 | 480 } |
| 481 | |
| 482 DEFUN ("string-match", Fstring_match, 2, 4, 0, /* | |
| 483 Return index of start of first match for REGEXP in STRING, or nil. | |
| 484 If third arg START is non-nil, start search at that index in STRING. | |
| 485 For index of first char beyond the match, do (match-end 0). | |
| 486 `match-end' and `match-beginning' also give indices of substrings | |
| 487 matched by parenthesis constructs in the pattern. | |
| 488 | |
| 826 | 489 Optional arg BUFFER controls how case folding and syntax and category |
| 490 lookup is done (according to the value of `case-fold-search' in that buffer | |
| 491 and that buffer's case tables, syntax tables, and category table). If nil | |
| 492 or unspecified, it defaults *NOT* to the current buffer but instead: | |
| 493 | |
| 494 -- the value of `case-fold-search' in the current buffer is still respected | |
| 495 because of idioms like | |
| 496 | |
| 497 (let ((case-fold-search nil)) | |
| 498 (string-match "^foo.*bar" string)) | |
| 499 | |
| 500 but the case, syntax, and category tables come from the standard tables, | |
| 1468 | 501 which are accessed through functions `default-{case,syntax,category}-table' |
| 502 and serve as the parents of the tables in particular buffer. | |
| 503 | |
| 504 When the match is successful, this function modifies the match data | |
| 505 that `match-beginning', `match-end' and `match-data' access; save the | |
| 506 match data with `match-data' and restore it with `store-match-data' if | |
| 507 you want to preserve them. If the match fails, the match data from the | |
| 508 previous success match is preserved. | |
| 428 | 509 */ |
| 510 (regexp, string, start, buffer)) | |
| 511 { | |
| 826 | 512 /* &&#### implement new interp for buffer arg; check code to see if it |
| 513 makes more sense than prev */ | |
| 428 | 514 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0); |
| 515 } | |
| 516 | |
| 517 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /* | |
| 518 Return index of start of first match for REGEXP in STRING, or nil. | |
| 519 Find the longest match, in accord with Posix regular expression rules. | |
| 520 If third arg START is non-nil, start search at that index in STRING. | |
| 521 For index of first char beyond the match, do (match-end 0). | |
| 522 `match-end' and `match-beginning' also give indices of substrings | |
| 523 matched by parenthesis constructs in the pattern. | |
| 524 | |
| 525 Optional arg BUFFER controls how case folding is done (according to | |
| 526 the value of `case-fold-search' in that buffer and that buffer's case | |
| 527 tables) and defaults to the current buffer. | |
| 1468 | 528 |
| 529 When the match is successful, this function modifies the match data | |
| 530 that `match-beginning', `match-end' and `match-data' access; save the | |
| 531 match data with `match-data' and restore it with `store-match-data' if | |
| 532 you want to preserve them. If the match fails, the match data from the | |
| 533 previous success match is preserved. | |
| 428 | 534 */ |
| 535 (regexp, string, start, buffer)) | |
| 536 { | |
| 537 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1); | |
| 538 } | |
| 539 | |
| 540 /* Match REGEXP against STRING, searching all of STRING, | |
| 541 and return the index of the match, or negative on failure. | |
| 542 This does not clobber the match data. */ | |
| 543 | |
| 544 Bytecount | |
| 1347 | 545 fast_string_match (Lisp_Object regexp, const Ibyte *nonreloc, |
| 428 | 546 Lisp_Object reloc, Bytecount offset, |
| 547 Bytecount length, int case_fold_search, | |
| 578 | 548 Error_Behavior errb, int no_quit) |
| 428 | 549 { |
| 550 Bytecount val; | |
| 867 | 551 Ibyte *newnonreloc = (Ibyte *) nonreloc; |
| 428 | 552 struct re_pattern_buffer *bufp; |
| 826 | 553 struct syntax_cache scache_struct; |
| 554 struct syntax_cache *scache = &scache_struct; | |
| 428 | 555 |
| 556 bufp = compile_pattern (regexp, 0, | |
| 557 (case_fold_search | |
| 771 | 558 ? XCASE_TABLE_DOWNCASE (Vstandard_case_table) |
| 446 | 559 : Qnil), |
| 826 | 560 reloc, 0, 0, errb); |
| 428 | 561 if (!bufp) |
| 562 return -1; /* will only do this when errb != ERROR_ME */ | |
| 563 if (!no_quit) | |
| 564 QUIT; | |
| 565 else | |
| 566 no_quit_in_re_search = 1; | |
| 567 | |
| 568 fixup_internal_substring (nonreloc, reloc, offset, &length); | |
| 569 | |
| 771 | 570 /* Don't need to protect against GC inside of re_search() due to QUIT; |
| 571 QUIT is GC-inhibited. */ | |
| 428 | 572 if (!NILP (reloc)) |
| 771 | 573 newnonreloc = XSTRING_DATA (reloc); |
| 574 | |
| 826 | 575 /* By making the regex object, regex buffer, and syntax cache arguments |
| 576 to re_{search,match}{,_2}, we've removed the need to do nasty things | |
| 577 to deal with regex reentrancy. (See stack trace in signal.c for proof | |
| 578 that this can happen.) | |
| 579 | |
| 580 #### there is still a potential problem with the regex cache -- | |
| 581 the compiled regex could be overwritten. we'd need 20-fold | |
| 582 reentrancy, though. Fix this. */ | |
| 583 | |
| 428 | 584 val = re_search (bufp, (char *) newnonreloc + offset, length, 0, |
| 826 | 585 length, 0, reloc, 0, scache); |
| 428 | 586 |
| 587 no_quit_in_re_search = 0; | |
| 588 return val; | |
| 589 } | |
| 590 | |
| 591 Bytecount | |
| 592 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string) | |
| 593 { | |
| 594 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0); | |
| 595 } | |
| 596 | |
| 597 | |
| 598 #ifdef REGION_CACHE_NEEDS_WORK | |
| 599 /* The newline cache: remembering which sections of text have no newlines. */ | |
| 600 | |
| 601 /* If the user has requested newline caching, make sure it's on. | |
| 602 Otherwise, make sure it's off. | |
| 603 This is our cheezy way of associating an action with the change of | |
| 604 state of a buffer-local variable. */ | |
| 605 static void | |
| 606 newline_cache_on_off (struct buffer *buf) | |
| 607 { | |
| 608 if (NILP (buf->cache_long_line_scans)) | |
| 609 { | |
| 610 /* It should be off. */ | |
| 611 if (buf->newline_cache) | |
| 612 { | |
| 613 free_region_cache (buf->newline_cache); | |
| 614 buf->newline_cache = 0; | |
| 615 } | |
| 616 } | |
| 617 else | |
| 618 { | |
| 619 /* It should be on. */ | |
| 620 if (buf->newline_cache == 0) | |
| 621 buf->newline_cache = new_region_cache (); | |
| 622 } | |
| 623 } | |
| 624 #endif | |
| 625 | |
| 626 /* Search in BUF for COUNT instances of the character TARGET between | |
| 627 START and END. | |
| 628 | |
| 629 If COUNT is positive, search forwards; END must be >= START. | |
| 630 If COUNT is negative, search backwards for the -COUNTth instance; | |
| 631 END must be <= START. | |
| 632 If COUNT is zero, do anything you please; run rogue, for all I care. | |
| 633 | |
| 634 If END is zero, use BEGV or ZV instead, as appropriate for the | |
| 635 direction indicated by COUNT. | |
| 636 | |
| 637 If we find COUNT instances, set *SHORTAGE to zero, and return the | |
| 638 position after the COUNTth match. Note that for reverse motion | |
| 639 this is not the same as the usual convention for Emacs motion commands. | |
| 640 | |
| 641 If we don't find COUNT instances before reaching END, set *SHORTAGE | |
| 642 to the number of TARGETs left unfound, and return END. | |
| 643 | |
| 644 If ALLOW_QUIT is non-zero, call QUIT periodically. */ | |
| 645 | |
| 665 | 646 static Bytebpos |
| 867 | 647 byte_scan_buffer (struct buffer *buf, Ichar target, Bytebpos st, Bytebpos en, |
| 872 | 648 EMACS_INT count, EMACS_INT *shortage, int allow_quit) |
| 428 | 649 { |
| 665 | 650 Bytebpos lim = en > 0 ? en : |
| 826 | 651 ((count > 0) ? BYTE_BUF_ZV (buf) : BYTE_BUF_BEGV (buf)); |
| 428 | 652 |
| 653 /* #### newline cache stuff in this function not yet ported */ | |
| 654 assert (count != 0); | |
| 655 | |
| 656 if (shortage) | |
| 657 *shortage = 0; | |
| 658 | |
| 659 if (count > 0) | |
| 660 { | |
| 661 #ifdef MULE | |
| 826 | 662 Internal_Format fmt = buf->text->format; |
| 663 /* Check for char that's unrepresentable in the buffer -- it | |
| 664 certainly can't be there. */ | |
| 867 | 665 if (!ichar_fits_in_format (target, fmt, wrap_buffer (buf))) |
| 428 | 666 { |
| 826 | 667 *shortage = count; |
| 668 return lim; | |
| 669 } | |
| 670 /* Due to the Mule representation of characters in a buffer, we can | |
| 671 simply search for characters in the range 0 - 127 directly; for | |
| 672 8-bit-fixed, we can do this for all characters. In other cases, | |
| 673 we do it the "hard" way. Note that this way works for all | |
| 674 characters and all formats, but the other way is faster. */ | |
| 675 else if (! (fmt == FORMAT_8_BIT_FIXED || | |
| 867 | 676 (fmt == FORMAT_DEFAULT && ichar_ascii_p (target)))) |
| 826 | 677 { |
| 867 | 678 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
| 428 | 679 while (st < lim && count > 0) |
| 680 { | |
| 826 | 681 if (BYTE_BUF_FETCH_CHAR_RAW (buf, st) == raw) |
| 428 | 682 count--; |
| 665 | 683 INC_BYTEBPOS (buf, st); |
| 428 | 684 } |
| 685 } | |
| 686 else | |
| 687 #endif | |
| 688 { | |
| 867 | 689 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
| 428 | 690 while (st < lim && count > 0) |
| 691 { | |
|
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
692 Bytebpos ceiling; |
| 867 | 693 Ibyte *bufptr; |
| 428 | 694 |
|
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
695 ceiling = BYTE_BUF_CEILING_OF (buf, st); |
|
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
696 ceiling = min (lim, ceiling); |
| 867 | 697 bufptr = (Ibyte *) memchr (BYTE_BUF_BYTE_ADDRESS (buf, st), |
|
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
698 raw, ceiling - st); |
| 428 | 699 if (bufptr) |
| 700 { | |
| 701 count--; | |
| 826 | 702 st = BYTE_BUF_PTR_BYTE_POS (buf, bufptr) + 1; |
| 428 | 703 } |
| 704 else | |
|
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
705 st = ceiling; |
| 428 | 706 } |
| 707 } | |
| 708 | |
| 709 if (shortage) | |
| 710 *shortage = count; | |
| 711 if (allow_quit) | |
| 712 QUIT; | |
| 713 return st; | |
| 714 } | |
| 715 else | |
| 716 { | |
| 717 #ifdef MULE | |
| 826 | 718 Internal_Format fmt = buf->text->format; |
| 719 /* Check for char that's unrepresentable in the buffer -- it | |
| 720 certainly can't be there. */ | |
| 867 | 721 if (!ichar_fits_in_format (target, fmt, wrap_buffer (buf))) |
| 428 | 722 { |
| 826 | 723 *shortage = -count; |
| 724 return lim; | |
| 725 } | |
| 726 else if (! (fmt == FORMAT_8_BIT_FIXED || | |
| 867 | 727 (fmt == FORMAT_DEFAULT && ichar_ascii_p (target)))) |
| 826 | 728 { |
| 867 | 729 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
| 428 | 730 while (st > lim && count < 0) |
| 731 { | |
| 665 | 732 DEC_BYTEBPOS (buf, st); |
| 826 | 733 if (BYTE_BUF_FETCH_CHAR_RAW (buf, st) == raw) |
| 428 | 734 count++; |
| 735 } | |
| 736 } | |
| 737 else | |
| 738 #endif | |
| 739 { | |
| 867 | 740 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
| 428 | 741 while (st > lim && count < 0) |
| 742 { | |
|
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
743 Bytebpos floorpos; |
| 867 | 744 Ibyte *bufptr; |
| 745 Ibyte *floorptr; | |
| 428 | 746 |
|
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
747 floorpos = BYTE_BUF_FLOOR_OF (buf, st); |
|
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
748 floorpos = max (lim, floorpos); |
| 428 | 749 /* No memrchr() ... */ |
| 826 | 750 bufptr = BYTE_BUF_BYTE_ADDRESS_BEFORE (buf, st); |
|
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
751 floorptr = BYTE_BUF_BYTE_ADDRESS (buf, floorpos); |
| 428 | 752 while (bufptr >= floorptr) |
| 753 { | |
| 754 st--; | |
| 755 /* At this point, both ST and BUFPTR refer to the same | |
| 756 character. When the loop terminates, ST will | |
| 757 always point to the last character we tried. */ | |
| 867 | 758 if (*bufptr == (Ibyte) raw) |
| 428 | 759 { |
| 760 count++; | |
| 761 break; | |
| 762 } | |
| 763 bufptr--; | |
| 764 } | |
| 765 } | |
| 766 } | |
| 767 | |
| 768 if (shortage) | |
| 769 *shortage = -count; | |
| 770 if (allow_quit) | |
| 771 QUIT; | |
| 772 if (count) | |
| 773 return st; | |
| 774 else | |
| 775 { | |
| 776 /* We found the character we were looking for; we have to return | |
| 777 the position *after* it due to the strange way that the return | |
| 778 value is defined. */ | |
| 665 | 779 INC_BYTEBPOS (buf, st); |
| 428 | 780 return st; |
| 781 } | |
| 782 } | |
| 783 } | |
| 784 | |
| 665 | 785 Charbpos |
| 867 | 786 scan_buffer (struct buffer *buf, Ichar target, Charbpos start, Charbpos end, |
| 428 | 787 EMACS_INT count, EMACS_INT *shortage, int allow_quit) |
| 788 { | |
| 826 | 789 Bytebpos byte_retval; |
| 790 Bytebpos byte_start, byte_end; | |
| 791 | |
| 792 byte_start = charbpos_to_bytebpos (buf, start); | |
| 428 | 793 if (end) |
| 826 | 794 byte_end = charbpos_to_bytebpos (buf, end); |
| 428 | 795 else |
| 826 | 796 byte_end = 0; |
| 797 byte_retval = byte_scan_buffer (buf, target, byte_start, byte_end, count, | |
| 428 | 798 shortage, allow_quit); |
| 826 | 799 return bytebpos_to_charbpos (buf, byte_retval); |
| 428 | 800 } |
| 801 | |
| 665 | 802 Bytebpos |
| 826 | 803 byte_find_next_newline_no_quit (struct buffer *buf, Bytebpos from, int count) |
| 428 | 804 { |
| 826 | 805 return byte_scan_buffer (buf, '\n', from, 0, count, 0, 0); |
| 428 | 806 } |
| 807 | |
| 665 | 808 Charbpos |
| 809 find_next_newline_no_quit (struct buffer *buf, Charbpos from, int count) | |
| 428 | 810 { |
| 811 return scan_buffer (buf, '\n', from, 0, count, 0, 0); | |
| 812 } | |
| 813 | |
| 665 | 814 Charbpos |
| 815 find_next_newline (struct buffer *buf, Charbpos from, int count) | |
| 428 | 816 { |
| 817 return scan_buffer (buf, '\n', from, 0, count, 0, 1); | |
| 818 } | |
| 819 | |
| 826 | 820 Bytecount |
| 867 | 821 byte_find_next_ichar_in_string (Lisp_Object str, Ichar target, Bytecount st, |
| 428 | 822 EMACS_INT count) |
| 823 { | |
| 793 | 824 Bytebpos lim = XSTRING_LENGTH (str) -1; |
| 867 | 825 Ibyte *s = XSTRING_DATA (str); |
| 428 | 826 |
| 827 assert (count >= 0); | |
| 828 | |
| 829 #ifdef MULE | |
| 830 /* Due to the Mule representation of characters in a buffer, | |
| 831 we can simply search for characters in the range 0 - 127 | |
| 832 directly. For other characters, we do it the "hard" way. | |
| 833 Note that this way works for all characters but the other | |
| 834 way is faster. */ | |
| 835 if (target >= 0200) | |
| 836 { | |
| 837 while (st < lim && count > 0) | |
| 838 { | |
| 867 | 839 if (string_ichar (str, st) == target) |
| 428 | 840 count--; |
| 826 | 841 INC_BYTECOUNT (s, st); |
| 428 | 842 } |
| 843 } | |
| 844 else | |
| 845 #endif | |
| 846 { | |
| 847 while (st < lim && count > 0) | |
| 848 { | |
| 867 | 849 Ibyte *bufptr = (Ibyte *) memchr (itext_n_addr (s, st), |
| 428 | 850 (int) target, lim - st); |
| 851 if (bufptr) | |
| 852 { | |
| 853 count--; | |
| 826 | 854 st = (Bytebpos) (bufptr - s) + 1; |
| 428 | 855 } |
| 856 else | |
| 857 st = lim; | |
| 858 } | |
| 859 } | |
| 860 return st; | |
| 861 } | |
| 862 | |
| 863 /* Like find_next_newline, but returns position before the newline, | |
| 864 not after, and only search up to TO. This isn't just | |
| 865 find_next_newline (...)-1, because you might hit TO. */ | |
| 665 | 866 Charbpos |
| 826 | 867 find_before_next_newline (struct buffer *buf, Charbpos from, Charbpos to, |
| 868 int count) | |
| 428 | 869 { |
| 870 EMACS_INT shortage; | |
| 665 | 871 Charbpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1); |
| 428 | 872 |
| 873 if (shortage == 0) | |
| 874 pos--; | |
| 875 | |
| 876 return pos; | |
| 877 } | |
| 878 | |
| 872 | 879 /* This function synched with FSF 21.1 */ |
| 428 | 880 static Lisp_Object |
| 881 skip_chars (struct buffer *buf, int forwardp, int syntaxp, | |
| 882 Lisp_Object string, Lisp_Object lim) | |
| 883 { | |
| 867 | 884 REGISTER Ibyte *p, *pend; |
| 885 REGISTER Ichar c; | |
| 428 | 886 /* We store the first 256 chars in an array here and the rest in |
| 887 a range table. */ | |
| 888 unsigned char fastmap[0400]; | |
| 889 int negate = 0; | |
| 890 REGISTER int i; | |
| 665 | 891 Charbpos limit; |
| 826 | 892 struct syntax_cache *scache; |
| 893 | |
| 428 | 894 if (NILP (lim)) |
| 895 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf); | |
| 896 else | |
| 897 { | |
| 898 CHECK_INT_COERCE_MARKER (lim); | |
| 899 limit = XINT (lim); | |
| 900 | |
| 901 /* In any case, don't allow scan outside bounds of buffer. */ | |
| 902 if (limit > BUF_ZV (buf)) limit = BUF_ZV (buf); | |
| 903 if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf); | |
| 904 } | |
| 905 | |
| 906 CHECK_STRING (string); | |
| 907 p = XSTRING_DATA (string); | |
| 908 pend = p + XSTRING_LENGTH (string); | |
| 909 memset (fastmap, 0, sizeof (fastmap)); | |
| 910 | |
| 911 Fclear_range_table (Vskip_chars_range_table); | |
| 912 | |
| 913 if (p != pend && *p == '^') | |
| 914 { | |
| 915 negate = 1; | |
| 916 p++; | |
| 917 } | |
| 918 | |
| 919 /* Find the characters specified and set their elements of fastmap. | |
| 920 If syntaxp, each character counts as itself. | |
| 921 Otherwise, handle backslashes and ranges specially */ | |
| 922 | |
| 923 while (p != pend) | |
| 924 { | |
| 867 | 925 c = itext_ichar (p); |
| 926 INC_IBYTEPTR (p); | |
| 428 | 927 if (syntaxp) |
| 928 { | |
|
5542
dab422055bab
Correct array bound for syntax_code_spec.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5539
diff
changeset
|
929 if (c < 0200 && syntax_spec_code[c] < (unsigned char) Smax) |
| 428 | 930 fastmap[c] = 1; |
| 931 else | |
| 831 | 932 invalid_argument ("Invalid syntax designator", make_char (c)); |
| 428 | 933 } |
| 934 else | |
| 935 { | |
| 936 if (c == '\\') | |
| 937 { | |
| 938 if (p == pend) break; | |
| 867 | 939 c = itext_ichar (p); |
| 940 INC_IBYTEPTR (p); | |
| 428 | 941 } |
| 942 if (p != pend && *p == '-') | |
| 943 { | |
| 867 | 944 Ichar cend; |
| 428 | 945 |
| 872 | 946 /* Skip over the dash. */ |
| 428 | 947 p++; |
| 948 if (p == pend) break; | |
| 867 | 949 cend = itext_ichar (p); |
| 428 | 950 while (c <= cend && c < 0400) |
| 951 { | |
| 952 fastmap[c] = 1; | |
| 953 c++; | |
| 954 } | |
| 955 if (c <= cend) | |
| 956 Fput_range_table (make_int (c), make_int (cend), Qt, | |
| 957 Vskip_chars_range_table); | |
| 867 | 958 INC_IBYTEPTR (p); |
| 428 | 959 } |
| 960 else | |
| 961 { | |
| 962 if (c < 0400) | |
| 963 fastmap[c] = 1; | |
| 964 else | |
| 965 Fput_range_table (make_int (c), make_int (c), Qt, | |
| 966 Vskip_chars_range_table); | |
| 967 } | |
| 968 } | |
| 969 } | |
| 970 | |
| 872 | 971 /* #### Not in FSF 21.1 */ |
| 428 | 972 if (syntaxp && fastmap['-'] != 0) |
| 973 fastmap[' '] = 1; | |
| 974 | |
| 975 /* If ^ was the first character, complement the fastmap. | |
| 976 We don't complement the range table, however; we just use negate | |
| 977 in the comparisons below. */ | |
| 978 | |
| 979 if (negate) | |
| 647 | 980 for (i = 0; i < (int) (sizeof (fastmap)); i++) |
| 428 | 981 fastmap[i] ^= 1; |
| 982 | |
| 983 { | |
| 665 | 984 Charbpos start_point = BUF_PT (buf); |
| 872 | 985 Charbpos pos = start_point; |
| 986 Charbpos pos_byte = BYTE_BUF_PT (buf); | |
| 428 | 987 |
| 988 if (syntaxp) | |
| 989 { | |
| 872 | 990 scache = setup_buffer_syntax_cache (buf, pos, forwardp ? 1 : -1); |
| 428 | 991 /* All syntax designators are normal chars so nothing strange |
| 992 to worry about */ | |
| 993 if (forwardp) | |
| 994 { | |
| 872 | 995 if (pos < limit) |
| 996 while (fastmap[(unsigned char) | |
| 997 syntax_code_spec | |
| 998 [(int) SYNTAX_FROM_CACHE | |
| 999 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) | |
| 1000 { | |
| 1001 pos++; | |
| 1002 INC_BYTEBPOS (buf, pos_byte); | |
| 879 | 1003 if (pos >= limit) |
| 872 | 1004 break; |
| 1005 UPDATE_SYNTAX_CACHE_FORWARD (scache, pos); | |
| 1006 } | |
| 428 | 1007 } |
| 1008 else | |
| 1009 { | |
| 872 | 1010 while (pos > limit) |
| 460 | 1011 { |
| 872 | 1012 Charbpos savepos = pos_byte; |
| 1013 pos--; | |
| 1014 DEC_BYTEBPOS (buf, pos_byte); | |
| 1015 UPDATE_SYNTAX_CACHE_BACKWARD (scache, pos); | |
| 1016 if (!fastmap[(unsigned char) | |
| 1017 syntax_code_spec | |
| 1018 [(int) SYNTAX_FROM_CACHE | |
| 1019 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) | |
| 1020 { | |
| 1021 pos++; | |
| 1022 pos_byte = savepos; | |
| 1023 break; | |
| 1024 } | |
| 460 | 1025 } |
| 428 | 1026 } |
| 1027 } | |
| 1028 else | |
| 1029 { | |
| 1030 if (forwardp) | |
| 1031 { | |
| 872 | 1032 while (pos < limit) |
| 428 | 1033 { |
| 872 | 1034 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
| 428 | 1035 if ((ch < 0400) ? fastmap[ch] : |
| 1036 (NILP (Fget_range_table (make_int (ch), | |
| 1037 Vskip_chars_range_table, | |
| 1038 Qnil)) | |
| 1039 == negate)) | |
| 872 | 1040 { |
| 1041 pos++; | |
| 1042 INC_BYTEBPOS (buf, pos_byte); | |
| 1043 } | |
| 428 | 1044 else |
| 1045 break; | |
| 1046 } | |
| 1047 } | |
| 1048 else | |
| 1049 { | |
| 872 | 1050 while (pos > limit) |
| 428 | 1051 { |
| 872 | 1052 Charbpos prev_pos_byte = pos_byte; |
| 1053 Ichar ch; | |
| 1054 | |
| 1055 DEC_BYTEBPOS (buf, prev_pos_byte); | |
| 1056 ch = BYTE_BUF_FETCH_CHAR (buf, prev_pos_byte); | |
| 428 | 1057 if ((ch < 0400) ? fastmap[ch] : |
| 1058 (NILP (Fget_range_table (make_int (ch), | |
| 1059 Vskip_chars_range_table, | |
| 1060 Qnil)) | |
| 1061 == negate)) | |
| 872 | 1062 { |
| 1063 pos--; | |
| 1064 pos_byte = prev_pos_byte; | |
| 1065 } | |
| 428 | 1066 else |
| 1067 break; | |
| 1068 } | |
| 1069 } | |
| 1070 } | |
| 1071 QUIT; | |
| 872 | 1072 BOTH_BUF_SET_PT (buf, pos, pos_byte); |
| 428 | 1073 return make_int (BUF_PT (buf) - start_point); |
| 1074 } | |
| 1075 } | |
| 1076 | |
| 1077 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /* | |
| 444 | 1078 Move point forward, stopping before a char not in STRING, or at pos LIMIT. |
| 428 | 1079 STRING is like the inside of a `[...]' in a regular expression |
| 1080 except that `]' is never special and `\\' quotes `^', `-' or `\\'. | |
| 1081 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter. | |
| 1082 With arg "^a-zA-Z", skips nonletters stopping before first letter. | |
| 1083 Returns the distance traveled, either zero or positive. | |
| 1084 | |
| 1085 Optional argument BUFFER defaults to the current buffer. | |
| 1086 */ | |
| 444 | 1087 (string, limit, buffer)) |
| 428 | 1088 { |
| 444 | 1089 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit); |
| 428 | 1090 } |
| 1091 | |
| 1092 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /* | |
| 444 | 1093 Move point backward, stopping after a char not in STRING, or at pos LIMIT. |
| 428 | 1094 See `skip-chars-forward' for details. |
| 1095 Returns the distance traveled, either zero or negative. | |
| 1096 | |
| 1097 Optional argument BUFFER defaults to the current buffer. | |
| 1098 */ | |
| 444 | 1099 (string, limit, buffer)) |
| 428 | 1100 { |
| 444 | 1101 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit); |
| 428 | 1102 } |
| 1103 | |
| 1104 | |
| 1105 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /* | |
| 1106 Move point forward across chars in specified syntax classes. | |
| 1107 SYNTAX is a string of syntax code characters. | |
| 444 | 1108 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT. |
| 428 | 1109 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX. |
| 1110 This function returns the distance traveled, either zero or positive. | |
| 1111 | |
| 1112 Optional argument BUFFER defaults to the current buffer. | |
| 1113 */ | |
| 444 | 1114 (syntax, limit, buffer)) |
| 428 | 1115 { |
| 444 | 1116 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit); |
| 428 | 1117 } |
| 1118 | |
| 1119 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /* | |
| 1120 Move point backward across chars in specified syntax classes. | |
| 1121 SYNTAX is a string of syntax code characters. | |
| 444 | 1122 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT. |
| 428 | 1123 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX. |
| 1124 This function returns the distance traveled, either zero or negative. | |
| 1125 | |
| 1126 Optional argument BUFFER defaults to the current buffer. | |
| 1127 */ | |
| 444 | 1128 (syntax, limit, buffer)) |
| 428 | 1129 { |
| 444 | 1130 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit); |
| 428 | 1131 } |
| 1132 | |
| 1133 | |
| 1134 /* Subroutines of Lisp buffer search functions. */ | |
| 1135 | |
| 1136 static Lisp_Object | |
| 444 | 1137 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror, |
| 428 | 1138 Lisp_Object count, Lisp_Object buffer, int direction, |
| 1139 int RE, int posix) | |
| 1140 { | |
| 665 | 1141 REGISTER Charbpos np; |
| 1142 Charbpos lim; | |
| 428 | 1143 EMACS_INT n = direction; |
| 1144 struct buffer *buf; | |
| 1145 | |
| 1146 if (!NILP (count)) | |
| 1147 { | |
| 1148 CHECK_INT (count); | |
| 1149 n *= XINT (count); | |
| 1150 } | |
| 1151 | |
| 1152 buf = decode_buffer (buffer, 0); | |
| 1153 CHECK_STRING (string); | |
| 444 | 1154 if (NILP (limit)) |
| 428 | 1155 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf); |
| 1156 else | |
| 1157 { | |
| 444 | 1158 CHECK_INT_COERCE_MARKER (limit); |
| 1159 lim = XINT (limit); | |
| 428 | 1160 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf)) |
| 563 | 1161 invalid_argument ("Invalid search limit (wrong side of point)", |
| 1162 Qunbound); | |
| 428 | 1163 if (lim > BUF_ZV (buf)) |
| 1164 lim = BUF_ZV (buf); | |
| 1165 if (lim < BUF_BEGV (buf)) | |
| 1166 lim = BUF_BEGV (buf); | |
| 1167 } | |
| 1168 | |
| 1169 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE, | |
| 1170 (!NILP (buf->case_fold_search) | |
| 446 | 1171 ? XCASE_TABLE_CANON (buf->case_table) |
| 1172 : Qnil), | |
| 428 | 1173 (!NILP (buf->case_fold_search) |
| 446 | 1174 ? XCASE_TABLE_EQV (buf->case_table) |
| 1175 : Qnil), posix); | |
| 428 | 1176 |
| 1177 if (np <= 0) | |
| 1178 { | |
| 444 | 1179 if (NILP (noerror)) |
| 2268 | 1180 { |
| 1181 signal_failure (string); | |
| 1182 RETURN_NOT_REACHED (Qnil); | |
| 1183 } | |
| 444 | 1184 if (!EQ (noerror, Qt)) |
| 428 | 1185 { |
| 1186 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf)) | |
| 2500 | 1187 ABORT (); |
| 428 | 1188 BUF_SET_PT (buf, lim); |
| 1189 return Qnil; | |
| 1190 #if 0 /* This would be clean, but maybe programs depend on | |
| 1191 a value of nil here. */ | |
| 1192 np = lim; | |
| 1193 #endif | |
| 1194 } | |
| 1195 else | |
| 1196 return Qnil; | |
| 1197 } | |
| 1198 | |
| 1199 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf)) | |
| 2500 | 1200 ABORT (); |
| 428 | 1201 |
| 1202 BUF_SET_PT (buf, np); | |
| 1203 | |
| 1204 return make_int (np); | |
| 1205 } | |
| 1206 | |
| 1207 static int | |
| 1208 trivial_regexp_p (Lisp_Object regexp) | |
| 1209 { | |
| 1210 Bytecount len = XSTRING_LENGTH (regexp); | |
| 867 | 1211 Ibyte *s = XSTRING_DATA (regexp); |
| 428 | 1212 while (--len >= 0) |
| 1213 { | |
| 1214 switch (*s++) | |
| 1215 { | |
| 1724 | 1216 /* #### howcum ']' doesn't appear here, but ... */ |
| 428 | 1217 case '.': case '*': case '+': case '?': case '[': case '^': case '$': |
| 1218 return 0; | |
| 1219 case '\\': | |
| 1220 if (--len < 0) | |
| 1221 return 0; | |
| 1222 switch (*s++) | |
| 1223 { | |
| 1724 | 1224 /* ... ')' does appear here? ('<' and '>' can appear singly.) */ |
| 1225 /* #### are there other constructs to check? */ | |
| 428 | 1226 case '|': case '(': case ')': case '`': case '\'': case 'b': |
| 1227 case 'B': case '<': case '>': case 'w': case 'W': case 's': | |
| 1724 | 1228 case 'S': case '=': case '{': case '}': |
| 428 | 1229 #ifdef MULE |
| 1230 /* 97/2/25 jhod Added for category matches */ | |
| 1231 case 'c': case 'C': | |
| 1232 #endif /* MULE */ | |
| 1233 case '1': case '2': case '3': case '4': case '5': | |
| 1234 case '6': case '7': case '8': case '9': | |
| 1235 return 0; | |
| 1236 } | |
| 1237 } | |
| 1238 } | |
| 1239 return 1; | |
| 1240 } | |
| 1241 | |
| 1242 /* Search for the n'th occurrence of STRING in BUF, | |
| 665 | 1243 starting at position CHARBPOS and stopping at position BUFLIM, |
| 428 | 1244 treating PAT as a literal string if RE is false or as |
| 1245 a regular expression if RE is true. | |
| 1246 | |
| 1247 If N is positive, searching is forward and BUFLIM must be greater | |
| 665 | 1248 than CHARBPOS. |
| 428 | 1249 If N is negative, searching is backward and BUFLIM must be less |
| 665 | 1250 than CHARBPOS. |
| 428 | 1251 |
| 1252 Returns -x if only N-x occurrences found (x > 0), | |
| 1253 or else the position at the beginning of the Nth occurrence | |
| 1254 (if searching backward) or the end (if searching forward). | |
| 1255 | |
| 1256 POSIX is nonzero if we want full backtracking (POSIX style) | |
| 1257 for this pattern. 0 means backtrack only enough to get a valid match. */ | |
| 665 | 1258 static Charbpos |
| 1259 search_buffer (struct buffer *buf, Lisp_Object string, Charbpos charbpos, | |
| 1260 Charbpos buflim, EMACS_INT n, int RE, Lisp_Object trt, | |
| 446 | 1261 Lisp_Object inverse_trt, int posix) |
| 428 | 1262 { |
| 1263 Bytecount len = XSTRING_LENGTH (string); | |
| 867 | 1264 Ibyte *base_pat = XSTRING_DATA (string); |
| 428 | 1265 REGISTER EMACS_INT i, j; |
| 665 | 1266 Bytebpos p1, p2; |
| 428 | 1267 Bytecount s1, s2; |
| 665 | 1268 Bytebpos pos, lim; |
| 428 | 1269 |
| 853 | 1270 /* Some FSF junk with running_asynch_code, to preserve the match |
| 1271 data. Not necessary because we don't call process filters | |
| 1272 asynchronously (i.e. from within QUIT). */ | |
| 428 | 1273 |
| 1425 | 1274 /* Searching 0 times means noop---don't move, don't touch registers. */ |
| 1275 if (n == 0) | |
| 1276 return charbpos; | |
| 1277 | |
| 428 | 1278 /* Null string is found at starting position. */ |
| 1279 if (len == 0) | |
| 1280 { | |
| 665 | 1281 set_search_regs (buf, charbpos, 0); |
| 1282 return charbpos; | |
| 428 | 1283 } |
| 1284 | |
| 665 | 1285 pos = charbpos_to_bytebpos (buf, charbpos); |
| 1286 lim = charbpos_to_bytebpos (buf, buflim); | |
| 428 | 1287 if (RE && !trivial_regexp_p (string)) |
| 1288 { | |
| 1289 struct re_pattern_buffer *bufp; | |
| 826 | 1290 |
| 1291 bufp = compile_pattern (string, &search_regs, trt, | |
| 1292 wrap_buffer (buf), buf, posix, ERROR_ME); | |
| 428 | 1293 |
| 1294 /* Get pointers and sizes of the two strings | |
| 1295 that make up the visible portion of the buffer. */ | |
| 1296 | |
| 826 | 1297 p1 = BYTE_BUF_BEGV (buf); |
| 1298 p2 = BYTE_BUF_CEILING_OF (buf, p1); | |
| 428 | 1299 s1 = p2 - p1; |
| 826 | 1300 s2 = BYTE_BUF_ZV (buf) - p2; |
| 1301 | |
| 1302 while (n != 0) | |
| 428 | 1303 { |
| 1304 Bytecount val; | |
| 826 | 1305 struct syntax_cache scache_struct; |
| 1306 struct syntax_cache *scache = &scache_struct; | |
| 1307 | |
| 428 | 1308 QUIT; |
| 826 | 1309 /* By making the regex object, regex buffer, and syntax cache |
| 1310 arguments to re_{search,match}{,_2}, we've removed the need to | |
| 1311 do nasty things to deal with regex reentrancy. (See stack | |
| 1312 trace in signal.c for proof that this can happen.) | |
| 1313 | |
| 1314 #### there is still a potential problem with the regex cache -- | |
| 1315 the compiled regex could be overwritten. we'd need 20-fold | |
| 1316 reentrancy, though. Fix this. */ | |
| 1317 | |
| 428 | 1318 val = re_search_2 (bufp, |
| 826 | 1319 (char *) BYTE_BUF_BYTE_ADDRESS (buf, p1), s1, |
| 1320 (char *) BYTE_BUF_BYTE_ADDRESS (buf, p2), s2, | |
| 1321 pos - BYTE_BUF_BEGV (buf), lim - pos, &search_regs, | |
| 1322 n > 0 ? lim - BYTE_BUF_BEGV (buf) : | |
| 1323 pos - BYTE_BUF_BEGV (buf), wrap_buffer (buf), | |
| 1324 buf, scache); | |
| 428 | 1325 |
| 1326 if (val == -2) | |
| 1327 { | |
| 1328 matcher_overflow (); | |
| 1329 } | |
| 1330 if (val >= 0) | |
| 1331 { | |
| 1332 int num_regs = search_regs.num_regs; | |
| 826 | 1333 j = BYTE_BUF_BEGV (buf); |
| 428 | 1334 for (i = 0; i < num_regs; i++) |
| 1335 if (search_regs.start[i] >= 0) | |
| 1336 { | |
| 1337 search_regs.start[i] += j; | |
| 1338 search_regs.end[i] += j; | |
| 1339 } | |
| 793 | 1340 last_thing_searched = wrap_buffer (buf); |
| 428 | 1341 /* Set pos to the new position. */ |
| 826 | 1342 pos = n > 0 ? search_regs.end[0] : search_regs.start[0]; |
| 428 | 1343 fixup_search_regs_for_buffer (buf); |
| 665 | 1344 /* And charbpos too. */ |
| 826 | 1345 charbpos = n > 0 ? search_regs.end[0] : search_regs.start[0]; |
| 428 | 1346 } |
| 1347 else | |
| 826 | 1348 return (n > 0 ? 0 - n : n); |
| 1349 if (n > 0) n--; else n++; | |
| 428 | 1350 } |
| 665 | 1351 return charbpos; |
| 428 | 1352 } |
| 1353 else /* non-RE case */ | |
| 1354 { | |
| 446 | 1355 int charset_base = -1; |
| 1356 int boyer_moore_ok = 1; | |
| 2367 | 1357 Ibyte *patbuf = alloca_ibytes (len * MAX_ICHAR_LEN); |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1358 Ibyte *pat = patbuf; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1359 |
| 446 | 1360 #ifdef MULE |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1361 int entirely_one_byte_p = buf->text->entirely_one_byte_p; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1362 int nothing_greater_than_0xff = |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1363 buf->text->num_8_bit_fixed_chars == BUF_Z(buf) - BUF_BEG (buf); |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1364 |
| 446 | 1365 while (len > 0) |
| 1366 { | |
| 867 | 1367 Ibyte tmp_str[MAX_ICHAR_LEN]; |
| 1368 Ichar c, translated, inverse; | |
| 446 | 1369 Bytecount orig_bytelen, new_bytelen, inv_bytelen; |
| 1370 | |
| 1371 /* If we got here and the RE flag is set, it's because | |
| 1372 we're dealing with a regexp known to be trivial, so the | |
| 1373 backslash just quotes the next character. */ | |
| 1374 if (RE && *base_pat == '\\') | |
| 1375 { | |
| 1376 len--; | |
| 1377 base_pat++; | |
| 1378 } | |
| 867 | 1379 c = itext_ichar (base_pat); |
| 446 | 1380 translated = TRANSLATE (trt, c); |
| 1381 inverse = TRANSLATE (inverse_trt, c); | |
| 1382 | |
| 867 | 1383 orig_bytelen = itext_ichar_len (base_pat); |
| 1384 inv_bytelen = set_itext_ichar (tmp_str, inverse); | |
| 1385 new_bytelen = set_itext_ichar (tmp_str, translated); | |
| 446 | 1386 |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1387 if (boyer_moore_ok |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1388 /* Only do the Boyer-Moore check for characters needing |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1389 translation. */ |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1390 && (translated != c || inverse != c)) |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1391 { |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1392 Ichar starting_c = c; |
|
4421
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1393 int charset_base_code, checked = 0; |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1394 |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1395 do |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1396 { |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1397 c = TRANSLATE (inverse_trt, c); |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1398 |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1399 /* If a character cannot occur in the buffer, ignore |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1400 it. */ |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1401 if (c > 0x7F && entirely_one_byte_p) |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1402 continue; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1403 |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1404 if (c > 0xFF && nothing_greater_than_0xff) |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1405 continue; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1406 |
|
4421
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1407 checked = 1; |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1408 |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1409 if (-1 == charset_base) /* No charset yet specified. */ |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1410 { |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1411 /* Keep track of which charset and character set row |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1412 contains the characters that need translation. |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1413 |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1414 Zero out the bits corresponding to the last |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1415 byte. */ |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1416 charset_base = c & ~ICHAR_FIELD3_MASK; |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1417 } |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1418 else |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1419 { |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1420 charset_base_code = c & ~ICHAR_FIELD3_MASK; |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1421 |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1422 if (charset_base_code != charset_base) |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1423 { |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1424 /* If two different rows, or two different |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1425 charsets, appear, needing non-ASCII |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1426 translation, then we cannot use boyer_moore |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1427 search. See the comment at the head of |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1428 boyer_moore(). */ |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1429 boyer_moore_ok = 0; |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1430 break; |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1431 } |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1432 } |
|
4901
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1433 |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1434 if (ichar_len (c) > 2) |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1435 { |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1436 /* Case-equivalence plus repeated octets throws off |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1437 the construction of the stride table; avoid this. |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1438 |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1439 It should be possible to correct boyer_moore to |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1440 behave correctly even in this case--it doesn't have |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1441 problems with repeated octets when case conversion |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1442 is not involved--but this is not a critical |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1443 issue. */ |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1444 Ibyte encoded[MAX_ICHAR_LEN]; |
|
5016
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1445 Bytecount clen = set_itext_ichar (encoded, c); |
|
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1446 int a, b; |
|
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1447 for (a = 0; a < clen && boyer_moore_ok; ++a) |
|
4901
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1448 { |
|
5016
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1449 for (b = a + 1; b < clen && boyer_moore_ok; ++b) |
|
4901
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1450 { |
|
5016
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1451 if (encoded[a] == encoded[b]) |
|
4901
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1452 { |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1453 boyer_moore_ok = 0; |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1454 } |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1455 } |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1456 } |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1457 |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1458 if (0 == boyer_moore_ok) |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1459 { |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1460 break; |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1461 } |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1462 } |
|
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1463 |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1464 } while (c != starting_c); |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1465 |
|
4421
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1466 if (!checked) |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1467 { |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1468 #ifdef DEBUG_XEMACS |
| 5041 | 1469 if (debug_searches) |
|
4421
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1470 { |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1471 Lisp_Symbol *sym = XSYMBOL (Qsearch_algorithm_used); |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1472 sym->value = Qnil; |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1473 } |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1474 #endif |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1475 /* The "continue" clauses were used above, for every |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1476 translation of the character. As such, this character |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1477 is not to be found in the buffer and neither is the |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1478 string as a whole. Return immediately; also avoid |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1479 triggering the assertion a few lines down. */ |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1480 return n > 0 ? -n : n; |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1481 } |
|
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1482 |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1483 if (boyer_moore_ok && charset_base != -1 && |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1484 charset_base != (translated & ~ICHAR_FIELD3_MASK)) |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1485 { |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1486 /* In the rare event that the CANON entry for this |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1487 character is not in the desired set, choose one |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1488 that is, from the equivalence set. It doesn't much |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1489 matter which. */ |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1490 Ichar starting_ch = translated; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1491 do |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1492 { |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1493 translated = TRANSLATE (inverse_trt, translated); |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1494 |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1495 if (charset_base == (translated & ~ICHAR_FIELD3_MASK)) |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1496 break; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1497 |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1498 } while (starting_ch != translated); |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1499 |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1500 assert (starting_ch != translated); |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1501 |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1502 new_bytelen = set_itext_ichar (tmp_str, translated); |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1503 } |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1504 } |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1505 |
| 446 | 1506 memcpy (pat, tmp_str, new_bytelen); |
| 1507 pat += new_bytelen; | |
| 1508 base_pat += orig_bytelen; | |
| 1509 len -= orig_bytelen; | |
| 1510 } | |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1511 |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1512 if (-1 == charset_base) |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1513 { |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1514 charset_base = 'a' & ~ICHAR_FIELD3_MASK; /* Default to ASCII. */ |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1515 } |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1516 |
| 446 | 1517 #else /* not MULE */ |
| 1518 while (--len >= 0) | |
| 1519 { | |
| 1520 /* If we got here and the RE flag is set, it's because | |
| 1521 we're dealing with a regexp known to be trivial, so the | |
| 1522 backslash just quotes the next character. */ | |
| 1523 if (RE && *base_pat == '\\') | |
| 1524 { | |
| 1525 len--; | |
| 1526 base_pat++; | |
| 1527 } | |
| 1528 *pat++ = TRANSLATE (trt, *base_pat++); | |
| 1529 } | |
| 1530 #endif /* MULE */ | |
| 1531 len = pat - patbuf; | |
| 1532 pat = base_pat = patbuf; | |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1533 |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1534 #ifdef DEBUG_XEMACS |
| 5041 | 1535 if (debug_searches) |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1536 { |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1537 Lisp_Symbol *sym = XSYMBOL (Qsearch_algorithm_used); |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1538 sym->value = boyer_moore_ok ? Qboyer_moore : Qsimple_search; |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1539 } |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1540 #endif |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1541 |
| 446 | 1542 if (boyer_moore_ok) |
| 1543 return boyer_moore (buf, base_pat, len, pos, lim, n, | |
| 1544 trt, inverse_trt, charset_base); | |
| 1545 else | |
| 1546 return simple_search (buf, base_pat, len, pos, lim, n, trt); | |
| 1547 } | |
| 1548 } | |
| 1549 | |
| 826 | 1550 /* Do a simple string search N times for the string PAT, whose length is |
| 1551 LEN/LEN_BYTE, from buffer position POS until LIM. TRT is the | |
| 1552 translation table. | |
| 446 | 1553 |
| 1554 Return the character position where the match is found. | |
| 1555 Otherwise, if M matches remained to be found, return -M. | |
| 1556 | |
| 1557 This kind of search works regardless of what is in PAT and | |
| 1558 regardless of what is in TRT. It is used in cases where | |
| 1559 boyer_moore cannot work. */ | |
| 1560 | |
| 665 | 1561 static Charbpos |
| 867 | 1562 simple_search (struct buffer *buf, Ibyte *base_pat, Bytecount len, |
| 826 | 1563 Bytebpos pos, Bytebpos lim, EMACS_INT n, Lisp_Object trt) |
| 446 | 1564 { |
| 1565 int forward = n > 0; | |
| 1566 Bytecount buf_len = 0; /* Shut up compiler. */ | |
| 1567 | |
| 826 | 1568 if (lim > pos) |
| 446 | 1569 while (n > 0) |
| 428 | 1570 { |
| 446 | 1571 while (1) |
| 428 | 1572 { |
| 826 | 1573 Bytecount this_len = len; |
| 1574 Bytebpos this_pos = pos; | |
| 867 | 1575 Ibyte *p = base_pat; |
| 826 | 1576 if (pos >= lim) |
| 446 | 1577 goto stop; |
| 1578 | |
| 1579 while (this_len > 0) | |
| 1580 { | |
| 867 | 1581 Ichar pat_ch, buf_ch; |
| 446 | 1582 Bytecount pat_len; |
| 1583 | |
| 867 | 1584 pat_ch = itext_ichar (p); |
| 826 | 1585 buf_ch = BYTE_BUF_FETCH_CHAR (buf, this_pos); |
| 446 | 1586 |
| 1587 buf_ch = TRANSLATE (trt, buf_ch); | |
| 1588 | |
| 1589 if (buf_ch != pat_ch) | |
| 1590 break; | |
| 1591 | |
| 867 | 1592 pat_len = itext_ichar_len (p); |
| 446 | 1593 p += pat_len; |
| 1594 this_len -= pat_len; | |
| 826 | 1595 INC_BYTEBPOS (buf, this_pos); |
| 446 | 1596 } |
| 1597 if (this_len == 0) | |
| 428 | 1598 { |
| 826 | 1599 buf_len = this_pos - pos; |
| 1600 pos = this_pos; | |
| 446 | 1601 break; |
| 428 | 1602 } |
| 826 | 1603 INC_BYTEBPOS (buf, pos); |
| 428 | 1604 } |
| 446 | 1605 n--; |
| 1606 } | |
| 1607 else | |
|
4322
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1608 { |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1609 /* If lim < len, then there are too few buffer positions to hold the |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1610 pattern between the beginning of the buffer and lim. Adjust to |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1611 ensure pattern fits. If we don't do this, we can assert in the |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1612 DEC_BYTEBPOS below. */ |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1613 if (lim < len) |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1614 lim = len; |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1615 while (n < 0) |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1616 { |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1617 while (1) |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1618 { |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1619 Bytecount this_len = len; |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1620 Bytebpos this_pos = pos; |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1621 Ibyte *p; |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1622 if (pos <= lim) |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1623 goto stop; |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1624 p = base_pat + len; |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1625 |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1626 while (this_len > 0) |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1627 { |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1628 Ichar pat_ch, buf_ch; |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1629 |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1630 DEC_IBYTEPTR (p); |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1631 DEC_BYTEBPOS (buf, this_pos); |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1632 pat_ch = itext_ichar (p); |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1633 buf_ch = BYTE_BUF_FETCH_CHAR (buf, this_pos); |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1634 |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1635 buf_ch = TRANSLATE (trt, buf_ch); |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1636 |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1637 if (buf_ch != pat_ch) |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1638 break; |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1639 |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1640 this_len -= itext_ichar_len (p); |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1641 } |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1642 if (this_len == 0) |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1643 { |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1644 buf_len = pos - this_pos; |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1645 pos = this_pos; |
| 446 | 1646 break; |
|
4322
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1647 } |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1648 DEC_BYTEBPOS (buf, pos); |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1649 } |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1650 n++; |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1651 } |
|
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1652 } |
| 446 | 1653 stop: |
| 1654 if (n == 0) | |
| 1655 { | |
| 665 | 1656 Charbpos beg, end, retval; |
| 446 | 1657 if (forward) |
| 1658 { | |
| 826 | 1659 beg = bytebpos_to_charbpos (buf, pos - buf_len); |
| 1660 retval = end = bytebpos_to_charbpos (buf, pos); | |
| 446 | 1661 } |
| 1662 else | |
| 428 | 1663 { |
| 826 | 1664 retval = beg = bytebpos_to_charbpos (buf, pos); |
| 1665 end = bytebpos_to_charbpos (buf, pos + buf_len); | |
| 428 | 1666 } |
| 446 | 1667 set_search_regs (buf, beg, end - beg); |
| 1668 | |
| 1669 return retval; | |
| 1670 } | |
| 1671 else if (n > 0) | |
| 1672 return -n; | |
| 1673 else | |
| 1674 return n; | |
| 1675 } | |
| 1676 | |
| 1677 /* Do Boyer-Moore search N times for the string PAT, | |
| 1678 whose length is LEN/LEN_BYTE, | |
| 1679 from buffer position POS/POS_BYTE until LIM/LIM_BYTE. | |
| 1680 DIRECTION says which direction we search in. | |
| 1681 TRT and INVERSE_TRT are translation tables. | |
| 1682 | |
| 1683 This kind of search works if all the characters in PAT that have | |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1684 (non-ASCII) translation are the same aside from the last byte. This |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1685 makes it possible to translate just the last byte of a character, and do |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1686 so after just a simple test of the context. |
| 446 | 1687 |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1688 If that criterion is not satisfied, do not call this function. You will |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1689 get an assertion failure. */ |
| 446 | 1690 |
| 665 | 1691 static Charbpos |
| 867 | 1692 boyer_moore (struct buffer *buf, Ibyte *base_pat, Bytecount len, |
| 665 | 1693 Bytebpos pos, Bytebpos lim, EMACS_INT n, Lisp_Object trt, |
| 2333 | 1694 Lisp_Object inverse_trt, int USED_IF_MULE (charset_base)) |
| 446 | 1695 { |
| 1696 /* #### Someone really really really needs to comment the workings | |
| 1697 of this junk somewhat better. | |
| 1698 | |
| 1699 BTW "BM" stands for Boyer-Moore, which is one of the standard | |
| 1700 string-searching algorithms. It's the best string-searching | |
| 1701 algorithm out there, provided that: | |
| 1702 | |
| 1703 a) You're not fazed by algorithm complexity. (Rabin-Karp, which | |
| 1704 uses hashing, is much much easier to code but not as fast.) | |
| 1705 b) You can freely move backwards in the string that you're | |
| 1706 searching through. | |
| 1707 | |
| 1708 As the comment below tries to explain (but garbles in typical | |
| 1709 programmer-ese), the idea is that you don't have to do a | |
| 1710 string match at every successive position in the text. For | |
| 1711 example, let's say the pattern is "a very long string". We | |
| 1712 compare the last character in the string (`g') with the | |
| 1713 corresponding character in the text. If it mismatches, and | |
| 1714 it is, say, `z', then we can skip forward by the entire | |
| 1715 length of the pattern because `z' does not occur anywhere | |
| 1716 in the pattern. If the mismatching character does occur | |
| 1717 in the pattern, we can usually still skip forward by more | |
| 1718 than one: e.g. if it is `l', then we can skip forward | |
| 1719 by the length of the substring "ong string" -- i.e. the | |
| 1720 largest end section of the pattern that does not contain | |
| 1721 the mismatched character. So what we do is compute, for | |
| 1722 each possible character, the distance we can skip forward | |
| 1723 (the "stride") and use it in the string matching. This | |
| 1724 is what the BM_tab holds. */ | |
| 1725 REGISTER EMACS_INT *BM_tab; | |
| 1726 EMACS_INT *BM_tab_base; | |
| 1727 REGISTER Bytecount dirlen; | |
| 1728 EMACS_INT infinity; | |
| 665 | 1729 Bytebpos limit; |
| 446 | 1730 Bytecount stride_for_teases = 0; |
| 1731 REGISTER EMACS_INT i, j; | |
| 867 | 1732 Ibyte *pat, *pat_end; |
| 1733 REGISTER Ibyte *cursor, *p_limit, *ptr2; | |
| 1734 Ibyte simple_translate[0400]; | |
| 446 | 1735 REGISTER int direction = ((n > 0) ? 1 : -1); |
| 1736 #ifdef MULE | |
| 867 | 1737 Ibyte translate_prev_byte = 0; |
| 1738 Ibyte translate_anteprev_byte = 0; | |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1739 /* These need to be rethought in the event that the internal format |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1740 changes, or in the event that num_8_bit_fixed_chars disappears |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1741 (entirely_one_byte_p can be trivially worked out by checking is the |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1742 byte count equal to the char count.) */ |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1743 int buffer_entirely_one_byte_p = buf->text->entirely_one_byte_p; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1744 int buffer_nothing_greater_than_0xff = |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1745 buf->text->num_8_bit_fixed_chars == BUF_Z(buf) - BUF_BEG (buf); |
| 446 | 1746 #endif |
| 1747 #ifdef C_ALLOCA | |
| 1748 EMACS_INT BM_tab_space[0400]; | |
| 1749 BM_tab = &BM_tab_space[0]; | |
| 1750 #else | |
| 1751 BM_tab = alloca_array (EMACS_INT, 256); | |
| 1752 #endif | |
| 1753 | |
| 1754 /* The general approach is that we are going to maintain that we | |
| 1755 know the first (closest to the present position, in whatever | |
| 1756 direction we're searching) character that could possibly be | |
| 1757 the last (furthest from present position) character of a | |
| 1758 valid match. We advance the state of our knowledge by | |
| 1759 looking at that character and seeing whether it indeed | |
| 1760 matches the last character of the pattern. If it does, we | |
| 1761 take a closer look. If it does not, we move our pointer (to | |
| 1762 putative last characters) as far as is logically possible. | |
| 1763 This amount of movement, which I call a stride, will be the | |
| 1764 length of the pattern if the actual character appears nowhere | |
| 1765 in the pattern, otherwise it will be the distance from the | |
| 1766 last occurrence of that character to the end of the pattern. | |
| 1767 As a coding trick, an enormous stride is coded into the table | |
| 1768 for characters that match the last character. This allows | |
| 1769 use of only a single test, a test for having gone past the | |
| 1770 end of the permissible match region, to test for both | |
| 1771 possible matches (when the stride goes past the end | |
| 1772 immediately) and failure to match (where you get nudged past | |
| 1773 the end one stride at a time). | |
| 1774 | |
| 1775 Here we make a "mickey mouse" BM table. The stride of the | |
| 1776 search is determined only by the last character of the | |
| 1777 putative match. If that character does not match, we will | |
| 1778 stride the proper distance to propose a match that | |
| 1779 superimposes it on the last instance of a character that | |
| 1780 matches it (per trt), or misses it entirely if there is | |
| 1781 none. */ | |
| 1782 | |
| 1783 dirlen = len * direction; | |
| 1784 infinity = dirlen - (lim + pos + len + len) * direction; | |
| 1785 /* Record position after the end of the pattern. */ | |
| 1786 pat_end = base_pat + len; | |
| 1787 if (direction < 0) | |
| 1788 base_pat = pat_end - 1; | |
| 1789 BM_tab_base = BM_tab; | |
| 1790 BM_tab += 0400; | |
| 1791 j = dirlen; /* to get it in a register */ | |
| 1792 /* A character that does not appear in the pattern induces a | |
| 1793 stride equal to the pattern length. */ | |
| 1794 while (BM_tab_base != BM_tab) | |
| 1795 { | |
| 1796 *--BM_tab = j; | |
| 1797 *--BM_tab = j; | |
| 1798 *--BM_tab = j; | |
| 1799 *--BM_tab = j; | |
| 1800 } | |
| 1801 /* We use this for translation, instead of TRT itself. We | |
| 1802 fill this in to handle the characters that actually occur | |
| 1803 in the pattern. Others don't matter anyway! */ | |
| 1804 xzero (simple_translate); | |
| 1805 for (i = 0; i < 0400; i++) | |
| 867 | 1806 simple_translate[i] = (Ibyte) i; |
| 446 | 1807 i = 0; |
| 1425 | 1808 |
| 446 | 1809 while (i != infinity) |
| 1810 { | |
| 867 | 1811 Ibyte *ptr = base_pat + i; |
| 446 | 1812 i += direction; |
| 1813 if (i == dirlen) | |
| 1814 i = infinity; | |
| 1815 if (!NILP (trt)) | |
| 428 | 1816 { |
| 446 | 1817 #ifdef MULE |
|
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1818 Ichar ch = -1, untranslated; |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1819 Ibyte byte; |
| 446 | 1820 int this_translated = 1; |
| 1821 | |
| 1822 /* Is *PTR the last byte of a character? */ | |
| 867 | 1823 if (pat_end - ptr == 1 || ibyte_first_byte_p (ptr[1])) |
| 428 | 1824 { |
| 867 | 1825 Ibyte *charstart = ptr; |
| 1826 while (!ibyte_first_byte_p (*charstart)) | |
| 446 | 1827 charstart--; |
| 867 | 1828 untranslated = itext_ichar (charstart); |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1829 |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1830 ch = TRANSLATE (trt, untranslated); |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1831 if (!ibyte_first_byte_p (*ptr)) |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1832 { |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1833 translate_prev_byte = ptr[-1]; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1834 if (!ibyte_first_byte_p (translate_prev_byte)) |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1835 translate_anteprev_byte = ptr[-2]; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1836 } |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1837 |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1838 if (ch != untranslated && /* Was translation done? */ |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1839 charset_base != (ch & ~ICHAR_FIELD3_MASK)) |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1840 { |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1841 /* In the very rare event that the CANON entry for this |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1842 character is not in the desired set, choose one that |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1843 is, from the equivalence set. It doesn't much matter |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1844 which, since we're building our own cheesy equivalence |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1845 table instead of using that belonging to the case |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1846 table directly. |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1847 |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1848 We can get here if search_buffer has worked out that |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1849 the buffer is entirely single width. */ |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1850 Ichar starting_ch = ch; |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1851 int count = 0; |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1852 do |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1853 { |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1854 ch = TRANSLATE (inverse_trt, ch); |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1855 if (charset_base == (ch & ~ICHAR_FIELD3_MASK)) |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1856 break; |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1857 ++count; |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1858 } while (starting_ch != ch); |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1859 |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1860 /* If starting_ch is equal to ch (and count is not one, |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1861 which means no translation is necessary), the case |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1862 table is corrupt. (Any mapping in the canon table |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1863 should be reflected in the equivalence table, and we |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1864 know from the canon table that untranslated maps to |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1865 starting_ch and that untranslated has the correct value |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1866 for charset_base.) */ |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1867 assert (1 == count || starting_ch != ch); |
| 446 | 1868 } |
|
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1869 { |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1870 Ibyte tmp[MAX_ICHAR_LEN]; |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1871 Bytecount chlen; |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1872 |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1873 chlen = set_itext_ichar (tmp, ch); |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1874 byte = tmp[chlen - 1]; |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1875 } |
| 428 | 1876 } |
| 1877 else | |
| 1878 { | |
|
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1879 byte = *ptr; |
| 446 | 1880 this_translated = 0; |
|
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1881 ch = -1; |
| 446 | 1882 } |
|
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1883 |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1884 /* BYTE = last byte of character CH when represented as text */ |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1885 j = byte; |
| 446 | 1886 |
| 1887 if (i == infinity) | |
| 1888 stride_for_teases = BM_tab[j]; | |
| 1889 BM_tab[j] = dirlen - i; | |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1890 /* A translation table is accompanied by its inverse -- see |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1891 comment in casetab.c. */ |
| 446 | 1892 if (this_translated) |
| 1893 { | |
| 867 | 1894 Ichar starting_ch = ch; |
| 446 | 1895 EMACS_INT starting_j = j; |
|
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1896 |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1897 text_checking_assert (valid_ichar_p (ch)); |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1898 do |
| 446 | 1899 { |
| 1900 ch = TRANSLATE (inverse_trt, ch); | |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1901 |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1902 if (ch > 0x7F && buffer_entirely_one_byte_p) |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1903 continue; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1904 |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1905 if (ch > 0xFF && buffer_nothing_greater_than_0xff) |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1906 continue; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1907 |
|
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1908 |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1909 /* Retrieve last byte of character CH when represented as |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1910 text */ |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1911 { |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1912 Ibyte tmp[MAX_ICHAR_LEN]; |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1913 Bytecount chlen; |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1914 |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1915 chlen = set_itext_ichar (tmp, ch); |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1916 j = tmp[chlen - 1]; |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1917 } |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1918 |
|
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1919 /* For all the characters that map into CH, set up |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1920 simple_translate to map the last byte into |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1921 STARTING_J. */ |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1922 simple_translate[j] = (Ibyte) starting_j; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1923 BM_tab[j] = dirlen - i; |
|
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1924 |
|
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1925 } |
|
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1926 while (ch != starting_ch); |
| 446 | 1927 } |
|
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1928 #else /* not MULE */ |
| 446 | 1929 EMACS_INT k; |
| 1930 j = *ptr; | |
| 1931 k = (j = TRANSLATE (trt, j)); | |
| 1932 if (i == infinity) | |
| 1933 stride_for_teases = BM_tab[j]; | |
| 1934 BM_tab[j] = dirlen - i; | |
| 1935 /* A translation table is accompanied by its inverse -- | |
| 826 | 1936 see comment in casetab.c. */ |
| 446 | 1937 while ((j = TRANSLATE (inverse_trt, j)) != k) |
| 1938 { | |
| 867 | 1939 simple_translate[j] = (Ibyte) k; |
| 428 | 1940 BM_tab[j] = dirlen - i; |
| 1941 } | |
|
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1942 #endif /* (not) MULE */ |
| 446 | 1943 } |
| 1944 else | |
| 1945 { | |
| 1946 j = *ptr; | |
| 1947 | |
| 1948 if (i == infinity) | |
| 1949 stride_for_teases = BM_tab[j]; | |
| 1950 BM_tab[j] = dirlen - i; | |
| 428 | 1951 } |
| 446 | 1952 /* stride_for_teases tells how much to stride if we get a |
| 1953 match on the far character but are subsequently | |
| 1954 disappointed, by recording what the stride would have been | |
| 1955 for that character if the last character had been | |
| 1956 different. */ | |
| 1957 } | |
| 1958 infinity = dirlen - infinity; | |
| 1959 pos += dirlen - ((direction > 0) ? direction : 0); | |
| 1960 /* loop invariant - pos points at where last char (first char if | |
| 1961 reverse) of pattern would align in a possible match. */ | |
| 1962 while (n != 0) | |
| 1963 { | |
| 665 | 1964 Bytebpos tail_end; |
| 867 | 1965 Ibyte *tail_end_ptr; |
| 446 | 1966 /* It's been reported that some (broken) compiler thinks |
| 1967 that Boolean expressions in an arithmetic context are | |
| 1968 unsigned. Using an explicit ?1:0 prevents this. */ | |
| 1969 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0) | |
| 1970 return n * (0 - direction); | |
| 1971 /* First we do the part we can by pointers (maybe | |
| 1972 nothing) */ | |
| 1973 QUIT; | |
| 1974 pat = base_pat; | |
| 1975 limit = pos - dirlen + direction; | |
| 1976 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF | |
| 1977 have changed. See buffer.h. */ | |
| 1978 limit = ((direction > 0) | |
| 826 | 1979 ? BYTE_BUF_CEILING_OF (buf, limit) - 1 |
| 1980 : BYTE_BUF_FLOOR_OF (buf, limit + 1)); | |
| 446 | 1981 /* LIMIT is now the last (not beyond-last!) value POS can |
| 1982 take on without hitting edge of buffer or the gap. */ | |
| 1983 limit = ((direction > 0) | |
| 1984 ? min (lim - 1, min (limit, pos + 20000)) | |
| 1985 : max (lim, max (limit, pos - 20000))); | |
| 826 | 1986 tail_end = BYTE_BUF_CEILING_OF (buf, pos); |
| 1987 tail_end_ptr = BYTE_BUF_BYTE_ADDRESS (buf, tail_end); | |
| 446 | 1988 |
| 1989 if ((limit - pos) * direction > 20) | |
| 428 | 1990 { |
| 826 | 1991 /* We have to be careful because the code can generate addresses |
| 1992 that don't point to the beginning of characters. */ | |
| 1993 p_limit = BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, limit); | |
| 1994 ptr2 = (cursor = BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos)); | |
| 446 | 1995 /* In this loop, pos + cursor - ptr2 is the surrogate |
| 1996 for pos */ | |
| 1997 while (1) /* use one cursor setting as long as i can */ | |
| 1998 { | |
| 1999 if (direction > 0) /* worth duplicating */ | |
| 2000 { | |
| 2001 /* Use signed comparison if appropriate to make | |
| 2002 cursor+infinity sure to be > p_limit. | |
| 2003 Assuming that the buffer lies in a range of | |
| 2004 addresses that are all "positive" (as ints) | |
| 2005 or all "negative", either kind of comparison | |
| 2006 will work as long as we don't step by | |
| 2007 infinity. So pick the kind that works when | |
| 2008 we do step by infinity. */ | |
| 2009 if ((EMACS_INT) (p_limit + infinity) > | |
| 2010 (EMACS_INT) p_limit) | |
| 2011 while ((EMACS_INT) cursor <= | |
| 2012 (EMACS_INT) p_limit) | |
| 2013 cursor += BM_tab[*cursor]; | |
| 2014 else | |
| 2015 while ((EMACS_UINT) cursor <= | |
| 2016 (EMACS_UINT) p_limit) | |
| 2017 cursor += BM_tab[*cursor]; | |
| 2018 } | |
| 2019 else | |
| 2020 { | |
| 2021 if ((EMACS_INT) (p_limit + infinity) < | |
| 2022 (EMACS_INT) p_limit) | |
| 2023 while ((EMACS_INT) cursor >= | |
| 2024 (EMACS_INT) p_limit) | |
| 2025 cursor += BM_tab[*cursor]; | |
| 2026 else | |
| 2027 while ((EMACS_UINT) cursor >= | |
| 2028 (EMACS_UINT) p_limit) | |
| 2029 cursor += BM_tab[*cursor]; | |
| 2030 } | |
| 2031 /* If you are here, cursor is beyond the end of the | |
| 2032 searched region. This can happen if you match on | |
| 2033 the far character of the pattern, because the | |
| 2034 "stride" of that character is infinity, a number | |
| 2035 able to throw you well beyond the end of the | |
| 2036 search. It can also happen if you fail to match | |
| 2037 within the permitted region and would otherwise | |
| 2038 try a character beyond that region */ | |
| 2039 if ((cursor - p_limit) * direction <= len) | |
| 2040 break; /* a small overrun is genuine */ | |
| 2041 cursor -= infinity; /* large overrun = hit */ | |
| 2042 i = dirlen - direction; | |
| 2043 if (!NILP (trt)) | |
| 2044 { | |
| 2045 while ((i -= direction) + direction != 0) | |
| 2046 { | |
| 2047 #ifdef MULE | |
| 867 | 2048 Ichar ch; |
| 446 | 2049 cursor -= direction; |
| 2050 /* Translate only the last byte of a character. */ | |
| 2051 if ((cursor == tail_end_ptr | |
| 867 | 2052 || ibyte_first_byte_p (cursor[1])) |
| 2053 && (ibyte_first_byte_p (cursor[0]) | |
| 446 | 2054 || (translate_prev_byte == cursor[-1] |
| 867 | 2055 && (ibyte_first_byte_p (translate_prev_byte) |
| 446 | 2056 || translate_anteprev_byte == cursor[-2])))) |
| 2057 ch = simple_translate[*cursor]; | |
| 2058 else | |
| 2059 ch = *cursor; | |
| 2060 if (pat[i] != ch) | |
| 2061 break; | |
| 2062 #else | |
| 2063 if (pat[i] != TRANSLATE (trt, *(cursor -= direction))) | |
| 2064 break; | |
| 2065 #endif | |
| 2066 } | |
| 2067 } | |
| 2068 else | |
| 2069 { | |
| 2070 while ((i -= direction) + direction != 0) | |
| 2071 if (pat[i] != *(cursor -= direction)) | |
| 2072 break; | |
| 2073 } | |
| 2074 cursor += dirlen - i - direction; /* fix cursor */ | |
| 2075 if (i + direction == 0) | |
| 2076 { | |
| 2077 cursor -= direction; | |
| 2078 | |
| 2079 { | |
| 665 | 2080 Bytebpos bytstart = (pos + cursor - ptr2 + |
| 446 | 2081 ((direction > 0) |
| 2082 ? 1 - len : 0)); | |
| 665 | 2083 Charbpos bufstart = bytebpos_to_charbpos (buf, bytstart); |
| 2084 Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len); | |
| 446 | 2085 |
| 2086 set_search_regs (buf, bufstart, bufend - bufstart); | |
| 2087 } | |
| 2088 | |
| 2089 if ((n -= direction) != 0) | |
| 2090 cursor += dirlen; /* to resume search */ | |
| 2091 else | |
| 2092 return ((direction > 0) | |
| 2093 ? search_regs.end[0] : search_regs.start[0]); | |
| 2094 } | |
| 2095 else | |
| 2096 cursor += stride_for_teases; /* <sigh> we lose - */ | |
| 2097 } | |
| 2098 pos += cursor - ptr2; | |
| 2099 } | |
| 2100 else | |
| 2101 /* Now we'll pick up a clump that has to be done the hard | |
| 2102 way because it covers a discontinuity */ | |
| 2103 { | |
| 428 | 2104 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF |
| 2105 have changed. See buffer.h. */ | |
| 2106 limit = ((direction > 0) | |
| 826 | 2107 ? BYTE_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1 |
| 2108 : BYTE_BUF_FLOOR_OF (buf, pos - dirlen)); | |
| 428 | 2109 limit = ((direction > 0) |
| 446 | 2110 ? min (limit + len, lim - 1) |
| 2111 : max (limit - len, lim)); | |
| 2112 /* LIMIT is now the last value POS can have | |
| 2113 and still be valid for a possible match. */ | |
| 2114 while (1) | |
| 428 | 2115 { |
| 446 | 2116 /* This loop can be coded for space rather than |
| 2117 speed because it will usually run only once. | |
| 2118 (the reach is at most len + 21, and typically | |
| 2119 does not exceed len) */ | |
| 2120 while ((limit - pos) * direction >= 0) | |
| 826 | 2121 /* *not* BYTE_BUF_FETCH_CHAR. We are working here |
| 446 | 2122 with bytes, not characters. */ |
| 826 | 2123 pos += BM_tab[*BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos)]; |
| 446 | 2124 /* now run the same tests to distinguish going off |
| 2125 the end, a match or a phony match. */ | |
| 2126 if ((pos - limit) * direction <= len) | |
| 2127 break; /* ran off the end */ | |
| 2128 /* Found what might be a match. | |
| 2129 Set POS back to last (first if reverse) char pos. */ | |
| 2130 pos -= infinity; | |
| 2131 i = dirlen - direction; | |
| 2132 while ((i -= direction) + direction != 0) | |
| 428 | 2133 { |
| 446 | 2134 #ifdef MULE |
| 867 | 2135 Ichar ch; |
| 2136 Ibyte *ptr; | |
| 446 | 2137 #endif |
| 2138 pos -= direction; | |
| 2139 #ifdef MULE | |
| 826 | 2140 ptr = BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos); |
| 446 | 2141 if ((ptr == tail_end_ptr |
| 867 | 2142 || ibyte_first_byte_p (ptr[1])) |
| 2143 && (ibyte_first_byte_p (ptr[0]) | |
| 446 | 2144 || (translate_prev_byte == ptr[-1] |
| 867 | 2145 && (ibyte_first_byte_p (translate_prev_byte) |
| 446 | 2146 || translate_anteprev_byte == ptr[-2])))) |
| 2147 ch = simple_translate[*ptr]; | |
| 428 | 2148 else |
| 446 | 2149 ch = *ptr; |
| 2150 if (pat[i] != ch) | |
| 2151 break; | |
| 2152 | |
| 2153 #else | |
| 826 | 2154 if (pat[i] != |
| 2155 TRANSLATE (trt, | |
| 2156 *BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos))) | |
| 446 | 2157 break; |
| 2158 #endif | |
| 428 | 2159 } |
| 446 | 2160 /* Above loop has moved POS part or all the way back |
| 2161 to the first char pos (last char pos if reverse). | |
| 2162 Set it once again at the last (first if reverse) | |
| 2163 char. */ | |
| 2164 pos += dirlen - i- direction; | |
| 2165 if (i + direction == 0) | |
| 428 | 2166 { |
| 446 | 2167 pos -= direction; |
| 2168 | |
| 2169 { | |
| 665 | 2170 Bytebpos bytstart = (pos + |
| 446 | 2171 ((direction > 0) |
| 2172 ? 1 - len : 0)); | |
| 665 | 2173 Charbpos bufstart = bytebpos_to_charbpos (buf, bytstart); |
| 2174 Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len); | |
| 446 | 2175 |
| 2176 set_search_regs (buf, bufstart, bufend - bufstart); | |
| 2177 } | |
| 2178 | |
| 2179 if ((n -= direction) != 0) | |
| 2180 pos += dirlen; /* to resume search */ | |
| 428 | 2181 else |
| 446 | 2182 return ((direction > 0) |
| 2183 ? search_regs.end[0] : search_regs.start[0]); | |
| 428 | 2184 } |
| 446 | 2185 else |
| 2186 pos += stride_for_teases; | |
| 2187 } | |
| 428 | 2188 } |
| 446 | 2189 /* We have done one clump. Can we continue? */ |
| 2190 if ((lim - pos) * direction < 0) | |
| 2191 return (0 - n) * direction; | |
| 428 | 2192 } |
| 665 | 2193 return bytebpos_to_charbpos (buf, pos); |
| 428 | 2194 } |
| 2195 | |
| 1024 | 2196 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the |
| 2197 buffer for a match just found. */ | |
| 428 | 2198 |
| 2199 static void | |
| 665 | 2200 set_search_regs (struct buffer *buf, Charbpos beg, Charcount len) |
| 428 | 2201 { |
| 2202 /* Make sure we have registers in which to store | |
| 2203 the match position. */ | |
| 2204 if (search_regs.num_regs == 0) | |
| 2205 { | |
| 2206 search_regs.start = xnew (regoff_t); | |
| 2207 search_regs.end = xnew (regoff_t); | |
| 2208 search_regs.num_regs = 1; | |
| 2209 } | |
| 2210 | |
| 1468 | 2211 clear_search_regs (); |
| 428 | 2212 search_regs.start[0] = beg; |
| 2213 search_regs.end[0] = beg + len; | |
| 793 | 2214 last_thing_searched = wrap_buffer (buf); |
| 428 | 2215 } |
| 2216 | |
| 1468 | 2217 /* Clear search registers so match data will be null. */ |
| 1024 | 2218 |
| 2219 static void | |
| 1468 | 2220 clear_search_regs (void) |
| 1024 | 2221 { |
| 2222 /* This function has been Mule-ized. */ | |
| 2223 int i; | |
| 2224 | |
| 1468 | 2225 for (i = 0; i < search_regs.num_regs; i++) |
| 2226 search_regs.start[i] = search_regs.end[i] = -1; | |
| 1024 | 2227 } |
| 2228 | |
| 428 | 2229 |
| 2230 /* Given a string of words separated by word delimiters, | |
| 442 | 2231 compute a regexp that matches those exact words |
| 2232 separated by arbitrary punctuation. */ | |
| 428 | 2233 |
| 2234 static Lisp_Object | |
| 2235 wordify (Lisp_Object buffer, Lisp_Object string) | |
| 2236 { | |
| 2237 Charcount i, len; | |
| 2238 EMACS_INT punct_count = 0, word_count = 0; | |
| 2239 struct buffer *buf = decode_buffer (buffer, 0); | |
| 826 | 2240 Lisp_Object syntax_table = buf->mirror_syntax_table; |
| 428 | 2241 |
| 2242 CHECK_STRING (string); | |
| 826 | 2243 len = string_char_length (string); |
| 428 | 2244 |
| 2245 for (i = 0; i < len; i++) | |
| 867 | 2246 if (!WORD_SYNTAX_P (syntax_table, string_ichar (string, i))) |
| 428 | 2247 { |
| 2248 punct_count++; | |
| 2249 if (i > 0 && WORD_SYNTAX_P (syntax_table, | |
| 867 | 2250 string_ichar (string, i - 1))) |
| 428 | 2251 word_count++; |
| 2252 } | |
| 867 | 2253 if (WORD_SYNTAX_P (syntax_table, string_ichar (string, len - 1))) |
| 428 | 2254 word_count++; |
|
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
2255 if (!word_count) return build_ascstring (""); |
| 428 | 2256 |
| 2257 { | |
| 2258 /* The following value is an upper bound on the amount of storage we | |
| 2259 need. In non-Mule, it is exact. */ | |
| 867 | 2260 Ibyte *storage = |
| 2367 | 2261 alloca_ibytes (XSTRING_LENGTH (string) - punct_count + |
| 428 | 2262 5 * (word_count - 1) + 4); |
| 867 | 2263 Ibyte *o = storage; |
| 428 | 2264 |
| 2265 *o++ = '\\'; | |
| 2266 *o++ = 'b'; | |
| 2267 | |
| 2268 for (i = 0; i < len; i++) | |
| 2269 { | |
| 867 | 2270 Ichar ch = string_ichar (string, i); |
| 428 | 2271 |
| 2272 if (WORD_SYNTAX_P (syntax_table, ch)) | |
| 867 | 2273 o += set_itext_ichar (o, ch); |
| 428 | 2274 else if (i > 0 |
| 2275 && WORD_SYNTAX_P (syntax_table, | |
| 867 | 2276 string_ichar (string, i - 1)) |
| 428 | 2277 && --word_count) |
| 2278 { | |
| 2279 *o++ = '\\'; | |
| 2280 *o++ = 'W'; | |
| 2281 *o++ = '\\'; | |
| 2282 *o++ = 'W'; | |
| 2283 *o++ = '*'; | |
| 2284 } | |
| 2285 } | |
| 2286 | |
| 2287 *o++ = '\\'; | |
| 2288 *o++ = 'b'; | |
| 2289 | |
| 2290 return make_string (storage, o - storage); | |
| 2291 } | |
| 2292 } | |
| 2293 | |
| 2294 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /* | |
| 2295 Search backward from point for STRING. | |
| 2296 Set point to the beginning of the occurrence found, and return point. | |
| 444 | 2297 |
| 2298 Optional second argument LIMIT bounds the search; it is a buffer | |
| 2299 position. The match found must not extend before that position. | |
| 2300 The value nil is equivalent to (point-min). | |
| 2301 | |
| 2302 Optional third argument NOERROR, if t, means just return nil (no | |
| 2303 error) if the search fails. If neither nil nor t, set point to LIMIT | |
| 2304 and return nil. | |
| 2305 | |
| 2306 Optional fourth argument COUNT is a repeat count--search for | |
| 2307 successive occurrences. | |
| 2308 | |
| 428 | 2309 Optional fifth argument BUFFER specifies the buffer to search in and |
| 444 | 2310 defaults to the current buffer. |
| 2311 | |
| 1468 | 2312 When the match is successful, this function modifies the match data |
| 2313 that `match-beginning', `match-end' and `match-data' access; save the | |
| 2314 match data with `match-data' and restore it with `store-match-data' if | |
| 2315 you want to preserve them. If the match fails, the match data from the | |
| 2316 previous success match is preserved. | |
| 2317 | |
| 2318 See also the function `replace-match'. | |
| 428 | 2319 */ |
| 444 | 2320 (string, limit, noerror, count, buffer)) |
| 428 | 2321 { |
| 444 | 2322 return search_command (string, limit, noerror, count, buffer, -1, 0, 0); |
| 428 | 2323 } |
| 2324 | |
| 2325 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /* | |
| 2326 Search forward from point for STRING. | |
| 2327 Set point to the end of the occurrence found, and return point. | |
| 444 | 2328 |
| 2329 Optional second argument LIMIT bounds the search; it is a buffer | |
| 2330 position. The match found must not extend after that position. The | |
| 2331 value nil is equivalent to (point-max). | |
| 2332 | |
| 2333 Optional third argument NOERROR, if t, means just return nil (no | |
| 2334 error) if the search fails. If neither nil nor t, set point to LIMIT | |
| 2335 and return nil. | |
| 2336 | |
| 2337 Optional fourth argument COUNT is a repeat count--search for | |
| 2338 successive occurrences. | |
| 2339 | |
| 428 | 2340 Optional fifth argument BUFFER specifies the buffer to search in and |
| 444 | 2341 defaults to the current buffer. |
| 2342 | |
| 1468 | 2343 When the match is successful, this function modifies the match data |
| 2344 that `match-beginning', `match-end' and `match-data' access; save the | |
| 2345 match data with `match-data' and restore it with `store-match-data' if | |
| 2346 you want to preserve them. If the match fails, the match data from the | |
| 2347 previous success match is preserved. | |
| 2348 | |
| 2349 See also the function `replace-match'. | |
| 428 | 2350 */ |
| 444 | 2351 (string, limit, noerror, count, buffer)) |
| 428 | 2352 { |
| 444 | 2353 return search_command (string, limit, noerror, count, buffer, 1, 0, 0); |
| 428 | 2354 } |
| 2355 | |
| 2356 DEFUN ("word-search-backward", Fword_search_backward, 1, 5, | |
| 2357 "sWord search backward: ", /* | |
| 2358 Search backward from point for STRING, ignoring differences in punctuation. | |
| 2359 Set point to the beginning of the occurrence found, and return point. | |
| 444 | 2360 |
| 2361 Optional second argument LIMIT bounds the search; it is a buffer | |
| 2362 position. The match found must not extend before that position. | |
| 2363 The value nil is equivalent to (point-min). | |
| 2364 | |
| 2365 Optional third argument NOERROR, if t, means just return nil (no | |
| 2366 error) if the search fails. If neither nil nor t, set point to LIMIT | |
| 2367 and return nil. | |
| 2368 | |
| 2369 Optional fourth argument COUNT is a repeat count--search for | |
| 2370 successive occurrences. | |
| 2371 | |
| 428 | 2372 Optional fifth argument BUFFER specifies the buffer to search in and |
| 444 | 2373 defaults to the current buffer. |
| 2374 | |
| 1468 | 2375 When the match is successful, this function modifies the match data |
| 2376 that `match-beginning', `match-end' and `match-data' access; save the | |
| 2377 match data with `match-data' and restore it with `store-match-data' if | |
| 2378 you want to preserve them. If the match fails, the match data from the | |
| 2379 previous success match is preserved. | |
| 2380 | |
| 2381 See also the function `replace-match'. | |
| 428 | 2382 */ |
| 444 | 2383 (string, limit, noerror, count, buffer)) |
| 428 | 2384 { |
| 444 | 2385 return search_command (wordify (buffer, string), limit, noerror, count, |
| 428 | 2386 buffer, -1, 1, 0); |
| 2387 } | |
| 2388 | |
| 2389 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /* | |
| 2390 Search forward from point for STRING, ignoring differences in punctuation. | |
| 2391 Set point to the end of the occurrence found, and return point. | |
| 444 | 2392 |
| 2393 Optional second argument LIMIT bounds the search; it is a buffer | |
| 2394 position. The match found must not extend after that position. The | |
| 2395 value nil is equivalent to (point-max). | |
| 2396 | |
| 2397 Optional third argument NOERROR, if t, means just return nil (no | |
| 2398 error) if the search fails. If neither nil nor t, set point to LIMIT | |
| 2399 and return nil. | |
| 2400 | |
| 2401 Optional fourth argument COUNT is a repeat count--search for | |
| 2402 successive occurrences. | |
| 2403 | |
| 428 | 2404 Optional fifth argument BUFFER specifies the buffer to search in and |
| 444 | 2405 defaults to the current buffer. |
| 2406 | |
| 1468 | 2407 When the match is successful, this function modifies the match data |
| 2408 that `match-beginning', `match-end' and `match-data' access; save the | |
| 2409 match data with `match-data' and restore it with `store-match-data' if | |
| 2410 you want to preserve them. If the match fails, the match data from the | |
| 2411 previous success match is preserved. | |
| 2412 | |
| 2413 See also the function `replace-match'. | |
| 428 | 2414 */ |
| 444 | 2415 (string, limit, noerror, count, buffer)) |
| 428 | 2416 { |
| 444 | 2417 return search_command (wordify (buffer, string), limit, noerror, count, |
| 428 | 2418 buffer, 1, 1, 0); |
| 2419 } | |
| 2420 | |
| 2421 DEFUN ("re-search-backward", Fre_search_backward, 1, 5, | |
| 2422 "sRE search backward: ", /* | |
| 2423 Search backward from point for match for regular expression REGEXP. | |
| 2424 Set point to the beginning of the match, and return point. | |
| 2425 The match found is the one starting last in the buffer | |
| 2426 and yet ending before the origin of the search. | |
| 444 | 2427 |
| 2428 Optional second argument LIMIT bounds the search; it is a buffer | |
| 2429 position. The match found must not extend before that position. | |
| 2430 The value nil is equivalent to (point-min). | |
| 2431 | |
| 2432 Optional third argument NOERROR, if t, means just return nil (no | |
| 2433 error) if the search fails. If neither nil nor t, set point to LIMIT | |
| 2434 and return nil. | |
| 2435 | |
| 2436 Optional fourth argument COUNT is a repeat count--search for | |
| 2437 successive occurrences. | |
| 2438 | |
| 428 | 2439 Optional fifth argument BUFFER specifies the buffer to search in and |
| 444 | 2440 defaults to the current buffer. |
| 2441 | |
| 1468 | 2442 When the match is successful, this function modifies the match data |
| 2443 that `match-beginning', `match-end' and `match-data' access; save the | |
| 2444 match data with `match-data' and restore it with `store-match-data' if | |
| 2445 you want to preserve them. If the match fails, the match data from the | |
| 2446 previous success match is preserved. | |
| 2447 | |
| 2448 See also the function `replace-match'. | |
| 428 | 2449 */ |
| 444 | 2450 (regexp, limit, noerror, count, buffer)) |
| 428 | 2451 { |
| 444 | 2452 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0); |
| 428 | 2453 } |
| 2454 | |
| 2455 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /* | |
| 2456 Search forward from point for regular expression REGEXP. | |
| 2457 Set point to the end of the occurrence found, and return point. | |
| 444 | 2458 |
| 2459 Optional second argument LIMIT bounds the search; it is a buffer | |
| 2460 position. The match found must not extend after that position. The | |
| 2461 value nil is equivalent to (point-max). | |
| 2462 | |
| 2463 Optional third argument NOERROR, if t, means just return nil (no | |
| 2464 error) if the search fails. If neither nil nor t, set point to LIMIT | |
| 2465 and return nil. | |
| 2466 | |
| 2467 Optional fourth argument COUNT is a repeat count--search for | |
| 2468 successive occurrences. | |
| 2469 | |
| 428 | 2470 Optional fifth argument BUFFER specifies the buffer to search in and |
| 444 | 2471 defaults to the current buffer. |
| 2472 | |
| 1468 | 2473 When the match is successful, this function modifies the match data |
| 2474 that `match-beginning', `match-end' and `match-data' access; save the | |
| 2475 match data with `match-data' and restore it with `store-match-data' if | |
| 2476 you want to preserve them. If the match fails, the match data from the | |
| 2477 previous success match is preserved. | |
| 2478 | |
| 2479 See also the function `replace-match'. | |
| 428 | 2480 */ |
| 444 | 2481 (regexp, limit, noerror, count, buffer)) |
| 428 | 2482 { |
| 444 | 2483 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0); |
| 428 | 2484 } |
| 2485 | |
| 2486 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5, | |
| 2487 "sPosix search backward: ", /* | |
| 2488 Search backward from point for match for regular expression REGEXP. | |
| 2489 Find the longest match in accord with Posix regular expression rules. | |
| 2490 Set point to the beginning of the match, and return point. | |
| 2491 The match found is the one starting last in the buffer | |
| 2492 and yet ending before the origin of the search. | |
| 444 | 2493 |
| 2494 Optional second argument LIMIT bounds the search; it is a buffer | |
| 2495 position. The match found must not extend before that position. | |
| 2496 The value nil is equivalent to (point-min). | |
| 2497 | |
| 2498 Optional third argument NOERROR, if t, means just return nil (no | |
| 2499 error) if the search fails. If neither nil nor t, set point to LIMIT | |
| 2500 and return nil. | |
| 2501 | |
| 2502 Optional fourth argument COUNT is a repeat count--search for | |
| 2503 successive occurrences. | |
| 2504 | |
| 428 | 2505 Optional fifth argument BUFFER specifies the buffer to search in and |
| 444 | 2506 defaults to the current buffer. |
| 2507 | |
| 1468 | 2508 When the match is successful, this function modifies the match data |
| 2509 that `match-beginning', `match-end' and `match-data' access; save the | |
| 2510 match data with `match-data' and restore it with `store-match-data' if | |
| 2511 you want to preserve them. If the match fails, the match data from the | |
| 2512 previous success match is preserved. | |
| 2513 | |
| 2514 See also the function `replace-match'. | |
| 428 | 2515 */ |
| 444 | 2516 (regexp, limit, noerror, count, buffer)) |
| 428 | 2517 { |
| 444 | 2518 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1); |
| 428 | 2519 } |
| 2520 | |
| 2521 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /* | |
| 2522 Search forward from point for regular expression REGEXP. | |
| 2523 Find the longest match in accord with Posix regular expression rules. | |
| 2524 Set point to the end of the occurrence found, and return point. | |
| 444 | 2525 |
| 2526 Optional second argument LIMIT bounds the search; it is a buffer | |
| 2527 position. The match found must not extend after that position. The | |
| 2528 value nil is equivalent to (point-max). | |
| 2529 | |
| 2530 Optional third argument NOERROR, if t, means just return nil (no | |
| 2531 error) if the search fails. If neither nil nor t, set point to LIMIT | |
| 2532 and return nil. | |
| 2533 | |
| 2534 Optional fourth argument COUNT is a repeat count--search for | |
| 2535 successive occurrences. | |
| 2536 | |
| 428 | 2537 Optional fifth argument BUFFER specifies the buffer to search in and |
| 444 | 2538 defaults to the current buffer. |
| 2539 | |
| 1468 | 2540 When the match is successful, this function modifies the match data |
| 2541 that `match-beginning', `match-end' and `match-data' access; save the | |
| 2542 match data with `match-data' and restore it with `store-match-data' if | |
| 2543 you want to preserve them. If the match fails, the match data from the | |
| 2544 previous success match is preserved. | |
| 2545 | |
| 2546 See also the function `replace-match'. | |
| 428 | 2547 */ |
| 444 | 2548 (regexp, limit, noerror, count, buffer)) |
| 428 | 2549 { |
| 444 | 2550 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1); |
| 428 | 2551 } |
| 2552 | |
| 2553 | |
| 2554 static Lisp_Object | |
| 2555 free_created_dynarrs (Lisp_Object cons) | |
| 2556 { | |
| 2557 Dynarr_free (get_opaque_ptr (XCAR (cons))); | |
| 2558 Dynarr_free (get_opaque_ptr (XCDR (cons))); | |
| 2559 free_opaque_ptr (XCAR (cons)); | |
| 2560 free_opaque_ptr (XCDR (cons)); | |
| 853 | 2561 free_cons (cons); |
| 428 | 2562 return Qnil; |
| 2563 } | |
| 2564 | |
| 2565 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /* | |
| 444 | 2566 Replace text matched by last search with REPLACEMENT. |
| 4199 | 2567 Leaves point at end of replacement text. |
| 2568 Optional boolean FIXEDCASE inhibits matching case of REPLACEMENT to source. | |
| 2569 Optional boolean LITERAL inhibits interpretation of escape sequences. | |
| 2570 Optional STRING provides the source text to replace. | |
| 2571 Optional STRBUFFER may be a buffer, providing match context, or an integer | |
| 2572 specifying the subexpression to replace. | |
| 2573 | |
| 2574 If FIXEDCASE is non-nil, do not alter case of replacement text. | |
| 428 | 2575 Otherwise maybe capitalize the whole text, or maybe just word initials, |
| 2576 based on the replaced text. | |
| 4199 | 2577 If the replaced text has only capital letters and has at least one |
| 2578 multiletter word, convert REPLACEMENT to all caps. | |
| 428 | 2579 If the replaced text has at least one word starting with a capital letter, |
| 444 | 2580 then capitalize each word in REPLACEMENT. |
| 428 | 2581 |
| 4199 | 2582 If LITERAL is non-nil, insert REPLACEMENT literally. |
| 428 | 2583 Otherwise treat `\\' as special: |
| 444 | 2584 `\\&' in REPLACEMENT means substitute original matched text. |
| 428 | 2585 `\\N' means substitute what matched the Nth `\\(...\\)'. |
| 2586 If Nth parens didn't match, substitute nothing. | |
| 2587 `\\\\' means insert one `\\'. | |
| 2588 `\\u' means upcase the next character. | |
| 2589 `\\l' means downcase the next character. | |
| 2590 `\\U' means begin upcasing all following characters. | |
| 2591 `\\L' means begin downcasing all following characters. | |
| 2592 `\\E' means terminate the effect of any `\\U' or `\\L'. | |
| 2593 Case changes made with `\\u', `\\l', `\\U', and `\\L' override | |
| 2594 all other case changes that may be made in the replaced text. | |
| 4199 | 2595 |
| 2596 If non-nil, STRING is the source string, and a new string with the specified | |
| 2597 replacements is created and returned. Otherwise the current buffer is the | |
| 2598 source text. | |
| 2599 | |
| 2600 If non-nil, STRBUFFER may be an integer, interpreted as the index of the | |
| 2601 subexpression to replace in the source text, or a buffer to provide the | |
| 2602 syntax table and case table. If nil, then the \"subexpression\" is 0, i.e., | |
| 2603 the whole match, and the current buffer provides the syntax and case tables. | |
| 2604 If STRING is nil, STRBUFFER must be nil or an integer. | |
| 2605 | |
| 2606 Specifying a subexpression is only useful after a regular expression match, | |
| 2607 since a fixed string search has no non-trivial subexpressions. | |
| 2608 | |
| 2609 It is not possible to specify both a buffer and a subexpression. If that is | |
| 2610 desired, the idiom `(with-current-buffer BUFFER (replace-match ... INTEGER))' | |
| 2611 may be appropriate. | |
| 2612 | |
| 2613 If STRING is nil but the last thing matched (or searched) was a string, or | |
| 2614 STRING is a string but the last thing matched was a buffer, an | |
| 2615 `invalid-argument' error will be signaled. (XEmacs does not check that the | |
| 2616 last thing searched is the source string, but it is not useful to use a | |
| 2617 different string as source.) | |
| 2618 | |
| 2619 If no match (including searches) has been successful or the requested | |
| 1468 | 2620 subexpression was not matched, an `args-out-of-range' error will be |
| 2621 signaled. (If no match has ever been conducted in this instance of | |
| 2622 XEmacs, an `invalid-operation' error will be signaled. This is very | |
| 2623 rare.) | |
| 428 | 2624 */ |
| 444 | 2625 (replacement, fixedcase, literal, string, strbuffer)) |
| 428 | 2626 { |
| 2627 /* This function can GC */ | |
| 2628 enum { nochange, all_caps, cap_initial } case_action; | |
| 665 | 2629 Charbpos pos, last; |
| 428 | 2630 int some_multiletter_word; |
| 2631 int some_lowercase; | |
| 2632 int some_uppercase; | |
| 2633 int some_nonuppercase_initial; | |
| 867 | 2634 Ichar c, prevc; |
| 428 | 2635 Charcount inslen; |
| 2636 struct buffer *buf; | |
| 826 | 2637 Lisp_Object syntax_table; |
| 428 | 2638 int mc_count; |
| 2639 Lisp_Object buffer; | |
| 2640 int_dynarr *ul_action_dynarr = 0; | |
| 2641 int_dynarr *ul_pos_dynarr = 0; | |
| 502 | 2642 int sub = 0; |
| 428 | 2643 int speccount; |
| 2644 | |
| 444 | 2645 CHECK_STRING (replacement); |
| 428 | 2646 |
| 4199 | 2647 /* Because GNU decided to be incompatible here, we support the following |
| 2648 baroque and bogus API for the STRING and STRBUFFER arguments: | |
| 2649 types interpretations | |
| 2650 STRING STRBUFFER STRING STRBUFFER | |
| 2651 nil nil none 0 = index of subexpression to replace | |
| 2652 nil integer none index of subexpression to replace | |
| 2653 nil other ***** error ***** | |
| 2654 string nil source current buffer provides syntax table | |
| 2655 subexpression = 0 (whole match) | |
| 2656 string buffer source buffer providing syntax table | |
| 2657 subexpression = 0 (whole match) | |
| 2658 string integer source current buffer provides syntax table | |
| 2659 subexpression = STRBUFFER | |
| 2660 string other ***** error ***** | |
| 2661 */ | |
| 2662 | |
| 2663 /* Do STRBUFFER first; if STRING is nil, we'll overwrite BUF and BUFFER. */ | |
| 2664 | |
| 2665 /* If the match data were abstracted into a special "match data" type | |
| 2666 instead of the typical half-assed "let the implementation be visible" | |
| 2667 form it's in, we could extend it to include the last string matched | |
| 2668 and the buffer used for that matching. But of course we can't change | |
| 2669 it as it is. | |
| 2670 */ | |
| 2671 if (NILP (strbuffer) || BUFFERP (strbuffer)) | |
| 2672 { | |
| 2673 buf = decode_buffer (strbuffer, 0); | |
| 2674 } | |
| 2675 else if (!NILP (strbuffer)) | |
| 2676 { | |
| 2677 CHECK_INT (strbuffer); | |
| 2678 sub = XINT (strbuffer); | |
| 2679 if (sub < 0 || sub >= (int) search_regs.num_regs) | |
| 2680 invalid_argument ("match data register invalid", strbuffer); | |
| 2681 if (search_regs.start[sub] < 0) | |
| 2682 invalid_argument ("match data register not set", strbuffer); | |
| 2683 buf = current_buffer; | |
| 2684 } | |
| 2685 else | |
| 2686 invalid_argument ("STRBUFFER must be nil, a buffer, or an integer", | |
| 2687 strbuffer); | |
| 2688 buffer = wrap_buffer (buf); | |
| 2689 | |
| 428 | 2690 if (! NILP (string)) |
| 2691 { | |
| 2692 CHECK_STRING (string); | |
| 2693 if (!EQ (last_thing_searched, Qt)) | |
| 4199 | 2694 invalid_argument ("last thing matched was not a string", Qunbound); |
| 428 | 2695 } |
| 2696 else | |
| 2697 { | |
| 2698 if (!BUFFERP (last_thing_searched)) | |
| 4199 | 2699 invalid_argument ("last thing matched was not a buffer", Qunbound); |
| 428 | 2700 buffer = last_thing_searched; |
| 2701 buf = XBUFFER (buffer); | |
| 2702 } | |
| 2703 | |
| 826 | 2704 syntax_table = buf->mirror_syntax_table; |
| 428 | 2705 |
| 2706 case_action = nochange; /* We tried an initialization */ | |
| 2707 /* but some C compilers blew it */ | |
| 2708 | |
| 2709 if (search_regs.num_regs == 0) | |
| 826 | 2710 signal_error (Qinvalid_operation, |
| 2711 "replace-match called before any match found", Qunbound); | |
| 428 | 2712 |
| 2713 if (NILP (string)) | |
| 2714 { | |
| 469 | 2715 if (search_regs.start[sub] < BUF_BEGV (buf) |
| 2716 || search_regs.start[sub] > search_regs.end[sub] | |
| 2717 || search_regs.end[sub] > BUF_ZV (buf)) | |
| 2718 args_out_of_range (make_int (search_regs.start[sub]), | |
| 2719 make_int (search_regs.end[sub])); | |
| 428 | 2720 } |
| 2721 else | |
| 2722 { | |
| 2723 if (search_regs.start[0] < 0 | |
| 2724 || search_regs.start[0] > search_regs.end[0] | |
| 826 | 2725 || search_regs.end[0] > string_char_length (string)) |
| 428 | 2726 args_out_of_range (make_int (search_regs.start[0]), |
| 2727 make_int (search_regs.end[0])); | |
| 2728 } | |
| 2729 | |
| 2730 if (NILP (fixedcase)) | |
| 2731 { | |
| 2732 /* Decide how to casify by examining the matched text. */ | |
| 2733 | |
| 707 | 2734 last = search_regs.end[sub]; |
| 428 | 2735 prevc = '\n'; |
| 2736 case_action = all_caps; | |
| 2737 | |
| 2738 /* some_multiletter_word is set nonzero if any original word | |
| 2739 is more than one letter long. */ | |
| 2740 some_multiletter_word = 0; | |
| 2741 some_lowercase = 0; | |
| 2742 some_nonuppercase_initial = 0; | |
| 2743 some_uppercase = 0; | |
| 2744 | |
| 707 | 2745 for (pos = search_regs.start[sub]; pos < last; pos++) |
| 428 | 2746 { |
| 2747 if (NILP (string)) | |
| 2748 c = BUF_FETCH_CHAR (buf, pos); | |
| 2749 else | |
| 867 | 2750 c = string_ichar (string, pos); |
| 428 | 2751 |
| 2752 if (LOWERCASEP (buf, c)) | |
| 2753 { | |
| 2754 /* Cannot be all caps if any original char is lower case */ | |
| 2755 | |
| 2756 some_lowercase = 1; | |
| 2757 if (!WORD_SYNTAX_P (syntax_table, prevc)) | |
| 2758 some_nonuppercase_initial = 1; | |
| 2759 else | |
| 2760 some_multiletter_word = 1; | |
| 2761 } | |
| 2762 else if (!NOCASEP (buf, c)) | |
| 2763 { | |
| 2764 some_uppercase = 1; | |
| 2765 if (!WORD_SYNTAX_P (syntax_table, prevc)) | |
| 2766 ; | |
| 2767 else | |
| 2768 some_multiletter_word = 1; | |
| 2769 } | |
| 2770 else | |
| 2771 { | |
| 2772 /* If the initial is a caseless word constituent, | |
| 2773 treat that like a lowercase initial. */ | |
| 2774 if (!WORD_SYNTAX_P (syntax_table, prevc)) | |
| 2775 some_nonuppercase_initial = 1; | |
| 2776 } | |
| 2777 | |
| 2778 prevc = c; | |
| 2779 } | |
| 2780 | |
| 2781 /* Convert to all caps if the old text is all caps | |
| 2782 and has at least one multiletter word. */ | |
| 2783 if (! some_lowercase && some_multiletter_word) | |
| 2784 case_action = all_caps; | |
| 2785 /* Capitalize each word, if the old text has all capitalized words. */ | |
| 2786 else if (!some_nonuppercase_initial && some_multiletter_word) | |
| 2787 case_action = cap_initial; | |
| 2788 else if (!some_nonuppercase_initial && some_uppercase) | |
| 2789 /* Should x -> yz, operating on X, give Yz or YZ? | |
| 2790 We'll assume the latter. */ | |
| 2791 case_action = all_caps; | |
| 2792 else | |
| 2793 case_action = nochange; | |
| 2794 } | |
| 2795 | |
| 2796 /* Do replacement in a string. */ | |
| 2797 if (!NILP (string)) | |
| 2798 { | |
| 2799 Lisp_Object before, after; | |
| 2800 | |
| 2801 speccount = specpdl_depth (); | |
|
5089
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2802 before = Fsubseq (string, Qzero, make_int (search_regs.start[sub])); |
|
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2803 after = Fsubseq (string, make_int (search_regs.end[sub]), Qnil); |
| 428 | 2804 |
| 444 | 2805 /* Do case substitution into REPLACEMENT if desired. */ |
| 428 | 2806 if (NILP (literal)) |
| 2807 { | |
| 826 | 2808 Charcount stlen = string_char_length (replacement); |
| 428 | 2809 Charcount strpos; |
| 2810 /* XEmacs change: rewrote this loop somewhat to make it | |
| 2811 cleaner. Also added \U, \E, etc. */ | |
| 2812 Charcount literal_start = 0; | |
| 2813 /* We build up the substituted string in ACCUM. */ | |
| 2814 Lisp_Object accum; | |
| 2815 | |
| 2816 accum = Qnil; | |
| 2817 | |
| 2818 /* OK, the basic idea here is that we scan through the | |
| 2819 replacement string until we find a backslash, which | |
| 2820 represents a substring of the original string to be | |
| 2821 substituted. We then append onto ACCUM the literal | |
| 2822 text before the backslash (LASTPOS marks the | |
| 2823 beginning of this) followed by the substring of the | |
| 2824 original string that needs to be inserted. */ | |
| 2825 for (strpos = 0; strpos < stlen; strpos++) | |
| 2826 { | |
| 2827 /* If LITERAL_END is set, we've encountered a backslash | |
| 2828 (the end of literal text to be inserted). */ | |
| 2829 Charcount literal_end = -1; | |
| 2830 /* If SUBSTART is set, we need to also insert the | |
| 2831 text from SUBSTART to SUBEND in the original string. */ | |
| 2832 Charcount substart = -1; | |
| 2833 Charcount subend = -1; | |
| 2834 | |
| 867 | 2835 c = string_ichar (replacement, strpos); |
| 428 | 2836 if (c == '\\' && strpos < stlen - 1) |
| 2837 { | |
| 867 | 2838 c = string_ichar (replacement, ++strpos); |
| 428 | 2839 if (c == '&') |
| 2840 { | |
| 2841 literal_end = strpos - 1; | |
| 2842 substart = search_regs.start[0]; | |
| 2843 subend = search_regs.end[0]; | |
| 2844 } | |
| 4199 | 2845 /* #### This logic is totally broken, |
| 2846 since we can have backrefs like "\99", right? */ | |
| 428 | 2847 else if (c >= '1' && c <= '9' && |
| 2848 c <= search_regs.num_regs + '0') | |
| 2849 { | |
| 2850 if (search_regs.start[c - '0'] >= 0) | |
| 2851 { | |
| 2852 literal_end = strpos - 1; | |
| 2853 substart = search_regs.start[c - '0']; | |
| 2854 subend = search_regs.end[c - '0']; | |
| 2855 } | |
| 2856 } | |
| 2857 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' || | |
| 2858 c == 'E') | |
| 2859 { | |
| 2860 /* Keep track of all case changes requested, but don't | |
| 2861 make them now. Do them later so we override | |
| 2862 everything else. */ | |
| 2863 if (!ul_pos_dynarr) | |
| 2864 { | |
| 2865 ul_pos_dynarr = Dynarr_new (int); | |
| 2866 ul_action_dynarr = Dynarr_new (int); | |
| 2867 record_unwind_protect | |
| 2868 (free_created_dynarrs, | |
| 2869 noseeum_cons | |
| 2870 (make_opaque_ptr (ul_pos_dynarr), | |
| 2871 make_opaque_ptr (ul_action_dynarr))); | |
| 2872 } | |
| 2873 literal_end = strpos - 1; | |
| 2874 Dynarr_add (ul_pos_dynarr, | |
| 2875 (!NILP (accum) | |
| 826 | 2876 ? string_char_length (accum) |
| 428 | 2877 : 0) + (literal_end - literal_start)); |
| 2878 Dynarr_add (ul_action_dynarr, c); | |
| 2879 } | |
| 2880 else if (c == '\\') | |
| 2881 /* So we get just one backslash. */ | |
| 2882 literal_end = strpos; | |
| 2883 } | |
| 2884 if (literal_end >= 0) | |
| 2885 { | |
| 2886 Lisp_Object literal_text = Qnil; | |
| 2887 Lisp_Object substring = Qnil; | |
| 2888 if (literal_end != literal_start) | |
|
5089
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2889 literal_text = Fsubseq (replacement, |
|
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2890 make_int (literal_start), |
|
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2891 make_int (literal_end)); |
| 428 | 2892 if (substart >= 0 && subend != substart) |
|
5089
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2893 substring = Fsubseq (string, make_int (substart), |
|
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2894 make_int (subend)); |
| 428 | 2895 if (!NILP (literal_text) || !NILP (substring)) |
| 2896 accum = concat3 (accum, literal_text, substring); | |
| 2897 literal_start = strpos + 1; | |
| 2898 } | |
| 2899 } | |
| 2900 | |
| 2901 if (strpos != literal_start) | |
| 2902 /* some literal text at end to be inserted */ | |
|
5089
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2903 replacement = concat2 (accum, Fsubseq (replacement, |
|
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2904 make_int (literal_start), |
|
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2905 make_int (strpos))); |
| 428 | 2906 else |
| 444 | 2907 replacement = accum; |
| 428 | 2908 } |
| 2909 | |
| 444 | 2910 /* replacement can be nil. */ |
| 2911 if (NILP (replacement)) | |
|
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
2912 replacement = build_ascstring (""); |
| 444 | 2913 |
| 428 | 2914 if (case_action == all_caps) |
| 444 | 2915 replacement = Fupcase (replacement, buffer); |
| 428 | 2916 else if (case_action == cap_initial) |
| 444 | 2917 replacement = Fupcase_initials (replacement, buffer); |
| 428 | 2918 |
| 2919 /* Now finally, we need to process the \U's, \E's, etc. */ | |
| 2920 if (ul_pos_dynarr) | |
| 2921 { | |
| 2922 int i = 0; | |
| 2923 int cur_action = 'E'; | |
| 826 | 2924 Charcount stlen = string_char_length (replacement); |
| 428 | 2925 Charcount strpos; |
| 2926 | |
| 2927 for (strpos = 0; strpos < stlen; strpos++) | |
| 2928 { | |
| 867 | 2929 Ichar curchar = string_ichar (replacement, strpos); |
| 2930 Ichar newchar = -1; | |
| 428 | 2931 if (i < Dynarr_length (ul_pos_dynarr) && |
| 2932 strpos == Dynarr_at (ul_pos_dynarr, i)) | |
| 2933 { | |
| 2934 int new_action = Dynarr_at (ul_action_dynarr, i); | |
| 2935 i++; | |
| 2936 if (new_action == 'u') | |
| 2937 newchar = UPCASE (buf, curchar); | |
| 2938 else if (new_action == 'l') | |
| 2939 newchar = DOWNCASE (buf, curchar); | |
| 2940 else | |
| 2941 cur_action = new_action; | |
| 2942 } | |
| 2943 if (newchar == -1) | |
| 2944 { | |
| 2945 if (cur_action == 'U') | |
| 2946 newchar = UPCASE (buf, curchar); | |
| 2947 else if (cur_action == 'L') | |
| 2948 newchar = DOWNCASE (buf, curchar); | |
| 2949 else | |
| 2950 newchar = curchar; | |
| 2951 } | |
| 2952 if (newchar != curchar) | |
| 793 | 2953 set_string_char (replacement, strpos, newchar); |
| 428 | 2954 } |
| 2955 } | |
| 2956 | |
| 2957 /* frees the Dynarrs if necessary. */ | |
| 771 | 2958 unbind_to (speccount); |
| 444 | 2959 return concat3 (before, replacement, after); |
| 428 | 2960 } |
| 2961 | |
| 707 | 2962 mc_count = begin_multiple_change (buf, search_regs.start[sub], |
| 2963 search_regs.end[sub]); | |
| 428 | 2964 |
| 2965 /* begin_multiple_change() records an unwind-protect, so we need to | |
| 2966 record this value now. */ | |
| 2967 speccount = specpdl_depth (); | |
| 2968 | |
| 2969 /* We insert the replacement text before the old text, and then | |
| 2970 delete the original text. This means that markers at the | |
| 2971 beginning or end of the original will float to the corresponding | |
| 2972 position in the replacement. */ | |
| 707 | 2973 BUF_SET_PT (buf, search_regs.start[sub]); |
| 428 | 2974 if (!NILP (literal)) |
| 444 | 2975 Finsert (1, &replacement); |
| 428 | 2976 else |
| 2977 { | |
| 826 | 2978 Charcount stlen = string_char_length (replacement); |
| 428 | 2979 Charcount strpos; |
| 2980 struct gcpro gcpro1; | |
| 444 | 2981 GCPRO1 (replacement); |
| 428 | 2982 for (strpos = 0; strpos < stlen; strpos++) |
| 2983 { | |
| 707 | 2984 /* on the first iteration assert(offset==0), |
| 2985 exactly complementing BUF_SET_PT() above. | |
| 2986 During the loop, it keeps track of the amount inserted. | |
| 2987 */ | |
| 2988 Charcount offset = BUF_PT (buf) - search_regs.start[sub]; | |
| 428 | 2989 |
| 867 | 2990 c = string_ichar (replacement, strpos); |
| 428 | 2991 if (c == '\\' && strpos < stlen - 1) |
| 2992 { | |
| 707 | 2993 /* XXX FIXME: replacing just a substring non-literally |
| 2994 using backslash refs to the match looks dangerous. But | |
| 2995 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe | |
| 2996 <duwe@caldera.de> claims Finsert_buffer_substring already | |
| 2997 handles this correctly. | |
| 2998 */ | |
| 867 | 2999 c = string_ichar (replacement, ++strpos); |
| 428 | 3000 if (c == '&') |
| 3001 Finsert_buffer_substring | |
| 3002 (buffer, | |
| 3003 make_int (search_regs.start[0] + offset), | |
| 3004 make_int (search_regs.end[0] + offset)); | |
| 4199 | 3005 /* #### This logic is totally broken, |
| 3006 since we can have backrefs like "\99", right? */ | |
| 428 | 3007 else if (c >= '1' && c <= '9' && |
| 3008 c <= search_regs.num_regs + '0') | |
| 3009 { | |
| 3010 if (search_regs.start[c - '0'] >= 1) | |
| 3011 Finsert_buffer_substring | |
| 3012 (buffer, | |
| 3013 make_int (search_regs.start[c - '0'] + offset), | |
| 3014 make_int (search_regs.end[c - '0'] + offset)); | |
| 3015 } | |
| 3016 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' || | |
| 3017 c == 'E') | |
| 3018 { | |
| 3019 /* Keep track of all case changes requested, but don't | |
| 3020 make them now. Do them later so we override | |
| 3021 everything else. */ | |
| 3022 if (!ul_pos_dynarr) | |
| 3023 { | |
| 3024 ul_pos_dynarr = Dynarr_new (int); | |
| 3025 ul_action_dynarr = Dynarr_new (int); | |
| 3026 record_unwind_protect | |
| 3027 (free_created_dynarrs, | |
| 3028 Fcons (make_opaque_ptr (ul_pos_dynarr), | |
| 3029 make_opaque_ptr (ul_action_dynarr))); | |
| 3030 } | |
| 3031 Dynarr_add (ul_pos_dynarr, BUF_PT (buf)); | |
| 3032 Dynarr_add (ul_action_dynarr, c); | |
| 3033 } | |
| 3034 else | |
| 3035 buffer_insert_emacs_char (buf, c); | |
| 3036 } | |
| 3037 else | |
| 3038 buffer_insert_emacs_char (buf, c); | |
| 3039 } | |
| 3040 UNGCPRO; | |
| 3041 } | |
| 3042 | |
| 707 | 3043 inslen = BUF_PT (buf) - (search_regs.start[sub]); |
| 3044 buffer_delete_range (buf, search_regs.start[sub] + inslen, | |
| 3045 search_regs.end[sub] + inslen, 0); | |
| 428 | 3046 |
| 3047 if (case_action == all_caps) | |
| 3048 Fupcase_region (make_int (BUF_PT (buf) - inslen), | |
| 3049 make_int (BUF_PT (buf)), buffer); | |
| 3050 else if (case_action == cap_initial) | |
| 3051 Fupcase_initials_region (make_int (BUF_PT (buf) - inslen), | |
| 3052 make_int (BUF_PT (buf)), buffer); | |
| 3053 | |
| 3054 /* Now go through and make all the case changes that were requested | |
| 3055 in the replacement string. */ | |
| 3056 if (ul_pos_dynarr) | |
| 3057 { | |
| 665 | 3058 Charbpos eend = BUF_PT (buf); |
| 428 | 3059 int i = 0; |
| 3060 int cur_action = 'E'; | |
| 3061 | |
| 3062 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++) | |
| 3063 { | |
| 867 | 3064 Ichar curchar = BUF_FETCH_CHAR (buf, pos); |
| 3065 Ichar newchar = -1; | |
| 428 | 3066 if (i < Dynarr_length (ul_pos_dynarr) && |
| 3067 pos == Dynarr_at (ul_pos_dynarr, i)) | |
| 3068 { | |
| 3069 int new_action = Dynarr_at (ul_action_dynarr, i); | |
| 3070 i++; | |
| 3071 if (new_action == 'u') | |
| 3072 newchar = UPCASE (buf, curchar); | |
| 3073 else if (new_action == 'l') | |
| 3074 newchar = DOWNCASE (buf, curchar); | |
| 3075 else | |
| 3076 cur_action = new_action; | |
| 3077 } | |
| 3078 if (newchar == -1) | |
| 3079 { | |
| 3080 if (cur_action == 'U') | |
| 3081 newchar = UPCASE (buf, curchar); | |
| 3082 else if (cur_action == 'L') | |
| 3083 newchar = DOWNCASE (buf, curchar); | |
| 3084 else | |
| 3085 newchar = curchar; | |
| 3086 } | |
| 3087 if (newchar != curchar) | |
| 3088 buffer_replace_char (buf, pos, newchar, 0, 0); | |
| 3089 } | |
| 3090 } | |
| 3091 | |
| 3092 /* frees the Dynarrs if necessary. */ | |
| 771 | 3093 unbind_to (speccount); |
| 428 | 3094 end_multiple_change (buf, mc_count); |
| 3095 | |
| 3096 return Qnil; | |
| 3097 } | |
| 3098 | |
| 3099 static Lisp_Object | |
| 3100 match_limit (Lisp_Object num, int beginningp) | |
| 3101 { | |
| 3102 int n; | |
| 3103 | |
| 3104 CHECK_INT (num); | |
| 3105 n = XINT (num); | |
| 3106 if (n < 0 || n >= search_regs.num_regs) | |
| 3107 args_out_of_range (num, make_int (search_regs.num_regs)); | |
| 3108 if (search_regs.num_regs == 0 || | |
| 3109 search_regs.start[n] < 0) | |
| 3110 return Qnil; | |
| 3111 return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]); | |
| 3112 } | |
| 3113 | |
| 3114 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /* | |
| 3115 Return position of start of text matched by last regexp search. | |
| 3116 NUM, specifies which parenthesized expression in the last regexp. | |
| 3117 Value is nil if NUMth pair didn't match, or there were less than NUM pairs. | |
| 3118 Zero means the entire text matched by the whole regexp or whole string. | |
| 3119 */ | |
| 3120 (num)) | |
| 3121 { | |
| 3122 return match_limit (num, 1); | |
| 3123 } | |
| 3124 | |
| 3125 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /* | |
| 3126 Return position of end of text matched by last regexp search. | |
| 3127 NUM specifies which parenthesized expression in the last regexp. | |
| 3128 Value is nil if NUMth pair didn't match, or there were less than NUM pairs. | |
| 3129 Zero means the entire text matched by the whole regexp or whole string. | |
| 3130 */ | |
| 3131 (num)) | |
| 3132 { | |
| 3133 return match_limit (num, 0); | |
| 3134 } | |
| 3135 | |
| 3136 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /* | |
| 3137 Return a list containing all info on what the last regexp search matched. | |
| 3138 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'. | |
| 3139 All the elements are markers or nil (nil if the Nth pair didn't match) | |
| 3140 if the last match was on a buffer; integers or nil if a string was matched. | |
| 3141 Use `store-match-data' to reinstate the data in this list. | |
| 3142 | |
| 3143 If INTEGERS (the optional first argument) is non-nil, always use integers | |
| 3144 \(rather than markers) to represent buffer positions. | |
| 3145 If REUSE is a list, reuse it as part of the value. If REUSE is long enough | |
| 3146 to hold all the values, and if INTEGERS is non-nil, no consing is done. | |
| 3147 */ | |
| 3148 (integers, reuse)) | |
| 3149 { | |
| 3150 Lisp_Object tail, prev; | |
| 3151 Lisp_Object *data; | |
| 3152 int i; | |
| 3153 Charcount len; | |
| 3154 | |
| 3155 if (NILP (last_thing_searched)) | |
| 563 | 3156 /*error ("match-data called before any match found", Qunbound);*/ |
| 428 | 3157 return Qnil; |
| 3158 | |
| 3159 data = alloca_array (Lisp_Object, 2 * search_regs.num_regs); | |
| 3160 | |
| 3161 len = -1; | |
| 3162 for (i = 0; i < search_regs.num_regs; i++) | |
| 3163 { | |
| 665 | 3164 Charbpos start = search_regs.start[i]; |
| 428 | 3165 if (start >= 0) |
| 3166 { | |
| 3167 if (EQ (last_thing_searched, Qt) | |
| 3168 || !NILP (integers)) | |
| 3169 { | |
| 3170 data[2 * i] = make_int (start); | |
| 3171 data[2 * i + 1] = make_int (search_regs.end[i]); | |
| 3172 } | |
| 3173 else if (BUFFERP (last_thing_searched)) | |
| 3174 { | |
| 3175 data[2 * i] = Fmake_marker (); | |
| 3176 Fset_marker (data[2 * i], | |
| 3177 make_int (start), | |
| 3178 last_thing_searched); | |
| 3179 data[2 * i + 1] = Fmake_marker (); | |
| 3180 Fset_marker (data[2 * i + 1], | |
| 3181 make_int (search_regs.end[i]), | |
| 3182 last_thing_searched); | |
| 3183 } | |
| 3184 else | |
| 3185 /* last_thing_searched must always be Qt, a buffer, or Qnil. */ | |
| 2500 | 3186 ABORT (); |
| 428 | 3187 |
| 3188 len = i; | |
| 3189 } | |
| 3190 else | |
| 3191 data[2 * i] = data [2 * i + 1] = Qnil; | |
| 3192 } | |
| 3193 if (!CONSP (reuse)) | |
| 3194 return Flist (2 * len + 2, data); | |
| 3195 | |
| 3196 /* If REUSE is a list, store as many value elements as will fit | |
| 3197 into the elements of REUSE. */ | |
| 3198 for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail)) | |
| 3199 { | |
| 3200 if (i < 2 * len + 2) | |
| 3201 XCAR (tail) = data[i]; | |
| 3202 else | |
| 3203 XCAR (tail) = Qnil; | |
| 3204 prev = tail; | |
| 3205 } | |
| 3206 | |
| 3207 /* If we couldn't fit all value elements into REUSE, | |
| 3208 cons up the rest of them and add them to the end of REUSE. */ | |
| 3209 if (i < 2 * len + 2) | |
| 3210 XCDR (prev) = Flist (2 * len + 2 - i, data + i); | |
| 3211 | |
| 3212 return reuse; | |
| 3213 } | |
| 3214 | |
| 3215 | |
| 3216 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /* | |
| 3217 Set internal data on last search match from elements of LIST. | |
| 1468 | 3218 LIST should have been created by calling `match-data' previously, |
| 3219 or be nil, to clear the internal match data. | |
| 428 | 3220 */ |
| 3221 (list)) | |
| 3222 { | |
| 3223 REGISTER int i; | |
| 3224 REGISTER Lisp_Object marker; | |
| 3225 int num_regs; | |
| 3226 int length; | |
| 3227 | |
| 853 | 3228 /* Some FSF junk with running_asynch_code, to preserve the match |
| 3229 data. Not necessary because we don't call process filters | |
| 3230 asynchronously (i.e. from within QUIT). */ | |
| 428 | 3231 |
| 3232 CONCHECK_LIST (list); | |
| 3233 | |
| 3234 /* Unless we find a marker with a buffer in LIST, assume that this | |
| 3235 match data came from a string. */ | |
| 3236 last_thing_searched = Qt; | |
| 3237 | |
| 3238 /* Allocate registers if they don't already exist. */ | |
| 3239 length = XINT (Flength (list)) / 2; | |
| 3240 num_regs = search_regs.num_regs; | |
| 3241 | |
| 3242 if (length > num_regs) | |
| 3243 { | |
| 3244 if (search_regs.num_regs == 0) | |
| 3245 { | |
| 3246 search_regs.start = xnew_array (regoff_t, length); | |
| 3247 search_regs.end = xnew_array (regoff_t, length); | |
| 3248 } | |
| 3249 else | |
| 3250 { | |
| 3251 XREALLOC_ARRAY (search_regs.start, regoff_t, length); | |
| 3252 XREALLOC_ARRAY (search_regs.end, regoff_t, length); | |
| 3253 } | |
| 3254 | |
| 3255 search_regs.num_regs = length; | |
| 3256 } | |
| 3257 | |
| 3258 for (i = 0; i < num_regs; i++) | |
| 3259 { | |
| 3260 marker = Fcar (list); | |
| 3261 if (NILP (marker)) | |
| 3262 { | |
| 3263 search_regs.start[i] = -1; | |
| 3264 list = Fcdr (list); | |
| 3265 } | |
| 3266 else | |
| 3267 { | |
| 3268 if (MARKERP (marker)) | |
| 3269 { | |
| 3270 if (XMARKER (marker)->buffer == 0) | |
| 3271 marker = Qzero; | |
| 3272 else | |
| 793 | 3273 last_thing_searched = wrap_buffer (XMARKER (marker)->buffer); |
| 428 | 3274 } |
| 3275 | |
| 3276 CHECK_INT_COERCE_MARKER (marker); | |
| 3277 search_regs.start[i] = XINT (marker); | |
| 3278 list = Fcdr (list); | |
| 3279 | |
| 3280 marker = Fcar (list); | |
| 3281 if (MARKERP (marker) && XMARKER (marker)->buffer == 0) | |
| 3282 marker = Qzero; | |
| 3283 | |
| 3284 CHECK_INT_COERCE_MARKER (marker); | |
| 3285 search_regs.end[i] = XINT (marker); | |
| 3286 } | |
| 3287 list = Fcdr (list); | |
| 3288 } | |
| 3289 | |
| 3290 return Qnil; | |
| 3291 } | |
| 3292 | |
| 3293 /* Quote a string to inactivate reg-expr chars */ | |
| 3294 | |
| 3295 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /* | |
| 3296 Return a regexp string which matches exactly STRING and nothing else. | |
| 3297 */ | |
| 444 | 3298 (string)) |
| 428 | 3299 { |
| 867 | 3300 REGISTER Ibyte *in, *out, *end; |
| 3301 REGISTER Ibyte *temp; | |
| 428 | 3302 |
| 444 | 3303 CHECK_STRING (string); |
| 428 | 3304 |
| 2367 | 3305 temp = alloca_ibytes (XSTRING_LENGTH (string) * 2); |
| 428 | 3306 |
| 3307 /* Now copy the data into the new string, inserting escapes. */ | |
| 3308 | |
| 444 | 3309 in = XSTRING_DATA (string); |
| 3310 end = in + XSTRING_LENGTH (string); | |
| 428 | 3311 out = temp; |
| 3312 | |
| 3313 while (in < end) | |
| 3314 { | |
| 867 | 3315 Ichar c = itext_ichar (in); |
| 428 | 3316 |
| 3317 if (c == '[' || c == ']' | |
| 3318 || c == '*' || c == '.' || c == '\\' | |
| 3319 || c == '?' || c == '+' | |
| 3320 || c == '^' || c == '$') | |
| 3321 *out++ = '\\'; | |
| 867 | 3322 out += set_itext_ichar (out, c); |
| 3323 INC_IBYTEPTR (in); | |
| 428 | 3324 } |
| 3325 | |
| 3326 return make_string (temp, out - temp); | |
| 3327 } | |
| 3328 | |
| 3329 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /* | |
| 3330 Set the regexp to be used to match a word in regular-expression searching. | |
| 3331 #### Not yet implemented. Currently does nothing. | |
| 3332 #### Do not use this yet. Its calling interface is likely to change. | |
| 3333 */ | |
| 2286 | 3334 (UNUSED (regexp))) |
| 428 | 3335 { |
| 3336 return Qnil; | |
| 3337 } | |
| 3338 | |
| 3339 | |
| 5041 | 3340 #ifdef DEBUG_XEMACS |
| 3341 | |
| 3342 static int | |
| 3343 debug_regexps_changed (Lisp_Object UNUSED (sym), Lisp_Object *val, | |
| 3344 Lisp_Object UNUSED (in_object), | |
| 3345 int UNUSED (flags)) | |
| 3346 { | |
| 3347 int newval = 0; | |
| 3348 | |
| 3349 EXTERNAL_LIST_LOOP_2 (elt, *val) | |
| 3350 { | |
| 3351 CHECK_SYMBOL (elt); | |
| 3352 if (EQ (elt, Qcompilation)) | |
| 3353 newval |= RE_DEBUG_COMPILATION; | |
| 3354 else if (EQ (elt, Qfailure_point)) | |
| 3355 newval |= RE_DEBUG_FAILURE_POINT; | |
| 3356 else if (EQ (elt, Qmatching)) | |
| 3357 newval |= RE_DEBUG_MATCHING; | |
| 3358 else | |
| 3359 invalid_argument | |
| 3360 ("Expected `compilation', `failure-point' or `matching'", elt); | |
| 3361 } | |
| 3362 debug_regexps = newval; | |
| 3363 return 0; | |
| 3364 } | |
| 3365 | |
| 3366 #endif /* DEBUG_XEMACS */ | |
| 3367 | |
| 3368 | |
| 428 | 3369 /************************************************************************/ |
| 3370 /* initialization */ | |
| 3371 /************************************************************************/ | |
| 3372 | |
| 3373 void | |
| 3374 syms_of_search (void) | |
| 3375 { | |
| 3376 | |
| 442 | 3377 DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation); |
| 3378 DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error); | |
| 563 | 3379 Fput (Qinvalid_regexp, Qerror_lacks_explanatory_string, Qt); |
| 428 | 3380 |
| 3381 DEFSUBR (Flooking_at); | |
| 3382 DEFSUBR (Fposix_looking_at); | |
| 3383 DEFSUBR (Fstring_match); | |
| 3384 DEFSUBR (Fposix_string_match); | |
| 3385 DEFSUBR (Fskip_chars_forward); | |
| 3386 DEFSUBR (Fskip_chars_backward); | |
| 3387 DEFSUBR (Fskip_syntax_forward); | |
| 3388 DEFSUBR (Fskip_syntax_backward); | |
| 3389 DEFSUBR (Fsearch_forward); | |
| 3390 DEFSUBR (Fsearch_backward); | |
| 3391 DEFSUBR (Fword_search_forward); | |
| 3392 DEFSUBR (Fword_search_backward); | |
| 3393 DEFSUBR (Fre_search_forward); | |
| 3394 DEFSUBR (Fre_search_backward); | |
| 3395 DEFSUBR (Fposix_search_forward); | |
| 3396 DEFSUBR (Fposix_search_backward); | |
| 3397 DEFSUBR (Freplace_match); | |
| 3398 DEFSUBR (Fmatch_beginning); | |
| 3399 DEFSUBR (Fmatch_end); | |
| 3400 DEFSUBR (Fmatch_data); | |
| 3401 DEFSUBR (Fstore_match_data); | |
| 3402 DEFSUBR (Fregexp_quote); | |
| 3403 DEFSUBR (Fset_word_regexp); | |
| 3404 } | |
| 3405 | |
| 3406 void | |
| 3407 reinit_vars_of_search (void) | |
| 3408 { | |
| 3409 int i; | |
| 3410 | |
| 3411 last_thing_searched = Qnil; | |
| 3412 staticpro_nodump (&last_thing_searched); | |
| 3413 | |
| 3414 for (i = 0; i < REGEXP_CACHE_SIZE; ++i) | |
| 3415 { | |
| 3416 searchbufs[i].buf.allocated = 100; | |
| 3417 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100); | |
| 3418 searchbufs[i].buf.fastmap = searchbufs[i].fastmap; | |
| 3419 searchbufs[i].regexp = Qnil; | |
| 3420 staticpro_nodump (&searchbufs[i].regexp); | |
| 3421 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]); | |
| 3422 } | |
| 3423 searchbuf_head = &searchbufs[0]; | |
| 3424 } | |
| 3425 | |
| 3426 void | |
| 3427 vars_of_search (void) | |
| 3428 { | |
| 3429 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /* | |
| 3430 *Regular expression to be used in `forward-word'. | |
| 3431 #### Not yet implemented. | |
| 3432 */ ); | |
| 3433 Vforward_word_regexp = Qnil; | |
| 3434 | |
| 3435 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /* | |
| 3436 *Regular expression to be used in `backward-word'. | |
| 3437 #### Not yet implemented. | |
| 3438 */ ); | |
| 3439 Vbackward_word_regexp = Qnil; | |
| 502 | 3440 |
| 3441 DEFVAR_INT ("warn-about-possibly-incompatible-back-references", | |
| 3442 &warn_about_possibly_incompatible_back_references /* | |
| 3443 If true, issue warnings when new-semantics back references occur. | |
| 3444 This is to catch places where old code might inadvertently have changed | |
| 3445 semantics. This will occur in old code only where more than nine groups | |
| 3446 occur and a back reference to one of them is directly followed by a digit. | |
| 3447 */ ); | |
| 3448 warn_about_possibly_incompatible_back_references = 1; | |
| 814 | 3449 |
| 2421 | 3450 Vskip_chars_range_table = Fmake_range_table (Qstart_closed_end_closed); |
| 428 | 3451 staticpro (&Vskip_chars_range_table); |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3452 #ifdef DEBUG_XEMACS |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3453 DEFSYMBOL (Qsearch_algorithm_used); |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3454 DEFSYMBOL (Qboyer_moore); |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3455 DEFSYMBOL (Qsimple_search); |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3456 |
| 5041 | 3457 DEFSYMBOL (Qcompilation); |
| 3458 DEFSYMBOL (Qfailure_point); | |
| 3459 DEFSYMBOL (Qmatching); | |
| 3460 | |
| 3461 DEFVAR_INT ("debug-searches", &debug_searches /* | |
|
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3462 If non-zero, bind `search-algorithm-used' to `boyer-moore' or `simple-search', |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3463 depending on the algorithm used for each search. Used for testing. |
|
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3464 */ ); |
| 5041 | 3465 debug_searches = 0; |
| 3466 | |
| 3467 DEFVAR_LISP_MAGIC ("debug-regexps", &Vdebug_regexps, /* | |
| 3468 List of areas to display debug info about during regexp operation. | |
| 3469 The following areas are recognized: | |
| 3470 | |
| 3471 `compilation' Display the result of compiling a regexp. | |
| 3472 `failure-point' Display info about failure points reached. | |
| 3473 `matching' Display info about the process of matching a regex against | |
| 3474 text. | |
| 3475 */ debug_regexps_changed); | |
| 3476 Vdebug_regexps = Qnil; | |
| 3477 debug_regexps = 0; | |
| 3478 #endif /* DEBUG_XEMACS */ | |
| 428 | 3479 } |
