Mercurial > hg > xemacs-beta
annotate src/search.c @ 5940:c608d4b0b75e cygwin64 tip
rescue lost branch from 64bit.backup
author | Henry Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 16 Dec 2021 18:48:58 +0000 |
parents | 6e5a7278f9bf |
children |
rev | line source |
---|---|
428 | 1 /* String search routines for XEmacs. |
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
5041 | 4 Copyright (C) 2001, 2002, 2010 Ben Wing. |
428 | 5 |
6 This file is part of XEmacs. | |
7 | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5089
diff
changeset
|
8 XEmacs is free software: you can redistribute it and/or modify it |
428 | 9 under the terms of the GNU General Public License as published by the |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5089
diff
changeset
|
10 Free Software Foundation, either version 3 of the License, or (at your |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5089
diff
changeset
|
11 option) any later version. |
428 | 12 |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5089
diff
changeset
|
19 along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */ |
428 | 20 |
21 /* Synched up with: FSF 19.29, except for region-cache stuff. */ | |
22 | |
23 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */ | |
24 | |
826 | 25 /* This file has been Mule-ized. */ |
428 | 26 |
27 #include <config.h> | |
28 #include "lisp.h" | |
29 | |
30 #include "buffer.h" | |
31 #include "insdel.h" | |
32 #include "opaque.h" | |
33 #ifdef REGION_CACHE_NEEDS_WORK | |
34 #include "region-cache.h" | |
35 #endif | |
36 #include "syntax.h" | |
37 | |
38 #include <sys/types.h> | |
39 #include "regex.h" | |
446 | 40 #include "casetab.h" |
41 #include "chartab.h" | |
42 | |
43 #define TRANSLATE(table, pos) \ | |
867 | 44 (!NILP (table) ? TRT_TABLE_OF (table, (Ichar) pos) : pos) |
428 | 45 |
46 #define REGEXP_CACHE_SIZE 20 | |
47 | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
48 #ifdef DEBUG_XEMACS |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
49 |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
50 /* Used in tests/automated/case-tests.el if available. */ |
5041 | 51 Fixnum debug_searches; |
52 | |
53 /* Declare as int rather than Bitflags because it's used by regex.c, which | |
54 may be used outside of XEmacs (e.g. etags.c). */ | |
55 int debug_regexps; | |
56 Lisp_Object Vdebug_regexps; | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
57 |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
58 Lisp_Object Qsearch_algorithm_used, Qboyer_moore, Qsimple_search; |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
59 |
5041 | 60 Lisp_Object Qcompilation, Qfailure_point, Qmatching; |
61 | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
62 #endif |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
63 |
428 | 64 /* If the regexp is non-nil, then the buffer contains the compiled form |
65 of that regexp, suitable for searching. */ | |
446 | 66 struct regexp_cache |
67 { | |
428 | 68 struct regexp_cache *next; |
69 Lisp_Object regexp; | |
70 struct re_pattern_buffer buf; | |
71 char fastmap[0400]; | |
72 /* Nonzero means regexp was compiled to do full POSIX backtracking. */ | |
73 char posix; | |
74 }; | |
75 | |
76 /* The instances of that struct. */ | |
77 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE]; | |
78 | |
79 /* The head of the linked list; points to the most recently used buffer. */ | |
80 static struct regexp_cache *searchbuf_head; | |
81 | |
82 | |
83 /* Every call to re_match, etc., must pass &search_regs as the regs | |
84 argument unless you can show it is unnecessary (i.e., if re_match | |
85 is certainly going to be called again before region-around-match | |
86 can be called). | |
87 | |
88 Since the registers are now dynamically allocated, we need to make | |
89 sure not to refer to the Nth register before checking that it has | |
90 been allocated by checking search_regs.num_regs. | |
91 | |
92 The regex code keeps track of whether it has allocated the search | |
93 buffer using bits in the re_pattern_buffer. This means that whenever | |
94 you compile a new pattern, it completely forgets whether it has | |
95 allocated any registers, and will allocate new registers the next | |
96 time you call a searching or matching function. Therefore, we need | |
97 to call re_set_registers after compiling a new pattern or after | |
98 setting the match registers, so that the regex functions will be | |
99 able to free or re-allocate it properly. */ | |
100 | |
101 /* Note: things get trickier under Mule because the values returned from | |
826 | 102 the regexp routines are in Bytebpos's but we need them to be in Charbpos's. |
428 | 103 We take the easy way out for the moment and just convert them immediately. |
104 We could be more clever by not converting them until necessary, but | |
105 that gets real ugly real fast since the buffer might have changed and | |
106 the positions might be out of sync or out of range. | |
107 */ | |
108 static struct re_registers search_regs; | |
109 | |
1468 | 110 /* Every function that sets the match data _must_ clear unused search |
111 registers on success. An unsuccessful search or match _must_ preserve | |
112 the search registers. The traditional documentation implied that | |
113 any match operation might trash the registers, but in fact failures | |
114 have always preserved the match data (in GNU Emacs as well). Some | |
115 plausible code depends on this behavior (cf. `w3-configuration-data' | |
116 in library "w3-cfg"). | |
117 | |
118 Ordinary string searchs use set_search_regs to set the whole-string | |
119 match. That function takes care of clearing the unused subexpression | |
1425 | 120 registers. |
121 */ | |
122 static void set_search_regs (struct buffer *buf, Charbpos beg, Charcount len); | |
1468 | 123 static void clear_search_regs (void); |
1425 | 124 |
428 | 125 /* The buffer in which the last search was performed, or |
126 Qt if the last search was done in a string; | |
127 Qnil if no searching has been done yet. */ | |
128 static Lisp_Object last_thing_searched; | |
129 | |
130 /* error condition signalled when regexp compile_pattern fails */ | |
131 | |
132 Lisp_Object Qinvalid_regexp; | |
133 | |
134 /* Regular expressions used in forward/backward-word */ | |
135 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp; | |
136 | |
507 | 137 Fixnum warn_about_possibly_incompatible_back_references; |
502 | 138 |
428 | 139 /* range table for use with skip_chars. Only needed for Mule. */ |
140 Lisp_Object Vskip_chars_range_table; | |
141 | |
867 | 142 static Charbpos simple_search (struct buffer *buf, Ibyte *base_pat, |
826 | 143 Bytecount len, Bytebpos pos, Bytebpos lim, |
144 EMACS_INT n, Lisp_Object trt); | |
867 | 145 static Charbpos boyer_moore (struct buffer *buf, Ibyte *base_pat, |
826 | 146 Bytecount len, Bytebpos pos, Bytebpos lim, |
147 EMACS_INT n, Lisp_Object trt, | |
148 Lisp_Object inverse_trt, int charset_base); | |
665 | 149 static Charbpos search_buffer (struct buffer *buf, Lisp_Object str, |
826 | 150 Charbpos charbpos, Charbpos buflim, EMACS_INT n, |
151 int RE, Lisp_Object trt, | |
152 Lisp_Object inverse_trt, int posix); | |
771 | 153 |
2268 | 154 static DECLARE_DOESNT_RETURN (matcher_overflow (void)); |
155 | |
156 static DOESNT_RETURN | |
157 matcher_overflow () | |
428 | 158 { |
563 | 159 stack_overflow ("Stack overflow in regexp matcher", Qunbound); |
428 | 160 } |
161 | |
162 /* Compile a regexp and signal a Lisp error if anything goes wrong. | |
163 PATTERN is the pattern to compile. | |
164 CP is the place to put the result. | |
826 | 165 TRANSLATE is a translation table for ignoring case, or Qnil for none. |
428 | 166 REGP is the structure that says where to store the "register" |
167 values that will result from matching this pattern. | |
168 If it is 0, we should compile the pattern not to record any | |
169 subexpression bounds. | |
170 POSIX is nonzero if we want full backtracking (POSIX style) | |
171 for this pattern. 0 means backtrack only enough to get a valid match. */ | |
172 | |
173 static int | |
174 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, | |
2286 | 175 struct re_registers *UNUSED (regp), Lisp_Object translate, |
826 | 176 int posix, Error_Behavior errb) |
428 | 177 { |
442 | 178 const char *val; |
428 | 179 reg_syntax_t old; |
180 | |
181 cp->regexp = Qnil; | |
182 cp->buf.translate = translate; | |
183 cp->posix = posix; | |
184 old = re_set_syntax (RE_SYNTAX_EMACS | |
185 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING)); | |
442 | 186 val = (const char *) |
428 | 187 re_compile_pattern ((char *) XSTRING_DATA (pattern), |
188 XSTRING_LENGTH (pattern), &cp->buf); | |
189 re_set_syntax (old); | |
190 if (val) | |
191 { | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
192 maybe_signal_error (Qinvalid_regexp, 0, build_cistring (val), |
428 | 193 Qsearch, errb); |
194 return 0; | |
195 } | |
196 | |
197 cp->regexp = Fcopy_sequence (pattern); | |
198 return 1; | |
199 } | |
200 | |
201 /* Compile a regexp if necessary, but first check to see if there's one in | |
202 the cache. | |
203 PATTERN is the pattern to compile. | |
826 | 204 TRANSLATE is a translation table for ignoring case, or Qnil for none. |
428 | 205 REGP is the structure that says where to store the "register" |
206 values that will result from matching this pattern. | |
207 If it is 0, we should compile the pattern not to record any | |
208 subexpression bounds. | |
209 POSIX is nonzero if we want full backtracking (POSIX style) | |
210 for this pattern. 0 means backtrack only enough to get a valid match. */ | |
211 | |
212 struct re_pattern_buffer * | |
213 compile_pattern (Lisp_Object pattern, struct re_registers *regp, | |
2286 | 214 Lisp_Object translate, Lisp_Object UNUSED (searchobj), |
215 struct buffer *UNUSED (searchbuf), int posix, | |
216 Error_Behavior errb) | |
428 | 217 { |
218 struct regexp_cache *cp, **cpp; | |
219 | |
220 for (cpp = &searchbuf_head; ; cpp = &cp->next) | |
221 { | |
222 cp = *cpp; | |
826 | 223 /* &&#### once we fix up the fastmap code in regex.c for 8-bit-fixed, |
224 we need to record and compare the buffer and format, since the | |
225 fastmap will reflect the state of the buffer -- and things get | |
226 more complicated if the buffer has changed formats or (esp.) has | |
227 kept the format but changed its interpretation! may need to have | |
228 the code that changes the interpretation go through and invalidate | |
229 cache entries for that buffer. */ | |
428 | 230 if (!NILP (Fstring_equal (cp->regexp, pattern)) |
446 | 231 && EQ (cp->buf.translate, translate) |
428 | 232 && cp->posix == posix) |
233 break; | |
234 | |
235 /* If we're at the end of the cache, compile into the last cell. */ | |
236 if (cp->next == 0) | |
237 { | |
826 | 238 if (!compile_pattern_1 (cp, pattern, regp, translate, |
239 posix, errb)) | |
428 | 240 return 0; |
241 break; | |
242 } | |
243 } | |
244 | |
245 /* When we get here, cp (aka *cpp) contains the compiled pattern, | |
246 either because we found it in the cache or because we just compiled it. | |
247 Move it to the front of the queue to mark it as most recently used. */ | |
248 *cpp = cp->next; | |
249 cp->next = searchbuf_head; | |
250 searchbuf_head = cp; | |
251 | |
252 /* Advise the searching functions about the space we have allocated | |
253 for register data. */ | |
254 if (regp) | |
255 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end); | |
256 | |
257 return &cp->buf; | |
258 } | |
259 | |
260 /* Error condition used for failing searches */ | |
261 Lisp_Object Qsearch_failed; | |
262 | |
2268 | 263 static DECLARE_DOESNT_RETURN (signal_failure (Lisp_Object)); |
264 | |
265 static DOESNT_RETURN | |
428 | 266 signal_failure (Lisp_Object arg) |
267 { | |
446 | 268 for (;;) |
269 Fsignal (Qsearch_failed, list1 (arg)); | |
428 | 270 } |
271 | |
826 | 272 /* Convert the search registers from Bytebpos's to Charbpos's. Needs to be |
428 | 273 done after each regexp match that uses the search regs. |
274 | |
275 We could get a potential speedup by not converting the search registers | |
276 until it's really necessary, e.g. when match-data or replace-match is | |
277 called. However, this complexifies the code a lot (e.g. the buffer | |
826 | 278 could have changed and the Bytebpos's stored might be invalid) and is |
428 | 279 probably not a great time-saver. */ |
280 | |
281 static void | |
282 fixup_search_regs_for_buffer (struct buffer *buf) | |
283 { | |
284 int i; | |
285 int num_regs = search_regs.num_regs; | |
286 | |
287 for (i = 0; i < num_regs; i++) | |
288 { | |
289 if (search_regs.start[i] >= 0) | |
826 | 290 search_regs.start[i] = bytebpos_to_charbpos (buf, |
291 search_regs.start[i]); | |
428 | 292 if (search_regs.end[i] >= 0) |
665 | 293 search_regs.end[i] = bytebpos_to_charbpos (buf, search_regs.end[i]); |
428 | 294 } |
295 } | |
296 | |
297 /* Similar but for strings. */ | |
298 static void | |
299 fixup_search_regs_for_string (Lisp_Object string) | |
300 { | |
301 int i; | |
302 int num_regs = search_regs.num_regs; | |
303 | |
304 /* #### bytecount_to_charcount() is not that efficient. This function | |
867 | 305 could be faster if it did its own conversion (using INC_IBYTEPTR() |
428 | 306 and such), because the register ends are likely to be somewhat ordered. |
307 (Even if not, you could sort them.) | |
308 | |
309 Think about this if this function is a time hog, which it's probably | |
310 not. */ | |
311 for (i = 0; i < num_regs; i++) | |
312 { | |
313 if (search_regs.start[i] > 0) | |
314 { | |
315 search_regs.start[i] = | |
793 | 316 string_index_byte_to_char (string, search_regs.start[i]); |
428 | 317 } |
318 if (search_regs.end[i] > 0) | |
319 { | |
320 search_regs.end[i] = | |
793 | 321 string_index_byte_to_char (string, search_regs.end[i]); |
428 | 322 } |
323 } | |
324 } | |
325 | |
326 | |
327 static Lisp_Object | |
328 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix) | |
329 { | |
330 Lisp_Object val; | |
665 | 331 Bytebpos p1, p2; |
428 | 332 Bytecount s1, s2; |
333 REGISTER int i; | |
334 struct re_pattern_buffer *bufp; | |
826 | 335 struct syntax_cache scache_struct; |
336 struct syntax_cache *scache = &scache_struct; | |
337 | |
428 | 338 CHECK_STRING (string); |
339 bufp = compile_pattern (string, &search_regs, | |
340 (!NILP (buf->case_fold_search) | |
446 | 341 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil), |
826 | 342 wrap_buffer (buf), buf, posix, ERROR_ME); |
428 | 343 |
344 QUIT; | |
345 | |
346 /* Get pointers and sizes of the two strings | |
347 that make up the visible portion of the buffer. */ | |
348 | |
826 | 349 p1 = BYTE_BUF_BEGV (buf); |
350 p2 = BYTE_BUF_CEILING_OF (buf, p1); | |
428 | 351 s1 = p2 - p1; |
826 | 352 s2 = BYTE_BUF_ZV (buf) - p2; |
353 | |
354 /* By making the regex object, regex buffer, and syntax cache arguments | |
355 to re_{search,match}{,_2}, we've removed the need to do nasty things | |
356 to deal with regex reentrancy. (See stack trace in signal.c for proof | |
357 that this can happen.) | |
358 | |
359 #### there is still a potential problem with the regex cache -- | |
360 the compiled regex could be overwritten. we'd need 20-fold | |
361 reentrancy, though. Fix this. */ | |
362 | |
363 i = re_match_2 (bufp, (char *) BYTE_BUF_BYTE_ADDRESS (buf, p1), | |
364 s1, (char *) BYTE_BUF_BYTE_ADDRESS (buf, p2), s2, | |
365 BYTE_BUF_PT (buf) - BYTE_BUF_BEGV (buf), &search_regs, | |
366 BYTE_BUF_ZV (buf) - BYTE_BUF_BEGV (buf), wrap_buffer (buf), | |
367 buf, scache); | |
428 | 368 |
369 if (i == -2) | |
370 matcher_overflow (); | |
371 | |
372 val = (0 <= i ? Qt : Qnil); | |
373 if (NILP (val)) | |
826 | 374 return Qnil; |
428 | 375 { |
376 int num_regs = search_regs.num_regs; | |
377 for (i = 0; i < num_regs; i++) | |
378 if (search_regs.start[i] >= 0) | |
379 { | |
826 | 380 search_regs.start[i] += BYTE_BUF_BEGV (buf); |
381 search_regs.end[i] += BYTE_BUF_BEGV (buf); | |
428 | 382 } |
383 } | |
793 | 384 last_thing_searched = wrap_buffer (buf); |
428 | 385 fixup_search_regs_for_buffer (buf); |
826 | 386 return val; |
428 | 387 } |
388 | |
389 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /* | |
390 Return t if text after point matches regular expression REGEXP. | |
1468 | 391 When the match is successful, this function modifies the match data |
392 that `match-beginning', `match-end' and `match-data' access; save the | |
393 match data with `match-data' and restore it with `store-match-data' if | |
394 you want to preserve them. If the match fails, the match data from the | |
395 previous success match is preserved. | |
428 | 396 |
397 Optional argument BUFFER defaults to the current buffer. | |
398 */ | |
399 (regexp, buffer)) | |
400 { | |
401 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0); | |
402 } | |
403 | |
404 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /* | |
405 Return t if text after point matches regular expression REGEXP. | |
406 Find the longest match, in accord with Posix regular expression rules. | |
1468 | 407 When the match is successful, this function modifies the match data |
408 that `match-beginning', `match-end' and `match-data' access; save the | |
409 match data with `match-data' and restore it with `store-match-data' if | |
410 you want to preserve them. If the match fails, the match data from the | |
411 previous success match is preserved. | |
428 | 412 |
413 Optional argument BUFFER defaults to the current buffer. | |
414 */ | |
415 (regexp, buffer)) | |
416 { | |
826 | 417 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1); |
428 | 418 } |
419 | |
420 static Lisp_Object | |
421 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, | |
5649
d026b665014f
Actually obey POSIX rules in #'posix-string-match, don't ignore them.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
422 struct buffer *buf, int posix) |
428 | 423 { |
424 Bytecount val; | |
425 Charcount s; | |
426 struct re_pattern_buffer *bufp; | |
427 | |
853 | 428 /* Some FSF junk with running_asynch_code, to preserve the match |
429 data. Not necessary because we don't call process filters | |
430 asynchronously (i.e. from within QUIT). */ | |
428 | 431 |
432 CHECK_STRING (regexp); | |
433 CHECK_STRING (string); | |
434 | |
435 if (NILP (start)) | |
436 s = 0; | |
437 else | |
438 { | |
826 | 439 Charcount len = string_char_length (string); |
428 | 440 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
441 CHECK_FIXNUM (start); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
442 s = XFIXNUM (start); |
428 | 443 if (s < 0 && -s <= len) |
444 s = len + s; | |
445 else if (0 > s || s > len) | |
446 args_out_of_range (string, start); | |
447 } | |
448 | |
449 | |
450 bufp = compile_pattern (regexp, &search_regs, | |
451 (!NILP (buf->case_fold_search) | |
446 | 452 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil), |
5649
d026b665014f
Actually obey POSIX rules in #'posix-string-match, don't ignore them.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
453 string, buf, posix, ERROR_ME); |
428 | 454 QUIT; |
455 { | |
793 | 456 Bytecount bis = string_index_char_to_byte (string, s); |
826 | 457 struct syntax_cache scache_struct; |
458 struct syntax_cache *scache = &scache_struct; | |
459 | |
460 /* By making the regex object, regex buffer, and syntax cache arguments | |
461 to re_{search,match}{,_2}, we've removed the need to do nasty things | |
462 to deal with regex reentrancy. (See stack trace in signal.c for proof | |
463 that this can happen.) | |
464 | |
465 #### there is still a potential problem with the regex cache -- | |
466 the compiled regex could be overwritten. we'd need 20-fold | |
467 reentrancy, though. Fix this. */ | |
468 | |
428 | 469 val = re_search (bufp, (char *) XSTRING_DATA (string), |
470 XSTRING_LENGTH (string), bis, | |
471 XSTRING_LENGTH (string) - bis, | |
826 | 472 &search_regs, string, buf, scache); |
428 | 473 } |
474 if (val == -2) | |
475 matcher_overflow (); | |
826 | 476 if (val < 0) return Qnil; |
428 | 477 last_thing_searched = Qt; |
478 fixup_search_regs_for_string (string); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
479 return make_fixnum (string_index_byte_to_char (string, val)); |
428 | 480 } |
481 | |
482 DEFUN ("string-match", Fstring_match, 2, 4, 0, /* | |
483 Return index of start of first match for REGEXP in STRING, or nil. | |
484 If third arg START is non-nil, start search at that index in STRING. | |
485 For index of first char beyond the match, do (match-end 0). | |
486 `match-end' and `match-beginning' also give indices of substrings | |
487 matched by parenthesis constructs in the pattern. | |
488 | |
826 | 489 Optional arg BUFFER controls how case folding and syntax and category |
490 lookup is done (according to the value of `case-fold-search' in that buffer | |
491 and that buffer's case tables, syntax tables, and category table). If nil | |
492 or unspecified, it defaults *NOT* to the current buffer but instead: | |
493 | |
494 -- the value of `case-fold-search' in the current buffer is still respected | |
495 because of idioms like | |
496 | |
497 (let ((case-fold-search nil)) | |
498 (string-match "^foo.*bar" string)) | |
499 | |
500 but the case, syntax, and category tables come from the standard tables, | |
1468 | 501 which are accessed through functions `default-{case,syntax,category}-table' |
502 and serve as the parents of the tables in particular buffer. | |
503 | |
504 When the match is successful, this function modifies the match data | |
505 that `match-beginning', `match-end' and `match-data' access; save the | |
506 match data with `match-data' and restore it with `store-match-data' if | |
507 you want to preserve them. If the match fails, the match data from the | |
508 previous success match is preserved. | |
428 | 509 */ |
510 (regexp, string, start, buffer)) | |
511 { | |
826 | 512 /* &&#### implement new interp for buffer arg; check code to see if it |
513 makes more sense than prev */ | |
428 | 514 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0); |
515 } | |
516 | |
517 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /* | |
518 Return index of start of first match for REGEXP in STRING, or nil. | |
519 Find the longest match, in accord with Posix regular expression rules. | |
520 If third arg START is non-nil, start search at that index in STRING. | |
521 For index of first char beyond the match, do (match-end 0). | |
522 `match-end' and `match-beginning' also give indices of substrings | |
523 matched by parenthesis constructs in the pattern. | |
524 | |
525 Optional arg BUFFER controls how case folding is done (according to | |
526 the value of `case-fold-search' in that buffer and that buffer's case | |
527 tables) and defaults to the current buffer. | |
1468 | 528 |
529 When the match is successful, this function modifies the match data | |
530 that `match-beginning', `match-end' and `match-data' access; save the | |
531 match data with `match-data' and restore it with `store-match-data' if | |
532 you want to preserve them. If the match fails, the match data from the | |
533 previous success match is preserved. | |
428 | 534 */ |
535 (regexp, string, start, buffer)) | |
536 { | |
537 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1); | |
538 } | |
539 | |
540 /* Match REGEXP against STRING, searching all of STRING, | |
541 and return the index of the match, or negative on failure. | |
542 This does not clobber the match data. */ | |
543 | |
544 Bytecount | |
1347 | 545 fast_string_match (Lisp_Object regexp, const Ibyte *nonreloc, |
428 | 546 Lisp_Object reloc, Bytecount offset, |
547 Bytecount length, int case_fold_search, | |
578 | 548 Error_Behavior errb, int no_quit) |
428 | 549 { |
550 Bytecount val; | |
867 | 551 Ibyte *newnonreloc = (Ibyte *) nonreloc; |
428 | 552 struct re_pattern_buffer *bufp; |
826 | 553 struct syntax_cache scache_struct; |
554 struct syntax_cache *scache = &scache_struct; | |
428 | 555 |
556 bufp = compile_pattern (regexp, 0, | |
557 (case_fold_search | |
771 | 558 ? XCASE_TABLE_DOWNCASE (Vstandard_case_table) |
446 | 559 : Qnil), |
826 | 560 reloc, 0, 0, errb); |
428 | 561 if (!bufp) |
562 return -1; /* will only do this when errb != ERROR_ME */ | |
563 if (!no_quit) | |
564 QUIT; | |
565 else | |
566 no_quit_in_re_search = 1; | |
567 | |
568 fixup_internal_substring (nonreloc, reloc, offset, &length); | |
569 | |
771 | 570 /* Don't need to protect against GC inside of re_search() due to QUIT; |
571 QUIT is GC-inhibited. */ | |
428 | 572 if (!NILP (reloc)) |
771 | 573 newnonreloc = XSTRING_DATA (reloc); |
574 | |
826 | 575 /* By making the regex object, regex buffer, and syntax cache arguments |
576 to re_{search,match}{,_2}, we've removed the need to do nasty things | |
577 to deal with regex reentrancy. (See stack trace in signal.c for proof | |
578 that this can happen.) | |
579 | |
580 #### there is still a potential problem with the regex cache -- | |
581 the compiled regex could be overwritten. we'd need 20-fold | |
582 reentrancy, though. Fix this. */ | |
583 | |
428 | 584 val = re_search (bufp, (char *) newnonreloc + offset, length, 0, |
826 | 585 length, 0, reloc, 0, scache); |
428 | 586 |
587 no_quit_in_re_search = 0; | |
588 return val; | |
589 } | |
590 | |
591 Bytecount | |
592 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string) | |
593 { | |
594 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0); | |
595 } | |
596 | |
597 | |
598 #ifdef REGION_CACHE_NEEDS_WORK | |
599 /* The newline cache: remembering which sections of text have no newlines. */ | |
600 | |
601 /* If the user has requested newline caching, make sure it's on. | |
602 Otherwise, make sure it's off. | |
603 This is our cheezy way of associating an action with the change of | |
604 state of a buffer-local variable. */ | |
605 static void | |
606 newline_cache_on_off (struct buffer *buf) | |
607 { | |
608 if (NILP (buf->cache_long_line_scans)) | |
609 { | |
610 /* It should be off. */ | |
611 if (buf->newline_cache) | |
612 { | |
613 free_region_cache (buf->newline_cache); | |
614 buf->newline_cache = 0; | |
615 } | |
616 } | |
617 else | |
618 { | |
619 /* It should be on. */ | |
620 if (buf->newline_cache == 0) | |
621 buf->newline_cache = new_region_cache (); | |
622 } | |
623 } | |
624 #endif | |
625 | |
626 /* Search in BUF for COUNT instances of the character TARGET between | |
627 START and END. | |
628 | |
629 If COUNT is positive, search forwards; END must be >= START. | |
630 If COUNT is negative, search backwards for the -COUNTth instance; | |
631 END must be <= START. | |
632 If COUNT is zero, do anything you please; run rogue, for all I care. | |
633 | |
634 If END is zero, use BEGV or ZV instead, as appropriate for the | |
635 direction indicated by COUNT. | |
636 | |
637 If we find COUNT instances, set *SHORTAGE to zero, and return the | |
638 position after the COUNTth match. Note that for reverse motion | |
639 this is not the same as the usual convention for Emacs motion commands. | |
640 | |
641 If we don't find COUNT instances before reaching END, set *SHORTAGE | |
642 to the number of TARGETs left unfound, and return END. | |
643 | |
644 If ALLOW_QUIT is non-zero, call QUIT periodically. */ | |
645 | |
665 | 646 static Bytebpos |
867 | 647 byte_scan_buffer (struct buffer *buf, Ichar target, Bytebpos st, Bytebpos en, |
872 | 648 EMACS_INT count, EMACS_INT *shortage, int allow_quit) |
428 | 649 { |
665 | 650 Bytebpos lim = en > 0 ? en : |
826 | 651 ((count > 0) ? BYTE_BUF_ZV (buf) : BYTE_BUF_BEGV (buf)); |
428 | 652 |
653 /* #### newline cache stuff in this function not yet ported */ | |
654 assert (count != 0); | |
655 | |
656 if (shortage) | |
657 *shortage = 0; | |
658 | |
659 if (count > 0) | |
660 { | |
661 #ifdef MULE | |
826 | 662 Internal_Format fmt = buf->text->format; |
663 /* Check for char that's unrepresentable in the buffer -- it | |
664 certainly can't be there. */ | |
867 | 665 if (!ichar_fits_in_format (target, fmt, wrap_buffer (buf))) |
428 | 666 { |
826 | 667 *shortage = count; |
668 return lim; | |
669 } | |
670 /* Due to the Mule representation of characters in a buffer, we can | |
671 simply search for characters in the range 0 - 127 directly; for | |
672 8-bit-fixed, we can do this for all characters. In other cases, | |
673 we do it the "hard" way. Note that this way works for all | |
674 characters and all formats, but the other way is faster. */ | |
675 else if (! (fmt == FORMAT_8_BIT_FIXED || | |
867 | 676 (fmt == FORMAT_DEFAULT && ichar_ascii_p (target)))) |
826 | 677 { |
867 | 678 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
428 | 679 while (st < lim && count > 0) |
680 { | |
826 | 681 if (BYTE_BUF_FETCH_CHAR_RAW (buf, st) == raw) |
428 | 682 count--; |
665 | 683 INC_BYTEBPOS (buf, st); |
428 | 684 } |
685 } | |
686 else | |
687 #endif | |
688 { | |
867 | 689 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
428 | 690 while (st < lim && count > 0) |
691 { | |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
692 Bytebpos ceiling; |
867 | 693 Ibyte *bufptr; |
428 | 694 |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
695 ceiling = BYTE_BUF_CEILING_OF (buf, st); |
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
696 ceiling = min (lim, ceiling); |
867 | 697 bufptr = (Ibyte *) memchr (BYTE_BUF_BYTE_ADDRESS (buf, st), |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
698 raw, ceiling - st); |
428 | 699 if (bufptr) |
700 { | |
701 count--; | |
826 | 702 st = BYTE_BUF_PTR_BYTE_POS (buf, bufptr) + 1; |
428 | 703 } |
704 else | |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
705 st = ceiling; |
428 | 706 } |
707 } | |
708 | |
709 if (shortage) | |
710 *shortage = count; | |
711 if (allow_quit) | |
712 QUIT; | |
713 return st; | |
714 } | |
715 else | |
716 { | |
717 #ifdef MULE | |
826 | 718 Internal_Format fmt = buf->text->format; |
719 /* Check for char that's unrepresentable in the buffer -- it | |
720 certainly can't be there. */ | |
867 | 721 if (!ichar_fits_in_format (target, fmt, wrap_buffer (buf))) |
428 | 722 { |
826 | 723 *shortage = -count; |
724 return lim; | |
725 } | |
726 else if (! (fmt == FORMAT_8_BIT_FIXED || | |
867 | 727 (fmt == FORMAT_DEFAULT && ichar_ascii_p (target)))) |
826 | 728 { |
867 | 729 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
428 | 730 while (st > lim && count < 0) |
731 { | |
665 | 732 DEC_BYTEBPOS (buf, st); |
826 | 733 if (BYTE_BUF_FETCH_CHAR_RAW (buf, st) == raw) |
428 | 734 count++; |
735 } | |
736 } | |
737 else | |
738 #endif | |
739 { | |
867 | 740 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
428 | 741 while (st > lim && count < 0) |
742 { | |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
743 Bytebpos floorpos; |
867 | 744 Ibyte *bufptr; |
745 Ibyte *floorptr; | |
428 | 746 |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
747 floorpos = BYTE_BUF_FLOOR_OF (buf, st); |
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
748 floorpos = max (lim, floorpos); |
428 | 749 /* No memrchr() ... */ |
826 | 750 bufptr = BYTE_BUF_BYTE_ADDRESS_BEFORE (buf, st); |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
751 floorptr = BYTE_BUF_BYTE_ADDRESS (buf, floorpos); |
428 | 752 while (bufptr >= floorptr) |
753 { | |
754 st--; | |
755 /* At this point, both ST and BUFPTR refer to the same | |
756 character. When the loop terminates, ST will | |
757 always point to the last character we tried. */ | |
867 | 758 if (*bufptr == (Ibyte) raw) |
428 | 759 { |
760 count++; | |
761 break; | |
762 } | |
763 bufptr--; | |
764 } | |
765 } | |
766 } | |
767 | |
768 if (shortage) | |
769 *shortage = -count; | |
770 if (allow_quit) | |
771 QUIT; | |
772 if (count) | |
773 return st; | |
774 else | |
775 { | |
776 /* We found the character we were looking for; we have to return | |
777 the position *after* it due to the strange way that the return | |
778 value is defined. */ | |
665 | 779 INC_BYTEBPOS (buf, st); |
428 | 780 return st; |
781 } | |
782 } | |
783 } | |
784 | |
665 | 785 Charbpos |
867 | 786 scan_buffer (struct buffer *buf, Ichar target, Charbpos start, Charbpos end, |
428 | 787 EMACS_INT count, EMACS_INT *shortage, int allow_quit) |
788 { | |
826 | 789 Bytebpos byte_retval; |
790 Bytebpos byte_start, byte_end; | |
791 | |
792 byte_start = charbpos_to_bytebpos (buf, start); | |
428 | 793 if (end) |
826 | 794 byte_end = charbpos_to_bytebpos (buf, end); |
428 | 795 else |
826 | 796 byte_end = 0; |
797 byte_retval = byte_scan_buffer (buf, target, byte_start, byte_end, count, | |
428 | 798 shortage, allow_quit); |
826 | 799 return bytebpos_to_charbpos (buf, byte_retval); |
428 | 800 } |
801 | |
665 | 802 Bytebpos |
826 | 803 byte_find_next_newline_no_quit (struct buffer *buf, Bytebpos from, int count) |
428 | 804 { |
826 | 805 return byte_scan_buffer (buf, '\n', from, 0, count, 0, 0); |
428 | 806 } |
807 | |
665 | 808 Charbpos |
809 find_next_newline_no_quit (struct buffer *buf, Charbpos from, int count) | |
428 | 810 { |
811 return scan_buffer (buf, '\n', from, 0, count, 0, 0); | |
812 } | |
813 | |
665 | 814 Charbpos |
815 find_next_newline (struct buffer *buf, Charbpos from, int count) | |
428 | 816 { |
817 return scan_buffer (buf, '\n', from, 0, count, 0, 1); | |
818 } | |
819 | |
826 | 820 Bytecount |
867 | 821 byte_find_next_ichar_in_string (Lisp_Object str, Ichar target, Bytecount st, |
428 | 822 EMACS_INT count) |
823 { | |
793 | 824 Bytebpos lim = XSTRING_LENGTH (str) -1; |
867 | 825 Ibyte *s = XSTRING_DATA (str); |
428 | 826 |
827 assert (count >= 0); | |
828 | |
829 #ifdef MULE | |
830 /* Due to the Mule representation of characters in a buffer, | |
831 we can simply search for characters in the range 0 - 127 | |
832 directly. For other characters, we do it the "hard" way. | |
833 Note that this way works for all characters but the other | |
834 way is faster. */ | |
835 if (target >= 0200) | |
836 { | |
837 while (st < lim && count > 0) | |
838 { | |
867 | 839 if (string_ichar (str, st) == target) |
428 | 840 count--; |
826 | 841 INC_BYTECOUNT (s, st); |
428 | 842 } |
843 } | |
844 else | |
845 #endif | |
846 { | |
847 while (st < lim && count > 0) | |
848 { | |
867 | 849 Ibyte *bufptr = (Ibyte *) memchr (itext_n_addr (s, st), |
428 | 850 (int) target, lim - st); |
851 if (bufptr) | |
852 { | |
853 count--; | |
826 | 854 st = (Bytebpos) (bufptr - s) + 1; |
428 | 855 } |
856 else | |
857 st = lim; | |
858 } | |
859 } | |
860 return st; | |
861 } | |
862 | |
863 /* Like find_next_newline, but returns position before the newline, | |
864 not after, and only search up to TO. This isn't just | |
865 find_next_newline (...)-1, because you might hit TO. */ | |
665 | 866 Charbpos |
826 | 867 find_before_next_newline (struct buffer *buf, Charbpos from, Charbpos to, |
868 int count) | |
428 | 869 { |
870 EMACS_INT shortage; | |
665 | 871 Charbpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1); |
428 | 872 |
873 if (shortage == 0) | |
874 pos--; | |
875 | |
876 return pos; | |
877 } | |
878 | |
872 | 879 /* This function synched with FSF 21.1 */ |
428 | 880 static Lisp_Object |
881 skip_chars (struct buffer *buf, int forwardp, int syntaxp, | |
882 Lisp_Object string, Lisp_Object lim) | |
883 { | |
867 | 884 REGISTER Ibyte *p, *pend; |
885 REGISTER Ichar c; | |
428 | 886 /* We store the first 256 chars in an array here and the rest in |
887 a range table. */ | |
888 unsigned char fastmap[0400]; | |
889 int negate = 0; | |
665 | 890 Charbpos limit; |
826 | 891 struct syntax_cache *scache; |
5653
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
892 Bitbyte class_bits = 0; |
826 | 893 |
428 | 894 if (NILP (lim)) |
895 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf); | |
896 else | |
897 { | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
898 CHECK_FIXNUM_COERCE_MARKER (lim); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
899 limit = XFIXNUM (lim); |
428 | 900 |
901 /* In any case, don't allow scan outside bounds of buffer. */ | |
902 if (limit > BUF_ZV (buf)) limit = BUF_ZV (buf); | |
903 if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf); | |
904 } | |
905 | |
906 CHECK_STRING (string); | |
907 p = XSTRING_DATA (string); | |
908 pend = p + XSTRING_LENGTH (string); | |
909 memset (fastmap, 0, sizeof (fastmap)); | |
910 | |
911 Fclear_range_table (Vskip_chars_range_table); | |
912 | |
913 if (p != pend && *p == '^') | |
914 { | |
915 negate = 1; | |
916 p++; | |
917 } | |
918 | |
919 /* Find the characters specified and set their elements of fastmap. | |
920 If syntaxp, each character counts as itself. | |
921 Otherwise, handle backslashes and ranges specially */ | |
922 | |
923 while (p != pend) | |
924 { | |
867 | 925 c = itext_ichar (p); |
926 INC_IBYTEPTR (p); | |
428 | 927 if (syntaxp) |
928 { | |
5542
dab422055bab
Correct array bound for syntax_code_spec.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5539
diff
changeset
|
929 if (c < 0200 && syntax_spec_code[c] < (unsigned char) Smax) |
428 | 930 fastmap[c] = 1; |
931 else | |
831 | 932 invalid_argument ("Invalid syntax designator", make_char (c)); |
428 | 933 } |
934 else | |
935 { | |
936 if (c == '\\') | |
937 { | |
938 if (p == pend) break; | |
867 | 939 c = itext_ichar (p); |
940 INC_IBYTEPTR (p); | |
428 | 941 } |
942 if (p != pend && *p == '-') | |
943 { | |
867 | 944 Ichar cend; |
428 | 945 |
872 | 946 /* Skip over the dash. */ |
428 | 947 p++; |
948 if (p == pend) break; | |
867 | 949 cend = itext_ichar (p); |
428 | 950 while (c <= cend && c < 0400) |
951 { | |
952 fastmap[c] = 1; | |
953 c++; | |
954 } | |
955 if (c <= cend) | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
956 Fput_range_table (make_fixnum (c), make_fixnum (cend), Qt, |
428 | 957 Vskip_chars_range_table); |
867 | 958 INC_IBYTEPTR (p); |
428 | 959 } |
5653
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
960 else if ('[' == c && p != pend && *p == ':') |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
961 { |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
962 Ibyte *colonp; |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
963 Extbyte *classname; |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
964 int ch = 0; |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
965 re_wctype_t cc; |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
966 |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
967 INC_IBYTEPTR (p); |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
968 |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
969 if (p == pend) |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
970 { |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
971 fastmap ['['] = fastmap[':'] = 1; |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
972 break; |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
973 } |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
974 |
5661
6e5a7278f9bf
Add cast to Ibyte *, search.c, thank you Jeff Sparkes.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5653
diff
changeset
|
975 colonp = (Ibyte *) memchr (p, ':', pend - p); |
5653
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
976 if (NULL == colonp || (colonp + 1) == pend || colonp[1] != ']') |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
977 { |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
978 fastmap ['['] = fastmap[':'] = 1; |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
979 continue; |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
980 } |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
981 |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
982 classname = alloca_extbytes (colonp - p + 1); |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
983 memmove (classname, p, colonp - p); |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
984 classname[colonp - p] = '\0'; |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
985 cc = re_wctype (classname); |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
986 |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
987 if (cc == RECC_ERROR) |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
988 { |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
989 invalid_argument ("Invalid character class", |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
990 build_extstring (classname, Qbinary)); |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
991 } |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
992 |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
993 for (ch = 0; ch < countof (fastmap); ++ch) |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
994 { |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
995 if (re_iswctype (ch, cc, buf)) |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
996 { |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
997 fastmap[ch] = 1; |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
998 } |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
999 } |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1000 |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1001 compile_char_class (cc, Vskip_chars_range_table, &class_bits); |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1002 |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1003 p = colonp + 2; |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1004 } |
428 | 1005 else |
1006 { | |
1007 if (c < 0400) | |
1008 fastmap[c] = 1; | |
1009 else | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1010 Fput_range_table (make_fixnum (c), make_fixnum (c), Qt, |
428 | 1011 Vskip_chars_range_table); |
1012 } | |
1013 } | |
1014 } | |
1015 | |
872 | 1016 /* #### Not in FSF 21.1 */ |
428 | 1017 if (syntaxp && fastmap['-'] != 0) |
1018 fastmap[' '] = 1; | |
1019 | |
1020 { | |
665 | 1021 Charbpos start_point = BUF_PT (buf); |
872 | 1022 Charbpos pos = start_point; |
1023 Charbpos pos_byte = BYTE_BUF_PT (buf); | |
428 | 1024 |
1025 if (syntaxp) | |
1026 { | |
872 | 1027 scache = setup_buffer_syntax_cache (buf, pos, forwardp ? 1 : -1); |
428 | 1028 /* All syntax designators are normal chars so nothing strange |
1029 to worry about */ | |
1030 if (forwardp) | |
1031 { | |
872 | 1032 if (pos < limit) |
1033 while (fastmap[(unsigned char) | |
1034 syntax_code_spec | |
1035 [(int) SYNTAX_FROM_CACHE | |
5653
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1036 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]] |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1037 != negate) |
872 | 1038 { |
1039 pos++; | |
1040 INC_BYTEBPOS (buf, pos_byte); | |
879 | 1041 if (pos >= limit) |
872 | 1042 break; |
1043 UPDATE_SYNTAX_CACHE_FORWARD (scache, pos); | |
1044 } | |
428 | 1045 } |
1046 else | |
1047 { | |
872 | 1048 while (pos > limit) |
460 | 1049 { |
872 | 1050 Charbpos savepos = pos_byte; |
1051 pos--; | |
1052 DEC_BYTEBPOS (buf, pos_byte); | |
1053 UPDATE_SYNTAX_CACHE_BACKWARD (scache, pos); | |
5653
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1054 if (fastmap[(unsigned char) |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1055 syntax_code_spec |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1056 [(int) SYNTAX_FROM_CACHE |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1057 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]] |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1058 == negate) |
872 | 1059 { |
1060 pos++; | |
1061 pos_byte = savepos; | |
1062 break; | |
1063 } | |
460 | 1064 } |
428 | 1065 } |
1066 } | |
1067 else | |
1068 { | |
5653
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1069 struct buffer *lispbuf = buf; |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1070 |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1071 #define CLASS_BIT_CHECK(c) \ |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1072 (class_bits && ((class_bits & BIT_ALPHA && ISALPHA (c)) \ |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1073 || (class_bits & BIT_SPACE && ISSPACE (c)) \ |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1074 || (class_bits & BIT_PUNCT && ISPUNCT (c)) \ |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1075 || (class_bits & BIT_WORD && ISWORD (c)) \ |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1076 || (NILP (buf->case_fold_search) ? \ |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1077 ((class_bits & BIT_UPPER && ISUPPER (c)) \ |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1078 || (class_bits & BIT_LOWER && ISLOWER (c))) \ |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1079 : (class_bits & (BIT_UPPER | BIT_LOWER) \ |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1080 && !NOCASEP (buf, c))))) |
428 | 1081 if (forwardp) |
1082 { | |
872 | 1083 while (pos < limit) |
428 | 1084 { |
872 | 1085 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
5653
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1086 |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1087 if ((ch < countof (fastmap) ? fastmap[ch] |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1088 : (CLASS_BIT_CHECK (ch) || |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1089 (EQ (Qt, Fget_range_table (make_fixnum (ch), |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1090 Vskip_chars_range_table, |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1091 Qnil))))) |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1092 != negate) |
872 | 1093 { |
1094 pos++; | |
1095 INC_BYTEBPOS (buf, pos_byte); | |
1096 } | |
428 | 1097 else |
1098 break; | |
1099 } | |
1100 } | |
1101 else | |
1102 { | |
872 | 1103 while (pos > limit) |
428 | 1104 { |
872 | 1105 Charbpos prev_pos_byte = pos_byte; |
1106 Ichar ch; | |
1107 | |
1108 DEC_BYTEBPOS (buf, prev_pos_byte); | |
1109 ch = BYTE_BUF_FETCH_CHAR (buf, prev_pos_byte); | |
5653
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1110 if ((ch < countof (fastmap) ? fastmap[ch] |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1111 : (CLASS_BIT_CHECK (ch) || |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1112 (EQ (Qt, Fget_range_table (make_fixnum (ch), |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1113 Vskip_chars_range_table, |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1114 Qnil))))) |
3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
Aidan Kehoe <kehoea@parhasard.net>
parents:
5649
diff
changeset
|
1115 != negate) |
872 | 1116 { |
1117 pos--; | |
1118 pos_byte = prev_pos_byte; | |
1119 } | |
428 | 1120 else |
1121 break; | |
1122 } | |
1123 } | |
1124 } | |
1125 QUIT; | |
872 | 1126 BOTH_BUF_SET_PT (buf, pos, pos_byte); |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1127 return make_fixnum (BUF_PT (buf) - start_point); |
428 | 1128 } |
1129 } | |
1130 | |
1131 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /* | |
444 | 1132 Move point forward, stopping before a char not in STRING, or at pos LIMIT. |
428 | 1133 STRING is like the inside of a `[...]' in a regular expression |
1134 except that `]' is never special and `\\' quotes `^', `-' or `\\'. | |
1135 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter. | |
1136 With arg "^a-zA-Z", skips nonletters stopping before first letter. | |
1137 Returns the distance traveled, either zero or positive. | |
1138 | |
1139 Optional argument BUFFER defaults to the current buffer. | |
1140 */ | |
444 | 1141 (string, limit, buffer)) |
428 | 1142 { |
444 | 1143 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit); |
428 | 1144 } |
1145 | |
1146 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /* | |
444 | 1147 Move point backward, stopping after a char not in STRING, or at pos LIMIT. |
428 | 1148 See `skip-chars-forward' for details. |
1149 Returns the distance traveled, either zero or negative. | |
1150 | |
1151 Optional argument BUFFER defaults to the current buffer. | |
1152 */ | |
444 | 1153 (string, limit, buffer)) |
428 | 1154 { |
444 | 1155 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit); |
428 | 1156 } |
1157 | |
1158 | |
1159 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /* | |
1160 Move point forward across chars in specified syntax classes. | |
1161 SYNTAX is a string of syntax code characters. | |
444 | 1162 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT. |
428 | 1163 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX. |
1164 This function returns the distance traveled, either zero or positive. | |
1165 | |
1166 Optional argument BUFFER defaults to the current buffer. | |
1167 */ | |
444 | 1168 (syntax, limit, buffer)) |
428 | 1169 { |
444 | 1170 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit); |
428 | 1171 } |
1172 | |
1173 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /* | |
1174 Move point backward across chars in specified syntax classes. | |
1175 SYNTAX is a string of syntax code characters. | |
444 | 1176 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT. |
428 | 1177 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX. |
1178 This function returns the distance traveled, either zero or negative. | |
1179 | |
1180 Optional argument BUFFER defaults to the current buffer. | |
1181 */ | |
444 | 1182 (syntax, limit, buffer)) |
428 | 1183 { |
444 | 1184 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit); |
428 | 1185 } |
1186 | |
1187 | |
1188 /* Subroutines of Lisp buffer search functions. */ | |
1189 | |
1190 static Lisp_Object | |
444 | 1191 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror, |
428 | 1192 Lisp_Object count, Lisp_Object buffer, int direction, |
1193 int RE, int posix) | |
1194 { | |
665 | 1195 REGISTER Charbpos np; |
1196 Charbpos lim; | |
428 | 1197 EMACS_INT n = direction; |
1198 struct buffer *buf; | |
1199 | |
1200 if (!NILP (count)) | |
1201 { | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1202 CHECK_FIXNUM (count); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1203 n *= XFIXNUM (count); |
428 | 1204 } |
1205 | |
1206 buf = decode_buffer (buffer, 0); | |
1207 CHECK_STRING (string); | |
444 | 1208 if (NILP (limit)) |
428 | 1209 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf); |
1210 else | |
1211 { | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1212 CHECK_FIXNUM_COERCE_MARKER (limit); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1213 lim = XFIXNUM (limit); |
428 | 1214 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf)) |
563 | 1215 invalid_argument ("Invalid search limit (wrong side of point)", |
1216 Qunbound); | |
428 | 1217 if (lim > BUF_ZV (buf)) |
1218 lim = BUF_ZV (buf); | |
1219 if (lim < BUF_BEGV (buf)) | |
1220 lim = BUF_BEGV (buf); | |
1221 } | |
1222 | |
1223 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE, | |
1224 (!NILP (buf->case_fold_search) | |
446 | 1225 ? XCASE_TABLE_CANON (buf->case_table) |
1226 : Qnil), | |
428 | 1227 (!NILP (buf->case_fold_search) |
446 | 1228 ? XCASE_TABLE_EQV (buf->case_table) |
1229 : Qnil), posix); | |
428 | 1230 |
1231 if (np <= 0) | |
1232 { | |
444 | 1233 if (NILP (noerror)) |
2268 | 1234 { |
1235 signal_failure (string); | |
1236 RETURN_NOT_REACHED (Qnil); | |
1237 } | |
444 | 1238 if (!EQ (noerror, Qt)) |
428 | 1239 { |
1240 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf)) | |
2500 | 1241 ABORT (); |
428 | 1242 BUF_SET_PT (buf, lim); |
1243 return Qnil; | |
1244 #if 0 /* This would be clean, but maybe programs depend on | |
1245 a value of nil here. */ | |
1246 np = lim; | |
1247 #endif | |
1248 } | |
1249 else | |
1250 return Qnil; | |
1251 } | |
1252 | |
1253 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf)) | |
2500 | 1254 ABORT (); |
428 | 1255 |
1256 BUF_SET_PT (buf, np); | |
1257 | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1258 return make_fixnum (np); |
428 | 1259 } |
1260 | |
1261 static int | |
1262 trivial_regexp_p (Lisp_Object regexp) | |
1263 { | |
1264 Bytecount len = XSTRING_LENGTH (regexp); | |
867 | 1265 Ibyte *s = XSTRING_DATA (regexp); |
428 | 1266 while (--len >= 0) |
1267 { | |
1268 switch (*s++) | |
1269 { | |
1724 | 1270 /* #### howcum ']' doesn't appear here, but ... */ |
428 | 1271 case '.': case '*': case '+': case '?': case '[': case '^': case '$': |
1272 return 0; | |
1273 case '\\': | |
1274 if (--len < 0) | |
1275 return 0; | |
1276 switch (*s++) | |
1277 { | |
1724 | 1278 /* ... ')' does appear here? ('<' and '>' can appear singly.) */ |
1279 /* #### are there other constructs to check? */ | |
428 | 1280 case '|': case '(': case ')': case '`': case '\'': case 'b': |
1281 case 'B': case '<': case '>': case 'w': case 'W': case 's': | |
1724 | 1282 case 'S': case '=': case '{': case '}': |
428 | 1283 #ifdef MULE |
1284 /* 97/2/25 jhod Added for category matches */ | |
1285 case 'c': case 'C': | |
1286 #endif /* MULE */ | |
1287 case '1': case '2': case '3': case '4': case '5': | |
1288 case '6': case '7': case '8': case '9': | |
1289 return 0; | |
1290 } | |
1291 } | |
1292 } | |
1293 return 1; | |
1294 } | |
1295 | |
1296 /* Search for the n'th occurrence of STRING in BUF, | |
665 | 1297 starting at position CHARBPOS and stopping at position BUFLIM, |
428 | 1298 treating PAT as a literal string if RE is false or as |
1299 a regular expression if RE is true. | |
1300 | |
1301 If N is positive, searching is forward and BUFLIM must be greater | |
665 | 1302 than CHARBPOS. |
428 | 1303 If N is negative, searching is backward and BUFLIM must be less |
665 | 1304 than CHARBPOS. |
428 | 1305 |
1306 Returns -x if only N-x occurrences found (x > 0), | |
1307 or else the position at the beginning of the Nth occurrence | |
1308 (if searching backward) or the end (if searching forward). | |
1309 | |
1310 POSIX is nonzero if we want full backtracking (POSIX style) | |
1311 for this pattern. 0 means backtrack only enough to get a valid match. */ | |
665 | 1312 static Charbpos |
1313 search_buffer (struct buffer *buf, Lisp_Object string, Charbpos charbpos, | |
1314 Charbpos buflim, EMACS_INT n, int RE, Lisp_Object trt, | |
446 | 1315 Lisp_Object inverse_trt, int posix) |
428 | 1316 { |
1317 Bytecount len = XSTRING_LENGTH (string); | |
867 | 1318 Ibyte *base_pat = XSTRING_DATA (string); |
428 | 1319 REGISTER EMACS_INT i, j; |
665 | 1320 Bytebpos p1, p2; |
428 | 1321 Bytecount s1, s2; |
665 | 1322 Bytebpos pos, lim; |
428 | 1323 |
853 | 1324 /* Some FSF junk with running_asynch_code, to preserve the match |
1325 data. Not necessary because we don't call process filters | |
1326 asynchronously (i.e. from within QUIT). */ | |
428 | 1327 |
1425 | 1328 /* Searching 0 times means noop---don't move, don't touch registers. */ |
1329 if (n == 0) | |
1330 return charbpos; | |
1331 | |
428 | 1332 /* Null string is found at starting position. */ |
1333 if (len == 0) | |
1334 { | |
665 | 1335 set_search_regs (buf, charbpos, 0); |
1336 return charbpos; | |
428 | 1337 } |
1338 | |
665 | 1339 pos = charbpos_to_bytebpos (buf, charbpos); |
1340 lim = charbpos_to_bytebpos (buf, buflim); | |
428 | 1341 if (RE && !trivial_regexp_p (string)) |
1342 { | |
1343 struct re_pattern_buffer *bufp; | |
826 | 1344 |
1345 bufp = compile_pattern (string, &search_regs, trt, | |
1346 wrap_buffer (buf), buf, posix, ERROR_ME); | |
428 | 1347 |
1348 /* Get pointers and sizes of the two strings | |
1349 that make up the visible portion of the buffer. */ | |
1350 | |
826 | 1351 p1 = BYTE_BUF_BEGV (buf); |
1352 p2 = BYTE_BUF_CEILING_OF (buf, p1); | |
428 | 1353 s1 = p2 - p1; |
826 | 1354 s2 = BYTE_BUF_ZV (buf) - p2; |
1355 | |
1356 while (n != 0) | |
428 | 1357 { |
1358 Bytecount val; | |
826 | 1359 struct syntax_cache scache_struct; |
1360 struct syntax_cache *scache = &scache_struct; | |
1361 | |
428 | 1362 QUIT; |
826 | 1363 /* By making the regex object, regex buffer, and syntax cache |
1364 arguments to re_{search,match}{,_2}, we've removed the need to | |
1365 do nasty things to deal with regex reentrancy. (See stack | |
1366 trace in signal.c for proof that this can happen.) | |
1367 | |
1368 #### there is still a potential problem with the regex cache -- | |
1369 the compiled regex could be overwritten. we'd need 20-fold | |
1370 reentrancy, though. Fix this. */ | |
1371 | |
428 | 1372 val = re_search_2 (bufp, |
826 | 1373 (char *) BYTE_BUF_BYTE_ADDRESS (buf, p1), s1, |
1374 (char *) BYTE_BUF_BYTE_ADDRESS (buf, p2), s2, | |
1375 pos - BYTE_BUF_BEGV (buf), lim - pos, &search_regs, | |
1376 n > 0 ? lim - BYTE_BUF_BEGV (buf) : | |
1377 pos - BYTE_BUF_BEGV (buf), wrap_buffer (buf), | |
1378 buf, scache); | |
428 | 1379 |
1380 if (val == -2) | |
1381 { | |
1382 matcher_overflow (); | |
1383 } | |
1384 if (val >= 0) | |
1385 { | |
1386 int num_regs = search_regs.num_regs; | |
826 | 1387 j = BYTE_BUF_BEGV (buf); |
428 | 1388 for (i = 0; i < num_regs; i++) |
1389 if (search_regs.start[i] >= 0) | |
1390 { | |
1391 search_regs.start[i] += j; | |
1392 search_regs.end[i] += j; | |
1393 } | |
793 | 1394 last_thing_searched = wrap_buffer (buf); |
428 | 1395 /* Set pos to the new position. */ |
826 | 1396 pos = n > 0 ? search_regs.end[0] : search_regs.start[0]; |
428 | 1397 fixup_search_regs_for_buffer (buf); |
665 | 1398 /* And charbpos too. */ |
826 | 1399 charbpos = n > 0 ? search_regs.end[0] : search_regs.start[0]; |
428 | 1400 } |
1401 else | |
826 | 1402 return (n > 0 ? 0 - n : n); |
1403 if (n > 0) n--; else n++; | |
428 | 1404 } |
665 | 1405 return charbpos; |
428 | 1406 } |
1407 else /* non-RE case */ | |
1408 { | |
446 | 1409 int charset_base = -1; |
1410 int boyer_moore_ok = 1; | |
2367 | 1411 Ibyte *patbuf = alloca_ibytes (len * MAX_ICHAR_LEN); |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1412 Ibyte *pat = patbuf; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1413 |
446 | 1414 #ifdef MULE |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1415 int entirely_one_byte_p = buf->text->entirely_one_byte_p; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1416 int nothing_greater_than_0xff = |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1417 buf->text->num_8_bit_fixed_chars == BUF_Z(buf) - BUF_BEG (buf); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1418 |
446 | 1419 while (len > 0) |
1420 { | |
867 | 1421 Ibyte tmp_str[MAX_ICHAR_LEN]; |
1422 Ichar c, translated, inverse; | |
446 | 1423 Bytecount orig_bytelen, new_bytelen, inv_bytelen; |
1424 | |
1425 /* If we got here and the RE flag is set, it's because | |
1426 we're dealing with a regexp known to be trivial, so the | |
1427 backslash just quotes the next character. */ | |
1428 if (RE && *base_pat == '\\') | |
1429 { | |
1430 len--; | |
1431 base_pat++; | |
1432 } | |
867 | 1433 c = itext_ichar (base_pat); |
446 | 1434 translated = TRANSLATE (trt, c); |
1435 inverse = TRANSLATE (inverse_trt, c); | |
1436 | |
867 | 1437 orig_bytelen = itext_ichar_len (base_pat); |
1438 inv_bytelen = set_itext_ichar (tmp_str, inverse); | |
1439 new_bytelen = set_itext_ichar (tmp_str, translated); | |
446 | 1440 |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1441 if (boyer_moore_ok |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1442 /* Only do the Boyer-Moore check for characters needing |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1443 translation. */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1444 && (translated != c || inverse != c)) |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1445 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1446 Ichar starting_c = c; |
4421
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1447 int charset_base_code, checked = 0; |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1448 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1449 do |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1450 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1451 c = TRANSLATE (inverse_trt, c); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1452 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1453 /* If a character cannot occur in the buffer, ignore |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1454 it. */ |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1455 if (c > 0x7F && entirely_one_byte_p) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1456 continue; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1457 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1458 if (c > 0xFF && nothing_greater_than_0xff) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1459 continue; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1460 |
4421
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1461 checked = 1; |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1462 |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1463 if (-1 == charset_base) /* No charset yet specified. */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1464 { |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1465 /* Keep track of which charset and character set row |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1466 contains the characters that need translation. |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1467 |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1468 Zero out the bits corresponding to the last |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1469 byte. */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1470 charset_base = c & ~ICHAR_FIELD3_MASK; |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1471 } |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1472 else |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1473 { |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1474 charset_base_code = c & ~ICHAR_FIELD3_MASK; |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1475 |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1476 if (charset_base_code != charset_base) |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1477 { |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1478 /* If two different rows, or two different |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1479 charsets, appear, needing non-ASCII |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1480 translation, then we cannot use boyer_moore |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1481 search. See the comment at the head of |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1482 boyer_moore(). */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1483 boyer_moore_ok = 0; |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1484 break; |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1485 } |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1486 } |
4901
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1487 |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1488 if (ichar_len (c) > 2) |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1489 { |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1490 /* Case-equivalence plus repeated octets throws off |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1491 the construction of the stride table; avoid this. |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1492 |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1493 It should be possible to correct boyer_moore to |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1494 behave correctly even in this case--it doesn't have |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1495 problems with repeated octets when case conversion |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1496 is not involved--but this is not a critical |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1497 issue. */ |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1498 Ibyte encoded[MAX_ICHAR_LEN]; |
5016
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1499 Bytecount clen = set_itext_ichar (encoded, c); |
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1500 int a, b; |
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1501 for (a = 0; a < clen && boyer_moore_ok; ++a) |
4901
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1502 { |
5016
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1503 for (b = a + 1; b < clen && boyer_moore_ok; ++b) |
4901
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1504 { |
5016
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1505 if (encoded[a] == encoded[b]) |
4901
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1506 { |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1507 boyer_moore_ok = 0; |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1508 } |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1509 } |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1510 } |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1511 |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1512 if (0 == boyer_moore_ok) |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1513 { |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1514 break; |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1515 } |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1516 } |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1517 |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1518 } while (c != starting_c); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1519 |
4421
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1520 if (!checked) |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1521 { |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1522 #ifdef DEBUG_XEMACS |
5041 | 1523 if (debug_searches) |
4421
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1524 { |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1525 Lisp_Symbol *sym = XSYMBOL (Qsearch_algorithm_used); |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1526 sym->value = Qnil; |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1527 } |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1528 #endif |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1529 /* The "continue" clauses were used above, for every |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1530 translation of the character. As such, this character |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1531 is not to be found in the buffer and neither is the |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1532 string as a whole. Return immediately; also avoid |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1533 triggering the assertion a few lines down. */ |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1534 return n > 0 ? -n : n; |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1535 } |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1536 |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1537 if (boyer_moore_ok && charset_base != -1 && |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1538 charset_base != (translated & ~ICHAR_FIELD3_MASK)) |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1539 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1540 /* In the rare event that the CANON entry for this |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1541 character is not in the desired set, choose one |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1542 that is, from the equivalence set. It doesn't much |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1543 matter which. */ |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1544 Ichar starting_ch = translated; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1545 do |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1546 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1547 translated = TRANSLATE (inverse_trt, translated); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1548 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1549 if (charset_base == (translated & ~ICHAR_FIELD3_MASK)) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1550 break; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1551 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1552 } while (starting_ch != translated); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1553 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1554 assert (starting_ch != translated); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1555 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1556 new_bytelen = set_itext_ichar (tmp_str, translated); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1557 } |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1558 } |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1559 |
446 | 1560 memcpy (pat, tmp_str, new_bytelen); |
1561 pat += new_bytelen; | |
1562 base_pat += orig_bytelen; | |
1563 len -= orig_bytelen; | |
1564 } | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1565 |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1566 if (-1 == charset_base) |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1567 { |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1568 charset_base = 'a' & ~ICHAR_FIELD3_MASK; /* Default to ASCII. */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1569 } |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1570 |
446 | 1571 #else /* not MULE */ |
1572 while (--len >= 0) | |
1573 { | |
1574 /* If we got here and the RE flag is set, it's because | |
1575 we're dealing with a regexp known to be trivial, so the | |
1576 backslash just quotes the next character. */ | |
1577 if (RE && *base_pat == '\\') | |
1578 { | |
1579 len--; | |
1580 base_pat++; | |
1581 } | |
1582 *pat++ = TRANSLATE (trt, *base_pat++); | |
1583 } | |
1584 #endif /* MULE */ | |
1585 len = pat - patbuf; | |
1586 pat = base_pat = patbuf; | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1587 |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1588 #ifdef DEBUG_XEMACS |
5041 | 1589 if (debug_searches) |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1590 { |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1591 Lisp_Symbol *sym = XSYMBOL (Qsearch_algorithm_used); |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1592 sym->value = boyer_moore_ok ? Qboyer_moore : Qsimple_search; |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1593 } |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1594 #endif |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1595 |
446 | 1596 if (boyer_moore_ok) |
1597 return boyer_moore (buf, base_pat, len, pos, lim, n, | |
1598 trt, inverse_trt, charset_base); | |
1599 else | |
1600 return simple_search (buf, base_pat, len, pos, lim, n, trt); | |
1601 } | |
1602 } | |
1603 | |
826 | 1604 /* Do a simple string search N times for the string PAT, whose length is |
1605 LEN/LEN_BYTE, from buffer position POS until LIM. TRT is the | |
1606 translation table. | |
446 | 1607 |
1608 Return the character position where the match is found. | |
1609 Otherwise, if M matches remained to be found, return -M. | |
1610 | |
1611 This kind of search works regardless of what is in PAT and | |
1612 regardless of what is in TRT. It is used in cases where | |
1613 boyer_moore cannot work. */ | |
1614 | |
665 | 1615 static Charbpos |
867 | 1616 simple_search (struct buffer *buf, Ibyte *base_pat, Bytecount len, |
826 | 1617 Bytebpos pos, Bytebpos lim, EMACS_INT n, Lisp_Object trt) |
446 | 1618 { |
1619 int forward = n > 0; | |
1620 Bytecount buf_len = 0; /* Shut up compiler. */ | |
1621 | |
826 | 1622 if (lim > pos) |
446 | 1623 while (n > 0) |
428 | 1624 { |
446 | 1625 while (1) |
428 | 1626 { |
826 | 1627 Bytecount this_len = len; |
1628 Bytebpos this_pos = pos; | |
867 | 1629 Ibyte *p = base_pat; |
826 | 1630 if (pos >= lim) |
446 | 1631 goto stop; |
1632 | |
1633 while (this_len > 0) | |
1634 { | |
867 | 1635 Ichar pat_ch, buf_ch; |
446 | 1636 Bytecount pat_len; |
1637 | |
867 | 1638 pat_ch = itext_ichar (p); |
826 | 1639 buf_ch = BYTE_BUF_FETCH_CHAR (buf, this_pos); |
446 | 1640 |
1641 buf_ch = TRANSLATE (trt, buf_ch); | |
1642 | |
1643 if (buf_ch != pat_ch) | |
1644 break; | |
1645 | |
867 | 1646 pat_len = itext_ichar_len (p); |
446 | 1647 p += pat_len; |
1648 this_len -= pat_len; | |
826 | 1649 INC_BYTEBPOS (buf, this_pos); |
446 | 1650 } |
1651 if (this_len == 0) | |
428 | 1652 { |
826 | 1653 buf_len = this_pos - pos; |
1654 pos = this_pos; | |
446 | 1655 break; |
428 | 1656 } |
826 | 1657 INC_BYTEBPOS (buf, pos); |
428 | 1658 } |
446 | 1659 n--; |
1660 } | |
1661 else | |
4322
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1662 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1663 /* If lim < len, then there are too few buffer positions to hold the |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1664 pattern between the beginning of the buffer and lim. Adjust to |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1665 ensure pattern fits. If we don't do this, we can assert in the |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1666 DEC_BYTEBPOS below. */ |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1667 if (lim < len) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1668 lim = len; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1669 while (n < 0) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1670 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1671 while (1) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1672 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1673 Bytecount this_len = len; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1674 Bytebpos this_pos = pos; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1675 Ibyte *p; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1676 if (pos <= lim) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1677 goto stop; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1678 p = base_pat + len; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1679 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1680 while (this_len > 0) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1681 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1682 Ichar pat_ch, buf_ch; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1683 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1684 DEC_IBYTEPTR (p); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1685 DEC_BYTEBPOS (buf, this_pos); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1686 pat_ch = itext_ichar (p); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1687 buf_ch = BYTE_BUF_FETCH_CHAR (buf, this_pos); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1688 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1689 buf_ch = TRANSLATE (trt, buf_ch); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1690 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1691 if (buf_ch != pat_ch) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1692 break; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1693 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1694 this_len -= itext_ichar_len (p); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1695 } |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1696 if (this_len == 0) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1697 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1698 buf_len = pos - this_pos; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1699 pos = this_pos; |
446 | 1700 break; |
4322
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1701 } |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1702 DEC_BYTEBPOS (buf, pos); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1703 } |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1704 n++; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1705 } |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1706 } |
446 | 1707 stop: |
1708 if (n == 0) | |
1709 { | |
665 | 1710 Charbpos beg, end, retval; |
446 | 1711 if (forward) |
1712 { | |
826 | 1713 beg = bytebpos_to_charbpos (buf, pos - buf_len); |
1714 retval = end = bytebpos_to_charbpos (buf, pos); | |
446 | 1715 } |
1716 else | |
428 | 1717 { |
826 | 1718 retval = beg = bytebpos_to_charbpos (buf, pos); |
1719 end = bytebpos_to_charbpos (buf, pos + buf_len); | |
428 | 1720 } |
446 | 1721 set_search_regs (buf, beg, end - beg); |
1722 | |
1723 return retval; | |
1724 } | |
1725 else if (n > 0) | |
1726 return -n; | |
1727 else | |
1728 return n; | |
1729 } | |
1730 | |
1731 /* Do Boyer-Moore search N times for the string PAT, | |
1732 whose length is LEN/LEN_BYTE, | |
1733 from buffer position POS/POS_BYTE until LIM/LIM_BYTE. | |
1734 DIRECTION says which direction we search in. | |
1735 TRT and INVERSE_TRT are translation tables. | |
1736 | |
1737 This kind of search works if all the characters in PAT that have | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1738 (non-ASCII) translation are the same aside from the last byte. This |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1739 makes it possible to translate just the last byte of a character, and do |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1740 so after just a simple test of the context. |
446 | 1741 |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1742 If that criterion is not satisfied, do not call this function. You will |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1743 get an assertion failure. */ |
446 | 1744 |
665 | 1745 static Charbpos |
867 | 1746 boyer_moore (struct buffer *buf, Ibyte *base_pat, Bytecount len, |
665 | 1747 Bytebpos pos, Bytebpos lim, EMACS_INT n, Lisp_Object trt, |
2333 | 1748 Lisp_Object inverse_trt, int USED_IF_MULE (charset_base)) |
446 | 1749 { |
1750 /* #### Someone really really really needs to comment the workings | |
1751 of this junk somewhat better. | |
1752 | |
1753 BTW "BM" stands for Boyer-Moore, which is one of the standard | |
1754 string-searching algorithms. It's the best string-searching | |
1755 algorithm out there, provided that: | |
1756 | |
1757 a) You're not fazed by algorithm complexity. (Rabin-Karp, which | |
1758 uses hashing, is much much easier to code but not as fast.) | |
1759 b) You can freely move backwards in the string that you're | |
1760 searching through. | |
1761 | |
1762 As the comment below tries to explain (but garbles in typical | |
1763 programmer-ese), the idea is that you don't have to do a | |
1764 string match at every successive position in the text. For | |
1765 example, let's say the pattern is "a very long string". We | |
1766 compare the last character in the string (`g') with the | |
1767 corresponding character in the text. If it mismatches, and | |
1768 it is, say, `z', then we can skip forward by the entire | |
1769 length of the pattern because `z' does not occur anywhere | |
1770 in the pattern. If the mismatching character does occur | |
1771 in the pattern, we can usually still skip forward by more | |
1772 than one: e.g. if it is `l', then we can skip forward | |
1773 by the length of the substring "ong string" -- i.e. the | |
1774 largest end section of the pattern that does not contain | |
1775 the mismatched character. So what we do is compute, for | |
1776 each possible character, the distance we can skip forward | |
1777 (the "stride") and use it in the string matching. This | |
1778 is what the BM_tab holds. */ | |
1779 REGISTER EMACS_INT *BM_tab; | |
1780 EMACS_INT *BM_tab_base; | |
1781 REGISTER Bytecount dirlen; | |
1782 EMACS_INT infinity; | |
665 | 1783 Bytebpos limit; |
446 | 1784 Bytecount stride_for_teases = 0; |
1785 REGISTER EMACS_INT i, j; | |
867 | 1786 Ibyte *pat, *pat_end; |
1787 REGISTER Ibyte *cursor, *p_limit, *ptr2; | |
1788 Ibyte simple_translate[0400]; | |
446 | 1789 REGISTER int direction = ((n > 0) ? 1 : -1); |
1790 #ifdef MULE | |
867 | 1791 Ibyte translate_prev_byte = 0; |
1792 Ibyte translate_anteprev_byte = 0; | |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1793 /* These need to be rethought in the event that the internal format |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1794 changes, or in the event that num_8_bit_fixed_chars disappears |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1795 (entirely_one_byte_p can be trivially worked out by checking is the |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1796 byte count equal to the char count.) */ |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1797 int buffer_entirely_one_byte_p = buf->text->entirely_one_byte_p; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1798 int buffer_nothing_greater_than_0xff = |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1799 buf->text->num_8_bit_fixed_chars == BUF_Z(buf) - BUF_BEG (buf); |
446 | 1800 #endif |
1801 #ifdef C_ALLOCA | |
1802 EMACS_INT BM_tab_space[0400]; | |
1803 BM_tab = &BM_tab_space[0]; | |
1804 #else | |
1805 BM_tab = alloca_array (EMACS_INT, 256); | |
1806 #endif | |
1807 | |
1808 /* The general approach is that we are going to maintain that we | |
1809 know the first (closest to the present position, in whatever | |
1810 direction we're searching) character that could possibly be | |
1811 the last (furthest from present position) character of a | |
1812 valid match. We advance the state of our knowledge by | |
1813 looking at that character and seeing whether it indeed | |
1814 matches the last character of the pattern. If it does, we | |
1815 take a closer look. If it does not, we move our pointer (to | |
1816 putative last characters) as far as is logically possible. | |
1817 This amount of movement, which I call a stride, will be the | |
1818 length of the pattern if the actual character appears nowhere | |
1819 in the pattern, otherwise it will be the distance from the | |
1820 last occurrence of that character to the end of the pattern. | |
1821 As a coding trick, an enormous stride is coded into the table | |
1822 for characters that match the last character. This allows | |
1823 use of only a single test, a test for having gone past the | |
1824 end of the permissible match region, to test for both | |
1825 possible matches (when the stride goes past the end | |
1826 immediately) and failure to match (where you get nudged past | |
1827 the end one stride at a time). | |
1828 | |
1829 Here we make a "mickey mouse" BM table. The stride of the | |
1830 search is determined only by the last character of the | |
1831 putative match. If that character does not match, we will | |
1832 stride the proper distance to propose a match that | |
1833 superimposes it on the last instance of a character that | |
1834 matches it (per trt), or misses it entirely if there is | |
1835 none. */ | |
1836 | |
1837 dirlen = len * direction; | |
1838 infinity = dirlen - (lim + pos + len + len) * direction; | |
1839 /* Record position after the end of the pattern. */ | |
1840 pat_end = base_pat + len; | |
1841 if (direction < 0) | |
1842 base_pat = pat_end - 1; | |
1843 BM_tab_base = BM_tab; | |
1844 BM_tab += 0400; | |
1845 j = dirlen; /* to get it in a register */ | |
1846 /* A character that does not appear in the pattern induces a | |
1847 stride equal to the pattern length. */ | |
1848 while (BM_tab_base != BM_tab) | |
1849 { | |
1850 *--BM_tab = j; | |
1851 *--BM_tab = j; | |
1852 *--BM_tab = j; | |
1853 *--BM_tab = j; | |
1854 } | |
1855 /* We use this for translation, instead of TRT itself. We | |
1856 fill this in to handle the characters that actually occur | |
1857 in the pattern. Others don't matter anyway! */ | |
1858 xzero (simple_translate); | |
1859 for (i = 0; i < 0400; i++) | |
867 | 1860 simple_translate[i] = (Ibyte) i; |
446 | 1861 i = 0; |
1425 | 1862 |
446 | 1863 while (i != infinity) |
1864 { | |
867 | 1865 Ibyte *ptr = base_pat + i; |
446 | 1866 i += direction; |
1867 if (i == dirlen) | |
1868 i = infinity; | |
1869 if (!NILP (trt)) | |
428 | 1870 { |
446 | 1871 #ifdef MULE |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1872 Ichar ch = -1, untranslated; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1873 Ibyte byte; |
446 | 1874 int this_translated = 1; |
1875 | |
1876 /* Is *PTR the last byte of a character? */ | |
867 | 1877 if (pat_end - ptr == 1 || ibyte_first_byte_p (ptr[1])) |
428 | 1878 { |
867 | 1879 Ibyte *charstart = ptr; |
1880 while (!ibyte_first_byte_p (*charstart)) | |
446 | 1881 charstart--; |
867 | 1882 untranslated = itext_ichar (charstart); |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1883 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1884 ch = TRANSLATE (trt, untranslated); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1885 if (!ibyte_first_byte_p (*ptr)) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1886 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1887 translate_prev_byte = ptr[-1]; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1888 if (!ibyte_first_byte_p (translate_prev_byte)) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1889 translate_anteprev_byte = ptr[-2]; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1890 } |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1891 |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1892 if (ch != untranslated && /* Was translation done? */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1893 charset_base != (ch & ~ICHAR_FIELD3_MASK)) |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1894 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1895 /* In the very rare event that the CANON entry for this |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1896 character is not in the desired set, choose one that |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1897 is, from the equivalence set. It doesn't much matter |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1898 which, since we're building our own cheesy equivalence |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1899 table instead of using that belonging to the case |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1900 table directly. |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1901 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1902 We can get here if search_buffer has worked out that |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1903 the buffer is entirely single width. */ |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1904 Ichar starting_ch = ch; |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1905 int count = 0; |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1906 do |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1907 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1908 ch = TRANSLATE (inverse_trt, ch); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1909 if (charset_base == (ch & ~ICHAR_FIELD3_MASK)) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1910 break; |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1911 ++count; |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1912 } while (starting_ch != ch); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1913 |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1914 /* If starting_ch is equal to ch (and count is not one, |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1915 which means no translation is necessary), the case |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1916 table is corrupt. (Any mapping in the canon table |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1917 should be reflected in the equivalence table, and we |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1918 know from the canon table that untranslated maps to |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1919 starting_ch and that untranslated has the correct value |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1920 for charset_base.) */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1921 assert (1 == count || starting_ch != ch); |
446 | 1922 } |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1923 { |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1924 Ibyte tmp[MAX_ICHAR_LEN]; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1925 Bytecount chlen; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1926 |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1927 chlen = set_itext_ichar (tmp, ch); |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1928 byte = tmp[chlen - 1]; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1929 } |
428 | 1930 } |
1931 else | |
1932 { | |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1933 byte = *ptr; |
446 | 1934 this_translated = 0; |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1935 ch = -1; |
446 | 1936 } |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1937 |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1938 /* BYTE = last byte of character CH when represented as text */ |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1939 j = byte; |
446 | 1940 |
1941 if (i == infinity) | |
1942 stride_for_teases = BM_tab[j]; | |
1943 BM_tab[j] = dirlen - i; | |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1944 /* A translation table is accompanied by its inverse -- see |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1945 comment in casetab.c. */ |
446 | 1946 if (this_translated) |
1947 { | |
867 | 1948 Ichar starting_ch = ch; |
446 | 1949 EMACS_INT starting_j = j; |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1950 |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1951 text_checking_assert (valid_ichar_p (ch)); |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1952 do |
446 | 1953 { |
1954 ch = TRANSLATE (inverse_trt, ch); | |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1955 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1956 if (ch > 0x7F && buffer_entirely_one_byte_p) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1957 continue; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1958 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1959 if (ch > 0xFF && buffer_nothing_greater_than_0xff) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1960 continue; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1961 |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1962 |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1963 /* Retrieve last byte of character CH when represented as |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1964 text */ |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1965 { |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1966 Ibyte tmp[MAX_ICHAR_LEN]; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1967 Bytecount chlen; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1968 |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1969 chlen = set_itext_ichar (tmp, ch); |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1970 j = tmp[chlen - 1]; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1971 } |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1972 |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1973 /* For all the characters that map into CH, set up |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1974 simple_translate to map the last byte into |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1975 STARTING_J. */ |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1976 simple_translate[j] = (Ibyte) starting_j; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1977 BM_tab[j] = dirlen - i; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1978 |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1979 } |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1980 while (ch != starting_ch); |
446 | 1981 } |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1982 #else /* not MULE */ |
446 | 1983 EMACS_INT k; |
1984 j = *ptr; | |
1985 k = (j = TRANSLATE (trt, j)); | |
1986 if (i == infinity) | |
1987 stride_for_teases = BM_tab[j]; | |
1988 BM_tab[j] = dirlen - i; | |
1989 /* A translation table is accompanied by its inverse -- | |
826 | 1990 see comment in casetab.c. */ |
446 | 1991 while ((j = TRANSLATE (inverse_trt, j)) != k) |
1992 { | |
867 | 1993 simple_translate[j] = (Ibyte) k; |
428 | 1994 BM_tab[j] = dirlen - i; |
1995 } | |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1996 #endif /* (not) MULE */ |
446 | 1997 } |
1998 else | |
1999 { | |
2000 j = *ptr; | |
2001 | |
2002 if (i == infinity) | |
2003 stride_for_teases = BM_tab[j]; | |
2004 BM_tab[j] = dirlen - i; | |
428 | 2005 } |
446 | 2006 /* stride_for_teases tells how much to stride if we get a |
2007 match on the far character but are subsequently | |
2008 disappointed, by recording what the stride would have been | |
2009 for that character if the last character had been | |
2010 different. */ | |
2011 } | |
2012 infinity = dirlen - infinity; | |
2013 pos += dirlen - ((direction > 0) ? direction : 0); | |
2014 /* loop invariant - pos points at where last char (first char if | |
2015 reverse) of pattern would align in a possible match. */ | |
2016 while (n != 0) | |
2017 { | |
665 | 2018 Bytebpos tail_end; |
867 | 2019 Ibyte *tail_end_ptr; |
446 | 2020 /* It's been reported that some (broken) compiler thinks |
2021 that Boolean expressions in an arithmetic context are | |
2022 unsigned. Using an explicit ?1:0 prevents this. */ | |
2023 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0) | |
2024 return n * (0 - direction); | |
2025 /* First we do the part we can by pointers (maybe | |
2026 nothing) */ | |
2027 QUIT; | |
2028 pat = base_pat; | |
2029 limit = pos - dirlen + direction; | |
2030 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF | |
2031 have changed. See buffer.h. */ | |
2032 limit = ((direction > 0) | |
826 | 2033 ? BYTE_BUF_CEILING_OF (buf, limit) - 1 |
2034 : BYTE_BUF_FLOOR_OF (buf, limit + 1)); | |
446 | 2035 /* LIMIT is now the last (not beyond-last!) value POS can |
2036 take on without hitting edge of buffer or the gap. */ | |
2037 limit = ((direction > 0) | |
2038 ? min (lim - 1, min (limit, pos + 20000)) | |
2039 : max (lim, max (limit, pos - 20000))); | |
826 | 2040 tail_end = BYTE_BUF_CEILING_OF (buf, pos); |
2041 tail_end_ptr = BYTE_BUF_BYTE_ADDRESS (buf, tail_end); | |
446 | 2042 |
2043 if ((limit - pos) * direction > 20) | |
428 | 2044 { |
826 | 2045 /* We have to be careful because the code can generate addresses |
2046 that don't point to the beginning of characters. */ | |
2047 p_limit = BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, limit); | |
2048 ptr2 = (cursor = BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos)); | |
446 | 2049 /* In this loop, pos + cursor - ptr2 is the surrogate |
2050 for pos */ | |
2051 while (1) /* use one cursor setting as long as i can */ | |
2052 { | |
2053 if (direction > 0) /* worth duplicating */ | |
2054 { | |
2055 /* Use signed comparison if appropriate to make | |
2056 cursor+infinity sure to be > p_limit. | |
2057 Assuming that the buffer lies in a range of | |
2058 addresses that are all "positive" (as ints) | |
2059 or all "negative", either kind of comparison | |
2060 will work as long as we don't step by | |
2061 infinity. So pick the kind that works when | |
2062 we do step by infinity. */ | |
2063 if ((EMACS_INT) (p_limit + infinity) > | |
2064 (EMACS_INT) p_limit) | |
2065 while ((EMACS_INT) cursor <= | |
2066 (EMACS_INT) p_limit) | |
2067 cursor += BM_tab[*cursor]; | |
2068 else | |
2069 while ((EMACS_UINT) cursor <= | |
2070 (EMACS_UINT) p_limit) | |
2071 cursor += BM_tab[*cursor]; | |
2072 } | |
2073 else | |
2074 { | |
2075 if ((EMACS_INT) (p_limit + infinity) < | |
2076 (EMACS_INT) p_limit) | |
2077 while ((EMACS_INT) cursor >= | |
2078 (EMACS_INT) p_limit) | |
2079 cursor += BM_tab[*cursor]; | |
2080 else | |
2081 while ((EMACS_UINT) cursor >= | |
2082 (EMACS_UINT) p_limit) | |
2083 cursor += BM_tab[*cursor]; | |
2084 } | |
2085 /* If you are here, cursor is beyond the end of the | |
2086 searched region. This can happen if you match on | |
2087 the far character of the pattern, because the | |
2088 "stride" of that character is infinity, a number | |
2089 able to throw you well beyond the end of the | |
2090 search. It can also happen if you fail to match | |
2091 within the permitted region and would otherwise | |
2092 try a character beyond that region */ | |
2093 if ((cursor - p_limit) * direction <= len) | |
2094 break; /* a small overrun is genuine */ | |
2095 cursor -= infinity; /* large overrun = hit */ | |
2096 i = dirlen - direction; | |
2097 if (!NILP (trt)) | |
2098 { | |
2099 while ((i -= direction) + direction != 0) | |
2100 { | |
2101 #ifdef MULE | |
867 | 2102 Ichar ch; |
446 | 2103 cursor -= direction; |
2104 /* Translate only the last byte of a character. */ | |
2105 if ((cursor == tail_end_ptr | |
867 | 2106 || ibyte_first_byte_p (cursor[1])) |
2107 && (ibyte_first_byte_p (cursor[0]) | |
446 | 2108 || (translate_prev_byte == cursor[-1] |
867 | 2109 && (ibyte_first_byte_p (translate_prev_byte) |
446 | 2110 || translate_anteprev_byte == cursor[-2])))) |
2111 ch = simple_translate[*cursor]; | |
2112 else | |
2113 ch = *cursor; | |
2114 if (pat[i] != ch) | |
2115 break; | |
2116 #else | |
2117 if (pat[i] != TRANSLATE (trt, *(cursor -= direction))) | |
2118 break; | |
2119 #endif | |
2120 } | |
2121 } | |
2122 else | |
2123 { | |
2124 while ((i -= direction) + direction != 0) | |
2125 if (pat[i] != *(cursor -= direction)) | |
2126 break; | |
2127 } | |
2128 cursor += dirlen - i - direction; /* fix cursor */ | |
2129 if (i + direction == 0) | |
2130 { | |
2131 cursor -= direction; | |
2132 | |
2133 { | |
665 | 2134 Bytebpos bytstart = (pos + cursor - ptr2 + |
446 | 2135 ((direction > 0) |
2136 ? 1 - len : 0)); | |
665 | 2137 Charbpos bufstart = bytebpos_to_charbpos (buf, bytstart); |
2138 Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len); | |
446 | 2139 |
2140 set_search_regs (buf, bufstart, bufend - bufstart); | |
2141 } | |
2142 | |
2143 if ((n -= direction) != 0) | |
2144 cursor += dirlen; /* to resume search */ | |
2145 else | |
2146 return ((direction > 0) | |
2147 ? search_regs.end[0] : search_regs.start[0]); | |
2148 } | |
2149 else | |
2150 cursor += stride_for_teases; /* <sigh> we lose - */ | |
2151 } | |
2152 pos += cursor - ptr2; | |
2153 } | |
2154 else | |
2155 /* Now we'll pick up a clump that has to be done the hard | |
2156 way because it covers a discontinuity */ | |
2157 { | |
428 | 2158 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF |
2159 have changed. See buffer.h. */ | |
2160 limit = ((direction > 0) | |
826 | 2161 ? BYTE_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1 |
2162 : BYTE_BUF_FLOOR_OF (buf, pos - dirlen)); | |
428 | 2163 limit = ((direction > 0) |
446 | 2164 ? min (limit + len, lim - 1) |
2165 : max (limit - len, lim)); | |
2166 /* LIMIT is now the last value POS can have | |
2167 and still be valid for a possible match. */ | |
2168 while (1) | |
428 | 2169 { |
446 | 2170 /* This loop can be coded for space rather than |
2171 speed because it will usually run only once. | |
2172 (the reach is at most len + 21, and typically | |
2173 does not exceed len) */ | |
2174 while ((limit - pos) * direction >= 0) | |
826 | 2175 /* *not* BYTE_BUF_FETCH_CHAR. We are working here |
446 | 2176 with bytes, not characters. */ |
826 | 2177 pos += BM_tab[*BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos)]; |
446 | 2178 /* now run the same tests to distinguish going off |
2179 the end, a match or a phony match. */ | |
2180 if ((pos - limit) * direction <= len) | |
2181 break; /* ran off the end */ | |
2182 /* Found what might be a match. | |
2183 Set POS back to last (first if reverse) char pos. */ | |
2184 pos -= infinity; | |
2185 i = dirlen - direction; | |
2186 while ((i -= direction) + direction != 0) | |
428 | 2187 { |
446 | 2188 #ifdef MULE |
867 | 2189 Ichar ch; |
2190 Ibyte *ptr; | |
446 | 2191 #endif |
2192 pos -= direction; | |
2193 #ifdef MULE | |
826 | 2194 ptr = BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos); |
446 | 2195 if ((ptr == tail_end_ptr |
867 | 2196 || ibyte_first_byte_p (ptr[1])) |
2197 && (ibyte_first_byte_p (ptr[0]) | |
446 | 2198 || (translate_prev_byte == ptr[-1] |
867 | 2199 && (ibyte_first_byte_p (translate_prev_byte) |
446 | 2200 || translate_anteprev_byte == ptr[-2])))) |
2201 ch = simple_translate[*ptr]; | |
428 | 2202 else |
446 | 2203 ch = *ptr; |
2204 if (pat[i] != ch) | |
2205 break; | |
2206 | |
2207 #else | |
826 | 2208 if (pat[i] != |
2209 TRANSLATE (trt, | |
2210 *BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos))) | |
446 | 2211 break; |
2212 #endif | |
428 | 2213 } |
446 | 2214 /* Above loop has moved POS part or all the way back |
2215 to the first char pos (last char pos if reverse). | |
2216 Set it once again at the last (first if reverse) | |
2217 char. */ | |
2218 pos += dirlen - i- direction; | |
2219 if (i + direction == 0) | |
428 | 2220 { |
446 | 2221 pos -= direction; |
2222 | |
2223 { | |
665 | 2224 Bytebpos bytstart = (pos + |
446 | 2225 ((direction > 0) |
2226 ? 1 - len : 0)); | |
665 | 2227 Charbpos bufstart = bytebpos_to_charbpos (buf, bytstart); |
2228 Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len); | |
446 | 2229 |
2230 set_search_regs (buf, bufstart, bufend - bufstart); | |
2231 } | |
2232 | |
2233 if ((n -= direction) != 0) | |
2234 pos += dirlen; /* to resume search */ | |
428 | 2235 else |
446 | 2236 return ((direction > 0) |
2237 ? search_regs.end[0] : search_regs.start[0]); | |
428 | 2238 } |
446 | 2239 else |
2240 pos += stride_for_teases; | |
2241 } | |
428 | 2242 } |
446 | 2243 /* We have done one clump. Can we continue? */ |
2244 if ((lim - pos) * direction < 0) | |
2245 return (0 - n) * direction; | |
428 | 2246 } |
665 | 2247 return bytebpos_to_charbpos (buf, pos); |
428 | 2248 } |
2249 | |
1024 | 2250 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the |
2251 buffer for a match just found. */ | |
428 | 2252 |
2253 static void | |
665 | 2254 set_search_regs (struct buffer *buf, Charbpos beg, Charcount len) |
428 | 2255 { |
2256 /* Make sure we have registers in which to store | |
2257 the match position. */ | |
2258 if (search_regs.num_regs == 0) | |
2259 { | |
2260 search_regs.start = xnew (regoff_t); | |
2261 search_regs.end = xnew (regoff_t); | |
2262 search_regs.num_regs = 1; | |
2263 } | |
2264 | |
1468 | 2265 clear_search_regs (); |
428 | 2266 search_regs.start[0] = beg; |
2267 search_regs.end[0] = beg + len; | |
793 | 2268 last_thing_searched = wrap_buffer (buf); |
428 | 2269 } |
2270 | |
1468 | 2271 /* Clear search registers so match data will be null. */ |
1024 | 2272 |
2273 static void | |
1468 | 2274 clear_search_regs (void) |
1024 | 2275 { |
2276 /* This function has been Mule-ized. */ | |
2277 int i; | |
2278 | |
1468 | 2279 for (i = 0; i < search_regs.num_regs; i++) |
2280 search_regs.start[i] = search_regs.end[i] = -1; | |
1024 | 2281 } |
2282 | |
428 | 2283 |
2284 /* Given a string of words separated by word delimiters, | |
442 | 2285 compute a regexp that matches those exact words |
2286 separated by arbitrary punctuation. */ | |
428 | 2287 |
2288 static Lisp_Object | |
2289 wordify (Lisp_Object buffer, Lisp_Object string) | |
2290 { | |
2291 Charcount i, len; | |
2292 EMACS_INT punct_count = 0, word_count = 0; | |
2293 struct buffer *buf = decode_buffer (buffer, 0); | |
826 | 2294 Lisp_Object syntax_table = buf->mirror_syntax_table; |
428 | 2295 |
2296 CHECK_STRING (string); | |
826 | 2297 len = string_char_length (string); |
428 | 2298 |
2299 for (i = 0; i < len; i++) | |
867 | 2300 if (!WORD_SYNTAX_P (syntax_table, string_ichar (string, i))) |
428 | 2301 { |
2302 punct_count++; | |
2303 if (i > 0 && WORD_SYNTAX_P (syntax_table, | |
867 | 2304 string_ichar (string, i - 1))) |
428 | 2305 word_count++; |
2306 } | |
867 | 2307 if (WORD_SYNTAX_P (syntax_table, string_ichar (string, len - 1))) |
428 | 2308 word_count++; |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
2309 if (!word_count) return build_ascstring (""); |
428 | 2310 |
2311 { | |
2312 /* The following value is an upper bound on the amount of storage we | |
2313 need. In non-Mule, it is exact. */ | |
867 | 2314 Ibyte *storage = |
2367 | 2315 alloca_ibytes (XSTRING_LENGTH (string) - punct_count + |
428 | 2316 5 * (word_count - 1) + 4); |
867 | 2317 Ibyte *o = storage; |
428 | 2318 |
2319 *o++ = '\\'; | |
2320 *o++ = 'b'; | |
2321 | |
2322 for (i = 0; i < len; i++) | |
2323 { | |
867 | 2324 Ichar ch = string_ichar (string, i); |
428 | 2325 |
2326 if (WORD_SYNTAX_P (syntax_table, ch)) | |
867 | 2327 o += set_itext_ichar (o, ch); |
428 | 2328 else if (i > 0 |
2329 && WORD_SYNTAX_P (syntax_table, | |
867 | 2330 string_ichar (string, i - 1)) |
428 | 2331 && --word_count) |
2332 { | |
2333 *o++ = '\\'; | |
2334 *o++ = 'W'; | |
2335 *o++ = '\\'; | |
2336 *o++ = 'W'; | |
2337 *o++ = '*'; | |
2338 } | |
2339 } | |
2340 | |
2341 *o++ = '\\'; | |
2342 *o++ = 'b'; | |
2343 | |
2344 return make_string (storage, o - storage); | |
2345 } | |
2346 } | |
2347 | |
2348 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /* | |
2349 Search backward from point for STRING. | |
2350 Set point to the beginning of the occurrence found, and return point. | |
444 | 2351 |
2352 Optional second argument LIMIT bounds the search; it is a buffer | |
2353 position. The match found must not extend before that position. | |
2354 The value nil is equivalent to (point-min). | |
2355 | |
2356 Optional third argument NOERROR, if t, means just return nil (no | |
2357 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2358 and return nil. | |
2359 | |
2360 Optional fourth argument COUNT is a repeat count--search for | |
2361 successive occurrences. | |
2362 | |
428 | 2363 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2364 defaults to the current buffer. |
2365 | |
1468 | 2366 When the match is successful, this function modifies the match data |
2367 that `match-beginning', `match-end' and `match-data' access; save the | |
2368 match data with `match-data' and restore it with `store-match-data' if | |
2369 you want to preserve them. If the match fails, the match data from the | |
2370 previous success match is preserved. | |
2371 | |
2372 See also the function `replace-match'. | |
428 | 2373 */ |
444 | 2374 (string, limit, noerror, count, buffer)) |
428 | 2375 { |
444 | 2376 return search_command (string, limit, noerror, count, buffer, -1, 0, 0); |
428 | 2377 } |
2378 | |
2379 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /* | |
2380 Search forward from point for STRING. | |
2381 Set point to the end of the occurrence found, and return point. | |
444 | 2382 |
2383 Optional second argument LIMIT bounds the search; it is a buffer | |
2384 position. The match found must not extend after that position. The | |
2385 value nil is equivalent to (point-max). | |
2386 | |
2387 Optional third argument NOERROR, if t, means just return nil (no | |
2388 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2389 and return nil. | |
2390 | |
2391 Optional fourth argument COUNT is a repeat count--search for | |
2392 successive occurrences. | |
2393 | |
428 | 2394 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2395 defaults to the current buffer. |
2396 | |
1468 | 2397 When the match is successful, this function modifies the match data |
2398 that `match-beginning', `match-end' and `match-data' access; save the | |
2399 match data with `match-data' and restore it with `store-match-data' if | |
2400 you want to preserve them. If the match fails, the match data from the | |
2401 previous success match is preserved. | |
2402 | |
2403 See also the function `replace-match'. | |
428 | 2404 */ |
444 | 2405 (string, limit, noerror, count, buffer)) |
428 | 2406 { |
444 | 2407 return search_command (string, limit, noerror, count, buffer, 1, 0, 0); |
428 | 2408 } |
2409 | |
2410 DEFUN ("word-search-backward", Fword_search_backward, 1, 5, | |
2411 "sWord search backward: ", /* | |
2412 Search backward from point for STRING, ignoring differences in punctuation. | |
2413 Set point to the beginning of the occurrence found, and return point. | |
444 | 2414 |
2415 Optional second argument LIMIT bounds the search; it is a buffer | |
2416 position. The match found must not extend before that position. | |
2417 The value nil is equivalent to (point-min). | |
2418 | |
2419 Optional third argument NOERROR, if t, means just return nil (no | |
2420 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2421 and return nil. | |
2422 | |
2423 Optional fourth argument COUNT is a repeat count--search for | |
2424 successive occurrences. | |
2425 | |
428 | 2426 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2427 defaults to the current buffer. |
2428 | |
1468 | 2429 When the match is successful, this function modifies the match data |
2430 that `match-beginning', `match-end' and `match-data' access; save the | |
2431 match data with `match-data' and restore it with `store-match-data' if | |
2432 you want to preserve them. If the match fails, the match data from the | |
2433 previous success match is preserved. | |
2434 | |
2435 See also the function `replace-match'. | |
428 | 2436 */ |
444 | 2437 (string, limit, noerror, count, buffer)) |
428 | 2438 { |
444 | 2439 return search_command (wordify (buffer, string), limit, noerror, count, |
428 | 2440 buffer, -1, 1, 0); |
2441 } | |
2442 | |
2443 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /* | |
2444 Search forward from point for STRING, ignoring differences in punctuation. | |
2445 Set point to the end of the occurrence found, and return point. | |
444 | 2446 |
2447 Optional second argument LIMIT bounds the search; it is a buffer | |
2448 position. The match found must not extend after that position. The | |
2449 value nil is equivalent to (point-max). | |
2450 | |
2451 Optional third argument NOERROR, if t, means just return nil (no | |
2452 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2453 and return nil. | |
2454 | |
2455 Optional fourth argument COUNT is a repeat count--search for | |
2456 successive occurrences. | |
2457 | |
428 | 2458 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2459 defaults to the current buffer. |
2460 | |
1468 | 2461 When the match is successful, this function modifies the match data |
2462 that `match-beginning', `match-end' and `match-data' access; save the | |
2463 match data with `match-data' and restore it with `store-match-data' if | |
2464 you want to preserve them. If the match fails, the match data from the | |
2465 previous success match is preserved. | |
2466 | |
2467 See also the function `replace-match'. | |
428 | 2468 */ |
444 | 2469 (string, limit, noerror, count, buffer)) |
428 | 2470 { |
444 | 2471 return search_command (wordify (buffer, string), limit, noerror, count, |
428 | 2472 buffer, 1, 1, 0); |
2473 } | |
2474 | |
2475 DEFUN ("re-search-backward", Fre_search_backward, 1, 5, | |
2476 "sRE search backward: ", /* | |
2477 Search backward from point for match for regular expression REGEXP. | |
2478 Set point to the beginning of the match, and return point. | |
2479 The match found is the one starting last in the buffer | |
2480 and yet ending before the origin of the search. | |
444 | 2481 |
2482 Optional second argument LIMIT bounds the search; it is a buffer | |
2483 position. The match found must not extend before that position. | |
2484 The value nil is equivalent to (point-min). | |
2485 | |
2486 Optional third argument NOERROR, if t, means just return nil (no | |
2487 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2488 and return nil. | |
2489 | |
2490 Optional fourth argument COUNT is a repeat count--search for | |
2491 successive occurrences. | |
2492 | |
428 | 2493 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2494 defaults to the current buffer. |
2495 | |
1468 | 2496 When the match is successful, this function modifies the match data |
2497 that `match-beginning', `match-end' and `match-data' access; save the | |
2498 match data with `match-data' and restore it with `store-match-data' if | |
2499 you want to preserve them. If the match fails, the match data from the | |
2500 previous success match is preserved. | |
2501 | |
2502 See also the function `replace-match'. | |
428 | 2503 */ |
444 | 2504 (regexp, limit, noerror, count, buffer)) |
428 | 2505 { |
444 | 2506 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0); |
428 | 2507 } |
2508 | |
2509 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /* | |
2510 Search forward from point for regular expression REGEXP. | |
2511 Set point to the end of the occurrence found, and return point. | |
444 | 2512 |
2513 Optional second argument LIMIT bounds the search; it is a buffer | |
2514 position. The match found must not extend after that position. The | |
2515 value nil is equivalent to (point-max). | |
2516 | |
2517 Optional third argument NOERROR, if t, means just return nil (no | |
2518 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2519 and return nil. | |
2520 | |
2521 Optional fourth argument COUNT is a repeat count--search for | |
2522 successive occurrences. | |
2523 | |
428 | 2524 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2525 defaults to the current buffer. |
2526 | |
1468 | 2527 When the match is successful, this function modifies the match data |
2528 that `match-beginning', `match-end' and `match-data' access; save the | |
2529 match data with `match-data' and restore it with `store-match-data' if | |
2530 you want to preserve them. If the match fails, the match data from the | |
2531 previous success match is preserved. | |
2532 | |
2533 See also the function `replace-match'. | |
428 | 2534 */ |
444 | 2535 (regexp, limit, noerror, count, buffer)) |
428 | 2536 { |
444 | 2537 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0); |
428 | 2538 } |
2539 | |
2540 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5, | |
2541 "sPosix search backward: ", /* | |
2542 Search backward from point for match for regular expression REGEXP. | |
2543 Find the longest match in accord with Posix regular expression rules. | |
2544 Set point to the beginning of the match, and return point. | |
2545 The match found is the one starting last in the buffer | |
2546 and yet ending before the origin of the search. | |
444 | 2547 |
2548 Optional second argument LIMIT bounds the search; it is a buffer | |
2549 position. The match found must not extend before that position. | |
2550 The value nil is equivalent to (point-min). | |
2551 | |
2552 Optional third argument NOERROR, if t, means just return nil (no | |
2553 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2554 and return nil. | |
2555 | |
2556 Optional fourth argument COUNT is a repeat count--search for | |
2557 successive occurrences. | |
2558 | |
428 | 2559 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2560 defaults to the current buffer. |
2561 | |
1468 | 2562 When the match is successful, this function modifies the match data |
2563 that `match-beginning', `match-end' and `match-data' access; save the | |
2564 match data with `match-data' and restore it with `store-match-data' if | |
2565 you want to preserve them. If the match fails, the match data from the | |
2566 previous success match is preserved. | |
2567 | |
2568 See also the function `replace-match'. | |
428 | 2569 */ |
444 | 2570 (regexp, limit, noerror, count, buffer)) |
428 | 2571 { |
444 | 2572 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1); |
428 | 2573 } |
2574 | |
2575 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /* | |
2576 Search forward from point for regular expression REGEXP. | |
2577 Find the longest match in accord with Posix regular expression rules. | |
2578 Set point to the end of the occurrence found, and return point. | |
444 | 2579 |
2580 Optional second argument LIMIT bounds the search; it is a buffer | |
2581 position. The match found must not extend after that position. The | |
2582 value nil is equivalent to (point-max). | |
2583 | |
2584 Optional third argument NOERROR, if t, means just return nil (no | |
2585 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2586 and return nil. | |
2587 | |
2588 Optional fourth argument COUNT is a repeat count--search for | |
2589 successive occurrences. | |
2590 | |
428 | 2591 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2592 defaults to the current buffer. |
2593 | |
1468 | 2594 When the match is successful, this function modifies the match data |
2595 that `match-beginning', `match-end' and `match-data' access; save the | |
2596 match data with `match-data' and restore it with `store-match-data' if | |
2597 you want to preserve them. If the match fails, the match data from the | |
2598 previous success match is preserved. | |
2599 | |
2600 See also the function `replace-match'. | |
428 | 2601 */ |
444 | 2602 (regexp, limit, noerror, count, buffer)) |
428 | 2603 { |
444 | 2604 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1); |
428 | 2605 } |
2606 | |
2607 | |
2608 static Lisp_Object | |
2609 free_created_dynarrs (Lisp_Object cons) | |
2610 { | |
2611 Dynarr_free (get_opaque_ptr (XCAR (cons))); | |
2612 Dynarr_free (get_opaque_ptr (XCDR (cons))); | |
2613 free_opaque_ptr (XCAR (cons)); | |
2614 free_opaque_ptr (XCDR (cons)); | |
853 | 2615 free_cons (cons); |
428 | 2616 return Qnil; |
2617 } | |
2618 | |
2619 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /* | |
444 | 2620 Replace text matched by last search with REPLACEMENT. |
4199 | 2621 Leaves point at end of replacement text. |
2622 Optional boolean FIXEDCASE inhibits matching case of REPLACEMENT to source. | |
2623 Optional boolean LITERAL inhibits interpretation of escape sequences. | |
2624 Optional STRING provides the source text to replace. | |
2625 Optional STRBUFFER may be a buffer, providing match context, or an integer | |
2626 specifying the subexpression to replace. | |
2627 | |
2628 If FIXEDCASE is non-nil, do not alter case of replacement text. | |
428 | 2629 Otherwise maybe capitalize the whole text, or maybe just word initials, |
2630 based on the replaced text. | |
4199 | 2631 If the replaced text has only capital letters and has at least one |
2632 multiletter word, convert REPLACEMENT to all caps. | |
428 | 2633 If the replaced text has at least one word starting with a capital letter, |
444 | 2634 then capitalize each word in REPLACEMENT. |
428 | 2635 |
4199 | 2636 If LITERAL is non-nil, insert REPLACEMENT literally. |
428 | 2637 Otherwise treat `\\' as special: |
444 | 2638 `\\&' in REPLACEMENT means substitute original matched text. |
428 | 2639 `\\N' means substitute what matched the Nth `\\(...\\)'. |
2640 If Nth parens didn't match, substitute nothing. | |
2641 `\\\\' means insert one `\\'. | |
2642 `\\u' means upcase the next character. | |
2643 `\\l' means downcase the next character. | |
2644 `\\U' means begin upcasing all following characters. | |
2645 `\\L' means begin downcasing all following characters. | |
2646 `\\E' means terminate the effect of any `\\U' or `\\L'. | |
2647 Case changes made with `\\u', `\\l', `\\U', and `\\L' override | |
2648 all other case changes that may be made in the replaced text. | |
4199 | 2649 |
2650 If non-nil, STRING is the source string, and a new string with the specified | |
2651 replacements is created and returned. Otherwise the current buffer is the | |
2652 source text. | |
2653 | |
2654 If non-nil, STRBUFFER may be an integer, interpreted as the index of the | |
2655 subexpression to replace in the source text, or a buffer to provide the | |
2656 syntax table and case table. If nil, then the \"subexpression\" is 0, i.e., | |
2657 the whole match, and the current buffer provides the syntax and case tables. | |
2658 If STRING is nil, STRBUFFER must be nil or an integer. | |
2659 | |
2660 Specifying a subexpression is only useful after a regular expression match, | |
2661 since a fixed string search has no non-trivial subexpressions. | |
2662 | |
2663 It is not possible to specify both a buffer and a subexpression. If that is | |
2664 desired, the idiom `(with-current-buffer BUFFER (replace-match ... INTEGER))' | |
2665 may be appropriate. | |
2666 | |
2667 If STRING is nil but the last thing matched (or searched) was a string, or | |
2668 STRING is a string but the last thing matched was a buffer, an | |
2669 `invalid-argument' error will be signaled. (XEmacs does not check that the | |
2670 last thing searched is the source string, but it is not useful to use a | |
2671 different string as source.) | |
2672 | |
2673 If no match (including searches) has been successful or the requested | |
1468 | 2674 subexpression was not matched, an `args-out-of-range' error will be |
2675 signaled. (If no match has ever been conducted in this instance of | |
2676 XEmacs, an `invalid-operation' error will be signaled. This is very | |
2677 rare.) | |
428 | 2678 */ |
444 | 2679 (replacement, fixedcase, literal, string, strbuffer)) |
428 | 2680 { |
2681 /* This function can GC */ | |
2682 enum { nochange, all_caps, cap_initial } case_action; | |
665 | 2683 Charbpos pos, last; |
428 | 2684 int some_multiletter_word; |
2685 int some_lowercase; | |
2686 int some_uppercase; | |
2687 int some_nonuppercase_initial; | |
867 | 2688 Ichar c, prevc; |
428 | 2689 Charcount inslen; |
2690 struct buffer *buf; | |
826 | 2691 Lisp_Object syntax_table; |
428 | 2692 int mc_count; |
2693 Lisp_Object buffer; | |
2694 int_dynarr *ul_action_dynarr = 0; | |
2695 int_dynarr *ul_pos_dynarr = 0; | |
502 | 2696 int sub = 0; |
428 | 2697 int speccount; |
2698 | |
444 | 2699 CHECK_STRING (replacement); |
428 | 2700 |
4199 | 2701 /* Because GNU decided to be incompatible here, we support the following |
2702 baroque and bogus API for the STRING and STRBUFFER arguments: | |
2703 types interpretations | |
2704 STRING STRBUFFER STRING STRBUFFER | |
2705 nil nil none 0 = index of subexpression to replace | |
2706 nil integer none index of subexpression to replace | |
2707 nil other ***** error ***** | |
2708 string nil source current buffer provides syntax table | |
2709 subexpression = 0 (whole match) | |
2710 string buffer source buffer providing syntax table | |
2711 subexpression = 0 (whole match) | |
2712 string integer source current buffer provides syntax table | |
2713 subexpression = STRBUFFER | |
2714 string other ***** error ***** | |
2715 */ | |
2716 | |
2717 /* Do STRBUFFER first; if STRING is nil, we'll overwrite BUF and BUFFER. */ | |
2718 | |
2719 /* If the match data were abstracted into a special "match data" type | |
2720 instead of the typical half-assed "let the implementation be visible" | |
2721 form it's in, we could extend it to include the last string matched | |
2722 and the buffer used for that matching. But of course we can't change | |
2723 it as it is. | |
2724 */ | |
2725 if (NILP (strbuffer) || BUFFERP (strbuffer)) | |
2726 { | |
2727 buf = decode_buffer (strbuffer, 0); | |
2728 } | |
2729 else if (!NILP (strbuffer)) | |
2730 { | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2731 CHECK_FIXNUM (strbuffer); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2732 sub = XFIXNUM (strbuffer); |
4199 | 2733 if (sub < 0 || sub >= (int) search_regs.num_regs) |
2734 invalid_argument ("match data register invalid", strbuffer); | |
2735 if (search_regs.start[sub] < 0) | |
2736 invalid_argument ("match data register not set", strbuffer); | |
2737 buf = current_buffer; | |
2738 } | |
2739 else | |
2740 invalid_argument ("STRBUFFER must be nil, a buffer, or an integer", | |
2741 strbuffer); | |
2742 buffer = wrap_buffer (buf); | |
2743 | |
428 | 2744 if (! NILP (string)) |
2745 { | |
2746 CHECK_STRING (string); | |
2747 if (!EQ (last_thing_searched, Qt)) | |
4199 | 2748 invalid_argument ("last thing matched was not a string", Qunbound); |
428 | 2749 } |
2750 else | |
2751 { | |
2752 if (!BUFFERP (last_thing_searched)) | |
4199 | 2753 invalid_argument ("last thing matched was not a buffer", Qunbound); |
428 | 2754 buffer = last_thing_searched; |
2755 buf = XBUFFER (buffer); | |
2756 } | |
2757 | |
826 | 2758 syntax_table = buf->mirror_syntax_table; |
428 | 2759 |
2760 case_action = nochange; /* We tried an initialization */ | |
2761 /* but some C compilers blew it */ | |
2762 | |
2763 if (search_regs.num_regs == 0) | |
826 | 2764 signal_error (Qinvalid_operation, |
2765 "replace-match called before any match found", Qunbound); | |
428 | 2766 |
2767 if (NILP (string)) | |
2768 { | |
469 | 2769 if (search_regs.start[sub] < BUF_BEGV (buf) |
2770 || search_regs.start[sub] > search_regs.end[sub] | |
2771 || search_regs.end[sub] > BUF_ZV (buf)) | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2772 args_out_of_range (make_fixnum (search_regs.start[sub]), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2773 make_fixnum (search_regs.end[sub])); |
428 | 2774 } |
2775 else | |
2776 { | |
2777 if (search_regs.start[0] < 0 | |
2778 || search_regs.start[0] > search_regs.end[0] | |
826 | 2779 || search_regs.end[0] > string_char_length (string)) |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2780 args_out_of_range (make_fixnum (search_regs.start[0]), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2781 make_fixnum (search_regs.end[0])); |
428 | 2782 } |
2783 | |
2784 if (NILP (fixedcase)) | |
2785 { | |
2786 /* Decide how to casify by examining the matched text. */ | |
2787 | |
707 | 2788 last = search_regs.end[sub]; |
428 | 2789 prevc = '\n'; |
2790 case_action = all_caps; | |
2791 | |
2792 /* some_multiletter_word is set nonzero if any original word | |
2793 is more than one letter long. */ | |
2794 some_multiletter_word = 0; | |
2795 some_lowercase = 0; | |
2796 some_nonuppercase_initial = 0; | |
2797 some_uppercase = 0; | |
2798 | |
707 | 2799 for (pos = search_regs.start[sub]; pos < last; pos++) |
428 | 2800 { |
2801 if (NILP (string)) | |
2802 c = BUF_FETCH_CHAR (buf, pos); | |
2803 else | |
867 | 2804 c = string_ichar (string, pos); |
428 | 2805 |
2806 if (LOWERCASEP (buf, c)) | |
2807 { | |
2808 /* Cannot be all caps if any original char is lower case */ | |
2809 | |
2810 some_lowercase = 1; | |
2811 if (!WORD_SYNTAX_P (syntax_table, prevc)) | |
2812 some_nonuppercase_initial = 1; | |
2813 else | |
2814 some_multiletter_word = 1; | |
2815 } | |
2816 else if (!NOCASEP (buf, c)) | |
2817 { | |
2818 some_uppercase = 1; | |
2819 if (!WORD_SYNTAX_P (syntax_table, prevc)) | |
2820 ; | |
2821 else | |
2822 some_multiletter_word = 1; | |
2823 } | |
2824 else | |
2825 { | |
2826 /* If the initial is a caseless word constituent, | |
2827 treat that like a lowercase initial. */ | |
2828 if (!WORD_SYNTAX_P (syntax_table, prevc)) | |
2829 some_nonuppercase_initial = 1; | |
2830 } | |
2831 | |
2832 prevc = c; | |
2833 } | |
2834 | |
2835 /* Convert to all caps if the old text is all caps | |
2836 and has at least one multiletter word. */ | |
2837 if (! some_lowercase && some_multiletter_word) | |
2838 case_action = all_caps; | |
2839 /* Capitalize each word, if the old text has all capitalized words. */ | |
2840 else if (!some_nonuppercase_initial && some_multiletter_word) | |
2841 case_action = cap_initial; | |
2842 else if (!some_nonuppercase_initial && some_uppercase) | |
2843 /* Should x -> yz, operating on X, give Yz or YZ? | |
2844 We'll assume the latter. */ | |
2845 case_action = all_caps; | |
2846 else | |
2847 case_action = nochange; | |
2848 } | |
2849 | |
2850 /* Do replacement in a string. */ | |
2851 if (!NILP (string)) | |
2852 { | |
2853 Lisp_Object before, after; | |
2854 | |
2855 speccount = specpdl_depth (); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2856 before = Fsubseq (string, Qzero, make_fixnum (search_regs.start[sub])); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2857 after = Fsubseq (string, make_fixnum (search_regs.end[sub]), Qnil); |
428 | 2858 |
444 | 2859 /* Do case substitution into REPLACEMENT if desired. */ |
428 | 2860 if (NILP (literal)) |
2861 { | |
826 | 2862 Charcount stlen = string_char_length (replacement); |
428 | 2863 Charcount strpos; |
2864 /* XEmacs change: rewrote this loop somewhat to make it | |
2865 cleaner. Also added \U, \E, etc. */ | |
2866 Charcount literal_start = 0; | |
2867 /* We build up the substituted string in ACCUM. */ | |
2868 Lisp_Object accum; | |
2869 | |
2870 accum = Qnil; | |
2871 | |
2872 /* OK, the basic idea here is that we scan through the | |
2873 replacement string until we find a backslash, which | |
2874 represents a substring of the original string to be | |
2875 substituted. We then append onto ACCUM the literal | |
2876 text before the backslash (LASTPOS marks the | |
2877 beginning of this) followed by the substring of the | |
2878 original string that needs to be inserted. */ | |
2879 for (strpos = 0; strpos < stlen; strpos++) | |
2880 { | |
2881 /* If LITERAL_END is set, we've encountered a backslash | |
2882 (the end of literal text to be inserted). */ | |
2883 Charcount literal_end = -1; | |
2884 /* If SUBSTART is set, we need to also insert the | |
2885 text from SUBSTART to SUBEND in the original string. */ | |
2886 Charcount substart = -1; | |
2887 Charcount subend = -1; | |
2888 | |
867 | 2889 c = string_ichar (replacement, strpos); |
428 | 2890 if (c == '\\' && strpos < stlen - 1) |
2891 { | |
867 | 2892 c = string_ichar (replacement, ++strpos); |
428 | 2893 if (c == '&') |
2894 { | |
2895 literal_end = strpos - 1; | |
2896 substart = search_regs.start[0]; | |
2897 subend = search_regs.end[0]; | |
2898 } | |
4199 | 2899 /* #### This logic is totally broken, |
2900 since we can have backrefs like "\99", right? */ | |
428 | 2901 else if (c >= '1' && c <= '9' && |
2902 c <= search_regs.num_regs + '0') | |
2903 { | |
2904 if (search_regs.start[c - '0'] >= 0) | |
2905 { | |
2906 literal_end = strpos - 1; | |
2907 substart = search_regs.start[c - '0']; | |
2908 subend = search_regs.end[c - '0']; | |
2909 } | |
2910 } | |
2911 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' || | |
2912 c == 'E') | |
2913 { | |
2914 /* Keep track of all case changes requested, but don't | |
2915 make them now. Do them later so we override | |
2916 everything else. */ | |
2917 if (!ul_pos_dynarr) | |
2918 { | |
2919 ul_pos_dynarr = Dynarr_new (int); | |
2920 ul_action_dynarr = Dynarr_new (int); | |
2921 record_unwind_protect | |
2922 (free_created_dynarrs, | |
2923 noseeum_cons | |
2924 (make_opaque_ptr (ul_pos_dynarr), | |
2925 make_opaque_ptr (ul_action_dynarr))); | |
2926 } | |
2927 literal_end = strpos - 1; | |
2928 Dynarr_add (ul_pos_dynarr, | |
2929 (!NILP (accum) | |
826 | 2930 ? string_char_length (accum) |
428 | 2931 : 0) + (literal_end - literal_start)); |
2932 Dynarr_add (ul_action_dynarr, c); | |
2933 } | |
2934 else if (c == '\\') | |
2935 /* So we get just one backslash. */ | |
2936 literal_end = strpos; | |
2937 } | |
2938 if (literal_end >= 0) | |
2939 { | |
2940 Lisp_Object literal_text = Qnil; | |
2941 Lisp_Object substring = Qnil; | |
2942 if (literal_end != literal_start) | |
5089
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2943 literal_text = Fsubseq (replacement, |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2944 make_fixnum (literal_start), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2945 make_fixnum (literal_end)); |
428 | 2946 if (substart >= 0 && subend != substart) |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2947 substring = Fsubseq (string, make_fixnum (substart), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2948 make_fixnum (subend)); |
428 | 2949 if (!NILP (literal_text) || !NILP (substring)) |
2950 accum = concat3 (accum, literal_text, substring); | |
2951 literal_start = strpos + 1; | |
2952 } | |
2953 } | |
2954 | |
2955 if (strpos != literal_start) | |
2956 /* some literal text at end to be inserted */ | |
5089
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2957 replacement = concat2 (accum, Fsubseq (replacement, |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2958 make_fixnum (literal_start), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2959 make_fixnum (strpos))); |
428 | 2960 else |
444 | 2961 replacement = accum; |
428 | 2962 } |
2963 | |
444 | 2964 /* replacement can be nil. */ |
2965 if (NILP (replacement)) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
2966 replacement = build_ascstring (""); |
444 | 2967 |
428 | 2968 if (case_action == all_caps) |
444 | 2969 replacement = Fupcase (replacement, buffer); |
428 | 2970 else if (case_action == cap_initial) |
444 | 2971 replacement = Fupcase_initials (replacement, buffer); |
428 | 2972 |
2973 /* Now finally, we need to process the \U's, \E's, etc. */ | |
2974 if (ul_pos_dynarr) | |
2975 { | |
2976 int i = 0; | |
2977 int cur_action = 'E'; | |
826 | 2978 Charcount stlen = string_char_length (replacement); |
428 | 2979 Charcount strpos; |
2980 | |
2981 for (strpos = 0; strpos < stlen; strpos++) | |
2982 { | |
867 | 2983 Ichar curchar = string_ichar (replacement, strpos); |
2984 Ichar newchar = -1; | |
428 | 2985 if (i < Dynarr_length (ul_pos_dynarr) && |
2986 strpos == Dynarr_at (ul_pos_dynarr, i)) | |
2987 { | |
2988 int new_action = Dynarr_at (ul_action_dynarr, i); | |
2989 i++; | |
2990 if (new_action == 'u') | |
2991 newchar = UPCASE (buf, curchar); | |
2992 else if (new_action == 'l') | |
2993 newchar = DOWNCASE (buf, curchar); | |
2994 else | |
2995 cur_action = new_action; | |
2996 } | |
2997 if (newchar == -1) | |
2998 { | |
2999 if (cur_action == 'U') | |
3000 newchar = UPCASE (buf, curchar); | |
3001 else if (cur_action == 'L') | |
3002 newchar = DOWNCASE (buf, curchar); | |
3003 else | |
3004 newchar = curchar; | |
3005 } | |
3006 if (newchar != curchar) | |
793 | 3007 set_string_char (replacement, strpos, newchar); |
428 | 3008 } |
3009 } | |
3010 | |
3011 /* frees the Dynarrs if necessary. */ | |
771 | 3012 unbind_to (speccount); |
444 | 3013 return concat3 (before, replacement, after); |
428 | 3014 } |
3015 | |
707 | 3016 mc_count = begin_multiple_change (buf, search_regs.start[sub], |
3017 search_regs.end[sub]); | |
428 | 3018 |
3019 /* begin_multiple_change() records an unwind-protect, so we need to | |
3020 record this value now. */ | |
3021 speccount = specpdl_depth (); | |
3022 | |
3023 /* We insert the replacement text before the old text, and then | |
3024 delete the original text. This means that markers at the | |
3025 beginning or end of the original will float to the corresponding | |
3026 position in the replacement. */ | |
707 | 3027 BUF_SET_PT (buf, search_regs.start[sub]); |
428 | 3028 if (!NILP (literal)) |
444 | 3029 Finsert (1, &replacement); |
428 | 3030 else |
3031 { | |
826 | 3032 Charcount stlen = string_char_length (replacement); |
428 | 3033 Charcount strpos; |
3034 struct gcpro gcpro1; | |
444 | 3035 GCPRO1 (replacement); |
428 | 3036 for (strpos = 0; strpos < stlen; strpos++) |
3037 { | |
707 | 3038 /* on the first iteration assert(offset==0), |
3039 exactly complementing BUF_SET_PT() above. | |
3040 During the loop, it keeps track of the amount inserted. | |
3041 */ | |
3042 Charcount offset = BUF_PT (buf) - search_regs.start[sub]; | |
428 | 3043 |
867 | 3044 c = string_ichar (replacement, strpos); |
428 | 3045 if (c == '\\' && strpos < stlen - 1) |
3046 { | |
707 | 3047 /* XXX FIXME: replacing just a substring non-literally |
3048 using backslash refs to the match looks dangerous. But | |
3049 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe | |
3050 <duwe@caldera.de> claims Finsert_buffer_substring already | |
3051 handles this correctly. | |
3052 */ | |
867 | 3053 c = string_ichar (replacement, ++strpos); |
428 | 3054 if (c == '&') |
3055 Finsert_buffer_substring | |
3056 (buffer, | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3057 make_fixnum (search_regs.start[0] + offset), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3058 make_fixnum (search_regs.end[0] + offset)); |
4199 | 3059 /* #### This logic is totally broken, |
3060 since we can have backrefs like "\99", right? */ | |
428 | 3061 else if (c >= '1' && c <= '9' && |
3062 c <= search_regs.num_regs + '0') | |
3063 { | |
3064 if (search_regs.start[c - '0'] >= 1) | |
3065 Finsert_buffer_substring | |
3066 (buffer, | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3067 make_fixnum (search_regs.start[c - '0'] + offset), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3068 make_fixnum (search_regs.end[c - '0'] + offset)); |
428 | 3069 } |
3070 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' || | |
3071 c == 'E') | |
3072 { | |
3073 /* Keep track of all case changes requested, but don't | |
3074 make them now. Do them later so we override | |
3075 everything else. */ | |
3076 if (!ul_pos_dynarr) | |
3077 { | |
3078 ul_pos_dynarr = Dynarr_new (int); | |
3079 ul_action_dynarr = Dynarr_new (int); | |
3080 record_unwind_protect | |
3081 (free_created_dynarrs, | |
3082 Fcons (make_opaque_ptr (ul_pos_dynarr), | |
3083 make_opaque_ptr (ul_action_dynarr))); | |
3084 } | |
3085 Dynarr_add (ul_pos_dynarr, BUF_PT (buf)); | |
3086 Dynarr_add (ul_action_dynarr, c); | |
3087 } | |
3088 else | |
3089 buffer_insert_emacs_char (buf, c); | |
3090 } | |
3091 else | |
3092 buffer_insert_emacs_char (buf, c); | |
3093 } | |
3094 UNGCPRO; | |
3095 } | |
3096 | |
707 | 3097 inslen = BUF_PT (buf) - (search_regs.start[sub]); |
3098 buffer_delete_range (buf, search_regs.start[sub] + inslen, | |
3099 search_regs.end[sub] + inslen, 0); | |
428 | 3100 |
3101 if (case_action == all_caps) | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3102 Fupcase_region (make_fixnum (BUF_PT (buf) - inslen), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3103 make_fixnum (BUF_PT (buf)), buffer); |
428 | 3104 else if (case_action == cap_initial) |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3105 Fupcase_initials_region (make_fixnum (BUF_PT (buf) - inslen), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3106 make_fixnum (BUF_PT (buf)), buffer); |
428 | 3107 |
3108 /* Now go through and make all the case changes that were requested | |
3109 in the replacement string. */ | |
3110 if (ul_pos_dynarr) | |
3111 { | |
665 | 3112 Charbpos eend = BUF_PT (buf); |
428 | 3113 int i = 0; |
3114 int cur_action = 'E'; | |
3115 | |
3116 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++) | |
3117 { | |
867 | 3118 Ichar curchar = BUF_FETCH_CHAR (buf, pos); |
3119 Ichar newchar = -1; | |
428 | 3120 if (i < Dynarr_length (ul_pos_dynarr) && |
3121 pos == Dynarr_at (ul_pos_dynarr, i)) | |
3122 { | |
3123 int new_action = Dynarr_at (ul_action_dynarr, i); | |
3124 i++; | |
3125 if (new_action == 'u') | |
3126 newchar = UPCASE (buf, curchar); | |
3127 else if (new_action == 'l') | |
3128 newchar = DOWNCASE (buf, curchar); | |
3129 else | |
3130 cur_action = new_action; | |
3131 } | |
3132 if (newchar == -1) | |
3133 { | |
3134 if (cur_action == 'U') | |
3135 newchar = UPCASE (buf, curchar); | |
3136 else if (cur_action == 'L') | |
3137 newchar = DOWNCASE (buf, curchar); | |
3138 else | |
3139 newchar = curchar; | |
3140 } | |
3141 if (newchar != curchar) | |
3142 buffer_replace_char (buf, pos, newchar, 0, 0); | |
3143 } | |
3144 } | |
3145 | |
3146 /* frees the Dynarrs if necessary. */ | |
771 | 3147 unbind_to (speccount); |
428 | 3148 end_multiple_change (buf, mc_count); |
3149 | |
3150 return Qnil; | |
3151 } | |
3152 | |
3153 static Lisp_Object | |
3154 match_limit (Lisp_Object num, int beginningp) | |
3155 { | |
3156 int n; | |
3157 | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3158 CHECK_FIXNUM (num); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3159 n = XFIXNUM (num); |
428 | 3160 if (n < 0 || n >= search_regs.num_regs) |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3161 args_out_of_range (num, make_fixnum (search_regs.num_regs)); |
428 | 3162 if (search_regs.num_regs == 0 || |
3163 search_regs.start[n] < 0) | |
3164 return Qnil; | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3165 return make_fixnum (beginningp ? search_regs.start[n] : search_regs.end[n]); |
428 | 3166 } |
3167 | |
3168 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /* | |
3169 Return position of start of text matched by last regexp search. | |
3170 NUM, specifies which parenthesized expression in the last regexp. | |
3171 Value is nil if NUMth pair didn't match, or there were less than NUM pairs. | |
3172 Zero means the entire text matched by the whole regexp or whole string. | |
3173 */ | |
3174 (num)) | |
3175 { | |
3176 return match_limit (num, 1); | |
3177 } | |
3178 | |
3179 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /* | |
3180 Return position of end of text matched by last regexp search. | |
3181 NUM specifies which parenthesized expression in the last regexp. | |
3182 Value is nil if NUMth pair didn't match, or there were less than NUM pairs. | |
3183 Zero means the entire text matched by the whole regexp or whole string. | |
3184 */ | |
3185 (num)) | |
3186 { | |
3187 return match_limit (num, 0); | |
3188 } | |
3189 | |
3190 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /* | |
3191 Return a list containing all info on what the last regexp search matched. | |
3192 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'. | |
3193 All the elements are markers or nil (nil if the Nth pair didn't match) | |
3194 if the last match was on a buffer; integers or nil if a string was matched. | |
3195 Use `store-match-data' to reinstate the data in this list. | |
3196 | |
3197 If INTEGERS (the optional first argument) is non-nil, always use integers | |
3198 \(rather than markers) to represent buffer positions. | |
3199 If REUSE is a list, reuse it as part of the value. If REUSE is long enough | |
3200 to hold all the values, and if INTEGERS is non-nil, no consing is done. | |
3201 */ | |
3202 (integers, reuse)) | |
3203 { | |
3204 Lisp_Object tail, prev; | |
3205 Lisp_Object *data; | |
3206 int i; | |
3207 Charcount len; | |
3208 | |
3209 if (NILP (last_thing_searched)) | |
563 | 3210 /*error ("match-data called before any match found", Qunbound);*/ |
428 | 3211 return Qnil; |
3212 | |
3213 data = alloca_array (Lisp_Object, 2 * search_regs.num_regs); | |
3214 | |
3215 len = -1; | |
3216 for (i = 0; i < search_regs.num_regs; i++) | |
3217 { | |
665 | 3218 Charbpos start = search_regs.start[i]; |
428 | 3219 if (start >= 0) |
3220 { | |
3221 if (EQ (last_thing_searched, Qt) | |
3222 || !NILP (integers)) | |
3223 { | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3224 data[2 * i] = make_fixnum (start); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3225 data[2 * i + 1] = make_fixnum (search_regs.end[i]); |
428 | 3226 } |
3227 else if (BUFFERP (last_thing_searched)) | |
3228 { | |
3229 data[2 * i] = Fmake_marker (); | |
3230 Fset_marker (data[2 * i], | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3231 make_fixnum (start), |
428 | 3232 last_thing_searched); |
3233 data[2 * i + 1] = Fmake_marker (); | |
3234 Fset_marker (data[2 * i + 1], | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3235 make_fixnum (search_regs.end[i]), |
428 | 3236 last_thing_searched); |
3237 } | |
3238 else | |
3239 /* last_thing_searched must always be Qt, a buffer, or Qnil. */ | |
2500 | 3240 ABORT (); |
428 | 3241 |
3242 len = i; | |
3243 } | |
3244 else | |
3245 data[2 * i] = data [2 * i + 1] = Qnil; | |
3246 } | |
3247 if (!CONSP (reuse)) | |
3248 return Flist (2 * len + 2, data); | |
3249 | |
3250 /* If REUSE is a list, store as many value elements as will fit | |
3251 into the elements of REUSE. */ | |
3252 for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail)) | |
3253 { | |
3254 if (i < 2 * len + 2) | |
3255 XCAR (tail) = data[i]; | |
3256 else | |
3257 XCAR (tail) = Qnil; | |
3258 prev = tail; | |
3259 } | |
3260 | |
3261 /* If we couldn't fit all value elements into REUSE, | |
3262 cons up the rest of them and add them to the end of REUSE. */ | |
3263 if (i < 2 * len + 2) | |
3264 XCDR (prev) = Flist (2 * len + 2 - i, data + i); | |
3265 | |
3266 return reuse; | |
3267 } | |
3268 | |
3269 | |
3270 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /* | |
3271 Set internal data on last search match from elements of LIST. | |
1468 | 3272 LIST should have been created by calling `match-data' previously, |
3273 or be nil, to clear the internal match data. | |
428 | 3274 */ |
3275 (list)) | |
3276 { | |
3277 REGISTER int i; | |
3278 REGISTER Lisp_Object marker; | |
3279 int num_regs; | |
3280 int length; | |
3281 | |
853 | 3282 /* Some FSF junk with running_asynch_code, to preserve the match |
3283 data. Not necessary because we don't call process filters | |
3284 asynchronously (i.e. from within QUIT). */ | |
428 | 3285 |
3286 CONCHECK_LIST (list); | |
3287 | |
3288 /* Unless we find a marker with a buffer in LIST, assume that this | |
3289 match data came from a string. */ | |
3290 last_thing_searched = Qt; | |
3291 | |
3292 /* Allocate registers if they don't already exist. */ | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3293 length = XFIXNUM (Flength (list)) / 2; |
428 | 3294 num_regs = search_regs.num_regs; |
3295 | |
3296 if (length > num_regs) | |
3297 { | |
3298 if (search_regs.num_regs == 0) | |
3299 { | |
3300 search_regs.start = xnew_array (regoff_t, length); | |
3301 search_regs.end = xnew_array (regoff_t, length); | |
3302 } | |
3303 else | |
3304 { | |
3305 XREALLOC_ARRAY (search_regs.start, regoff_t, length); | |
3306 XREALLOC_ARRAY (search_regs.end, regoff_t, length); | |
3307 } | |
3308 | |
3309 search_regs.num_regs = length; | |
3310 } | |
3311 | |
3312 for (i = 0; i < num_regs; i++) | |
3313 { | |
3314 marker = Fcar (list); | |
3315 if (NILP (marker)) | |
3316 { | |
3317 search_regs.start[i] = -1; | |
3318 list = Fcdr (list); | |
3319 } | |
3320 else | |
3321 { | |
3322 if (MARKERP (marker)) | |
3323 { | |
3324 if (XMARKER (marker)->buffer == 0) | |
3325 marker = Qzero; | |
3326 else | |
793 | 3327 last_thing_searched = wrap_buffer (XMARKER (marker)->buffer); |
428 | 3328 } |
3329 | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3330 CHECK_FIXNUM_COERCE_MARKER (marker); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3331 search_regs.start[i] = XFIXNUM (marker); |
428 | 3332 list = Fcdr (list); |
3333 | |
3334 marker = Fcar (list); | |
3335 if (MARKERP (marker) && XMARKER (marker)->buffer == 0) | |
3336 marker = Qzero; | |
3337 | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3338 CHECK_FIXNUM_COERCE_MARKER (marker); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3339 search_regs.end[i] = XFIXNUM (marker); |
428 | 3340 } |
3341 list = Fcdr (list); | |
3342 } | |
3343 | |
3344 return Qnil; | |
3345 } | |
3346 | |
3347 /* Quote a string to inactivate reg-expr chars */ | |
3348 | |
3349 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /* | |
3350 Return a regexp string which matches exactly STRING and nothing else. | |
3351 */ | |
444 | 3352 (string)) |
428 | 3353 { |
867 | 3354 REGISTER Ibyte *in, *out, *end; |
3355 REGISTER Ibyte *temp; | |
428 | 3356 |
444 | 3357 CHECK_STRING (string); |
428 | 3358 |
2367 | 3359 temp = alloca_ibytes (XSTRING_LENGTH (string) * 2); |
428 | 3360 |
3361 /* Now copy the data into the new string, inserting escapes. */ | |
3362 | |
444 | 3363 in = XSTRING_DATA (string); |
3364 end = in + XSTRING_LENGTH (string); | |
428 | 3365 out = temp; |
3366 | |
3367 while (in < end) | |
3368 { | |
867 | 3369 Ichar c = itext_ichar (in); |
428 | 3370 |
3371 if (c == '[' || c == ']' | |
3372 || c == '*' || c == '.' || c == '\\' | |
3373 || c == '?' || c == '+' | |
3374 || c == '^' || c == '$') | |
3375 *out++ = '\\'; | |
867 | 3376 out += set_itext_ichar (out, c); |
3377 INC_IBYTEPTR (in); | |
428 | 3378 } |
3379 | |
3380 return make_string (temp, out - temp); | |
3381 } | |
3382 | |
3383 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /* | |
3384 Set the regexp to be used to match a word in regular-expression searching. | |
3385 #### Not yet implemented. Currently does nothing. | |
3386 #### Do not use this yet. Its calling interface is likely to change. | |
3387 */ | |
2286 | 3388 (UNUSED (regexp))) |
428 | 3389 { |
3390 return Qnil; | |
3391 } | |
3392 | |
3393 | |
5041 | 3394 #ifdef DEBUG_XEMACS |
3395 | |
3396 static int | |
3397 debug_regexps_changed (Lisp_Object UNUSED (sym), Lisp_Object *val, | |
3398 Lisp_Object UNUSED (in_object), | |
3399 int UNUSED (flags)) | |
3400 { | |
3401 int newval = 0; | |
3402 | |
3403 EXTERNAL_LIST_LOOP_2 (elt, *val) | |
3404 { | |
3405 CHECK_SYMBOL (elt); | |
3406 if (EQ (elt, Qcompilation)) | |
3407 newval |= RE_DEBUG_COMPILATION; | |
3408 else if (EQ (elt, Qfailure_point)) | |
3409 newval |= RE_DEBUG_FAILURE_POINT; | |
3410 else if (EQ (elt, Qmatching)) | |
3411 newval |= RE_DEBUG_MATCHING; | |
3412 else | |
3413 invalid_argument | |
3414 ("Expected `compilation', `failure-point' or `matching'", elt); | |
3415 } | |
3416 debug_regexps = newval; | |
3417 return 0; | |
3418 } | |
3419 | |
3420 #endif /* DEBUG_XEMACS */ | |
3421 | |
3422 | |
428 | 3423 /************************************************************************/ |
3424 /* initialization */ | |
3425 /************************************************************************/ | |
3426 | |
3427 void | |
3428 syms_of_search (void) | |
3429 { | |
3430 | |
442 | 3431 DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation); |
3432 DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error); | |
563 | 3433 Fput (Qinvalid_regexp, Qerror_lacks_explanatory_string, Qt); |
428 | 3434 |
3435 DEFSUBR (Flooking_at); | |
3436 DEFSUBR (Fposix_looking_at); | |
3437 DEFSUBR (Fstring_match); | |
3438 DEFSUBR (Fposix_string_match); | |
3439 DEFSUBR (Fskip_chars_forward); | |
3440 DEFSUBR (Fskip_chars_backward); | |
3441 DEFSUBR (Fskip_syntax_forward); | |
3442 DEFSUBR (Fskip_syntax_backward); | |
3443 DEFSUBR (Fsearch_forward); | |
3444 DEFSUBR (Fsearch_backward); | |
3445 DEFSUBR (Fword_search_forward); | |
3446 DEFSUBR (Fword_search_backward); | |
3447 DEFSUBR (Fre_search_forward); | |
3448 DEFSUBR (Fre_search_backward); | |
3449 DEFSUBR (Fposix_search_forward); | |
3450 DEFSUBR (Fposix_search_backward); | |
3451 DEFSUBR (Freplace_match); | |
3452 DEFSUBR (Fmatch_beginning); | |
3453 DEFSUBR (Fmatch_end); | |
3454 DEFSUBR (Fmatch_data); | |
3455 DEFSUBR (Fstore_match_data); | |
3456 DEFSUBR (Fregexp_quote); | |
3457 DEFSUBR (Fset_word_regexp); | |
3458 } | |
3459 | |
3460 void | |
3461 reinit_vars_of_search (void) | |
3462 { | |
3463 int i; | |
3464 | |
3465 last_thing_searched = Qnil; | |
3466 staticpro_nodump (&last_thing_searched); | |
3467 | |
3468 for (i = 0; i < REGEXP_CACHE_SIZE; ++i) | |
3469 { | |
3470 searchbufs[i].buf.allocated = 100; | |
3471 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100); | |
3472 searchbufs[i].buf.fastmap = searchbufs[i].fastmap; | |
3473 searchbufs[i].regexp = Qnil; | |
3474 staticpro_nodump (&searchbufs[i].regexp); | |
3475 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]); | |
3476 } | |
3477 searchbuf_head = &searchbufs[0]; | |
3478 } | |
3479 | |
3480 void | |
3481 vars_of_search (void) | |
3482 { | |
3483 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /* | |
3484 *Regular expression to be used in `forward-word'. | |
3485 #### Not yet implemented. | |
3486 */ ); | |
3487 Vforward_word_regexp = Qnil; | |
3488 | |
3489 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /* | |
3490 *Regular expression to be used in `backward-word'. | |
3491 #### Not yet implemented. | |
3492 */ ); | |
3493 Vbackward_word_regexp = Qnil; | |
502 | 3494 |
3495 DEFVAR_INT ("warn-about-possibly-incompatible-back-references", | |
3496 &warn_about_possibly_incompatible_back_references /* | |
3497 If true, issue warnings when new-semantics back references occur. | |
3498 This is to catch places where old code might inadvertently have changed | |
3499 semantics. This will occur in old code only where more than nine groups | |
3500 occur and a back reference to one of them is directly followed by a digit. | |
3501 */ ); | |
3502 warn_about_possibly_incompatible_back_references = 1; | |
814 | 3503 |
2421 | 3504 Vskip_chars_range_table = Fmake_range_table (Qstart_closed_end_closed); |
428 | 3505 staticpro (&Vskip_chars_range_table); |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3506 #ifdef DEBUG_XEMACS |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3507 DEFSYMBOL (Qsearch_algorithm_used); |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3508 DEFSYMBOL (Qboyer_moore); |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3509 DEFSYMBOL (Qsimple_search); |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3510 |
5041 | 3511 DEFSYMBOL (Qcompilation); |
3512 DEFSYMBOL (Qfailure_point); | |
3513 DEFSYMBOL (Qmatching); | |
3514 | |
3515 DEFVAR_INT ("debug-searches", &debug_searches /* | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3516 If non-zero, bind `search-algorithm-used' to `boyer-moore' or `simple-search', |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3517 depending on the algorithm used for each search. Used for testing. |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3518 */ ); |
5041 | 3519 debug_searches = 0; |
3520 | |
3521 DEFVAR_LISP_MAGIC ("debug-regexps", &Vdebug_regexps, /* | |
3522 List of areas to display debug info about during regexp operation. | |
3523 The following areas are recognized: | |
3524 | |
3525 `compilation' Display the result of compiling a regexp. | |
3526 `failure-point' Display info about failure points reached. | |
3527 `matching' Display info about the process of matching a regex against | |
3528 text. | |
3529 */ debug_regexps_changed); | |
3530 Vdebug_regexps = Qnil; | |
3531 debug_regexps = 0; | |
3532 #endif /* DEBUG_XEMACS */ | |
428 | 3533 } |