Mercurial > hg > xemacs-beta
annotate src/search.c @ 4383:1e04b9c8125b
Correct the make-temp-name docstring.
Take the opportunity to correct the format of ChangeLog.
2008-01-03 Aidan Kehoe <kehoea@parhasard.net>
* fileio.c (Fmake_temp_name): Correct the comment to cross
reference to make-temp-file, and not to this function.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Thu, 03 Jan 2008 22:36:04 +0100 |
parents | f70e56bb52a7 |
children | 4ee73bbe4f8e |
rev | line source |
---|---|
428 | 1 /* String search routines for XEmacs. |
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
793 | 4 Copyright (C) 2001, 2002 Ben Wing. |
428 | 5 |
6 This file is part of XEmacs. | |
7 | |
8 XEmacs is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
10 Free Software Foundation; either version 2, or (at your option) any | |
11 later version. | |
12 | |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with XEmacs; see the file COPYING. If not, write to | |
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 Boston, MA 02111-1307, USA. */ | |
22 | |
23 /* Synched up with: FSF 19.29, except for region-cache stuff. */ | |
24 | |
25 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */ | |
26 | |
826 | 27 /* This file has been Mule-ized. */ |
428 | 28 |
29 #include <config.h> | |
30 #include "lisp.h" | |
31 | |
32 #include "buffer.h" | |
33 #include "insdel.h" | |
34 #include "opaque.h" | |
35 #ifdef REGION_CACHE_NEEDS_WORK | |
36 #include "region-cache.h" | |
37 #endif | |
38 #include "syntax.h" | |
39 | |
40 #include <sys/types.h> | |
41 #include "regex.h" | |
446 | 42 #include "casetab.h" |
43 #include "chartab.h" | |
44 | |
45 #define TRANSLATE(table, pos) \ | |
867 | 46 (!NILP (table) ? TRT_TABLE_OF (table, (Ichar) pos) : pos) |
428 | 47 |
48 #define REGEXP_CACHE_SIZE 20 | |
49 | |
50 /* If the regexp is non-nil, then the buffer contains the compiled form | |
51 of that regexp, suitable for searching. */ | |
446 | 52 struct regexp_cache |
53 { | |
428 | 54 struct regexp_cache *next; |
55 Lisp_Object regexp; | |
56 struct re_pattern_buffer buf; | |
57 char fastmap[0400]; | |
58 /* Nonzero means regexp was compiled to do full POSIX backtracking. */ | |
59 char posix; | |
60 }; | |
61 | |
62 /* The instances of that struct. */ | |
63 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE]; | |
64 | |
65 /* The head of the linked list; points to the most recently used buffer. */ | |
66 static struct regexp_cache *searchbuf_head; | |
67 | |
68 | |
69 /* Every call to re_match, etc., must pass &search_regs as the regs | |
70 argument unless you can show it is unnecessary (i.e., if re_match | |
71 is certainly going to be called again before region-around-match | |
72 can be called). | |
73 | |
74 Since the registers are now dynamically allocated, we need to make | |
75 sure not to refer to the Nth register before checking that it has | |
76 been allocated by checking search_regs.num_regs. | |
77 | |
78 The regex code keeps track of whether it has allocated the search | |
79 buffer using bits in the re_pattern_buffer. This means that whenever | |
80 you compile a new pattern, it completely forgets whether it has | |
81 allocated any registers, and will allocate new registers the next | |
82 time you call a searching or matching function. Therefore, we need | |
83 to call re_set_registers after compiling a new pattern or after | |
84 setting the match registers, so that the regex functions will be | |
85 able to free or re-allocate it properly. */ | |
86 | |
87 /* Note: things get trickier under Mule because the values returned from | |
826 | 88 the regexp routines are in Bytebpos's but we need them to be in Charbpos's. |
428 | 89 We take the easy way out for the moment and just convert them immediately. |
90 We could be more clever by not converting them until necessary, but | |
91 that gets real ugly real fast since the buffer might have changed and | |
92 the positions might be out of sync or out of range. | |
93 */ | |
94 static struct re_registers search_regs; | |
95 | |
1468 | 96 /* Every function that sets the match data _must_ clear unused search |
97 registers on success. An unsuccessful search or match _must_ preserve | |
98 the search registers. The traditional documentation implied that | |
99 any match operation might trash the registers, but in fact failures | |
100 have always preserved the match data (in GNU Emacs as well). Some | |
101 plausible code depends on this behavior (cf. `w3-configuration-data' | |
102 in library "w3-cfg"). | |
103 | |
104 Ordinary string searchs use set_search_regs to set the whole-string | |
105 match. That function takes care of clearing the unused subexpression | |
1425 | 106 registers. |
107 */ | |
108 static void set_search_regs (struct buffer *buf, Charbpos beg, Charcount len); | |
1468 | 109 static void clear_search_regs (void); |
1425 | 110 |
428 | 111 /* The buffer in which the last search was performed, or |
112 Qt if the last search was done in a string; | |
113 Qnil if no searching has been done yet. */ | |
114 static Lisp_Object last_thing_searched; | |
115 | |
116 /* error condition signalled when regexp compile_pattern fails */ | |
117 | |
118 Lisp_Object Qinvalid_regexp; | |
119 | |
120 /* Regular expressions used in forward/backward-word */ | |
121 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp; | |
122 | |
507 | 123 Fixnum warn_about_possibly_incompatible_back_references; |
502 | 124 |
428 | 125 /* range table for use with skip_chars. Only needed for Mule. */ |
126 Lisp_Object Vskip_chars_range_table; | |
127 | |
867 | 128 static Charbpos simple_search (struct buffer *buf, Ibyte *base_pat, |
826 | 129 Bytecount len, Bytebpos pos, Bytebpos lim, |
130 EMACS_INT n, Lisp_Object trt); | |
867 | 131 static Charbpos boyer_moore (struct buffer *buf, Ibyte *base_pat, |
826 | 132 Bytecount len, Bytebpos pos, Bytebpos lim, |
133 EMACS_INT n, Lisp_Object trt, | |
134 Lisp_Object inverse_trt, int charset_base); | |
665 | 135 static Charbpos search_buffer (struct buffer *buf, Lisp_Object str, |
826 | 136 Charbpos charbpos, Charbpos buflim, EMACS_INT n, |
137 int RE, Lisp_Object trt, | |
138 Lisp_Object inverse_trt, int posix); | |
771 | 139 |
2268 | 140 static DECLARE_DOESNT_RETURN (matcher_overflow (void)); |
141 | |
142 static DOESNT_RETURN | |
143 matcher_overflow () | |
428 | 144 { |
563 | 145 stack_overflow ("Stack overflow in regexp matcher", Qunbound); |
428 | 146 } |
147 | |
148 /* Compile a regexp and signal a Lisp error if anything goes wrong. | |
149 PATTERN is the pattern to compile. | |
150 CP is the place to put the result. | |
826 | 151 TRANSLATE is a translation table for ignoring case, or Qnil for none. |
428 | 152 REGP is the structure that says where to store the "register" |
153 values that will result from matching this pattern. | |
154 If it is 0, we should compile the pattern not to record any | |
155 subexpression bounds. | |
156 POSIX is nonzero if we want full backtracking (POSIX style) | |
157 for this pattern. 0 means backtrack only enough to get a valid match. */ | |
158 | |
159 static int | |
160 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, | |
2286 | 161 struct re_registers *UNUSED (regp), Lisp_Object translate, |
826 | 162 int posix, Error_Behavior errb) |
428 | 163 { |
442 | 164 const char *val; |
428 | 165 reg_syntax_t old; |
166 | |
167 cp->regexp = Qnil; | |
168 cp->buf.translate = translate; | |
169 cp->posix = posix; | |
170 old = re_set_syntax (RE_SYNTAX_EMACS | |
171 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING)); | |
442 | 172 val = (const char *) |
428 | 173 re_compile_pattern ((char *) XSTRING_DATA (pattern), |
174 XSTRING_LENGTH (pattern), &cp->buf); | |
175 re_set_syntax (old); | |
176 if (val) | |
177 { | |
563 | 178 maybe_signal_error (Qinvalid_regexp, 0, build_string (val), |
428 | 179 Qsearch, errb); |
180 return 0; | |
181 } | |
182 | |
183 cp->regexp = Fcopy_sequence (pattern); | |
184 return 1; | |
185 } | |
186 | |
187 /* Compile a regexp if necessary, but first check to see if there's one in | |
188 the cache. | |
189 PATTERN is the pattern to compile. | |
826 | 190 TRANSLATE is a translation table for ignoring case, or Qnil for none. |
428 | 191 REGP is the structure that says where to store the "register" |
192 values that will result from matching this pattern. | |
193 If it is 0, we should compile the pattern not to record any | |
194 subexpression bounds. | |
195 POSIX is nonzero if we want full backtracking (POSIX style) | |
196 for this pattern. 0 means backtrack only enough to get a valid match. */ | |
197 | |
198 struct re_pattern_buffer * | |
199 compile_pattern (Lisp_Object pattern, struct re_registers *regp, | |
2286 | 200 Lisp_Object translate, Lisp_Object UNUSED (searchobj), |
201 struct buffer *UNUSED (searchbuf), int posix, | |
202 Error_Behavior errb) | |
428 | 203 { |
204 struct regexp_cache *cp, **cpp; | |
205 | |
206 for (cpp = &searchbuf_head; ; cpp = &cp->next) | |
207 { | |
208 cp = *cpp; | |
826 | 209 /* &&#### once we fix up the fastmap code in regex.c for 8-bit-fixed, |
210 we need to record and compare the buffer and format, since the | |
211 fastmap will reflect the state of the buffer -- and things get | |
212 more complicated if the buffer has changed formats or (esp.) has | |
213 kept the format but changed its interpretation! may need to have | |
214 the code that changes the interpretation go through and invalidate | |
215 cache entries for that buffer. */ | |
428 | 216 if (!NILP (Fstring_equal (cp->regexp, pattern)) |
446 | 217 && EQ (cp->buf.translate, translate) |
428 | 218 && cp->posix == posix) |
219 break; | |
220 | |
221 /* If we're at the end of the cache, compile into the last cell. */ | |
222 if (cp->next == 0) | |
223 { | |
826 | 224 if (!compile_pattern_1 (cp, pattern, regp, translate, |
225 posix, errb)) | |
428 | 226 return 0; |
227 break; | |
228 } | |
229 } | |
230 | |
231 /* When we get here, cp (aka *cpp) contains the compiled pattern, | |
232 either because we found it in the cache or because we just compiled it. | |
233 Move it to the front of the queue to mark it as most recently used. */ | |
234 *cpp = cp->next; | |
235 cp->next = searchbuf_head; | |
236 searchbuf_head = cp; | |
237 | |
238 /* Advise the searching functions about the space we have allocated | |
239 for register data. */ | |
240 if (regp) | |
241 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end); | |
242 | |
243 return &cp->buf; | |
244 } | |
245 | |
246 /* Error condition used for failing searches */ | |
247 Lisp_Object Qsearch_failed; | |
248 | |
2268 | 249 static DECLARE_DOESNT_RETURN (signal_failure (Lisp_Object)); |
250 | |
251 static DOESNT_RETURN | |
428 | 252 signal_failure (Lisp_Object arg) |
253 { | |
446 | 254 for (;;) |
255 Fsignal (Qsearch_failed, list1 (arg)); | |
428 | 256 } |
257 | |
826 | 258 /* Convert the search registers from Bytebpos's to Charbpos's. Needs to be |
428 | 259 done after each regexp match that uses the search regs. |
260 | |
261 We could get a potential speedup by not converting the search registers | |
262 until it's really necessary, e.g. when match-data or replace-match is | |
263 called. However, this complexifies the code a lot (e.g. the buffer | |
826 | 264 could have changed and the Bytebpos's stored might be invalid) and is |
428 | 265 probably not a great time-saver. */ |
266 | |
267 static void | |
268 fixup_search_regs_for_buffer (struct buffer *buf) | |
269 { | |
270 int i; | |
271 int num_regs = search_regs.num_regs; | |
272 | |
273 for (i = 0; i < num_regs; i++) | |
274 { | |
275 if (search_regs.start[i] >= 0) | |
826 | 276 search_regs.start[i] = bytebpos_to_charbpos (buf, |
277 search_regs.start[i]); | |
428 | 278 if (search_regs.end[i] >= 0) |
665 | 279 search_regs.end[i] = bytebpos_to_charbpos (buf, search_regs.end[i]); |
428 | 280 } |
281 } | |
282 | |
283 /* Similar but for strings. */ | |
284 static void | |
285 fixup_search_regs_for_string (Lisp_Object string) | |
286 { | |
287 int i; | |
288 int num_regs = search_regs.num_regs; | |
289 | |
290 /* #### bytecount_to_charcount() is not that efficient. This function | |
867 | 291 could be faster if it did its own conversion (using INC_IBYTEPTR() |
428 | 292 and such), because the register ends are likely to be somewhat ordered. |
293 (Even if not, you could sort them.) | |
294 | |
295 Think about this if this function is a time hog, which it's probably | |
296 not. */ | |
297 for (i = 0; i < num_regs; i++) | |
298 { | |
299 if (search_regs.start[i] > 0) | |
300 { | |
301 search_regs.start[i] = | |
793 | 302 string_index_byte_to_char (string, search_regs.start[i]); |
428 | 303 } |
304 if (search_regs.end[i] > 0) | |
305 { | |
306 search_regs.end[i] = | |
793 | 307 string_index_byte_to_char (string, search_regs.end[i]); |
428 | 308 } |
309 } | |
310 } | |
311 | |
312 | |
313 static Lisp_Object | |
314 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix) | |
315 { | |
316 Lisp_Object val; | |
665 | 317 Bytebpos p1, p2; |
428 | 318 Bytecount s1, s2; |
319 REGISTER int i; | |
320 struct re_pattern_buffer *bufp; | |
826 | 321 struct syntax_cache scache_struct; |
322 struct syntax_cache *scache = &scache_struct; | |
323 | |
428 | 324 CHECK_STRING (string); |
325 bufp = compile_pattern (string, &search_regs, | |
326 (!NILP (buf->case_fold_search) | |
446 | 327 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil), |
826 | 328 wrap_buffer (buf), buf, posix, ERROR_ME); |
428 | 329 |
330 QUIT; | |
331 | |
332 /* Get pointers and sizes of the two strings | |
333 that make up the visible portion of the buffer. */ | |
334 | |
826 | 335 p1 = BYTE_BUF_BEGV (buf); |
336 p2 = BYTE_BUF_CEILING_OF (buf, p1); | |
428 | 337 s1 = p2 - p1; |
826 | 338 s2 = BYTE_BUF_ZV (buf) - p2; |
339 | |
340 /* By making the regex object, regex buffer, and syntax cache arguments | |
341 to re_{search,match}{,_2}, we've removed the need to do nasty things | |
342 to deal with regex reentrancy. (See stack trace in signal.c for proof | |
343 that this can happen.) | |
344 | |
345 #### there is still a potential problem with the regex cache -- | |
346 the compiled regex could be overwritten. we'd need 20-fold | |
347 reentrancy, though. Fix this. */ | |
348 | |
349 i = re_match_2 (bufp, (char *) BYTE_BUF_BYTE_ADDRESS (buf, p1), | |
350 s1, (char *) BYTE_BUF_BYTE_ADDRESS (buf, p2), s2, | |
351 BYTE_BUF_PT (buf) - BYTE_BUF_BEGV (buf), &search_regs, | |
352 BYTE_BUF_ZV (buf) - BYTE_BUF_BEGV (buf), wrap_buffer (buf), | |
353 buf, scache); | |
428 | 354 |
355 if (i == -2) | |
356 matcher_overflow (); | |
357 | |
358 val = (0 <= i ? Qt : Qnil); | |
359 if (NILP (val)) | |
826 | 360 return Qnil; |
428 | 361 { |
362 int num_regs = search_regs.num_regs; | |
363 for (i = 0; i < num_regs; i++) | |
364 if (search_regs.start[i] >= 0) | |
365 { | |
826 | 366 search_regs.start[i] += BYTE_BUF_BEGV (buf); |
367 search_regs.end[i] += BYTE_BUF_BEGV (buf); | |
428 | 368 } |
369 } | |
793 | 370 last_thing_searched = wrap_buffer (buf); |
428 | 371 fixup_search_regs_for_buffer (buf); |
826 | 372 return val; |
428 | 373 } |
374 | |
375 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /* | |
376 Return t if text after point matches regular expression REGEXP. | |
1468 | 377 When the match is successful, this function modifies the match data |
378 that `match-beginning', `match-end' and `match-data' access; save the | |
379 match data with `match-data' and restore it with `store-match-data' if | |
380 you want to preserve them. If the match fails, the match data from the | |
381 previous success match is preserved. | |
428 | 382 |
383 Optional argument BUFFER defaults to the current buffer. | |
384 */ | |
385 (regexp, buffer)) | |
386 { | |
387 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0); | |
388 } | |
389 | |
390 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /* | |
391 Return t if text after point matches regular expression REGEXP. | |
392 Find the longest match, in accord with Posix regular expression rules. | |
1468 | 393 When the match is successful, this function modifies the match data |
394 that `match-beginning', `match-end' and `match-data' access; save the | |
395 match data with `match-data' and restore it with `store-match-data' if | |
396 you want to preserve them. If the match fails, the match data from the | |
397 previous success match is preserved. | |
428 | 398 |
399 Optional argument BUFFER defaults to the current buffer. | |
400 */ | |
401 (regexp, buffer)) | |
402 { | |
826 | 403 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1); |
428 | 404 } |
405 | |
406 static Lisp_Object | |
407 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, | |
2286 | 408 struct buffer *buf, int UNUSED (posix)) |
428 | 409 { |
410 Bytecount val; | |
411 Charcount s; | |
412 struct re_pattern_buffer *bufp; | |
413 | |
853 | 414 /* Some FSF junk with running_asynch_code, to preserve the match |
415 data. Not necessary because we don't call process filters | |
416 asynchronously (i.e. from within QUIT). */ | |
428 | 417 |
418 CHECK_STRING (regexp); | |
419 CHECK_STRING (string); | |
420 | |
421 if (NILP (start)) | |
422 s = 0; | |
423 else | |
424 { | |
826 | 425 Charcount len = string_char_length (string); |
428 | 426 |
427 CHECK_INT (start); | |
428 s = XINT (start); | |
429 if (s < 0 && -s <= len) | |
430 s = len + s; | |
431 else if (0 > s || s > len) | |
432 args_out_of_range (string, start); | |
433 } | |
434 | |
435 | |
436 bufp = compile_pattern (regexp, &search_regs, | |
437 (!NILP (buf->case_fold_search) | |
446 | 438 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil), |
826 | 439 string, buf, 0, ERROR_ME); |
428 | 440 QUIT; |
441 { | |
793 | 442 Bytecount bis = string_index_char_to_byte (string, s); |
826 | 443 struct syntax_cache scache_struct; |
444 struct syntax_cache *scache = &scache_struct; | |
445 | |
446 /* By making the regex object, regex buffer, and syntax cache arguments | |
447 to re_{search,match}{,_2}, we've removed the need to do nasty things | |
448 to deal with regex reentrancy. (See stack trace in signal.c for proof | |
449 that this can happen.) | |
450 | |
451 #### there is still a potential problem with the regex cache -- | |
452 the compiled regex could be overwritten. we'd need 20-fold | |
453 reentrancy, though. Fix this. */ | |
454 | |
428 | 455 val = re_search (bufp, (char *) XSTRING_DATA (string), |
456 XSTRING_LENGTH (string), bis, | |
457 XSTRING_LENGTH (string) - bis, | |
826 | 458 &search_regs, string, buf, scache); |
428 | 459 } |
460 if (val == -2) | |
461 matcher_overflow (); | |
826 | 462 if (val < 0) return Qnil; |
428 | 463 last_thing_searched = Qt; |
464 fixup_search_regs_for_string (string); | |
826 | 465 return make_int (string_index_byte_to_char (string, val)); |
428 | 466 } |
467 | |
468 DEFUN ("string-match", Fstring_match, 2, 4, 0, /* | |
469 Return index of start of first match for REGEXP in STRING, or nil. | |
470 If third arg START is non-nil, start search at that index in STRING. | |
471 For index of first char beyond the match, do (match-end 0). | |
472 `match-end' and `match-beginning' also give indices of substrings | |
473 matched by parenthesis constructs in the pattern. | |
474 | |
826 | 475 Optional arg BUFFER controls how case folding and syntax and category |
476 lookup is done (according to the value of `case-fold-search' in that buffer | |
477 and that buffer's case tables, syntax tables, and category table). If nil | |
478 or unspecified, it defaults *NOT* to the current buffer but instead: | |
479 | |
480 -- the value of `case-fold-search' in the current buffer is still respected | |
481 because of idioms like | |
482 | |
483 (let ((case-fold-search nil)) | |
484 (string-match "^foo.*bar" string)) | |
485 | |
486 but the case, syntax, and category tables come from the standard tables, | |
1468 | 487 which are accessed through functions `default-{case,syntax,category}-table' |
488 and serve as the parents of the tables in particular buffer. | |
489 | |
490 When the match is successful, this function modifies the match data | |
491 that `match-beginning', `match-end' and `match-data' access; save the | |
492 match data with `match-data' and restore it with `store-match-data' if | |
493 you want to preserve them. If the match fails, the match data from the | |
494 previous success match is preserved. | |
428 | 495 */ |
496 (regexp, string, start, buffer)) | |
497 { | |
826 | 498 /* &&#### implement new interp for buffer arg; check code to see if it |
499 makes more sense than prev */ | |
428 | 500 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0); |
501 } | |
502 | |
503 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /* | |
504 Return index of start of first match for REGEXP in STRING, or nil. | |
505 Find the longest match, in accord with Posix regular expression rules. | |
506 If third arg START is non-nil, start search at that index in STRING. | |
507 For index of first char beyond the match, do (match-end 0). | |
508 `match-end' and `match-beginning' also give indices of substrings | |
509 matched by parenthesis constructs in the pattern. | |
510 | |
511 Optional arg BUFFER controls how case folding is done (according to | |
512 the value of `case-fold-search' in that buffer and that buffer's case | |
513 tables) and defaults to the current buffer. | |
1468 | 514 |
515 When the match is successful, this function modifies the match data | |
516 that `match-beginning', `match-end' and `match-data' access; save the | |
517 match data with `match-data' and restore it with `store-match-data' if | |
518 you want to preserve them. If the match fails, the match data from the | |
519 previous success match is preserved. | |
428 | 520 */ |
521 (regexp, string, start, buffer)) | |
522 { | |
523 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1); | |
524 } | |
525 | |
526 /* Match REGEXP against STRING, searching all of STRING, | |
527 and return the index of the match, or negative on failure. | |
528 This does not clobber the match data. */ | |
529 | |
530 Bytecount | |
1347 | 531 fast_string_match (Lisp_Object regexp, const Ibyte *nonreloc, |
428 | 532 Lisp_Object reloc, Bytecount offset, |
533 Bytecount length, int case_fold_search, | |
578 | 534 Error_Behavior errb, int no_quit) |
428 | 535 { |
536 Bytecount val; | |
867 | 537 Ibyte *newnonreloc = (Ibyte *) nonreloc; |
428 | 538 struct re_pattern_buffer *bufp; |
826 | 539 struct syntax_cache scache_struct; |
540 struct syntax_cache *scache = &scache_struct; | |
428 | 541 |
542 bufp = compile_pattern (regexp, 0, | |
543 (case_fold_search | |
771 | 544 ? XCASE_TABLE_DOWNCASE (Vstandard_case_table) |
446 | 545 : Qnil), |
826 | 546 reloc, 0, 0, errb); |
428 | 547 if (!bufp) |
548 return -1; /* will only do this when errb != ERROR_ME */ | |
549 if (!no_quit) | |
550 QUIT; | |
551 else | |
552 no_quit_in_re_search = 1; | |
553 | |
554 fixup_internal_substring (nonreloc, reloc, offset, &length); | |
555 | |
771 | 556 /* Don't need to protect against GC inside of re_search() due to QUIT; |
557 QUIT is GC-inhibited. */ | |
428 | 558 if (!NILP (reloc)) |
771 | 559 newnonreloc = XSTRING_DATA (reloc); |
560 | |
826 | 561 /* By making the regex object, regex buffer, and syntax cache arguments |
562 to re_{search,match}{,_2}, we've removed the need to do nasty things | |
563 to deal with regex reentrancy. (See stack trace in signal.c for proof | |
564 that this can happen.) | |
565 | |
566 #### there is still a potential problem with the regex cache -- | |
567 the compiled regex could be overwritten. we'd need 20-fold | |
568 reentrancy, though. Fix this. */ | |
569 | |
428 | 570 val = re_search (bufp, (char *) newnonreloc + offset, length, 0, |
826 | 571 length, 0, reloc, 0, scache); |
428 | 572 |
573 no_quit_in_re_search = 0; | |
574 return val; | |
575 } | |
576 | |
577 Bytecount | |
578 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string) | |
579 { | |
580 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0); | |
581 } | |
582 | |
583 | |
584 #ifdef REGION_CACHE_NEEDS_WORK | |
585 /* The newline cache: remembering which sections of text have no newlines. */ | |
586 | |
587 /* If the user has requested newline caching, make sure it's on. | |
588 Otherwise, make sure it's off. | |
589 This is our cheezy way of associating an action with the change of | |
590 state of a buffer-local variable. */ | |
591 static void | |
592 newline_cache_on_off (struct buffer *buf) | |
593 { | |
594 if (NILP (buf->cache_long_line_scans)) | |
595 { | |
596 /* It should be off. */ | |
597 if (buf->newline_cache) | |
598 { | |
599 free_region_cache (buf->newline_cache); | |
600 buf->newline_cache = 0; | |
601 } | |
602 } | |
603 else | |
604 { | |
605 /* It should be on. */ | |
606 if (buf->newline_cache == 0) | |
607 buf->newline_cache = new_region_cache (); | |
608 } | |
609 } | |
610 #endif | |
611 | |
612 /* Search in BUF for COUNT instances of the character TARGET between | |
613 START and END. | |
614 | |
615 If COUNT is positive, search forwards; END must be >= START. | |
616 If COUNT is negative, search backwards for the -COUNTth instance; | |
617 END must be <= START. | |
618 If COUNT is zero, do anything you please; run rogue, for all I care. | |
619 | |
620 If END is zero, use BEGV or ZV instead, as appropriate for the | |
621 direction indicated by COUNT. | |
622 | |
623 If we find COUNT instances, set *SHORTAGE to zero, and return the | |
624 position after the COUNTth match. Note that for reverse motion | |
625 this is not the same as the usual convention for Emacs motion commands. | |
626 | |
627 If we don't find COUNT instances before reaching END, set *SHORTAGE | |
628 to the number of TARGETs left unfound, and return END. | |
629 | |
630 If ALLOW_QUIT is non-zero, call QUIT periodically. */ | |
631 | |
665 | 632 static Bytebpos |
867 | 633 byte_scan_buffer (struct buffer *buf, Ichar target, Bytebpos st, Bytebpos en, |
872 | 634 EMACS_INT count, EMACS_INT *shortage, int allow_quit) |
428 | 635 { |
665 | 636 Bytebpos lim = en > 0 ? en : |
826 | 637 ((count > 0) ? BYTE_BUF_ZV (buf) : BYTE_BUF_BEGV (buf)); |
428 | 638 |
639 /* #### newline cache stuff in this function not yet ported */ | |
640 assert (count != 0); | |
641 | |
642 if (shortage) | |
643 *shortage = 0; | |
644 | |
645 if (count > 0) | |
646 { | |
647 #ifdef MULE | |
826 | 648 Internal_Format fmt = buf->text->format; |
649 /* Check for char that's unrepresentable in the buffer -- it | |
650 certainly can't be there. */ | |
867 | 651 if (!ichar_fits_in_format (target, fmt, wrap_buffer (buf))) |
428 | 652 { |
826 | 653 *shortage = count; |
654 return lim; | |
655 } | |
656 /* Due to the Mule representation of characters in a buffer, we can | |
657 simply search for characters in the range 0 - 127 directly; for | |
658 8-bit-fixed, we can do this for all characters. In other cases, | |
659 we do it the "hard" way. Note that this way works for all | |
660 characters and all formats, but the other way is faster. */ | |
661 else if (! (fmt == FORMAT_8_BIT_FIXED || | |
867 | 662 (fmt == FORMAT_DEFAULT && ichar_ascii_p (target)))) |
826 | 663 { |
867 | 664 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
428 | 665 while (st < lim && count > 0) |
666 { | |
826 | 667 if (BYTE_BUF_FETCH_CHAR_RAW (buf, st) == raw) |
428 | 668 count--; |
665 | 669 INC_BYTEBPOS (buf, st); |
428 | 670 } |
671 } | |
672 else | |
673 #endif | |
674 { | |
867 | 675 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
428 | 676 while (st < lim && count > 0) |
677 { | |
665 | 678 Bytebpos ceil; |
867 | 679 Ibyte *bufptr; |
428 | 680 |
826 | 681 ceil = BYTE_BUF_CEILING_OF (buf, st); |
428 | 682 ceil = min (lim, ceil); |
867 | 683 bufptr = (Ibyte *) memchr (BYTE_BUF_BYTE_ADDRESS (buf, st), |
826 | 684 raw, ceil - st); |
428 | 685 if (bufptr) |
686 { | |
687 count--; | |
826 | 688 st = BYTE_BUF_PTR_BYTE_POS (buf, bufptr) + 1; |
428 | 689 } |
690 else | |
691 st = ceil; | |
692 } | |
693 } | |
694 | |
695 if (shortage) | |
696 *shortage = count; | |
697 if (allow_quit) | |
698 QUIT; | |
699 return st; | |
700 } | |
701 else | |
702 { | |
703 #ifdef MULE | |
826 | 704 Internal_Format fmt = buf->text->format; |
705 /* Check for char that's unrepresentable in the buffer -- it | |
706 certainly can't be there. */ | |
867 | 707 if (!ichar_fits_in_format (target, fmt, wrap_buffer (buf))) |
428 | 708 { |
826 | 709 *shortage = -count; |
710 return lim; | |
711 } | |
712 else if (! (fmt == FORMAT_8_BIT_FIXED || | |
867 | 713 (fmt == FORMAT_DEFAULT && ichar_ascii_p (target)))) |
826 | 714 { |
867 | 715 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
428 | 716 while (st > lim && count < 0) |
717 { | |
665 | 718 DEC_BYTEBPOS (buf, st); |
826 | 719 if (BYTE_BUF_FETCH_CHAR_RAW (buf, st) == raw) |
428 | 720 count++; |
721 } | |
722 } | |
723 else | |
724 #endif | |
725 { | |
867 | 726 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
428 | 727 while (st > lim && count < 0) |
728 { | |
665 | 729 Bytebpos floor; |
867 | 730 Ibyte *bufptr; |
731 Ibyte *floorptr; | |
428 | 732 |
826 | 733 floor = BYTE_BUF_FLOOR_OF (buf, st); |
428 | 734 floor = max (lim, floor); |
735 /* No memrchr() ... */ | |
826 | 736 bufptr = BYTE_BUF_BYTE_ADDRESS_BEFORE (buf, st); |
737 floorptr = BYTE_BUF_BYTE_ADDRESS (buf, floor); | |
428 | 738 while (bufptr >= floorptr) |
739 { | |
740 st--; | |
741 /* At this point, both ST and BUFPTR refer to the same | |
742 character. When the loop terminates, ST will | |
743 always point to the last character we tried. */ | |
867 | 744 if (*bufptr == (Ibyte) raw) |
428 | 745 { |
746 count++; | |
747 break; | |
748 } | |
749 bufptr--; | |
750 } | |
751 } | |
752 } | |
753 | |
754 if (shortage) | |
755 *shortage = -count; | |
756 if (allow_quit) | |
757 QUIT; | |
758 if (count) | |
759 return st; | |
760 else | |
761 { | |
762 /* We found the character we were looking for; we have to return | |
763 the position *after* it due to the strange way that the return | |
764 value is defined. */ | |
665 | 765 INC_BYTEBPOS (buf, st); |
428 | 766 return st; |
767 } | |
768 } | |
769 } | |
770 | |
665 | 771 Charbpos |
867 | 772 scan_buffer (struct buffer *buf, Ichar target, Charbpos start, Charbpos end, |
428 | 773 EMACS_INT count, EMACS_INT *shortage, int allow_quit) |
774 { | |
826 | 775 Bytebpos byte_retval; |
776 Bytebpos byte_start, byte_end; | |
777 | |
778 byte_start = charbpos_to_bytebpos (buf, start); | |
428 | 779 if (end) |
826 | 780 byte_end = charbpos_to_bytebpos (buf, end); |
428 | 781 else |
826 | 782 byte_end = 0; |
783 byte_retval = byte_scan_buffer (buf, target, byte_start, byte_end, count, | |
428 | 784 shortage, allow_quit); |
826 | 785 return bytebpos_to_charbpos (buf, byte_retval); |
428 | 786 } |
787 | |
665 | 788 Bytebpos |
826 | 789 byte_find_next_newline_no_quit (struct buffer *buf, Bytebpos from, int count) |
428 | 790 { |
826 | 791 return byte_scan_buffer (buf, '\n', from, 0, count, 0, 0); |
428 | 792 } |
793 | |
665 | 794 Charbpos |
795 find_next_newline_no_quit (struct buffer *buf, Charbpos from, int count) | |
428 | 796 { |
797 return scan_buffer (buf, '\n', from, 0, count, 0, 0); | |
798 } | |
799 | |
665 | 800 Charbpos |
801 find_next_newline (struct buffer *buf, Charbpos from, int count) | |
428 | 802 { |
803 return scan_buffer (buf, '\n', from, 0, count, 0, 1); | |
804 } | |
805 | |
826 | 806 Bytecount |
867 | 807 byte_find_next_ichar_in_string (Lisp_Object str, Ichar target, Bytecount st, |
428 | 808 EMACS_INT count) |
809 { | |
793 | 810 Bytebpos lim = XSTRING_LENGTH (str) -1; |
867 | 811 Ibyte *s = XSTRING_DATA (str); |
428 | 812 |
813 assert (count >= 0); | |
814 | |
815 #ifdef MULE | |
816 /* Due to the Mule representation of characters in a buffer, | |
817 we can simply search for characters in the range 0 - 127 | |
818 directly. For other characters, we do it the "hard" way. | |
819 Note that this way works for all characters but the other | |
820 way is faster. */ | |
821 if (target >= 0200) | |
822 { | |
823 while (st < lim && count > 0) | |
824 { | |
867 | 825 if (string_ichar (str, st) == target) |
428 | 826 count--; |
826 | 827 INC_BYTECOUNT (s, st); |
428 | 828 } |
829 } | |
830 else | |
831 #endif | |
832 { | |
833 while (st < lim && count > 0) | |
834 { | |
867 | 835 Ibyte *bufptr = (Ibyte *) memchr (itext_n_addr (s, st), |
428 | 836 (int) target, lim - st); |
837 if (bufptr) | |
838 { | |
839 count--; | |
826 | 840 st = (Bytebpos) (bufptr - s) + 1; |
428 | 841 } |
842 else | |
843 st = lim; | |
844 } | |
845 } | |
846 return st; | |
847 } | |
848 | |
849 /* Like find_next_newline, but returns position before the newline, | |
850 not after, and only search up to TO. This isn't just | |
851 find_next_newline (...)-1, because you might hit TO. */ | |
665 | 852 Charbpos |
826 | 853 find_before_next_newline (struct buffer *buf, Charbpos from, Charbpos to, |
854 int count) | |
428 | 855 { |
856 EMACS_INT shortage; | |
665 | 857 Charbpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1); |
428 | 858 |
859 if (shortage == 0) | |
860 pos--; | |
861 | |
862 return pos; | |
863 } | |
864 | |
872 | 865 /* This function synched with FSF 21.1 */ |
428 | 866 static Lisp_Object |
867 skip_chars (struct buffer *buf, int forwardp, int syntaxp, | |
868 Lisp_Object string, Lisp_Object lim) | |
869 { | |
867 | 870 REGISTER Ibyte *p, *pend; |
871 REGISTER Ichar c; | |
428 | 872 /* We store the first 256 chars in an array here and the rest in |
873 a range table. */ | |
874 unsigned char fastmap[0400]; | |
875 int negate = 0; | |
876 REGISTER int i; | |
665 | 877 Charbpos limit; |
826 | 878 struct syntax_cache *scache; |
879 | |
428 | 880 if (NILP (lim)) |
881 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf); | |
882 else | |
883 { | |
884 CHECK_INT_COERCE_MARKER (lim); | |
885 limit = XINT (lim); | |
886 | |
887 /* In any case, don't allow scan outside bounds of buffer. */ | |
888 if (limit > BUF_ZV (buf)) limit = BUF_ZV (buf); | |
889 if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf); | |
890 } | |
891 | |
892 CHECK_STRING (string); | |
893 p = XSTRING_DATA (string); | |
894 pend = p + XSTRING_LENGTH (string); | |
895 memset (fastmap, 0, sizeof (fastmap)); | |
896 | |
897 Fclear_range_table (Vskip_chars_range_table); | |
898 | |
899 if (p != pend && *p == '^') | |
900 { | |
901 negate = 1; | |
902 p++; | |
903 } | |
904 | |
905 /* Find the characters specified and set their elements of fastmap. | |
906 If syntaxp, each character counts as itself. | |
907 Otherwise, handle backslashes and ranges specially */ | |
908 | |
909 while (p != pend) | |
910 { | |
867 | 911 c = itext_ichar (p); |
912 INC_IBYTEPTR (p); | |
428 | 913 if (syntaxp) |
914 { | |
915 if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax) | |
916 fastmap[c] = 1; | |
917 else | |
831 | 918 invalid_argument ("Invalid syntax designator", make_char (c)); |
428 | 919 } |
920 else | |
921 { | |
922 if (c == '\\') | |
923 { | |
924 if (p == pend) break; | |
867 | 925 c = itext_ichar (p); |
926 INC_IBYTEPTR (p); | |
428 | 927 } |
928 if (p != pend && *p == '-') | |
929 { | |
867 | 930 Ichar cend; |
428 | 931 |
872 | 932 /* Skip over the dash. */ |
428 | 933 p++; |
934 if (p == pend) break; | |
867 | 935 cend = itext_ichar (p); |
428 | 936 while (c <= cend && c < 0400) |
937 { | |
938 fastmap[c] = 1; | |
939 c++; | |
940 } | |
941 if (c <= cend) | |
942 Fput_range_table (make_int (c), make_int (cend), Qt, | |
943 Vskip_chars_range_table); | |
867 | 944 INC_IBYTEPTR (p); |
428 | 945 } |
946 else | |
947 { | |
948 if (c < 0400) | |
949 fastmap[c] = 1; | |
950 else | |
951 Fput_range_table (make_int (c), make_int (c), Qt, | |
952 Vskip_chars_range_table); | |
953 } | |
954 } | |
955 } | |
956 | |
872 | 957 /* #### Not in FSF 21.1 */ |
428 | 958 if (syntaxp && fastmap['-'] != 0) |
959 fastmap[' '] = 1; | |
960 | |
961 /* If ^ was the first character, complement the fastmap. | |
962 We don't complement the range table, however; we just use negate | |
963 in the comparisons below. */ | |
964 | |
965 if (negate) | |
647 | 966 for (i = 0; i < (int) (sizeof (fastmap)); i++) |
428 | 967 fastmap[i] ^= 1; |
968 | |
969 { | |
665 | 970 Charbpos start_point = BUF_PT (buf); |
872 | 971 Charbpos pos = start_point; |
972 Charbpos pos_byte = BYTE_BUF_PT (buf); | |
428 | 973 |
974 if (syntaxp) | |
975 { | |
872 | 976 scache = setup_buffer_syntax_cache (buf, pos, forwardp ? 1 : -1); |
428 | 977 /* All syntax designators are normal chars so nothing strange |
978 to worry about */ | |
979 if (forwardp) | |
980 { | |
872 | 981 if (pos < limit) |
982 while (fastmap[(unsigned char) | |
983 syntax_code_spec | |
984 [(int) SYNTAX_FROM_CACHE | |
985 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) | |
986 { | |
987 pos++; | |
988 INC_BYTEBPOS (buf, pos_byte); | |
879 | 989 if (pos >= limit) |
872 | 990 break; |
991 UPDATE_SYNTAX_CACHE_FORWARD (scache, pos); | |
992 } | |
428 | 993 } |
994 else | |
995 { | |
872 | 996 while (pos > limit) |
460 | 997 { |
872 | 998 Charbpos savepos = pos_byte; |
999 pos--; | |
1000 DEC_BYTEBPOS (buf, pos_byte); | |
1001 UPDATE_SYNTAX_CACHE_BACKWARD (scache, pos); | |
1002 if (!fastmap[(unsigned char) | |
1003 syntax_code_spec | |
1004 [(int) SYNTAX_FROM_CACHE | |
1005 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) | |
1006 { | |
1007 pos++; | |
1008 pos_byte = savepos; | |
1009 break; | |
1010 } | |
460 | 1011 } |
428 | 1012 } |
1013 } | |
1014 else | |
1015 { | |
1016 if (forwardp) | |
1017 { | |
872 | 1018 while (pos < limit) |
428 | 1019 { |
872 | 1020 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
428 | 1021 if ((ch < 0400) ? fastmap[ch] : |
1022 (NILP (Fget_range_table (make_int (ch), | |
1023 Vskip_chars_range_table, | |
1024 Qnil)) | |
1025 == negate)) | |
872 | 1026 { |
1027 pos++; | |
1028 INC_BYTEBPOS (buf, pos_byte); | |
1029 } | |
428 | 1030 else |
1031 break; | |
1032 } | |
1033 } | |
1034 else | |
1035 { | |
872 | 1036 while (pos > limit) |
428 | 1037 { |
872 | 1038 Charbpos prev_pos_byte = pos_byte; |
1039 Ichar ch; | |
1040 | |
1041 DEC_BYTEBPOS (buf, prev_pos_byte); | |
1042 ch = BYTE_BUF_FETCH_CHAR (buf, prev_pos_byte); | |
428 | 1043 if ((ch < 0400) ? fastmap[ch] : |
1044 (NILP (Fget_range_table (make_int (ch), | |
1045 Vskip_chars_range_table, | |
1046 Qnil)) | |
1047 == negate)) | |
872 | 1048 { |
1049 pos--; | |
1050 pos_byte = prev_pos_byte; | |
1051 } | |
428 | 1052 else |
1053 break; | |
1054 } | |
1055 } | |
1056 } | |
1057 QUIT; | |
872 | 1058 BOTH_BUF_SET_PT (buf, pos, pos_byte); |
428 | 1059 return make_int (BUF_PT (buf) - start_point); |
1060 } | |
1061 } | |
1062 | |
1063 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /* | |
444 | 1064 Move point forward, stopping before a char not in STRING, or at pos LIMIT. |
428 | 1065 STRING is like the inside of a `[...]' in a regular expression |
1066 except that `]' is never special and `\\' quotes `^', `-' or `\\'. | |
1067 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter. | |
1068 With arg "^a-zA-Z", skips nonletters stopping before first letter. | |
1069 Returns the distance traveled, either zero or positive. | |
1070 | |
1071 Optional argument BUFFER defaults to the current buffer. | |
1072 */ | |
444 | 1073 (string, limit, buffer)) |
428 | 1074 { |
444 | 1075 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit); |
428 | 1076 } |
1077 | |
1078 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /* | |
444 | 1079 Move point backward, stopping after a char not in STRING, or at pos LIMIT. |
428 | 1080 See `skip-chars-forward' for details. |
1081 Returns the distance traveled, either zero or negative. | |
1082 | |
1083 Optional argument BUFFER defaults to the current buffer. | |
1084 */ | |
444 | 1085 (string, limit, buffer)) |
428 | 1086 { |
444 | 1087 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit); |
428 | 1088 } |
1089 | |
1090 | |
1091 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /* | |
1092 Move point forward across chars in specified syntax classes. | |
1093 SYNTAX is a string of syntax code characters. | |
444 | 1094 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT. |
428 | 1095 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX. |
1096 This function returns the distance traveled, either zero or positive. | |
1097 | |
1098 Optional argument BUFFER defaults to the current buffer. | |
1099 */ | |
444 | 1100 (syntax, limit, buffer)) |
428 | 1101 { |
444 | 1102 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit); |
428 | 1103 } |
1104 | |
1105 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /* | |
1106 Move point backward across chars in specified syntax classes. | |
1107 SYNTAX is a string of syntax code characters. | |
444 | 1108 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT. |
428 | 1109 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX. |
1110 This function returns the distance traveled, either zero or negative. | |
1111 | |
1112 Optional argument BUFFER defaults to the current buffer. | |
1113 */ | |
444 | 1114 (syntax, limit, buffer)) |
428 | 1115 { |
444 | 1116 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit); |
428 | 1117 } |
1118 | |
1119 | |
1120 /* Subroutines of Lisp buffer search functions. */ | |
1121 | |
1122 static Lisp_Object | |
444 | 1123 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror, |
428 | 1124 Lisp_Object count, Lisp_Object buffer, int direction, |
1125 int RE, int posix) | |
1126 { | |
665 | 1127 REGISTER Charbpos np; |
1128 Charbpos lim; | |
428 | 1129 EMACS_INT n = direction; |
1130 struct buffer *buf; | |
1131 | |
1132 if (!NILP (count)) | |
1133 { | |
1134 CHECK_INT (count); | |
1135 n *= XINT (count); | |
1136 } | |
1137 | |
1138 buf = decode_buffer (buffer, 0); | |
1139 CHECK_STRING (string); | |
444 | 1140 if (NILP (limit)) |
428 | 1141 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf); |
1142 else | |
1143 { | |
444 | 1144 CHECK_INT_COERCE_MARKER (limit); |
1145 lim = XINT (limit); | |
428 | 1146 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf)) |
563 | 1147 invalid_argument ("Invalid search limit (wrong side of point)", |
1148 Qunbound); | |
428 | 1149 if (lim > BUF_ZV (buf)) |
1150 lim = BUF_ZV (buf); | |
1151 if (lim < BUF_BEGV (buf)) | |
1152 lim = BUF_BEGV (buf); | |
1153 } | |
1154 | |
1155 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE, | |
1156 (!NILP (buf->case_fold_search) | |
446 | 1157 ? XCASE_TABLE_CANON (buf->case_table) |
1158 : Qnil), | |
428 | 1159 (!NILP (buf->case_fold_search) |
446 | 1160 ? XCASE_TABLE_EQV (buf->case_table) |
1161 : Qnil), posix); | |
428 | 1162 |
1163 if (np <= 0) | |
1164 { | |
444 | 1165 if (NILP (noerror)) |
2268 | 1166 { |
1167 signal_failure (string); | |
1168 RETURN_NOT_REACHED (Qnil); | |
1169 } | |
444 | 1170 if (!EQ (noerror, Qt)) |
428 | 1171 { |
1172 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf)) | |
2500 | 1173 ABORT (); |
428 | 1174 BUF_SET_PT (buf, lim); |
1175 return Qnil; | |
1176 #if 0 /* This would be clean, but maybe programs depend on | |
1177 a value of nil here. */ | |
1178 np = lim; | |
1179 #endif | |
1180 } | |
1181 else | |
1182 return Qnil; | |
1183 } | |
1184 | |
1185 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf)) | |
2500 | 1186 ABORT (); |
428 | 1187 |
1188 BUF_SET_PT (buf, np); | |
1189 | |
1190 return make_int (np); | |
1191 } | |
1192 | |
1193 static int | |
1194 trivial_regexp_p (Lisp_Object regexp) | |
1195 { | |
1196 Bytecount len = XSTRING_LENGTH (regexp); | |
867 | 1197 Ibyte *s = XSTRING_DATA (regexp); |
428 | 1198 while (--len >= 0) |
1199 { | |
1200 switch (*s++) | |
1201 { | |
1724 | 1202 /* #### howcum ']' doesn't appear here, but ... */ |
428 | 1203 case '.': case '*': case '+': case '?': case '[': case '^': case '$': |
1204 return 0; | |
1205 case '\\': | |
1206 if (--len < 0) | |
1207 return 0; | |
1208 switch (*s++) | |
1209 { | |
1724 | 1210 /* ... ')' does appear here? ('<' and '>' can appear singly.) */ |
1211 /* #### are there other constructs to check? */ | |
428 | 1212 case '|': case '(': case ')': case '`': case '\'': case 'b': |
1213 case 'B': case '<': case '>': case 'w': case 'W': case 's': | |
1724 | 1214 case 'S': case '=': case '{': case '}': |
428 | 1215 #ifdef MULE |
1216 /* 97/2/25 jhod Added for category matches */ | |
1217 case 'c': case 'C': | |
1218 #endif /* MULE */ | |
1219 case '1': case '2': case '3': case '4': case '5': | |
1220 case '6': case '7': case '8': case '9': | |
1221 return 0; | |
1222 } | |
1223 } | |
1224 } | |
1225 return 1; | |
1226 } | |
1227 | |
1228 /* Search for the n'th occurrence of STRING in BUF, | |
665 | 1229 starting at position CHARBPOS and stopping at position BUFLIM, |
428 | 1230 treating PAT as a literal string if RE is false or as |
1231 a regular expression if RE is true. | |
1232 | |
1233 If N is positive, searching is forward and BUFLIM must be greater | |
665 | 1234 than CHARBPOS. |
428 | 1235 If N is negative, searching is backward and BUFLIM must be less |
665 | 1236 than CHARBPOS. |
428 | 1237 |
1238 Returns -x if only N-x occurrences found (x > 0), | |
1239 or else the position at the beginning of the Nth occurrence | |
1240 (if searching backward) or the end (if searching forward). | |
1241 | |
1242 POSIX is nonzero if we want full backtracking (POSIX style) | |
1243 for this pattern. 0 means backtrack only enough to get a valid match. */ | |
665 | 1244 static Charbpos |
1245 search_buffer (struct buffer *buf, Lisp_Object string, Charbpos charbpos, | |
1246 Charbpos buflim, EMACS_INT n, int RE, Lisp_Object trt, | |
446 | 1247 Lisp_Object inverse_trt, int posix) |
428 | 1248 { |
1249 Bytecount len = XSTRING_LENGTH (string); | |
867 | 1250 Ibyte *base_pat = XSTRING_DATA (string); |
428 | 1251 REGISTER EMACS_INT i, j; |
665 | 1252 Bytebpos p1, p2; |
428 | 1253 Bytecount s1, s2; |
665 | 1254 Bytebpos pos, lim; |
428 | 1255 |
853 | 1256 /* Some FSF junk with running_asynch_code, to preserve the match |
1257 data. Not necessary because we don't call process filters | |
1258 asynchronously (i.e. from within QUIT). */ | |
428 | 1259 |
1425 | 1260 /* Searching 0 times means noop---don't move, don't touch registers. */ |
1261 if (n == 0) | |
1262 return charbpos; | |
1263 | |
428 | 1264 /* Null string is found at starting position. */ |
1265 if (len == 0) | |
1266 { | |
665 | 1267 set_search_regs (buf, charbpos, 0); |
1268 return charbpos; | |
428 | 1269 } |
1270 | |
665 | 1271 pos = charbpos_to_bytebpos (buf, charbpos); |
1272 lim = charbpos_to_bytebpos (buf, buflim); | |
428 | 1273 if (RE && !trivial_regexp_p (string)) |
1274 { | |
1275 struct re_pattern_buffer *bufp; | |
826 | 1276 |
1277 bufp = compile_pattern (string, &search_regs, trt, | |
1278 wrap_buffer (buf), buf, posix, ERROR_ME); | |
428 | 1279 |
1280 /* Get pointers and sizes of the two strings | |
1281 that make up the visible portion of the buffer. */ | |
1282 | |
826 | 1283 p1 = BYTE_BUF_BEGV (buf); |
1284 p2 = BYTE_BUF_CEILING_OF (buf, p1); | |
428 | 1285 s1 = p2 - p1; |
826 | 1286 s2 = BYTE_BUF_ZV (buf) - p2; |
1287 | |
1288 while (n != 0) | |
428 | 1289 { |
1290 Bytecount val; | |
826 | 1291 struct syntax_cache scache_struct; |
1292 struct syntax_cache *scache = &scache_struct; | |
1293 | |
428 | 1294 QUIT; |
826 | 1295 /* By making the regex object, regex buffer, and syntax cache |
1296 arguments to re_{search,match}{,_2}, we've removed the need to | |
1297 do nasty things to deal with regex reentrancy. (See stack | |
1298 trace in signal.c for proof that this can happen.) | |
1299 | |
1300 #### there is still a potential problem with the regex cache -- | |
1301 the compiled regex could be overwritten. we'd need 20-fold | |
1302 reentrancy, though. Fix this. */ | |
1303 | |
428 | 1304 val = re_search_2 (bufp, |
826 | 1305 (char *) BYTE_BUF_BYTE_ADDRESS (buf, p1), s1, |
1306 (char *) BYTE_BUF_BYTE_ADDRESS (buf, p2), s2, | |
1307 pos - BYTE_BUF_BEGV (buf), lim - pos, &search_regs, | |
1308 n > 0 ? lim - BYTE_BUF_BEGV (buf) : | |
1309 pos - BYTE_BUF_BEGV (buf), wrap_buffer (buf), | |
1310 buf, scache); | |
428 | 1311 |
1312 if (val == -2) | |
1313 { | |
1314 matcher_overflow (); | |
1315 } | |
1316 if (val >= 0) | |
1317 { | |
1318 int num_regs = search_regs.num_regs; | |
826 | 1319 j = BYTE_BUF_BEGV (buf); |
428 | 1320 for (i = 0; i < num_regs; i++) |
1321 if (search_regs.start[i] >= 0) | |
1322 { | |
1323 search_regs.start[i] += j; | |
1324 search_regs.end[i] += j; | |
1325 } | |
793 | 1326 last_thing_searched = wrap_buffer (buf); |
428 | 1327 /* Set pos to the new position. */ |
826 | 1328 pos = n > 0 ? search_regs.end[0] : search_regs.start[0]; |
428 | 1329 fixup_search_regs_for_buffer (buf); |
665 | 1330 /* And charbpos too. */ |
826 | 1331 charbpos = n > 0 ? search_regs.end[0] : search_regs.start[0]; |
428 | 1332 } |
1333 else | |
826 | 1334 return (n > 0 ? 0 - n : n); |
1335 if (n > 0) n--; else n++; | |
428 | 1336 } |
665 | 1337 return charbpos; |
428 | 1338 } |
1339 else /* non-RE case */ | |
1340 { | |
446 | 1341 int charset_base = -1; |
1342 int boyer_moore_ok = 1; | |
867 | 1343 Ibyte *pat = 0; |
2367 | 1344 Ibyte *patbuf = alloca_ibytes (len * MAX_ICHAR_LEN); |
446 | 1345 pat = patbuf; |
1346 #ifdef MULE | |
826 | 1347 /* &&#### needs some 8-bit work here */ |
446 | 1348 while (len > 0) |
1349 { | |
867 | 1350 Ibyte tmp_str[MAX_ICHAR_LEN]; |
1351 Ichar c, translated, inverse; | |
446 | 1352 Bytecount orig_bytelen, new_bytelen, inv_bytelen; |
1353 | |
1354 /* If we got here and the RE flag is set, it's because | |
1355 we're dealing with a regexp known to be trivial, so the | |
1356 backslash just quotes the next character. */ | |
1357 if (RE && *base_pat == '\\') | |
1358 { | |
1359 len--; | |
1360 base_pat++; | |
1361 } | |
867 | 1362 c = itext_ichar (base_pat); |
446 | 1363 translated = TRANSLATE (trt, c); |
1364 inverse = TRANSLATE (inverse_trt, c); | |
1365 | |
867 | 1366 orig_bytelen = itext_ichar_len (base_pat); |
1367 inv_bytelen = set_itext_ichar (tmp_str, inverse); | |
1368 new_bytelen = set_itext_ichar (tmp_str, translated); | |
446 | 1369 |
1370 if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen) | |
1371 boyer_moore_ok = 0; | |
1372 if (translated != c || inverse != c) | |
1373 { | |
4322
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1374 /* Keep track of which charset and character set row |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1375 contains the characters that need translation. |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1376 Zero out the bits corresponding to the last byte. |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1377 */ |
867 | 1378 int charset_base_code = c & ~ICHAR_FIELD3_MASK; |
446 | 1379 if (charset_base == -1) |
1380 charset_base = charset_base_code; | |
1381 else if (charset_base != charset_base_code) | |
4322
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1382 /* If two different rows appear, needing translation, then |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1383 we cannot use boyer_moore search. See the comment at the |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1384 head of boyer_moore(). */ |
446 | 1385 boyer_moore_ok = 0; |
1386 } | |
1387 memcpy (pat, tmp_str, new_bytelen); | |
1388 pat += new_bytelen; | |
1389 base_pat += orig_bytelen; | |
1390 len -= orig_bytelen; | |
1391 } | |
1392 #else /* not MULE */ | |
1393 while (--len >= 0) | |
1394 { | |
1395 /* If we got here and the RE flag is set, it's because | |
1396 we're dealing with a regexp known to be trivial, so the | |
1397 backslash just quotes the next character. */ | |
1398 if (RE && *base_pat == '\\') | |
1399 { | |
1400 len--; | |
1401 base_pat++; | |
1402 } | |
1403 *pat++ = TRANSLATE (trt, *base_pat++); | |
1404 } | |
1405 #endif /* MULE */ | |
1406 len = pat - patbuf; | |
1407 pat = base_pat = patbuf; | |
1408 if (boyer_moore_ok) | |
1409 return boyer_moore (buf, base_pat, len, pos, lim, n, | |
1410 trt, inverse_trt, charset_base); | |
1411 else | |
1412 return simple_search (buf, base_pat, len, pos, lim, n, trt); | |
1413 } | |
1414 } | |
1415 | |
826 | 1416 /* Do a simple string search N times for the string PAT, whose length is |
1417 LEN/LEN_BYTE, from buffer position POS until LIM. TRT is the | |
1418 translation table. | |
446 | 1419 |
1420 Return the character position where the match is found. | |
1421 Otherwise, if M matches remained to be found, return -M. | |
1422 | |
1423 This kind of search works regardless of what is in PAT and | |
1424 regardless of what is in TRT. It is used in cases where | |
1425 boyer_moore cannot work. */ | |
1426 | |
665 | 1427 static Charbpos |
867 | 1428 simple_search (struct buffer *buf, Ibyte *base_pat, Bytecount len, |
826 | 1429 Bytebpos pos, Bytebpos lim, EMACS_INT n, Lisp_Object trt) |
446 | 1430 { |
1431 int forward = n > 0; | |
1432 Bytecount buf_len = 0; /* Shut up compiler. */ | |
1433 | |
826 | 1434 if (lim > pos) |
446 | 1435 while (n > 0) |
428 | 1436 { |
446 | 1437 while (1) |
428 | 1438 { |
826 | 1439 Bytecount this_len = len; |
1440 Bytebpos this_pos = pos; | |
867 | 1441 Ibyte *p = base_pat; |
826 | 1442 if (pos >= lim) |
446 | 1443 goto stop; |
1444 | |
1445 while (this_len > 0) | |
1446 { | |
867 | 1447 Ichar pat_ch, buf_ch; |
446 | 1448 Bytecount pat_len; |
1449 | |
867 | 1450 pat_ch = itext_ichar (p); |
826 | 1451 buf_ch = BYTE_BUF_FETCH_CHAR (buf, this_pos); |
446 | 1452 |
1453 buf_ch = TRANSLATE (trt, buf_ch); | |
1454 | |
1455 if (buf_ch != pat_ch) | |
1456 break; | |
1457 | |
867 | 1458 pat_len = itext_ichar_len (p); |
446 | 1459 p += pat_len; |
1460 this_len -= pat_len; | |
826 | 1461 INC_BYTEBPOS (buf, this_pos); |
446 | 1462 } |
1463 if (this_len == 0) | |
428 | 1464 { |
826 | 1465 buf_len = this_pos - pos; |
1466 pos = this_pos; | |
446 | 1467 break; |
428 | 1468 } |
826 | 1469 INC_BYTEBPOS (buf, pos); |
428 | 1470 } |
446 | 1471 n--; |
1472 } | |
1473 else | |
4322
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1474 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1475 /* If lim < len, then there are too few buffer positions to hold the |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1476 pattern between the beginning of the buffer and lim. Adjust to |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1477 ensure pattern fits. If we don't do this, we can assert in the |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1478 DEC_BYTEBPOS below. */ |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1479 if (lim < len) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1480 lim = len; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1481 while (n < 0) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1482 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1483 while (1) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1484 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1485 Bytecount this_len = len; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1486 Bytebpos this_pos = pos; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1487 Ibyte *p; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1488 if (pos <= lim) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1489 goto stop; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1490 p = base_pat + len; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1491 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1492 while (this_len > 0) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1493 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1494 Ichar pat_ch, buf_ch; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1495 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1496 DEC_IBYTEPTR (p); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1497 DEC_BYTEBPOS (buf, this_pos); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1498 pat_ch = itext_ichar (p); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1499 buf_ch = BYTE_BUF_FETCH_CHAR (buf, this_pos); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1500 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1501 buf_ch = TRANSLATE (trt, buf_ch); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1502 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1503 if (buf_ch != pat_ch) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1504 break; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1505 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1506 this_len -= itext_ichar_len (p); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1507 } |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1508 if (this_len == 0) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1509 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1510 buf_len = pos - this_pos; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1511 pos = this_pos; |
446 | 1512 break; |
4322
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1513 } |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1514 DEC_BYTEBPOS (buf, pos); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1515 } |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1516 n++; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1517 } |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1518 } |
446 | 1519 stop: |
1520 if (n == 0) | |
1521 { | |
665 | 1522 Charbpos beg, end, retval; |
446 | 1523 if (forward) |
1524 { | |
826 | 1525 beg = bytebpos_to_charbpos (buf, pos - buf_len); |
1526 retval = end = bytebpos_to_charbpos (buf, pos); | |
446 | 1527 } |
1528 else | |
428 | 1529 { |
826 | 1530 retval = beg = bytebpos_to_charbpos (buf, pos); |
1531 end = bytebpos_to_charbpos (buf, pos + buf_len); | |
428 | 1532 } |
446 | 1533 set_search_regs (buf, beg, end - beg); |
1534 | |
1535 return retval; | |
1536 } | |
1537 else if (n > 0) | |
1538 return -n; | |
1539 else | |
1540 return n; | |
1541 } | |
1542 | |
1543 /* Do Boyer-Moore search N times for the string PAT, | |
1544 whose length is LEN/LEN_BYTE, | |
1545 from buffer position POS/POS_BYTE until LIM/LIM_BYTE. | |
1546 DIRECTION says which direction we search in. | |
1547 TRT and INVERSE_TRT are translation tables. | |
1548 | |
1549 This kind of search works if all the characters in PAT that have | |
1550 nontrivial translation are the same aside from the last byte. This | |
1551 makes it possible to translate just the last byte of a character, | |
1552 and do so after just a simple test of the context. | |
1553 | |
1554 If that criterion is not satisfied, do not call this function. */ | |
1555 | |
665 | 1556 static Charbpos |
867 | 1557 boyer_moore (struct buffer *buf, Ibyte *base_pat, Bytecount len, |
665 | 1558 Bytebpos pos, Bytebpos lim, EMACS_INT n, Lisp_Object trt, |
2333 | 1559 Lisp_Object inverse_trt, int USED_IF_MULE (charset_base)) |
446 | 1560 { |
826 | 1561 /* &&#### needs some 8-bit work here */ |
446 | 1562 /* #### Someone really really really needs to comment the workings |
1563 of this junk somewhat better. | |
1564 | |
1565 BTW "BM" stands for Boyer-Moore, which is one of the standard | |
1566 string-searching algorithms. It's the best string-searching | |
1567 algorithm out there, provided that: | |
1568 | |
1569 a) You're not fazed by algorithm complexity. (Rabin-Karp, which | |
1570 uses hashing, is much much easier to code but not as fast.) | |
1571 b) You can freely move backwards in the string that you're | |
1572 searching through. | |
1573 | |
1574 As the comment below tries to explain (but garbles in typical | |
1575 programmer-ese), the idea is that you don't have to do a | |
1576 string match at every successive position in the text. For | |
1577 example, let's say the pattern is "a very long string". We | |
1578 compare the last character in the string (`g') with the | |
1579 corresponding character in the text. If it mismatches, and | |
1580 it is, say, `z', then we can skip forward by the entire | |
1581 length of the pattern because `z' does not occur anywhere | |
1582 in the pattern. If the mismatching character does occur | |
1583 in the pattern, we can usually still skip forward by more | |
1584 than one: e.g. if it is `l', then we can skip forward | |
1585 by the length of the substring "ong string" -- i.e. the | |
1586 largest end section of the pattern that does not contain | |
1587 the mismatched character. So what we do is compute, for | |
1588 each possible character, the distance we can skip forward | |
1589 (the "stride") and use it in the string matching. This | |
1590 is what the BM_tab holds. */ | |
1591 REGISTER EMACS_INT *BM_tab; | |
1592 EMACS_INT *BM_tab_base; | |
1593 REGISTER Bytecount dirlen; | |
1594 EMACS_INT infinity; | |
665 | 1595 Bytebpos limit; |
446 | 1596 Bytecount stride_for_teases = 0; |
1597 REGISTER EMACS_INT i, j; | |
867 | 1598 Ibyte *pat, *pat_end; |
1599 REGISTER Ibyte *cursor, *p_limit, *ptr2; | |
1600 Ibyte simple_translate[0400]; | |
446 | 1601 REGISTER int direction = ((n > 0) ? 1 : -1); |
1602 #ifdef MULE | |
867 | 1603 Ibyte translate_prev_byte = 0; |
1604 Ibyte translate_anteprev_byte = 0; | |
446 | 1605 #endif |
1606 #ifdef C_ALLOCA | |
1607 EMACS_INT BM_tab_space[0400]; | |
1608 BM_tab = &BM_tab_space[0]; | |
1609 #else | |
1610 BM_tab = alloca_array (EMACS_INT, 256); | |
1611 #endif | |
1612 | |
1613 /* The general approach is that we are going to maintain that we | |
1614 know the first (closest to the present position, in whatever | |
1615 direction we're searching) character that could possibly be | |
1616 the last (furthest from present position) character of a | |
1617 valid match. We advance the state of our knowledge by | |
1618 looking at that character and seeing whether it indeed | |
1619 matches the last character of the pattern. If it does, we | |
1620 take a closer look. If it does not, we move our pointer (to | |
1621 putative last characters) as far as is logically possible. | |
1622 This amount of movement, which I call a stride, will be the | |
1623 length of the pattern if the actual character appears nowhere | |
1624 in the pattern, otherwise it will be the distance from the | |
1625 last occurrence of that character to the end of the pattern. | |
1626 As a coding trick, an enormous stride is coded into the table | |
1627 for characters that match the last character. This allows | |
1628 use of only a single test, a test for having gone past the | |
1629 end of the permissible match region, to test for both | |
1630 possible matches (when the stride goes past the end | |
1631 immediately) and failure to match (where you get nudged past | |
1632 the end one stride at a time). | |
1633 | |
1634 Here we make a "mickey mouse" BM table. The stride of the | |
1635 search is determined only by the last character of the | |
1636 putative match. If that character does not match, we will | |
1637 stride the proper distance to propose a match that | |
1638 superimposes it on the last instance of a character that | |
1639 matches it (per trt), or misses it entirely if there is | |
1640 none. */ | |
1641 | |
1642 dirlen = len * direction; | |
1643 infinity = dirlen - (lim + pos + len + len) * direction; | |
1644 /* Record position after the end of the pattern. */ | |
1645 pat_end = base_pat + len; | |
1646 if (direction < 0) | |
1647 base_pat = pat_end - 1; | |
1648 BM_tab_base = BM_tab; | |
1649 BM_tab += 0400; | |
1650 j = dirlen; /* to get it in a register */ | |
1651 /* A character that does not appear in the pattern induces a | |
1652 stride equal to the pattern length. */ | |
1653 while (BM_tab_base != BM_tab) | |
1654 { | |
1655 *--BM_tab = j; | |
1656 *--BM_tab = j; | |
1657 *--BM_tab = j; | |
1658 *--BM_tab = j; | |
1659 } | |
1660 /* We use this for translation, instead of TRT itself. We | |
1661 fill this in to handle the characters that actually occur | |
1662 in the pattern. Others don't matter anyway! */ | |
1663 xzero (simple_translate); | |
1664 for (i = 0; i < 0400; i++) | |
867 | 1665 simple_translate[i] = (Ibyte) i; |
446 | 1666 i = 0; |
1425 | 1667 |
446 | 1668 while (i != infinity) |
1669 { | |
867 | 1670 Ibyte *ptr = base_pat + i; |
446 | 1671 i += direction; |
1672 if (i == dirlen) | |
1673 i = infinity; | |
1674 if (!NILP (trt)) | |
428 | 1675 { |
446 | 1676 #ifdef MULE |
867 | 1677 Ichar ch, untranslated; |
446 | 1678 int this_translated = 1; |
1679 | |
1680 /* Is *PTR the last byte of a character? */ | |
867 | 1681 if (pat_end - ptr == 1 || ibyte_first_byte_p (ptr[1])) |
428 | 1682 { |
867 | 1683 Ibyte *charstart = ptr; |
1684 while (!ibyte_first_byte_p (*charstart)) | |
446 | 1685 charstart--; |
867 | 1686 untranslated = itext_ichar (charstart); |
1687 if (charset_base == (untranslated & ~ICHAR_FIELD3_MASK)) | |
446 | 1688 { |
1689 ch = TRANSLATE (trt, untranslated); | |
867 | 1690 if (!ibyte_first_byte_p (*ptr)) |
446 | 1691 { |
1692 translate_prev_byte = ptr[-1]; | |
867 | 1693 if (!ibyte_first_byte_p (translate_prev_byte)) |
446 | 1694 translate_anteprev_byte = ptr[-2]; |
1695 } | |
1696 } | |
1697 else | |
1698 { | |
1699 this_translated = 0; | |
1700 ch = *ptr; | |
1701 } | |
428 | 1702 } |
1703 else | |
1704 { | |
446 | 1705 ch = *ptr; |
1706 this_translated = 0; | |
1707 } | |
1708 if (ch > 0400) | |
1709 j = ((unsigned char) ch | 0200); | |
1710 else | |
1711 j = (unsigned char) ch; | |
1712 | |
1713 if (i == infinity) | |
1714 stride_for_teases = BM_tab[j]; | |
1715 BM_tab[j] = dirlen - i; | |
1716 /* A translation table is accompanied by its inverse -- | |
826 | 1717 see comment in casetab.c. */ |
446 | 1718 if (this_translated) |
1719 { | |
867 | 1720 Ichar starting_ch = ch; |
446 | 1721 EMACS_INT starting_j = j; |
1722 while (1) | |
1723 { | |
1724 ch = TRANSLATE (inverse_trt, ch); | |
1725 if (ch > 0400) | |
1726 j = ((unsigned char) ch | 0200); | |
1727 else | |
1728 j = (unsigned char) ch; | |
1729 | |
1730 /* For all the characters that map into CH, | |
1731 set up simple_translate to map the last byte | |
1732 into STARTING_J. */ | |
867 | 1733 simple_translate[j] = (Ibyte) starting_j; |
446 | 1734 if (ch == starting_ch) |
1735 break; | |
1736 BM_tab[j] = dirlen - i; | |
1737 } | |
1738 } | |
1739 #else | |
1740 EMACS_INT k; | |
1741 j = *ptr; | |
1742 k = (j = TRANSLATE (trt, j)); | |
1743 if (i == infinity) | |
1744 stride_for_teases = BM_tab[j]; | |
1745 BM_tab[j] = dirlen - i; | |
1746 /* A translation table is accompanied by its inverse -- | |
826 | 1747 see comment in casetab.c. */ |
446 | 1748 while ((j = TRANSLATE (inverse_trt, j)) != k) |
1749 { | |
867 | 1750 simple_translate[j] = (Ibyte) k; |
428 | 1751 BM_tab[j] = dirlen - i; |
1752 } | |
446 | 1753 #endif |
1754 } | |
1755 else | |
1756 { | |
1757 j = *ptr; | |
1758 | |
1759 if (i == infinity) | |
1760 stride_for_teases = BM_tab[j]; | |
1761 BM_tab[j] = dirlen - i; | |
428 | 1762 } |
446 | 1763 /* stride_for_teases tells how much to stride if we get a |
1764 match on the far character but are subsequently | |
1765 disappointed, by recording what the stride would have been | |
1766 for that character if the last character had been | |
1767 different. */ | |
1768 } | |
1769 infinity = dirlen - infinity; | |
1770 pos += dirlen - ((direction > 0) ? direction : 0); | |
1771 /* loop invariant - pos points at where last char (first char if | |
1772 reverse) of pattern would align in a possible match. */ | |
1773 while (n != 0) | |
1774 { | |
665 | 1775 Bytebpos tail_end; |
867 | 1776 Ibyte *tail_end_ptr; |
446 | 1777 /* It's been reported that some (broken) compiler thinks |
1778 that Boolean expressions in an arithmetic context are | |
1779 unsigned. Using an explicit ?1:0 prevents this. */ | |
1780 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0) | |
1781 return n * (0 - direction); | |
1782 /* First we do the part we can by pointers (maybe | |
1783 nothing) */ | |
1784 QUIT; | |
1785 pat = base_pat; | |
1786 limit = pos - dirlen + direction; | |
1787 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF | |
1788 have changed. See buffer.h. */ | |
1789 limit = ((direction > 0) | |
826 | 1790 ? BYTE_BUF_CEILING_OF (buf, limit) - 1 |
1791 : BYTE_BUF_FLOOR_OF (buf, limit + 1)); | |
446 | 1792 /* LIMIT is now the last (not beyond-last!) value POS can |
1793 take on without hitting edge of buffer or the gap. */ | |
1794 limit = ((direction > 0) | |
1795 ? min (lim - 1, min (limit, pos + 20000)) | |
1796 : max (lim, max (limit, pos - 20000))); | |
826 | 1797 tail_end = BYTE_BUF_CEILING_OF (buf, pos); |
1798 tail_end_ptr = BYTE_BUF_BYTE_ADDRESS (buf, tail_end); | |
446 | 1799 |
1800 if ((limit - pos) * direction > 20) | |
428 | 1801 { |
826 | 1802 /* We have to be careful because the code can generate addresses |
1803 that don't point to the beginning of characters. */ | |
1804 p_limit = BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, limit); | |
1805 ptr2 = (cursor = BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos)); | |
446 | 1806 /* In this loop, pos + cursor - ptr2 is the surrogate |
1807 for pos */ | |
1808 while (1) /* use one cursor setting as long as i can */ | |
1809 { | |
1810 if (direction > 0) /* worth duplicating */ | |
1811 { | |
1812 /* Use signed comparison if appropriate to make | |
1813 cursor+infinity sure to be > p_limit. | |
1814 Assuming that the buffer lies in a range of | |
1815 addresses that are all "positive" (as ints) | |
1816 or all "negative", either kind of comparison | |
1817 will work as long as we don't step by | |
1818 infinity. So pick the kind that works when | |
1819 we do step by infinity. */ | |
1820 if ((EMACS_INT) (p_limit + infinity) > | |
1821 (EMACS_INT) p_limit) | |
1822 while ((EMACS_INT) cursor <= | |
1823 (EMACS_INT) p_limit) | |
1824 cursor += BM_tab[*cursor]; | |
1825 else | |
1826 while ((EMACS_UINT) cursor <= | |
1827 (EMACS_UINT) p_limit) | |
1828 cursor += BM_tab[*cursor]; | |
1829 } | |
1830 else | |
1831 { | |
1832 if ((EMACS_INT) (p_limit + infinity) < | |
1833 (EMACS_INT) p_limit) | |
1834 while ((EMACS_INT) cursor >= | |
1835 (EMACS_INT) p_limit) | |
1836 cursor += BM_tab[*cursor]; | |
1837 else | |
1838 while ((EMACS_UINT) cursor >= | |
1839 (EMACS_UINT) p_limit) | |
1840 cursor += BM_tab[*cursor]; | |
1841 } | |
1842 /* If you are here, cursor is beyond the end of the | |
1843 searched region. This can happen if you match on | |
1844 the far character of the pattern, because the | |
1845 "stride" of that character is infinity, a number | |
1846 able to throw you well beyond the end of the | |
1847 search. It can also happen if you fail to match | |
1848 within the permitted region and would otherwise | |
1849 try a character beyond that region */ | |
1850 if ((cursor - p_limit) * direction <= len) | |
1851 break; /* a small overrun is genuine */ | |
1852 cursor -= infinity; /* large overrun = hit */ | |
1853 i = dirlen - direction; | |
1854 if (!NILP (trt)) | |
1855 { | |
1856 while ((i -= direction) + direction != 0) | |
1857 { | |
1858 #ifdef MULE | |
867 | 1859 Ichar ch; |
446 | 1860 cursor -= direction; |
1861 /* Translate only the last byte of a character. */ | |
1862 if ((cursor == tail_end_ptr | |
867 | 1863 || ibyte_first_byte_p (cursor[1])) |
1864 && (ibyte_first_byte_p (cursor[0]) | |
446 | 1865 || (translate_prev_byte == cursor[-1] |
867 | 1866 && (ibyte_first_byte_p (translate_prev_byte) |
446 | 1867 || translate_anteprev_byte == cursor[-2])))) |
1868 ch = simple_translate[*cursor]; | |
1869 else | |
1870 ch = *cursor; | |
1871 if (pat[i] != ch) | |
1872 break; | |
1873 #else | |
1874 if (pat[i] != TRANSLATE (trt, *(cursor -= direction))) | |
1875 break; | |
1876 #endif | |
1877 } | |
1878 } | |
1879 else | |
1880 { | |
1881 while ((i -= direction) + direction != 0) | |
1882 if (pat[i] != *(cursor -= direction)) | |
1883 break; | |
1884 } | |
1885 cursor += dirlen - i - direction; /* fix cursor */ | |
1886 if (i + direction == 0) | |
1887 { | |
1888 cursor -= direction; | |
1889 | |
1890 { | |
665 | 1891 Bytebpos bytstart = (pos + cursor - ptr2 + |
446 | 1892 ((direction > 0) |
1893 ? 1 - len : 0)); | |
665 | 1894 Charbpos bufstart = bytebpos_to_charbpos (buf, bytstart); |
1895 Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len); | |
446 | 1896 |
1897 set_search_regs (buf, bufstart, bufend - bufstart); | |
1898 } | |
1899 | |
1900 if ((n -= direction) != 0) | |
1901 cursor += dirlen; /* to resume search */ | |
1902 else | |
1903 return ((direction > 0) | |
1904 ? search_regs.end[0] : search_regs.start[0]); | |
1905 } | |
1906 else | |
1907 cursor += stride_for_teases; /* <sigh> we lose - */ | |
1908 } | |
1909 pos += cursor - ptr2; | |
1910 } | |
1911 else | |
1912 /* Now we'll pick up a clump that has to be done the hard | |
1913 way because it covers a discontinuity */ | |
1914 { | |
428 | 1915 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF |
1916 have changed. See buffer.h. */ | |
1917 limit = ((direction > 0) | |
826 | 1918 ? BYTE_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1 |
1919 : BYTE_BUF_FLOOR_OF (buf, pos - dirlen)); | |
428 | 1920 limit = ((direction > 0) |
446 | 1921 ? min (limit + len, lim - 1) |
1922 : max (limit - len, lim)); | |
1923 /* LIMIT is now the last value POS can have | |
1924 and still be valid for a possible match. */ | |
1925 while (1) | |
428 | 1926 { |
446 | 1927 /* This loop can be coded for space rather than |
1928 speed because it will usually run only once. | |
1929 (the reach is at most len + 21, and typically | |
1930 does not exceed len) */ | |
1931 while ((limit - pos) * direction >= 0) | |
826 | 1932 /* *not* BYTE_BUF_FETCH_CHAR. We are working here |
446 | 1933 with bytes, not characters. */ |
826 | 1934 pos += BM_tab[*BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos)]; |
446 | 1935 /* now run the same tests to distinguish going off |
1936 the end, a match or a phony match. */ | |
1937 if ((pos - limit) * direction <= len) | |
1938 break; /* ran off the end */ | |
1939 /* Found what might be a match. | |
1940 Set POS back to last (first if reverse) char pos. */ | |
1941 pos -= infinity; | |
1942 i = dirlen - direction; | |
1943 while ((i -= direction) + direction != 0) | |
428 | 1944 { |
446 | 1945 #ifdef MULE |
867 | 1946 Ichar ch; |
1947 Ibyte *ptr; | |
446 | 1948 #endif |
1949 pos -= direction; | |
1950 #ifdef MULE | |
826 | 1951 ptr = BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos); |
446 | 1952 if ((ptr == tail_end_ptr |
867 | 1953 || ibyte_first_byte_p (ptr[1])) |
1954 && (ibyte_first_byte_p (ptr[0]) | |
446 | 1955 || (translate_prev_byte == ptr[-1] |
867 | 1956 && (ibyte_first_byte_p (translate_prev_byte) |
446 | 1957 || translate_anteprev_byte == ptr[-2])))) |
1958 ch = simple_translate[*ptr]; | |
428 | 1959 else |
446 | 1960 ch = *ptr; |
1961 if (pat[i] != ch) | |
1962 break; | |
1963 | |
1964 #else | |
826 | 1965 if (pat[i] != |
1966 TRANSLATE (trt, | |
1967 *BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos))) | |
446 | 1968 break; |
1969 #endif | |
428 | 1970 } |
446 | 1971 /* Above loop has moved POS part or all the way back |
1972 to the first char pos (last char pos if reverse). | |
1973 Set it once again at the last (first if reverse) | |
1974 char. */ | |
1975 pos += dirlen - i- direction; | |
1976 if (i + direction == 0) | |
428 | 1977 { |
446 | 1978 pos -= direction; |
1979 | |
1980 { | |
665 | 1981 Bytebpos bytstart = (pos + |
446 | 1982 ((direction > 0) |
1983 ? 1 - len : 0)); | |
665 | 1984 Charbpos bufstart = bytebpos_to_charbpos (buf, bytstart); |
1985 Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len); | |
446 | 1986 |
1987 set_search_regs (buf, bufstart, bufend - bufstart); | |
1988 } | |
1989 | |
1990 if ((n -= direction) != 0) | |
1991 pos += dirlen; /* to resume search */ | |
428 | 1992 else |
446 | 1993 return ((direction > 0) |
1994 ? search_regs.end[0] : search_regs.start[0]); | |
428 | 1995 } |
446 | 1996 else |
1997 pos += stride_for_teases; | |
1998 } | |
428 | 1999 } |
446 | 2000 /* We have done one clump. Can we continue? */ |
2001 if ((lim - pos) * direction < 0) | |
2002 return (0 - n) * direction; | |
428 | 2003 } |
665 | 2004 return bytebpos_to_charbpos (buf, pos); |
428 | 2005 } |
2006 | |
1024 | 2007 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the |
2008 buffer for a match just found. */ | |
428 | 2009 |
2010 static void | |
665 | 2011 set_search_regs (struct buffer *buf, Charbpos beg, Charcount len) |
428 | 2012 { |
2013 /* Make sure we have registers in which to store | |
2014 the match position. */ | |
2015 if (search_regs.num_regs == 0) | |
2016 { | |
2017 search_regs.start = xnew (regoff_t); | |
2018 search_regs.end = xnew (regoff_t); | |
2019 search_regs.num_regs = 1; | |
2020 } | |
2021 | |
1468 | 2022 clear_search_regs (); |
428 | 2023 search_regs.start[0] = beg; |
2024 search_regs.end[0] = beg + len; | |
793 | 2025 last_thing_searched = wrap_buffer (buf); |
428 | 2026 } |
2027 | |
1468 | 2028 /* Clear search registers so match data will be null. */ |
1024 | 2029 |
2030 static void | |
1468 | 2031 clear_search_regs (void) |
1024 | 2032 { |
2033 /* This function has been Mule-ized. */ | |
2034 int i; | |
2035 | |
1468 | 2036 for (i = 0; i < search_regs.num_regs; i++) |
2037 search_regs.start[i] = search_regs.end[i] = -1; | |
1024 | 2038 } |
2039 | |
428 | 2040 |
2041 /* Given a string of words separated by word delimiters, | |
442 | 2042 compute a regexp that matches those exact words |
2043 separated by arbitrary punctuation. */ | |
428 | 2044 |
2045 static Lisp_Object | |
2046 wordify (Lisp_Object buffer, Lisp_Object string) | |
2047 { | |
2048 Charcount i, len; | |
2049 EMACS_INT punct_count = 0, word_count = 0; | |
2050 struct buffer *buf = decode_buffer (buffer, 0); | |
826 | 2051 Lisp_Object syntax_table = buf->mirror_syntax_table; |
428 | 2052 |
2053 CHECK_STRING (string); | |
826 | 2054 len = string_char_length (string); |
428 | 2055 |
2056 for (i = 0; i < len; i++) | |
867 | 2057 if (!WORD_SYNTAX_P (syntax_table, string_ichar (string, i))) |
428 | 2058 { |
2059 punct_count++; | |
2060 if (i > 0 && WORD_SYNTAX_P (syntax_table, | |
867 | 2061 string_ichar (string, i - 1))) |
428 | 2062 word_count++; |
2063 } | |
867 | 2064 if (WORD_SYNTAX_P (syntax_table, string_ichar (string, len - 1))) |
428 | 2065 word_count++; |
2066 if (!word_count) return build_string (""); | |
2067 | |
2068 { | |
2069 /* The following value is an upper bound on the amount of storage we | |
2070 need. In non-Mule, it is exact. */ | |
867 | 2071 Ibyte *storage = |
2367 | 2072 alloca_ibytes (XSTRING_LENGTH (string) - punct_count + |
428 | 2073 5 * (word_count - 1) + 4); |
867 | 2074 Ibyte *o = storage; |
428 | 2075 |
2076 *o++ = '\\'; | |
2077 *o++ = 'b'; | |
2078 | |
2079 for (i = 0; i < len; i++) | |
2080 { | |
867 | 2081 Ichar ch = string_ichar (string, i); |
428 | 2082 |
2083 if (WORD_SYNTAX_P (syntax_table, ch)) | |
867 | 2084 o += set_itext_ichar (o, ch); |
428 | 2085 else if (i > 0 |
2086 && WORD_SYNTAX_P (syntax_table, | |
867 | 2087 string_ichar (string, i - 1)) |
428 | 2088 && --word_count) |
2089 { | |
2090 *o++ = '\\'; | |
2091 *o++ = 'W'; | |
2092 *o++ = '\\'; | |
2093 *o++ = 'W'; | |
2094 *o++ = '*'; | |
2095 } | |
2096 } | |
2097 | |
2098 *o++ = '\\'; | |
2099 *o++ = 'b'; | |
2100 | |
2101 return make_string (storage, o - storage); | |
2102 } | |
2103 } | |
2104 | |
2105 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /* | |
2106 Search backward from point for STRING. | |
2107 Set point to the beginning of the occurrence found, and return point. | |
444 | 2108 |
2109 Optional second argument LIMIT bounds the search; it is a buffer | |
2110 position. The match found must not extend before that position. | |
2111 The value nil is equivalent to (point-min). | |
2112 | |
2113 Optional third argument NOERROR, if t, means just return nil (no | |
2114 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2115 and return nil. | |
2116 | |
2117 Optional fourth argument COUNT is a repeat count--search for | |
2118 successive occurrences. | |
2119 | |
428 | 2120 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2121 defaults to the current buffer. |
2122 | |
1468 | 2123 When the match is successful, this function modifies the match data |
2124 that `match-beginning', `match-end' and `match-data' access; save the | |
2125 match data with `match-data' and restore it with `store-match-data' if | |
2126 you want to preserve them. If the match fails, the match data from the | |
2127 previous success match is preserved. | |
2128 | |
2129 See also the function `replace-match'. | |
428 | 2130 */ |
444 | 2131 (string, limit, noerror, count, buffer)) |
428 | 2132 { |
444 | 2133 return search_command (string, limit, noerror, count, buffer, -1, 0, 0); |
428 | 2134 } |
2135 | |
2136 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /* | |
2137 Search forward from point for STRING. | |
2138 Set point to the end of the occurrence found, and return point. | |
444 | 2139 |
2140 Optional second argument LIMIT bounds the search; it is a buffer | |
2141 position. The match found must not extend after that position. The | |
2142 value nil is equivalent to (point-max). | |
2143 | |
2144 Optional third argument NOERROR, if t, means just return nil (no | |
2145 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2146 and return nil. | |
2147 | |
2148 Optional fourth argument COUNT is a repeat count--search for | |
2149 successive occurrences. | |
2150 | |
428 | 2151 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2152 defaults to the current buffer. |
2153 | |
1468 | 2154 When the match is successful, this function modifies the match data |
2155 that `match-beginning', `match-end' and `match-data' access; save the | |
2156 match data with `match-data' and restore it with `store-match-data' if | |
2157 you want to preserve them. If the match fails, the match data from the | |
2158 previous success match is preserved. | |
2159 | |
2160 See also the function `replace-match'. | |
428 | 2161 */ |
444 | 2162 (string, limit, noerror, count, buffer)) |
428 | 2163 { |
444 | 2164 return search_command (string, limit, noerror, count, buffer, 1, 0, 0); |
428 | 2165 } |
2166 | |
2167 DEFUN ("word-search-backward", Fword_search_backward, 1, 5, | |
2168 "sWord search backward: ", /* | |
2169 Search backward from point for STRING, ignoring differences in punctuation. | |
2170 Set point to the beginning of the occurrence found, and return point. | |
444 | 2171 |
2172 Optional second argument LIMIT bounds the search; it is a buffer | |
2173 position. The match found must not extend before that position. | |
2174 The value nil is equivalent to (point-min). | |
2175 | |
2176 Optional third argument NOERROR, if t, means just return nil (no | |
2177 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2178 and return nil. | |
2179 | |
2180 Optional fourth argument COUNT is a repeat count--search for | |
2181 successive occurrences. | |
2182 | |
428 | 2183 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2184 defaults to the current buffer. |
2185 | |
1468 | 2186 When the match is successful, this function modifies the match data |
2187 that `match-beginning', `match-end' and `match-data' access; save the | |
2188 match data with `match-data' and restore it with `store-match-data' if | |
2189 you want to preserve them. If the match fails, the match data from the | |
2190 previous success match is preserved. | |
2191 | |
2192 See also the function `replace-match'. | |
428 | 2193 */ |
444 | 2194 (string, limit, noerror, count, buffer)) |
428 | 2195 { |
444 | 2196 return search_command (wordify (buffer, string), limit, noerror, count, |
428 | 2197 buffer, -1, 1, 0); |
2198 } | |
2199 | |
2200 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /* | |
2201 Search forward from point for STRING, ignoring differences in punctuation. | |
2202 Set point to the end of the occurrence found, and return point. | |
444 | 2203 |
2204 Optional second argument LIMIT bounds the search; it is a buffer | |
2205 position. The match found must not extend after that position. The | |
2206 value nil is equivalent to (point-max). | |
2207 | |
2208 Optional third argument NOERROR, if t, means just return nil (no | |
2209 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2210 and return nil. | |
2211 | |
2212 Optional fourth argument COUNT is a repeat count--search for | |
2213 successive occurrences. | |
2214 | |
428 | 2215 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2216 defaults to the current buffer. |
2217 | |
1468 | 2218 When the match is successful, this function modifies the match data |
2219 that `match-beginning', `match-end' and `match-data' access; save the | |
2220 match data with `match-data' and restore it with `store-match-data' if | |
2221 you want to preserve them. If the match fails, the match data from the | |
2222 previous success match is preserved. | |
2223 | |
2224 See also the function `replace-match'. | |
428 | 2225 */ |
444 | 2226 (string, limit, noerror, count, buffer)) |
428 | 2227 { |
444 | 2228 return search_command (wordify (buffer, string), limit, noerror, count, |
428 | 2229 buffer, 1, 1, 0); |
2230 } | |
2231 | |
2232 DEFUN ("re-search-backward", Fre_search_backward, 1, 5, | |
2233 "sRE search backward: ", /* | |
2234 Search backward from point for match for regular expression REGEXP. | |
2235 Set point to the beginning of the match, and return point. | |
2236 The match found is the one starting last in the buffer | |
2237 and yet ending before the origin of the search. | |
444 | 2238 |
2239 Optional second argument LIMIT bounds the search; it is a buffer | |
2240 position. The match found must not extend before that position. | |
2241 The value nil is equivalent to (point-min). | |
2242 | |
2243 Optional third argument NOERROR, if t, means just return nil (no | |
2244 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2245 and return nil. | |
2246 | |
2247 Optional fourth argument COUNT is a repeat count--search for | |
2248 successive occurrences. | |
2249 | |
428 | 2250 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2251 defaults to the current buffer. |
2252 | |
1468 | 2253 When the match is successful, this function modifies the match data |
2254 that `match-beginning', `match-end' and `match-data' access; save the | |
2255 match data with `match-data' and restore it with `store-match-data' if | |
2256 you want to preserve them. If the match fails, the match data from the | |
2257 previous success match is preserved. | |
2258 | |
2259 See also the function `replace-match'. | |
428 | 2260 */ |
444 | 2261 (regexp, limit, noerror, count, buffer)) |
428 | 2262 { |
444 | 2263 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0); |
428 | 2264 } |
2265 | |
2266 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /* | |
2267 Search forward from point for regular expression REGEXP. | |
2268 Set point to the end of the occurrence found, and return point. | |
444 | 2269 |
2270 Optional second argument LIMIT bounds the search; it is a buffer | |
2271 position. The match found must not extend after that position. The | |
2272 value nil is equivalent to (point-max). | |
2273 | |
2274 Optional third argument NOERROR, if t, means just return nil (no | |
2275 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2276 and return nil. | |
2277 | |
2278 Optional fourth argument COUNT is a repeat count--search for | |
2279 successive occurrences. | |
2280 | |
428 | 2281 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2282 defaults to the current buffer. |
2283 | |
1468 | 2284 When the match is successful, this function modifies the match data |
2285 that `match-beginning', `match-end' and `match-data' access; save the | |
2286 match data with `match-data' and restore it with `store-match-data' if | |
2287 you want to preserve them. If the match fails, the match data from the | |
2288 previous success match is preserved. | |
2289 | |
2290 See also the function `replace-match'. | |
428 | 2291 */ |
444 | 2292 (regexp, limit, noerror, count, buffer)) |
428 | 2293 { |
444 | 2294 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0); |
428 | 2295 } |
2296 | |
2297 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5, | |
2298 "sPosix search backward: ", /* | |
2299 Search backward from point for match for regular expression REGEXP. | |
2300 Find the longest match in accord with Posix regular expression rules. | |
2301 Set point to the beginning of the match, and return point. | |
2302 The match found is the one starting last in the buffer | |
2303 and yet ending before the origin of the search. | |
444 | 2304 |
2305 Optional second argument LIMIT bounds the search; it is a buffer | |
2306 position. The match found must not extend before that position. | |
2307 The value nil is equivalent to (point-min). | |
2308 | |
2309 Optional third argument NOERROR, if t, means just return nil (no | |
2310 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2311 and return nil. | |
2312 | |
2313 Optional fourth argument COUNT is a repeat count--search for | |
2314 successive occurrences. | |
2315 | |
428 | 2316 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2317 defaults to the current buffer. |
2318 | |
1468 | 2319 When the match is successful, this function modifies the match data |
2320 that `match-beginning', `match-end' and `match-data' access; save the | |
2321 match data with `match-data' and restore it with `store-match-data' if | |
2322 you want to preserve them. If the match fails, the match data from the | |
2323 previous success match is preserved. | |
2324 | |
2325 See also the function `replace-match'. | |
428 | 2326 */ |
444 | 2327 (regexp, limit, noerror, count, buffer)) |
428 | 2328 { |
444 | 2329 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1); |
428 | 2330 } |
2331 | |
2332 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /* | |
2333 Search forward from point for regular expression REGEXP. | |
2334 Find the longest match in accord with Posix regular expression rules. | |
2335 Set point to the end of the occurrence found, and return point. | |
444 | 2336 |
2337 Optional second argument LIMIT bounds the search; it is a buffer | |
2338 position. The match found must not extend after that position. The | |
2339 value nil is equivalent to (point-max). | |
2340 | |
2341 Optional third argument NOERROR, if t, means just return nil (no | |
2342 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2343 and return nil. | |
2344 | |
2345 Optional fourth argument COUNT is a repeat count--search for | |
2346 successive occurrences. | |
2347 | |
428 | 2348 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2349 defaults to the current buffer. |
2350 | |
1468 | 2351 When the match is successful, this function modifies the match data |
2352 that `match-beginning', `match-end' and `match-data' access; save the | |
2353 match data with `match-data' and restore it with `store-match-data' if | |
2354 you want to preserve them. If the match fails, the match data from the | |
2355 previous success match is preserved. | |
2356 | |
2357 See also the function `replace-match'. | |
428 | 2358 */ |
444 | 2359 (regexp, limit, noerror, count, buffer)) |
428 | 2360 { |
444 | 2361 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1); |
428 | 2362 } |
2363 | |
2364 | |
2365 static Lisp_Object | |
2366 free_created_dynarrs (Lisp_Object cons) | |
2367 { | |
2368 Dynarr_free (get_opaque_ptr (XCAR (cons))); | |
2369 Dynarr_free (get_opaque_ptr (XCDR (cons))); | |
2370 free_opaque_ptr (XCAR (cons)); | |
2371 free_opaque_ptr (XCDR (cons)); | |
853 | 2372 free_cons (cons); |
428 | 2373 return Qnil; |
2374 } | |
2375 | |
2376 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /* | |
444 | 2377 Replace text matched by last search with REPLACEMENT. |
4199 | 2378 Leaves point at end of replacement text. |
2379 Optional boolean FIXEDCASE inhibits matching case of REPLACEMENT to source. | |
2380 Optional boolean LITERAL inhibits interpretation of escape sequences. | |
2381 Optional STRING provides the source text to replace. | |
2382 Optional STRBUFFER may be a buffer, providing match context, or an integer | |
2383 specifying the subexpression to replace. | |
2384 | |
2385 If FIXEDCASE is non-nil, do not alter case of replacement text. | |
428 | 2386 Otherwise maybe capitalize the whole text, or maybe just word initials, |
2387 based on the replaced text. | |
4199 | 2388 If the replaced text has only capital letters and has at least one |
2389 multiletter word, convert REPLACEMENT to all caps. | |
428 | 2390 If the replaced text has at least one word starting with a capital letter, |
444 | 2391 then capitalize each word in REPLACEMENT. |
428 | 2392 |
4199 | 2393 If LITERAL is non-nil, insert REPLACEMENT literally. |
428 | 2394 Otherwise treat `\\' as special: |
444 | 2395 `\\&' in REPLACEMENT means substitute original matched text. |
428 | 2396 `\\N' means substitute what matched the Nth `\\(...\\)'. |
2397 If Nth parens didn't match, substitute nothing. | |
2398 `\\\\' means insert one `\\'. | |
2399 `\\u' means upcase the next character. | |
2400 `\\l' means downcase the next character. | |
2401 `\\U' means begin upcasing all following characters. | |
2402 `\\L' means begin downcasing all following characters. | |
2403 `\\E' means terminate the effect of any `\\U' or `\\L'. | |
2404 Case changes made with `\\u', `\\l', `\\U', and `\\L' override | |
2405 all other case changes that may be made in the replaced text. | |
4199 | 2406 |
2407 If non-nil, STRING is the source string, and a new string with the specified | |
2408 replacements is created and returned. Otherwise the current buffer is the | |
2409 source text. | |
2410 | |
2411 If non-nil, STRBUFFER may be an integer, interpreted as the index of the | |
2412 subexpression to replace in the source text, or a buffer to provide the | |
2413 syntax table and case table. If nil, then the \"subexpression\" is 0, i.e., | |
2414 the whole match, and the current buffer provides the syntax and case tables. | |
2415 If STRING is nil, STRBUFFER must be nil or an integer. | |
2416 | |
2417 Specifying a subexpression is only useful after a regular expression match, | |
2418 since a fixed string search has no non-trivial subexpressions. | |
2419 | |
2420 It is not possible to specify both a buffer and a subexpression. If that is | |
2421 desired, the idiom `(with-current-buffer BUFFER (replace-match ... INTEGER))' | |
2422 may be appropriate. | |
2423 | |
2424 If STRING is nil but the last thing matched (or searched) was a string, or | |
2425 STRING is a string but the last thing matched was a buffer, an | |
2426 `invalid-argument' error will be signaled. (XEmacs does not check that the | |
2427 last thing searched is the source string, but it is not useful to use a | |
2428 different string as source.) | |
2429 | |
2430 If no match (including searches) has been successful or the requested | |
1468 | 2431 subexpression was not matched, an `args-out-of-range' error will be |
2432 signaled. (If no match has ever been conducted in this instance of | |
2433 XEmacs, an `invalid-operation' error will be signaled. This is very | |
2434 rare.) | |
428 | 2435 */ |
444 | 2436 (replacement, fixedcase, literal, string, strbuffer)) |
428 | 2437 { |
2438 /* This function can GC */ | |
2439 enum { nochange, all_caps, cap_initial } case_action; | |
665 | 2440 Charbpos pos, last; |
428 | 2441 int some_multiletter_word; |
2442 int some_lowercase; | |
2443 int some_uppercase; | |
2444 int some_nonuppercase_initial; | |
867 | 2445 Ichar c, prevc; |
428 | 2446 Charcount inslen; |
2447 struct buffer *buf; | |
826 | 2448 Lisp_Object syntax_table; |
428 | 2449 int mc_count; |
2450 Lisp_Object buffer; | |
2451 int_dynarr *ul_action_dynarr = 0; | |
2452 int_dynarr *ul_pos_dynarr = 0; | |
502 | 2453 int sub = 0; |
428 | 2454 int speccount; |
2455 | |
444 | 2456 CHECK_STRING (replacement); |
428 | 2457 |
4199 | 2458 /* Because GNU decided to be incompatible here, we support the following |
2459 baroque and bogus API for the STRING and STRBUFFER arguments: | |
2460 types interpretations | |
2461 STRING STRBUFFER STRING STRBUFFER | |
2462 nil nil none 0 = index of subexpression to replace | |
2463 nil integer none index of subexpression to replace | |
2464 nil other ***** error ***** | |
2465 string nil source current buffer provides syntax table | |
2466 subexpression = 0 (whole match) | |
2467 string buffer source buffer providing syntax table | |
2468 subexpression = 0 (whole match) | |
2469 string integer source current buffer provides syntax table | |
2470 subexpression = STRBUFFER | |
2471 string other ***** error ***** | |
2472 */ | |
2473 | |
2474 /* Do STRBUFFER first; if STRING is nil, we'll overwrite BUF and BUFFER. */ | |
2475 | |
2476 /* If the match data were abstracted into a special "match data" type | |
2477 instead of the typical half-assed "let the implementation be visible" | |
2478 form it's in, we could extend it to include the last string matched | |
2479 and the buffer used for that matching. But of course we can't change | |
2480 it as it is. | |
2481 */ | |
2482 if (NILP (strbuffer) || BUFFERP (strbuffer)) | |
2483 { | |
2484 buf = decode_buffer (strbuffer, 0); | |
2485 } | |
2486 else if (!NILP (strbuffer)) | |
2487 { | |
2488 CHECK_INT (strbuffer); | |
2489 sub = XINT (strbuffer); | |
2490 if (sub < 0 || sub >= (int) search_regs.num_regs) | |
2491 invalid_argument ("match data register invalid", strbuffer); | |
2492 if (search_regs.start[sub] < 0) | |
2493 invalid_argument ("match data register not set", strbuffer); | |
2494 buf = current_buffer; | |
2495 } | |
2496 else | |
2497 invalid_argument ("STRBUFFER must be nil, a buffer, or an integer", | |
2498 strbuffer); | |
2499 buffer = wrap_buffer (buf); | |
2500 | |
428 | 2501 if (! NILP (string)) |
2502 { | |
2503 CHECK_STRING (string); | |
2504 if (!EQ (last_thing_searched, Qt)) | |
4199 | 2505 invalid_argument ("last thing matched was not a string", Qunbound); |
428 | 2506 } |
2507 else | |
2508 { | |
2509 if (!BUFFERP (last_thing_searched)) | |
4199 | 2510 invalid_argument ("last thing matched was not a buffer", Qunbound); |
428 | 2511 buffer = last_thing_searched; |
2512 buf = XBUFFER (buffer); | |
2513 } | |
2514 | |
826 | 2515 syntax_table = buf->mirror_syntax_table; |
428 | 2516 |
2517 case_action = nochange; /* We tried an initialization */ | |
2518 /* but some C compilers blew it */ | |
2519 | |
2520 if (search_regs.num_regs == 0) | |
826 | 2521 signal_error (Qinvalid_operation, |
2522 "replace-match called before any match found", Qunbound); | |
428 | 2523 |
2524 if (NILP (string)) | |
2525 { | |
469 | 2526 if (search_regs.start[sub] < BUF_BEGV (buf) |
2527 || search_regs.start[sub] > search_regs.end[sub] | |
2528 || search_regs.end[sub] > BUF_ZV (buf)) | |
2529 args_out_of_range (make_int (search_regs.start[sub]), | |
2530 make_int (search_regs.end[sub])); | |
428 | 2531 } |
2532 else | |
2533 { | |
2534 if (search_regs.start[0] < 0 | |
2535 || search_regs.start[0] > search_regs.end[0] | |
826 | 2536 || search_regs.end[0] > string_char_length (string)) |
428 | 2537 args_out_of_range (make_int (search_regs.start[0]), |
2538 make_int (search_regs.end[0])); | |
2539 } | |
2540 | |
2541 if (NILP (fixedcase)) | |
2542 { | |
2543 /* Decide how to casify by examining the matched text. */ | |
2544 | |
707 | 2545 last = search_regs.end[sub]; |
428 | 2546 prevc = '\n'; |
2547 case_action = all_caps; | |
2548 | |
2549 /* some_multiletter_word is set nonzero if any original word | |
2550 is more than one letter long. */ | |
2551 some_multiletter_word = 0; | |
2552 some_lowercase = 0; | |
2553 some_nonuppercase_initial = 0; | |
2554 some_uppercase = 0; | |
2555 | |
707 | 2556 for (pos = search_regs.start[sub]; pos < last; pos++) |
428 | 2557 { |
2558 if (NILP (string)) | |
2559 c = BUF_FETCH_CHAR (buf, pos); | |
2560 else | |
867 | 2561 c = string_ichar (string, pos); |
428 | 2562 |
2563 if (LOWERCASEP (buf, c)) | |
2564 { | |
2565 /* Cannot be all caps if any original char is lower case */ | |
2566 | |
2567 some_lowercase = 1; | |
2568 if (!WORD_SYNTAX_P (syntax_table, prevc)) | |
2569 some_nonuppercase_initial = 1; | |
2570 else | |
2571 some_multiletter_word = 1; | |
2572 } | |
2573 else if (!NOCASEP (buf, c)) | |
2574 { | |
2575 some_uppercase = 1; | |
2576 if (!WORD_SYNTAX_P (syntax_table, prevc)) | |
2577 ; | |
2578 else | |
2579 some_multiletter_word = 1; | |
2580 } | |
2581 else | |
2582 { | |
2583 /* If the initial is a caseless word constituent, | |
2584 treat that like a lowercase initial. */ | |
2585 if (!WORD_SYNTAX_P (syntax_table, prevc)) | |
2586 some_nonuppercase_initial = 1; | |
2587 } | |
2588 | |
2589 prevc = c; | |
2590 } | |
2591 | |
2592 /* Convert to all caps if the old text is all caps | |
2593 and has at least one multiletter word. */ | |
2594 if (! some_lowercase && some_multiletter_word) | |
2595 case_action = all_caps; | |
2596 /* Capitalize each word, if the old text has all capitalized words. */ | |
2597 else if (!some_nonuppercase_initial && some_multiletter_word) | |
2598 case_action = cap_initial; | |
2599 else if (!some_nonuppercase_initial && some_uppercase) | |
2600 /* Should x -> yz, operating on X, give Yz or YZ? | |
2601 We'll assume the latter. */ | |
2602 case_action = all_caps; | |
2603 else | |
2604 case_action = nochange; | |
2605 } | |
2606 | |
2607 /* Do replacement in a string. */ | |
2608 if (!NILP (string)) | |
2609 { | |
2610 Lisp_Object before, after; | |
2611 | |
2612 speccount = specpdl_depth (); | |
4199 | 2613 before = Fsubstring (string, Qzero, make_int (search_regs.start[sub])); |
2614 after = Fsubstring (string, make_int (search_regs.end[sub]), Qnil); | |
428 | 2615 |
444 | 2616 /* Do case substitution into REPLACEMENT if desired. */ |
428 | 2617 if (NILP (literal)) |
2618 { | |
826 | 2619 Charcount stlen = string_char_length (replacement); |
428 | 2620 Charcount strpos; |
2621 /* XEmacs change: rewrote this loop somewhat to make it | |
2622 cleaner. Also added \U, \E, etc. */ | |
2623 Charcount literal_start = 0; | |
2624 /* We build up the substituted string in ACCUM. */ | |
2625 Lisp_Object accum; | |
2626 | |
2627 accum = Qnil; | |
2628 | |
2629 /* OK, the basic idea here is that we scan through the | |
2630 replacement string until we find a backslash, which | |
2631 represents a substring of the original string to be | |
2632 substituted. We then append onto ACCUM the literal | |
2633 text before the backslash (LASTPOS marks the | |
2634 beginning of this) followed by the substring of the | |
2635 original string that needs to be inserted. */ | |
2636 for (strpos = 0; strpos < stlen; strpos++) | |
2637 { | |
2638 /* If LITERAL_END is set, we've encountered a backslash | |
2639 (the end of literal text to be inserted). */ | |
2640 Charcount literal_end = -1; | |
2641 /* If SUBSTART is set, we need to also insert the | |
2642 text from SUBSTART to SUBEND in the original string. */ | |
2643 Charcount substart = -1; | |
2644 Charcount subend = -1; | |
2645 | |
867 | 2646 c = string_ichar (replacement, strpos); |
428 | 2647 if (c == '\\' && strpos < stlen - 1) |
2648 { | |
867 | 2649 c = string_ichar (replacement, ++strpos); |
428 | 2650 if (c == '&') |
2651 { | |
2652 literal_end = strpos - 1; | |
2653 substart = search_regs.start[0]; | |
2654 subend = search_regs.end[0]; | |
2655 } | |
4199 | 2656 /* #### This logic is totally broken, |
2657 since we can have backrefs like "\99", right? */ | |
428 | 2658 else if (c >= '1' && c <= '9' && |
2659 c <= search_regs.num_regs + '0') | |
2660 { | |
2661 if (search_regs.start[c - '0'] >= 0) | |
2662 { | |
2663 literal_end = strpos - 1; | |
2664 substart = search_regs.start[c - '0']; | |
2665 subend = search_regs.end[c - '0']; | |
2666 } | |
2667 } | |
2668 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' || | |
2669 c == 'E') | |
2670 { | |
2671 /* Keep track of all case changes requested, but don't | |
2672 make them now. Do them later so we override | |
2673 everything else. */ | |
2674 if (!ul_pos_dynarr) | |
2675 { | |
2676 ul_pos_dynarr = Dynarr_new (int); | |
2677 ul_action_dynarr = Dynarr_new (int); | |
2678 record_unwind_protect | |
2679 (free_created_dynarrs, | |
2680 noseeum_cons | |
2681 (make_opaque_ptr (ul_pos_dynarr), | |
2682 make_opaque_ptr (ul_action_dynarr))); | |
2683 } | |
2684 literal_end = strpos - 1; | |
2685 Dynarr_add (ul_pos_dynarr, | |
2686 (!NILP (accum) | |
826 | 2687 ? string_char_length (accum) |
428 | 2688 : 0) + (literal_end - literal_start)); |
2689 Dynarr_add (ul_action_dynarr, c); | |
2690 } | |
2691 else if (c == '\\') | |
2692 /* So we get just one backslash. */ | |
2693 literal_end = strpos; | |
2694 } | |
2695 if (literal_end >= 0) | |
2696 { | |
2697 Lisp_Object literal_text = Qnil; | |
2698 Lisp_Object substring = Qnil; | |
2699 if (literal_end != literal_start) | |
444 | 2700 literal_text = Fsubstring (replacement, |
428 | 2701 make_int (literal_start), |
2702 make_int (literal_end)); | |
2703 if (substart >= 0 && subend != substart) | |
2704 substring = Fsubstring (string, | |
2705 make_int (substart), | |
2706 make_int (subend)); | |
2707 if (!NILP (literal_text) || !NILP (substring)) | |
2708 accum = concat3 (accum, literal_text, substring); | |
2709 literal_start = strpos + 1; | |
2710 } | |
2711 } | |
2712 | |
2713 if (strpos != literal_start) | |
2714 /* some literal text at end to be inserted */ | |
444 | 2715 replacement = concat2 (accum, Fsubstring (replacement, |
2716 make_int (literal_start), | |
2717 make_int (strpos))); | |
428 | 2718 else |
444 | 2719 replacement = accum; |
428 | 2720 } |
2721 | |
444 | 2722 /* replacement can be nil. */ |
2723 if (NILP (replacement)) | |
2724 replacement = build_string (""); | |
2725 | |
428 | 2726 if (case_action == all_caps) |
444 | 2727 replacement = Fupcase (replacement, buffer); |
428 | 2728 else if (case_action == cap_initial) |
444 | 2729 replacement = Fupcase_initials (replacement, buffer); |
428 | 2730 |
2731 /* Now finally, we need to process the \U's, \E's, etc. */ | |
2732 if (ul_pos_dynarr) | |
2733 { | |
2734 int i = 0; | |
2735 int cur_action = 'E'; | |
826 | 2736 Charcount stlen = string_char_length (replacement); |
428 | 2737 Charcount strpos; |
2738 | |
2739 for (strpos = 0; strpos < stlen; strpos++) | |
2740 { | |
867 | 2741 Ichar curchar = string_ichar (replacement, strpos); |
2742 Ichar newchar = -1; | |
428 | 2743 if (i < Dynarr_length (ul_pos_dynarr) && |
2744 strpos == Dynarr_at (ul_pos_dynarr, i)) | |
2745 { | |
2746 int new_action = Dynarr_at (ul_action_dynarr, i); | |
2747 i++; | |
2748 if (new_action == 'u') | |
2749 newchar = UPCASE (buf, curchar); | |
2750 else if (new_action == 'l') | |
2751 newchar = DOWNCASE (buf, curchar); | |
2752 else | |
2753 cur_action = new_action; | |
2754 } | |
2755 if (newchar == -1) | |
2756 { | |
2757 if (cur_action == 'U') | |
2758 newchar = UPCASE (buf, curchar); | |
2759 else if (cur_action == 'L') | |
2760 newchar = DOWNCASE (buf, curchar); | |
2761 else | |
2762 newchar = curchar; | |
2763 } | |
2764 if (newchar != curchar) | |
793 | 2765 set_string_char (replacement, strpos, newchar); |
428 | 2766 } |
2767 } | |
2768 | |
2769 /* frees the Dynarrs if necessary. */ | |
771 | 2770 unbind_to (speccount); |
444 | 2771 return concat3 (before, replacement, after); |
428 | 2772 } |
2773 | |
707 | 2774 mc_count = begin_multiple_change (buf, search_regs.start[sub], |
2775 search_regs.end[sub]); | |
428 | 2776 |
2777 /* begin_multiple_change() records an unwind-protect, so we need to | |
2778 record this value now. */ | |
2779 speccount = specpdl_depth (); | |
2780 | |
2781 /* We insert the replacement text before the old text, and then | |
2782 delete the original text. This means that markers at the | |
2783 beginning or end of the original will float to the corresponding | |
2784 position in the replacement. */ | |
707 | 2785 BUF_SET_PT (buf, search_regs.start[sub]); |
428 | 2786 if (!NILP (literal)) |
444 | 2787 Finsert (1, &replacement); |
428 | 2788 else |
2789 { | |
826 | 2790 Charcount stlen = string_char_length (replacement); |
428 | 2791 Charcount strpos; |
2792 struct gcpro gcpro1; | |
444 | 2793 GCPRO1 (replacement); |
428 | 2794 for (strpos = 0; strpos < stlen; strpos++) |
2795 { | |
707 | 2796 /* on the first iteration assert(offset==0), |
2797 exactly complementing BUF_SET_PT() above. | |
2798 During the loop, it keeps track of the amount inserted. | |
2799 */ | |
2800 Charcount offset = BUF_PT (buf) - search_regs.start[sub]; | |
428 | 2801 |
867 | 2802 c = string_ichar (replacement, strpos); |
428 | 2803 if (c == '\\' && strpos < stlen - 1) |
2804 { | |
707 | 2805 /* XXX FIXME: replacing just a substring non-literally |
2806 using backslash refs to the match looks dangerous. But | |
2807 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe | |
2808 <duwe@caldera.de> claims Finsert_buffer_substring already | |
2809 handles this correctly. | |
2810 */ | |
867 | 2811 c = string_ichar (replacement, ++strpos); |
428 | 2812 if (c == '&') |
2813 Finsert_buffer_substring | |
2814 (buffer, | |
2815 make_int (search_regs.start[0] + offset), | |
2816 make_int (search_regs.end[0] + offset)); | |
4199 | 2817 /* #### This logic is totally broken, |
2818 since we can have backrefs like "\99", right? */ | |
428 | 2819 else if (c >= '1' && c <= '9' && |
2820 c <= search_regs.num_regs + '0') | |
2821 { | |
2822 if (search_regs.start[c - '0'] >= 1) | |
2823 Finsert_buffer_substring | |
2824 (buffer, | |
2825 make_int (search_regs.start[c - '0'] + offset), | |
2826 make_int (search_regs.end[c - '0'] + offset)); | |
2827 } | |
2828 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' || | |
2829 c == 'E') | |
2830 { | |
2831 /* Keep track of all case changes requested, but don't | |
2832 make them now. Do them later so we override | |
2833 everything else. */ | |
2834 if (!ul_pos_dynarr) | |
2835 { | |
2836 ul_pos_dynarr = Dynarr_new (int); | |
2837 ul_action_dynarr = Dynarr_new (int); | |
2838 record_unwind_protect | |
2839 (free_created_dynarrs, | |
2840 Fcons (make_opaque_ptr (ul_pos_dynarr), | |
2841 make_opaque_ptr (ul_action_dynarr))); | |
2842 } | |
2843 Dynarr_add (ul_pos_dynarr, BUF_PT (buf)); | |
2844 Dynarr_add (ul_action_dynarr, c); | |
2845 } | |
2846 else | |
2847 buffer_insert_emacs_char (buf, c); | |
2848 } | |
2849 else | |
2850 buffer_insert_emacs_char (buf, c); | |
2851 } | |
2852 UNGCPRO; | |
2853 } | |
2854 | |
707 | 2855 inslen = BUF_PT (buf) - (search_regs.start[sub]); |
2856 buffer_delete_range (buf, search_regs.start[sub] + inslen, | |
2857 search_regs.end[sub] + inslen, 0); | |
428 | 2858 |
2859 if (case_action == all_caps) | |
2860 Fupcase_region (make_int (BUF_PT (buf) - inslen), | |
2861 make_int (BUF_PT (buf)), buffer); | |
2862 else if (case_action == cap_initial) | |
2863 Fupcase_initials_region (make_int (BUF_PT (buf) - inslen), | |
2864 make_int (BUF_PT (buf)), buffer); | |
2865 | |
2866 /* Now go through and make all the case changes that were requested | |
2867 in the replacement string. */ | |
2868 if (ul_pos_dynarr) | |
2869 { | |
665 | 2870 Charbpos eend = BUF_PT (buf); |
428 | 2871 int i = 0; |
2872 int cur_action = 'E'; | |
2873 | |
2874 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++) | |
2875 { | |
867 | 2876 Ichar curchar = BUF_FETCH_CHAR (buf, pos); |
2877 Ichar newchar = -1; | |
428 | 2878 if (i < Dynarr_length (ul_pos_dynarr) && |
2879 pos == Dynarr_at (ul_pos_dynarr, i)) | |
2880 { | |
2881 int new_action = Dynarr_at (ul_action_dynarr, i); | |
2882 i++; | |
2883 if (new_action == 'u') | |
2884 newchar = UPCASE (buf, curchar); | |
2885 else if (new_action == 'l') | |
2886 newchar = DOWNCASE (buf, curchar); | |
2887 else | |
2888 cur_action = new_action; | |
2889 } | |
2890 if (newchar == -1) | |
2891 { | |
2892 if (cur_action == 'U') | |
2893 newchar = UPCASE (buf, curchar); | |
2894 else if (cur_action == 'L') | |
2895 newchar = DOWNCASE (buf, curchar); | |
2896 else | |
2897 newchar = curchar; | |
2898 } | |
2899 if (newchar != curchar) | |
2900 buffer_replace_char (buf, pos, newchar, 0, 0); | |
2901 } | |
2902 } | |
2903 | |
2904 /* frees the Dynarrs if necessary. */ | |
771 | 2905 unbind_to (speccount); |
428 | 2906 end_multiple_change (buf, mc_count); |
2907 | |
2908 return Qnil; | |
2909 } | |
2910 | |
2911 static Lisp_Object | |
2912 match_limit (Lisp_Object num, int beginningp) | |
2913 { | |
2914 int n; | |
2915 | |
2916 CHECK_INT (num); | |
2917 n = XINT (num); | |
2918 if (n < 0 || n >= search_regs.num_regs) | |
2919 args_out_of_range (num, make_int (search_regs.num_regs)); | |
2920 if (search_regs.num_regs == 0 || | |
2921 search_regs.start[n] < 0) | |
2922 return Qnil; | |
2923 return make_int (beginningp ? search_regs.start[n] : search_regs.end[n]); | |
2924 } | |
2925 | |
2926 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /* | |
2927 Return position of start of text matched by last regexp search. | |
2928 NUM, specifies which parenthesized expression in the last regexp. | |
2929 Value is nil if NUMth pair didn't match, or there were less than NUM pairs. | |
2930 Zero means the entire text matched by the whole regexp or whole string. | |
2931 */ | |
2932 (num)) | |
2933 { | |
2934 return match_limit (num, 1); | |
2935 } | |
2936 | |
2937 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /* | |
2938 Return position of end of text matched by last regexp search. | |
2939 NUM specifies which parenthesized expression in the last regexp. | |
2940 Value is nil if NUMth pair didn't match, or there were less than NUM pairs. | |
2941 Zero means the entire text matched by the whole regexp or whole string. | |
2942 */ | |
2943 (num)) | |
2944 { | |
2945 return match_limit (num, 0); | |
2946 } | |
2947 | |
2948 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /* | |
2949 Return a list containing all info on what the last regexp search matched. | |
2950 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'. | |
2951 All the elements are markers or nil (nil if the Nth pair didn't match) | |
2952 if the last match was on a buffer; integers or nil if a string was matched. | |
2953 Use `store-match-data' to reinstate the data in this list. | |
2954 | |
2955 If INTEGERS (the optional first argument) is non-nil, always use integers | |
2956 \(rather than markers) to represent buffer positions. | |
2957 If REUSE is a list, reuse it as part of the value. If REUSE is long enough | |
2958 to hold all the values, and if INTEGERS is non-nil, no consing is done. | |
2959 */ | |
2960 (integers, reuse)) | |
2961 { | |
2962 Lisp_Object tail, prev; | |
2963 Lisp_Object *data; | |
2964 int i; | |
2965 Charcount len; | |
2966 | |
2967 if (NILP (last_thing_searched)) | |
563 | 2968 /*error ("match-data called before any match found", Qunbound);*/ |
428 | 2969 return Qnil; |
2970 | |
2971 data = alloca_array (Lisp_Object, 2 * search_regs.num_regs); | |
2972 | |
2973 len = -1; | |
2974 for (i = 0; i < search_regs.num_regs; i++) | |
2975 { | |
665 | 2976 Charbpos start = search_regs.start[i]; |
428 | 2977 if (start >= 0) |
2978 { | |
2979 if (EQ (last_thing_searched, Qt) | |
2980 || !NILP (integers)) | |
2981 { | |
2982 data[2 * i] = make_int (start); | |
2983 data[2 * i + 1] = make_int (search_regs.end[i]); | |
2984 } | |
2985 else if (BUFFERP (last_thing_searched)) | |
2986 { | |
2987 data[2 * i] = Fmake_marker (); | |
2988 Fset_marker (data[2 * i], | |
2989 make_int (start), | |
2990 last_thing_searched); | |
2991 data[2 * i + 1] = Fmake_marker (); | |
2992 Fset_marker (data[2 * i + 1], | |
2993 make_int (search_regs.end[i]), | |
2994 last_thing_searched); | |
2995 } | |
2996 else | |
2997 /* last_thing_searched must always be Qt, a buffer, or Qnil. */ | |
2500 | 2998 ABORT (); |
428 | 2999 |
3000 len = i; | |
3001 } | |
3002 else | |
3003 data[2 * i] = data [2 * i + 1] = Qnil; | |
3004 } | |
3005 if (!CONSP (reuse)) | |
3006 return Flist (2 * len + 2, data); | |
3007 | |
3008 /* If REUSE is a list, store as many value elements as will fit | |
3009 into the elements of REUSE. */ | |
3010 for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail)) | |
3011 { | |
3012 if (i < 2 * len + 2) | |
3013 XCAR (tail) = data[i]; | |
3014 else | |
3015 XCAR (tail) = Qnil; | |
3016 prev = tail; | |
3017 } | |
3018 | |
3019 /* If we couldn't fit all value elements into REUSE, | |
3020 cons up the rest of them and add them to the end of REUSE. */ | |
3021 if (i < 2 * len + 2) | |
3022 XCDR (prev) = Flist (2 * len + 2 - i, data + i); | |
3023 | |
3024 return reuse; | |
3025 } | |
3026 | |
3027 | |
3028 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /* | |
3029 Set internal data on last search match from elements of LIST. | |
1468 | 3030 LIST should have been created by calling `match-data' previously, |
3031 or be nil, to clear the internal match data. | |
428 | 3032 */ |
3033 (list)) | |
3034 { | |
3035 REGISTER int i; | |
3036 REGISTER Lisp_Object marker; | |
3037 int num_regs; | |
3038 int length; | |
3039 | |
853 | 3040 /* Some FSF junk with running_asynch_code, to preserve the match |
3041 data. Not necessary because we don't call process filters | |
3042 asynchronously (i.e. from within QUIT). */ | |
428 | 3043 |
3044 CONCHECK_LIST (list); | |
3045 | |
3046 /* Unless we find a marker with a buffer in LIST, assume that this | |
3047 match data came from a string. */ | |
3048 last_thing_searched = Qt; | |
3049 | |
3050 /* Allocate registers if they don't already exist. */ | |
3051 length = XINT (Flength (list)) / 2; | |
3052 num_regs = search_regs.num_regs; | |
3053 | |
3054 if (length > num_regs) | |
3055 { | |
3056 if (search_regs.num_regs == 0) | |
3057 { | |
3058 search_regs.start = xnew_array (regoff_t, length); | |
3059 search_regs.end = xnew_array (regoff_t, length); | |
3060 } | |
3061 else | |
3062 { | |
3063 XREALLOC_ARRAY (search_regs.start, regoff_t, length); | |
3064 XREALLOC_ARRAY (search_regs.end, regoff_t, length); | |
3065 } | |
3066 | |
3067 search_regs.num_regs = length; | |
3068 } | |
3069 | |
3070 for (i = 0; i < num_regs; i++) | |
3071 { | |
3072 marker = Fcar (list); | |
3073 if (NILP (marker)) | |
3074 { | |
3075 search_regs.start[i] = -1; | |
3076 list = Fcdr (list); | |
3077 } | |
3078 else | |
3079 { | |
3080 if (MARKERP (marker)) | |
3081 { | |
3082 if (XMARKER (marker)->buffer == 0) | |
3083 marker = Qzero; | |
3084 else | |
793 | 3085 last_thing_searched = wrap_buffer (XMARKER (marker)->buffer); |
428 | 3086 } |
3087 | |
3088 CHECK_INT_COERCE_MARKER (marker); | |
3089 search_regs.start[i] = XINT (marker); | |
3090 list = Fcdr (list); | |
3091 | |
3092 marker = Fcar (list); | |
3093 if (MARKERP (marker) && XMARKER (marker)->buffer == 0) | |
3094 marker = Qzero; | |
3095 | |
3096 CHECK_INT_COERCE_MARKER (marker); | |
3097 search_regs.end[i] = XINT (marker); | |
3098 } | |
3099 list = Fcdr (list); | |
3100 } | |
3101 | |
3102 return Qnil; | |
3103 } | |
3104 | |
3105 /* Quote a string to inactivate reg-expr chars */ | |
3106 | |
3107 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /* | |
3108 Return a regexp string which matches exactly STRING and nothing else. | |
3109 */ | |
444 | 3110 (string)) |
428 | 3111 { |
867 | 3112 REGISTER Ibyte *in, *out, *end; |
3113 REGISTER Ibyte *temp; | |
428 | 3114 |
444 | 3115 CHECK_STRING (string); |
428 | 3116 |
2367 | 3117 temp = alloca_ibytes (XSTRING_LENGTH (string) * 2); |
428 | 3118 |
3119 /* Now copy the data into the new string, inserting escapes. */ | |
3120 | |
444 | 3121 in = XSTRING_DATA (string); |
3122 end = in + XSTRING_LENGTH (string); | |
428 | 3123 out = temp; |
3124 | |
3125 while (in < end) | |
3126 { | |
867 | 3127 Ichar c = itext_ichar (in); |
428 | 3128 |
3129 if (c == '[' || c == ']' | |
3130 || c == '*' || c == '.' || c == '\\' | |
3131 || c == '?' || c == '+' | |
3132 || c == '^' || c == '$') | |
3133 *out++ = '\\'; | |
867 | 3134 out += set_itext_ichar (out, c); |
3135 INC_IBYTEPTR (in); | |
428 | 3136 } |
3137 | |
3138 return make_string (temp, out - temp); | |
3139 } | |
3140 | |
3141 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /* | |
3142 Set the regexp to be used to match a word in regular-expression searching. | |
3143 #### Not yet implemented. Currently does nothing. | |
3144 #### Do not use this yet. Its calling interface is likely to change. | |
3145 */ | |
2286 | 3146 (UNUSED (regexp))) |
428 | 3147 { |
3148 return Qnil; | |
3149 } | |
3150 | |
3151 | |
3152 /************************************************************************/ | |
3153 /* initialization */ | |
3154 /************************************************************************/ | |
3155 | |
3156 void | |
3157 syms_of_search (void) | |
3158 { | |
3159 | |
442 | 3160 DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation); |
3161 DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error); | |
563 | 3162 Fput (Qinvalid_regexp, Qerror_lacks_explanatory_string, Qt); |
428 | 3163 |
3164 DEFSUBR (Flooking_at); | |
3165 DEFSUBR (Fposix_looking_at); | |
3166 DEFSUBR (Fstring_match); | |
3167 DEFSUBR (Fposix_string_match); | |
3168 DEFSUBR (Fskip_chars_forward); | |
3169 DEFSUBR (Fskip_chars_backward); | |
3170 DEFSUBR (Fskip_syntax_forward); | |
3171 DEFSUBR (Fskip_syntax_backward); | |
3172 DEFSUBR (Fsearch_forward); | |
3173 DEFSUBR (Fsearch_backward); | |
3174 DEFSUBR (Fword_search_forward); | |
3175 DEFSUBR (Fword_search_backward); | |
3176 DEFSUBR (Fre_search_forward); | |
3177 DEFSUBR (Fre_search_backward); | |
3178 DEFSUBR (Fposix_search_forward); | |
3179 DEFSUBR (Fposix_search_backward); | |
3180 DEFSUBR (Freplace_match); | |
3181 DEFSUBR (Fmatch_beginning); | |
3182 DEFSUBR (Fmatch_end); | |
3183 DEFSUBR (Fmatch_data); | |
3184 DEFSUBR (Fstore_match_data); | |
3185 DEFSUBR (Fregexp_quote); | |
3186 DEFSUBR (Fset_word_regexp); | |
3187 } | |
3188 | |
3189 void | |
3190 reinit_vars_of_search (void) | |
3191 { | |
3192 int i; | |
3193 | |
3194 last_thing_searched = Qnil; | |
3195 staticpro_nodump (&last_thing_searched); | |
3196 | |
3197 for (i = 0; i < REGEXP_CACHE_SIZE; ++i) | |
3198 { | |
3199 searchbufs[i].buf.allocated = 100; | |
3200 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100); | |
3201 searchbufs[i].buf.fastmap = searchbufs[i].fastmap; | |
3202 searchbufs[i].regexp = Qnil; | |
3203 staticpro_nodump (&searchbufs[i].regexp); | |
3204 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]); | |
3205 } | |
3206 searchbuf_head = &searchbufs[0]; | |
3207 } | |
3208 | |
3209 void | |
3210 vars_of_search (void) | |
3211 { | |
3212 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /* | |
3213 *Regular expression to be used in `forward-word'. | |
3214 #### Not yet implemented. | |
3215 */ ); | |
3216 Vforward_word_regexp = Qnil; | |
3217 | |
3218 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /* | |
3219 *Regular expression to be used in `backward-word'. | |
3220 #### Not yet implemented. | |
3221 */ ); | |
3222 Vbackward_word_regexp = Qnil; | |
502 | 3223 |
3224 DEFVAR_INT ("warn-about-possibly-incompatible-back-references", | |
3225 &warn_about_possibly_incompatible_back_references /* | |
3226 If true, issue warnings when new-semantics back references occur. | |
3227 This is to catch places where old code might inadvertently have changed | |
3228 semantics. This will occur in old code only where more than nine groups | |
3229 occur and a back reference to one of them is directly followed by a digit. | |
3230 */ ); | |
3231 warn_about_possibly_incompatible_back_references = 1; | |
814 | 3232 |
2421 | 3233 Vskip_chars_range_table = Fmake_range_table (Qstart_closed_end_closed); |
428 | 3234 staticpro (&Vskip_chars_range_table); |
3235 } |