Mercurial > hg > xemacs-beta
annotate src/search.c @ 5602:c9e5612f5424
Support the MP library on recent FreeBSD, have it pass relevant tests.
src/ChangeLog addition:
2011-11-26 Aidan Kehoe <kehoea@parhasard.net>
* number-mp.c (bignum_to_string):
Don't overwrite the accumulator we've just set up for this
function.
* number-mp.c (BIGNUM_TO_TYPE):
mp_itom() doesn't necessarily do what this code used to think with
negative numbers, it can treat them as unsigned ints. Subtract
numbers from bignum_zero instead of multiplying them by -1 to
convert them to their negative equivalents.
* number-mp.c (bignum_to_int):
* number-mp.c (bignum_to_uint):
* number-mp.c (bignum_to_long):
* number-mp.c (bignum_to_ulong):
* number-mp.c (bignum_to_double):
Use the changed BIGNUM_TO_TYPE() in these functions.
* number-mp.c (bignum_ceil):
* number-mp.c (bignum_floor):
In these functions, be more careful about rounding to positive and
negative infinity, respectively. Don't use the sign of QUOTIENT
when working out out whether to add or subtract one, rather use
the sign QUOTIENT would have if arbitrary-precision division were
done.
* number-mp.h:
* number-mp.h (MP_GCD):
Wrap #include <mp.h> in BEGIN_C_DECLS/END_C_DECLS.
* number.c (Fbigfloat_get_precision):
* number.c (Fbigfloat_set_precision):
Don't attempt to call XBIGFLOAT_GET_PREC if this build doesn't
support big floats.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sat, 26 Nov 2011 17:59:14 +0000 |
parents | 56144c8593a8 |
children | d026b665014f |
rev | line source |
---|---|
428 | 1 /* String search routines for XEmacs. |
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
5041 | 4 Copyright (C) 2001, 2002, 2010 Ben Wing. |
428 | 5 |
6 This file is part of XEmacs. | |
7 | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5089
diff
changeset
|
8 XEmacs is free software: you can redistribute it and/or modify it |
428 | 9 under the terms of the GNU General Public License as published by the |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5089
diff
changeset
|
10 Free Software Foundation, either version 3 of the License, or (at your |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5089
diff
changeset
|
11 option) any later version. |
428 | 12 |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5089
diff
changeset
|
19 along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */ |
428 | 20 |
21 /* Synched up with: FSF 19.29, except for region-cache stuff. */ | |
22 | |
23 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */ | |
24 | |
826 | 25 /* This file has been Mule-ized. */ |
428 | 26 |
27 #include <config.h> | |
28 #include "lisp.h" | |
29 | |
30 #include "buffer.h" | |
31 #include "insdel.h" | |
32 #include "opaque.h" | |
33 #ifdef REGION_CACHE_NEEDS_WORK | |
34 #include "region-cache.h" | |
35 #endif | |
36 #include "syntax.h" | |
37 | |
38 #include <sys/types.h> | |
39 #include "regex.h" | |
446 | 40 #include "casetab.h" |
41 #include "chartab.h" | |
42 | |
43 #define TRANSLATE(table, pos) \ | |
867 | 44 (!NILP (table) ? TRT_TABLE_OF (table, (Ichar) pos) : pos) |
428 | 45 |
46 #define REGEXP_CACHE_SIZE 20 | |
47 | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
48 #ifdef DEBUG_XEMACS |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
49 |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
50 /* Used in tests/automated/case-tests.el if available. */ |
5041 | 51 Fixnum debug_searches; |
52 | |
53 /* Declare as int rather than Bitflags because it's used by regex.c, which | |
54 may be used outside of XEmacs (e.g. etags.c). */ | |
55 int debug_regexps; | |
56 Lisp_Object Vdebug_regexps; | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
57 |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
58 Lisp_Object Qsearch_algorithm_used, Qboyer_moore, Qsimple_search; |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
59 |
5041 | 60 Lisp_Object Qcompilation, Qfailure_point, Qmatching; |
61 | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
62 #endif |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
63 |
428 | 64 /* If the regexp is non-nil, then the buffer contains the compiled form |
65 of that regexp, suitable for searching. */ | |
446 | 66 struct regexp_cache |
67 { | |
428 | 68 struct regexp_cache *next; |
69 Lisp_Object regexp; | |
70 struct re_pattern_buffer buf; | |
71 char fastmap[0400]; | |
72 /* Nonzero means regexp was compiled to do full POSIX backtracking. */ | |
73 char posix; | |
74 }; | |
75 | |
76 /* The instances of that struct. */ | |
77 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE]; | |
78 | |
79 /* The head of the linked list; points to the most recently used buffer. */ | |
80 static struct regexp_cache *searchbuf_head; | |
81 | |
82 | |
83 /* Every call to re_match, etc., must pass &search_regs as the regs | |
84 argument unless you can show it is unnecessary (i.e., if re_match | |
85 is certainly going to be called again before region-around-match | |
86 can be called). | |
87 | |
88 Since the registers are now dynamically allocated, we need to make | |
89 sure not to refer to the Nth register before checking that it has | |
90 been allocated by checking search_regs.num_regs. | |
91 | |
92 The regex code keeps track of whether it has allocated the search | |
93 buffer using bits in the re_pattern_buffer. This means that whenever | |
94 you compile a new pattern, it completely forgets whether it has | |
95 allocated any registers, and will allocate new registers the next | |
96 time you call a searching or matching function. Therefore, we need | |
97 to call re_set_registers after compiling a new pattern or after | |
98 setting the match registers, so that the regex functions will be | |
99 able to free or re-allocate it properly. */ | |
100 | |
101 /* Note: things get trickier under Mule because the values returned from | |
826 | 102 the regexp routines are in Bytebpos's but we need them to be in Charbpos's. |
428 | 103 We take the easy way out for the moment and just convert them immediately. |
104 We could be more clever by not converting them until necessary, but | |
105 that gets real ugly real fast since the buffer might have changed and | |
106 the positions might be out of sync or out of range. | |
107 */ | |
108 static struct re_registers search_regs; | |
109 | |
1468 | 110 /* Every function that sets the match data _must_ clear unused search |
111 registers on success. An unsuccessful search or match _must_ preserve | |
112 the search registers. The traditional documentation implied that | |
113 any match operation might trash the registers, but in fact failures | |
114 have always preserved the match data (in GNU Emacs as well). Some | |
115 plausible code depends on this behavior (cf. `w3-configuration-data' | |
116 in library "w3-cfg"). | |
117 | |
118 Ordinary string searchs use set_search_regs to set the whole-string | |
119 match. That function takes care of clearing the unused subexpression | |
1425 | 120 registers. |
121 */ | |
122 static void set_search_regs (struct buffer *buf, Charbpos beg, Charcount len); | |
1468 | 123 static void clear_search_regs (void); |
1425 | 124 |
428 | 125 /* The buffer in which the last search was performed, or |
126 Qt if the last search was done in a string; | |
127 Qnil if no searching has been done yet. */ | |
128 static Lisp_Object last_thing_searched; | |
129 | |
130 /* error condition signalled when regexp compile_pattern fails */ | |
131 | |
132 Lisp_Object Qinvalid_regexp; | |
133 | |
134 /* Regular expressions used in forward/backward-word */ | |
135 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp; | |
136 | |
507 | 137 Fixnum warn_about_possibly_incompatible_back_references; |
502 | 138 |
428 | 139 /* range table for use with skip_chars. Only needed for Mule. */ |
140 Lisp_Object Vskip_chars_range_table; | |
141 | |
867 | 142 static Charbpos simple_search (struct buffer *buf, Ibyte *base_pat, |
826 | 143 Bytecount len, Bytebpos pos, Bytebpos lim, |
144 EMACS_INT n, Lisp_Object trt); | |
867 | 145 static Charbpos boyer_moore (struct buffer *buf, Ibyte *base_pat, |
826 | 146 Bytecount len, Bytebpos pos, Bytebpos lim, |
147 EMACS_INT n, Lisp_Object trt, | |
148 Lisp_Object inverse_trt, int charset_base); | |
665 | 149 static Charbpos search_buffer (struct buffer *buf, Lisp_Object str, |
826 | 150 Charbpos charbpos, Charbpos buflim, EMACS_INT n, |
151 int RE, Lisp_Object trt, | |
152 Lisp_Object inverse_trt, int posix); | |
771 | 153 |
2268 | 154 static DECLARE_DOESNT_RETURN (matcher_overflow (void)); |
155 | |
156 static DOESNT_RETURN | |
157 matcher_overflow () | |
428 | 158 { |
563 | 159 stack_overflow ("Stack overflow in regexp matcher", Qunbound); |
428 | 160 } |
161 | |
162 /* Compile a regexp and signal a Lisp error if anything goes wrong. | |
163 PATTERN is the pattern to compile. | |
164 CP is the place to put the result. | |
826 | 165 TRANSLATE is a translation table for ignoring case, or Qnil for none. |
428 | 166 REGP is the structure that says where to store the "register" |
167 values that will result from matching this pattern. | |
168 If it is 0, we should compile the pattern not to record any | |
169 subexpression bounds. | |
170 POSIX is nonzero if we want full backtracking (POSIX style) | |
171 for this pattern. 0 means backtrack only enough to get a valid match. */ | |
172 | |
173 static int | |
174 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, | |
2286 | 175 struct re_registers *UNUSED (regp), Lisp_Object translate, |
826 | 176 int posix, Error_Behavior errb) |
428 | 177 { |
442 | 178 const char *val; |
428 | 179 reg_syntax_t old; |
180 | |
181 cp->regexp = Qnil; | |
182 cp->buf.translate = translate; | |
183 cp->posix = posix; | |
184 old = re_set_syntax (RE_SYNTAX_EMACS | |
185 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING)); | |
442 | 186 val = (const char *) |
428 | 187 re_compile_pattern ((char *) XSTRING_DATA (pattern), |
188 XSTRING_LENGTH (pattern), &cp->buf); | |
189 re_set_syntax (old); | |
190 if (val) | |
191 { | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
192 maybe_signal_error (Qinvalid_regexp, 0, build_cistring (val), |
428 | 193 Qsearch, errb); |
194 return 0; | |
195 } | |
196 | |
197 cp->regexp = Fcopy_sequence (pattern); | |
198 return 1; | |
199 } | |
200 | |
201 /* Compile a regexp if necessary, but first check to see if there's one in | |
202 the cache. | |
203 PATTERN is the pattern to compile. | |
826 | 204 TRANSLATE is a translation table for ignoring case, or Qnil for none. |
428 | 205 REGP is the structure that says where to store the "register" |
206 values that will result from matching this pattern. | |
207 If it is 0, we should compile the pattern not to record any | |
208 subexpression bounds. | |
209 POSIX is nonzero if we want full backtracking (POSIX style) | |
210 for this pattern. 0 means backtrack only enough to get a valid match. */ | |
211 | |
212 struct re_pattern_buffer * | |
213 compile_pattern (Lisp_Object pattern, struct re_registers *regp, | |
2286 | 214 Lisp_Object translate, Lisp_Object UNUSED (searchobj), |
215 struct buffer *UNUSED (searchbuf), int posix, | |
216 Error_Behavior errb) | |
428 | 217 { |
218 struct regexp_cache *cp, **cpp; | |
219 | |
220 for (cpp = &searchbuf_head; ; cpp = &cp->next) | |
221 { | |
222 cp = *cpp; | |
826 | 223 /* &&#### once we fix up the fastmap code in regex.c for 8-bit-fixed, |
224 we need to record and compare the buffer and format, since the | |
225 fastmap will reflect the state of the buffer -- and things get | |
226 more complicated if the buffer has changed formats or (esp.) has | |
227 kept the format but changed its interpretation! may need to have | |
228 the code that changes the interpretation go through and invalidate | |
229 cache entries for that buffer. */ | |
428 | 230 if (!NILP (Fstring_equal (cp->regexp, pattern)) |
446 | 231 && EQ (cp->buf.translate, translate) |
428 | 232 && cp->posix == posix) |
233 break; | |
234 | |
235 /* If we're at the end of the cache, compile into the last cell. */ | |
236 if (cp->next == 0) | |
237 { | |
826 | 238 if (!compile_pattern_1 (cp, pattern, regp, translate, |
239 posix, errb)) | |
428 | 240 return 0; |
241 break; | |
242 } | |
243 } | |
244 | |
245 /* When we get here, cp (aka *cpp) contains the compiled pattern, | |
246 either because we found it in the cache or because we just compiled it. | |
247 Move it to the front of the queue to mark it as most recently used. */ | |
248 *cpp = cp->next; | |
249 cp->next = searchbuf_head; | |
250 searchbuf_head = cp; | |
251 | |
252 /* Advise the searching functions about the space we have allocated | |
253 for register data. */ | |
254 if (regp) | |
255 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end); | |
256 | |
257 return &cp->buf; | |
258 } | |
259 | |
260 /* Error condition used for failing searches */ | |
261 Lisp_Object Qsearch_failed; | |
262 | |
2268 | 263 static DECLARE_DOESNT_RETURN (signal_failure (Lisp_Object)); |
264 | |
265 static DOESNT_RETURN | |
428 | 266 signal_failure (Lisp_Object arg) |
267 { | |
446 | 268 for (;;) |
269 Fsignal (Qsearch_failed, list1 (arg)); | |
428 | 270 } |
271 | |
826 | 272 /* Convert the search registers from Bytebpos's to Charbpos's. Needs to be |
428 | 273 done after each regexp match that uses the search regs. |
274 | |
275 We could get a potential speedup by not converting the search registers | |
276 until it's really necessary, e.g. when match-data or replace-match is | |
277 called. However, this complexifies the code a lot (e.g. the buffer | |
826 | 278 could have changed and the Bytebpos's stored might be invalid) and is |
428 | 279 probably not a great time-saver. */ |
280 | |
281 static void | |
282 fixup_search_regs_for_buffer (struct buffer *buf) | |
283 { | |
284 int i; | |
285 int num_regs = search_regs.num_regs; | |
286 | |
287 for (i = 0; i < num_regs; i++) | |
288 { | |
289 if (search_regs.start[i] >= 0) | |
826 | 290 search_regs.start[i] = bytebpos_to_charbpos (buf, |
291 search_regs.start[i]); | |
428 | 292 if (search_regs.end[i] >= 0) |
665 | 293 search_regs.end[i] = bytebpos_to_charbpos (buf, search_regs.end[i]); |
428 | 294 } |
295 } | |
296 | |
297 /* Similar but for strings. */ | |
298 static void | |
299 fixup_search_regs_for_string (Lisp_Object string) | |
300 { | |
301 int i; | |
302 int num_regs = search_regs.num_regs; | |
303 | |
304 /* #### bytecount_to_charcount() is not that efficient. This function | |
867 | 305 could be faster if it did its own conversion (using INC_IBYTEPTR() |
428 | 306 and such), because the register ends are likely to be somewhat ordered. |
307 (Even if not, you could sort them.) | |
308 | |
309 Think about this if this function is a time hog, which it's probably | |
310 not. */ | |
311 for (i = 0; i < num_regs; i++) | |
312 { | |
313 if (search_regs.start[i] > 0) | |
314 { | |
315 search_regs.start[i] = | |
793 | 316 string_index_byte_to_char (string, search_regs.start[i]); |
428 | 317 } |
318 if (search_regs.end[i] > 0) | |
319 { | |
320 search_regs.end[i] = | |
793 | 321 string_index_byte_to_char (string, search_regs.end[i]); |
428 | 322 } |
323 } | |
324 } | |
325 | |
326 | |
327 static Lisp_Object | |
328 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix) | |
329 { | |
330 Lisp_Object val; | |
665 | 331 Bytebpos p1, p2; |
428 | 332 Bytecount s1, s2; |
333 REGISTER int i; | |
334 struct re_pattern_buffer *bufp; | |
826 | 335 struct syntax_cache scache_struct; |
336 struct syntax_cache *scache = &scache_struct; | |
337 | |
428 | 338 CHECK_STRING (string); |
339 bufp = compile_pattern (string, &search_regs, | |
340 (!NILP (buf->case_fold_search) | |
446 | 341 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil), |
826 | 342 wrap_buffer (buf), buf, posix, ERROR_ME); |
428 | 343 |
344 QUIT; | |
345 | |
346 /* Get pointers and sizes of the two strings | |
347 that make up the visible portion of the buffer. */ | |
348 | |
826 | 349 p1 = BYTE_BUF_BEGV (buf); |
350 p2 = BYTE_BUF_CEILING_OF (buf, p1); | |
428 | 351 s1 = p2 - p1; |
826 | 352 s2 = BYTE_BUF_ZV (buf) - p2; |
353 | |
354 /* By making the regex object, regex buffer, and syntax cache arguments | |
355 to re_{search,match}{,_2}, we've removed the need to do nasty things | |
356 to deal with regex reentrancy. (See stack trace in signal.c for proof | |
357 that this can happen.) | |
358 | |
359 #### there is still a potential problem with the regex cache -- | |
360 the compiled regex could be overwritten. we'd need 20-fold | |
361 reentrancy, though. Fix this. */ | |
362 | |
363 i = re_match_2 (bufp, (char *) BYTE_BUF_BYTE_ADDRESS (buf, p1), | |
364 s1, (char *) BYTE_BUF_BYTE_ADDRESS (buf, p2), s2, | |
365 BYTE_BUF_PT (buf) - BYTE_BUF_BEGV (buf), &search_regs, | |
366 BYTE_BUF_ZV (buf) - BYTE_BUF_BEGV (buf), wrap_buffer (buf), | |
367 buf, scache); | |
428 | 368 |
369 if (i == -2) | |
370 matcher_overflow (); | |
371 | |
372 val = (0 <= i ? Qt : Qnil); | |
373 if (NILP (val)) | |
826 | 374 return Qnil; |
428 | 375 { |
376 int num_regs = search_regs.num_regs; | |
377 for (i = 0; i < num_regs; i++) | |
378 if (search_regs.start[i] >= 0) | |
379 { | |
826 | 380 search_regs.start[i] += BYTE_BUF_BEGV (buf); |
381 search_regs.end[i] += BYTE_BUF_BEGV (buf); | |
428 | 382 } |
383 } | |
793 | 384 last_thing_searched = wrap_buffer (buf); |
428 | 385 fixup_search_regs_for_buffer (buf); |
826 | 386 return val; |
428 | 387 } |
388 | |
389 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /* | |
390 Return t if text after point matches regular expression REGEXP. | |
1468 | 391 When the match is successful, this function modifies the match data |
392 that `match-beginning', `match-end' and `match-data' access; save the | |
393 match data with `match-data' and restore it with `store-match-data' if | |
394 you want to preserve them. If the match fails, the match data from the | |
395 previous success match is preserved. | |
428 | 396 |
397 Optional argument BUFFER defaults to the current buffer. | |
398 */ | |
399 (regexp, buffer)) | |
400 { | |
401 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0); | |
402 } | |
403 | |
404 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /* | |
405 Return t if text after point matches regular expression REGEXP. | |
406 Find the longest match, in accord with Posix regular expression rules. | |
1468 | 407 When the match is successful, this function modifies the match data |
408 that `match-beginning', `match-end' and `match-data' access; save the | |
409 match data with `match-data' and restore it with `store-match-data' if | |
410 you want to preserve them. If the match fails, the match data from the | |
411 previous success match is preserved. | |
428 | 412 |
413 Optional argument BUFFER defaults to the current buffer. | |
414 */ | |
415 (regexp, buffer)) | |
416 { | |
826 | 417 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1); |
428 | 418 } |
419 | |
420 static Lisp_Object | |
421 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, | |
2286 | 422 struct buffer *buf, int UNUSED (posix)) |
428 | 423 { |
424 Bytecount val; | |
425 Charcount s; | |
426 struct re_pattern_buffer *bufp; | |
427 | |
853 | 428 /* Some FSF junk with running_asynch_code, to preserve the match |
429 data. Not necessary because we don't call process filters | |
430 asynchronously (i.e. from within QUIT). */ | |
428 | 431 |
432 CHECK_STRING (regexp); | |
433 CHECK_STRING (string); | |
434 | |
435 if (NILP (start)) | |
436 s = 0; | |
437 else | |
438 { | |
826 | 439 Charcount len = string_char_length (string); |
428 | 440 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
441 CHECK_FIXNUM (start); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
442 s = XFIXNUM (start); |
428 | 443 if (s < 0 && -s <= len) |
444 s = len + s; | |
445 else if (0 > s || s > len) | |
446 args_out_of_range (string, start); | |
447 } | |
448 | |
449 | |
450 bufp = compile_pattern (regexp, &search_regs, | |
451 (!NILP (buf->case_fold_search) | |
446 | 452 ? XCASE_TABLE_DOWNCASE (buf->case_table) : Qnil), |
826 | 453 string, buf, 0, ERROR_ME); |
428 | 454 QUIT; |
455 { | |
793 | 456 Bytecount bis = string_index_char_to_byte (string, s); |
826 | 457 struct syntax_cache scache_struct; |
458 struct syntax_cache *scache = &scache_struct; | |
459 | |
460 /* By making the regex object, regex buffer, and syntax cache arguments | |
461 to re_{search,match}{,_2}, we've removed the need to do nasty things | |
462 to deal with regex reentrancy. (See stack trace in signal.c for proof | |
463 that this can happen.) | |
464 | |
465 #### there is still a potential problem with the regex cache -- | |
466 the compiled regex could be overwritten. we'd need 20-fold | |
467 reentrancy, though. Fix this. */ | |
468 | |
428 | 469 val = re_search (bufp, (char *) XSTRING_DATA (string), |
470 XSTRING_LENGTH (string), bis, | |
471 XSTRING_LENGTH (string) - bis, | |
826 | 472 &search_regs, string, buf, scache); |
428 | 473 } |
474 if (val == -2) | |
475 matcher_overflow (); | |
826 | 476 if (val < 0) return Qnil; |
428 | 477 last_thing_searched = Qt; |
478 fixup_search_regs_for_string (string); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
479 return make_fixnum (string_index_byte_to_char (string, val)); |
428 | 480 } |
481 | |
482 DEFUN ("string-match", Fstring_match, 2, 4, 0, /* | |
483 Return index of start of first match for REGEXP in STRING, or nil. | |
484 If third arg START is non-nil, start search at that index in STRING. | |
485 For index of first char beyond the match, do (match-end 0). | |
486 `match-end' and `match-beginning' also give indices of substrings | |
487 matched by parenthesis constructs in the pattern. | |
488 | |
826 | 489 Optional arg BUFFER controls how case folding and syntax and category |
490 lookup is done (according to the value of `case-fold-search' in that buffer | |
491 and that buffer's case tables, syntax tables, and category table). If nil | |
492 or unspecified, it defaults *NOT* to the current buffer but instead: | |
493 | |
494 -- the value of `case-fold-search' in the current buffer is still respected | |
495 because of idioms like | |
496 | |
497 (let ((case-fold-search nil)) | |
498 (string-match "^foo.*bar" string)) | |
499 | |
500 but the case, syntax, and category tables come from the standard tables, | |
1468 | 501 which are accessed through functions `default-{case,syntax,category}-table' |
502 and serve as the parents of the tables in particular buffer. | |
503 | |
504 When the match is successful, this function modifies the match data | |
505 that `match-beginning', `match-end' and `match-data' access; save the | |
506 match data with `match-data' and restore it with `store-match-data' if | |
507 you want to preserve them. If the match fails, the match data from the | |
508 previous success match is preserved. | |
428 | 509 */ |
510 (regexp, string, start, buffer)) | |
511 { | |
826 | 512 /* &&#### implement new interp for buffer arg; check code to see if it |
513 makes more sense than prev */ | |
428 | 514 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 0); |
515 } | |
516 | |
517 DEFUN ("posix-string-match", Fposix_string_match, 2, 4, 0, /* | |
518 Return index of start of first match for REGEXP in STRING, or nil. | |
519 Find the longest match, in accord with Posix regular expression rules. | |
520 If third arg START is non-nil, start search at that index in STRING. | |
521 For index of first char beyond the match, do (match-end 0). | |
522 `match-end' and `match-beginning' also give indices of substrings | |
523 matched by parenthesis constructs in the pattern. | |
524 | |
525 Optional arg BUFFER controls how case folding is done (according to | |
526 the value of `case-fold-search' in that buffer and that buffer's case | |
527 tables) and defaults to the current buffer. | |
1468 | 528 |
529 When the match is successful, this function modifies the match data | |
530 that `match-beginning', `match-end' and `match-data' access; save the | |
531 match data with `match-data' and restore it with `store-match-data' if | |
532 you want to preserve them. If the match fails, the match data from the | |
533 previous success match is preserved. | |
428 | 534 */ |
535 (regexp, string, start, buffer)) | |
536 { | |
537 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0), 1); | |
538 } | |
539 | |
540 /* Match REGEXP against STRING, searching all of STRING, | |
541 and return the index of the match, or negative on failure. | |
542 This does not clobber the match data. */ | |
543 | |
544 Bytecount | |
1347 | 545 fast_string_match (Lisp_Object regexp, const Ibyte *nonreloc, |
428 | 546 Lisp_Object reloc, Bytecount offset, |
547 Bytecount length, int case_fold_search, | |
578 | 548 Error_Behavior errb, int no_quit) |
428 | 549 { |
550 Bytecount val; | |
867 | 551 Ibyte *newnonreloc = (Ibyte *) nonreloc; |
428 | 552 struct re_pattern_buffer *bufp; |
826 | 553 struct syntax_cache scache_struct; |
554 struct syntax_cache *scache = &scache_struct; | |
428 | 555 |
556 bufp = compile_pattern (regexp, 0, | |
557 (case_fold_search | |
771 | 558 ? XCASE_TABLE_DOWNCASE (Vstandard_case_table) |
446 | 559 : Qnil), |
826 | 560 reloc, 0, 0, errb); |
428 | 561 if (!bufp) |
562 return -1; /* will only do this when errb != ERROR_ME */ | |
563 if (!no_quit) | |
564 QUIT; | |
565 else | |
566 no_quit_in_re_search = 1; | |
567 | |
568 fixup_internal_substring (nonreloc, reloc, offset, &length); | |
569 | |
771 | 570 /* Don't need to protect against GC inside of re_search() due to QUIT; |
571 QUIT is GC-inhibited. */ | |
428 | 572 if (!NILP (reloc)) |
771 | 573 newnonreloc = XSTRING_DATA (reloc); |
574 | |
826 | 575 /* By making the regex object, regex buffer, and syntax cache arguments |
576 to re_{search,match}{,_2}, we've removed the need to do nasty things | |
577 to deal with regex reentrancy. (See stack trace in signal.c for proof | |
578 that this can happen.) | |
579 | |
580 #### there is still a potential problem with the regex cache -- | |
581 the compiled regex could be overwritten. we'd need 20-fold | |
582 reentrancy, though. Fix this. */ | |
583 | |
428 | 584 val = re_search (bufp, (char *) newnonreloc + offset, length, 0, |
826 | 585 length, 0, reloc, 0, scache); |
428 | 586 |
587 no_quit_in_re_search = 0; | |
588 return val; | |
589 } | |
590 | |
591 Bytecount | |
592 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string) | |
593 { | |
594 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0); | |
595 } | |
596 | |
597 | |
598 #ifdef REGION_CACHE_NEEDS_WORK | |
599 /* The newline cache: remembering which sections of text have no newlines. */ | |
600 | |
601 /* If the user has requested newline caching, make sure it's on. | |
602 Otherwise, make sure it's off. | |
603 This is our cheezy way of associating an action with the change of | |
604 state of a buffer-local variable. */ | |
605 static void | |
606 newline_cache_on_off (struct buffer *buf) | |
607 { | |
608 if (NILP (buf->cache_long_line_scans)) | |
609 { | |
610 /* It should be off. */ | |
611 if (buf->newline_cache) | |
612 { | |
613 free_region_cache (buf->newline_cache); | |
614 buf->newline_cache = 0; | |
615 } | |
616 } | |
617 else | |
618 { | |
619 /* It should be on. */ | |
620 if (buf->newline_cache == 0) | |
621 buf->newline_cache = new_region_cache (); | |
622 } | |
623 } | |
624 #endif | |
625 | |
626 /* Search in BUF for COUNT instances of the character TARGET between | |
627 START and END. | |
628 | |
629 If COUNT is positive, search forwards; END must be >= START. | |
630 If COUNT is negative, search backwards for the -COUNTth instance; | |
631 END must be <= START. | |
632 If COUNT is zero, do anything you please; run rogue, for all I care. | |
633 | |
634 If END is zero, use BEGV or ZV instead, as appropriate for the | |
635 direction indicated by COUNT. | |
636 | |
637 If we find COUNT instances, set *SHORTAGE to zero, and return the | |
638 position after the COUNTth match. Note that for reverse motion | |
639 this is not the same as the usual convention for Emacs motion commands. | |
640 | |
641 If we don't find COUNT instances before reaching END, set *SHORTAGE | |
642 to the number of TARGETs left unfound, and return END. | |
643 | |
644 If ALLOW_QUIT is non-zero, call QUIT periodically. */ | |
645 | |
665 | 646 static Bytebpos |
867 | 647 byte_scan_buffer (struct buffer *buf, Ichar target, Bytebpos st, Bytebpos en, |
872 | 648 EMACS_INT count, EMACS_INT *shortage, int allow_quit) |
428 | 649 { |
665 | 650 Bytebpos lim = en > 0 ? en : |
826 | 651 ((count > 0) ? BYTE_BUF_ZV (buf) : BYTE_BUF_BEGV (buf)); |
428 | 652 |
653 /* #### newline cache stuff in this function not yet ported */ | |
654 assert (count != 0); | |
655 | |
656 if (shortage) | |
657 *shortage = 0; | |
658 | |
659 if (count > 0) | |
660 { | |
661 #ifdef MULE | |
826 | 662 Internal_Format fmt = buf->text->format; |
663 /* Check for char that's unrepresentable in the buffer -- it | |
664 certainly can't be there. */ | |
867 | 665 if (!ichar_fits_in_format (target, fmt, wrap_buffer (buf))) |
428 | 666 { |
826 | 667 *shortage = count; |
668 return lim; | |
669 } | |
670 /* Due to the Mule representation of characters in a buffer, we can | |
671 simply search for characters in the range 0 - 127 directly; for | |
672 8-bit-fixed, we can do this for all characters. In other cases, | |
673 we do it the "hard" way. Note that this way works for all | |
674 characters and all formats, but the other way is faster. */ | |
675 else if (! (fmt == FORMAT_8_BIT_FIXED || | |
867 | 676 (fmt == FORMAT_DEFAULT && ichar_ascii_p (target)))) |
826 | 677 { |
867 | 678 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
428 | 679 while (st < lim && count > 0) |
680 { | |
826 | 681 if (BYTE_BUF_FETCH_CHAR_RAW (buf, st) == raw) |
428 | 682 count--; |
665 | 683 INC_BYTEBPOS (buf, st); |
428 | 684 } |
685 } | |
686 else | |
687 #endif | |
688 { | |
867 | 689 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
428 | 690 while (st < lim && count > 0) |
691 { | |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
692 Bytebpos ceiling; |
867 | 693 Ibyte *bufptr; |
428 | 694 |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
695 ceiling = BYTE_BUF_CEILING_OF (buf, st); |
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
696 ceiling = min (lim, ceiling); |
867 | 697 bufptr = (Ibyte *) memchr (BYTE_BUF_BYTE_ADDRESS (buf, st), |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
698 raw, ceiling - st); |
428 | 699 if (bufptr) |
700 { | |
701 count--; | |
826 | 702 st = BYTE_BUF_PTR_BYTE_POS (buf, bufptr) + 1; |
428 | 703 } |
704 else | |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
705 st = ceiling; |
428 | 706 } |
707 } | |
708 | |
709 if (shortage) | |
710 *shortage = count; | |
711 if (allow_quit) | |
712 QUIT; | |
713 return st; | |
714 } | |
715 else | |
716 { | |
717 #ifdef MULE | |
826 | 718 Internal_Format fmt = buf->text->format; |
719 /* Check for char that's unrepresentable in the buffer -- it | |
720 certainly can't be there. */ | |
867 | 721 if (!ichar_fits_in_format (target, fmt, wrap_buffer (buf))) |
428 | 722 { |
826 | 723 *shortage = -count; |
724 return lim; | |
725 } | |
726 else if (! (fmt == FORMAT_8_BIT_FIXED || | |
867 | 727 (fmt == FORMAT_DEFAULT && ichar_ascii_p (target)))) |
826 | 728 { |
867 | 729 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
428 | 730 while (st > lim && count < 0) |
731 { | |
665 | 732 DEC_BYTEBPOS (buf, st); |
826 | 733 if (BYTE_BUF_FETCH_CHAR_RAW (buf, st) == raw) |
428 | 734 count++; |
735 } | |
736 } | |
737 else | |
738 #endif | |
739 { | |
867 | 740 Raw_Ichar raw = ichar_to_raw (target, fmt, wrap_buffer (buf)); |
428 | 741 while (st > lim && count < 0) |
742 { | |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
743 Bytebpos floorpos; |
867 | 744 Ibyte *bufptr; |
745 Ibyte *floorptr; | |
428 | 746 |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
747 floorpos = BYTE_BUF_FLOOR_OF (buf, st); |
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
748 floorpos = max (lim, floorpos); |
428 | 749 /* No memrchr() ... */ |
826 | 750 bufptr = BYTE_BUF_BYTE_ADDRESS_BEFORE (buf, st); |
5539
4307b8e5998c
Suppress "shadowed global" warnings for floor and ceil from <math.h>.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
751 floorptr = BYTE_BUF_BYTE_ADDRESS (buf, floorpos); |
428 | 752 while (bufptr >= floorptr) |
753 { | |
754 st--; | |
755 /* At this point, both ST and BUFPTR refer to the same | |
756 character. When the loop terminates, ST will | |
757 always point to the last character we tried. */ | |
867 | 758 if (*bufptr == (Ibyte) raw) |
428 | 759 { |
760 count++; | |
761 break; | |
762 } | |
763 bufptr--; | |
764 } | |
765 } | |
766 } | |
767 | |
768 if (shortage) | |
769 *shortage = -count; | |
770 if (allow_quit) | |
771 QUIT; | |
772 if (count) | |
773 return st; | |
774 else | |
775 { | |
776 /* We found the character we were looking for; we have to return | |
777 the position *after* it due to the strange way that the return | |
778 value is defined. */ | |
665 | 779 INC_BYTEBPOS (buf, st); |
428 | 780 return st; |
781 } | |
782 } | |
783 } | |
784 | |
665 | 785 Charbpos |
867 | 786 scan_buffer (struct buffer *buf, Ichar target, Charbpos start, Charbpos end, |
428 | 787 EMACS_INT count, EMACS_INT *shortage, int allow_quit) |
788 { | |
826 | 789 Bytebpos byte_retval; |
790 Bytebpos byte_start, byte_end; | |
791 | |
792 byte_start = charbpos_to_bytebpos (buf, start); | |
428 | 793 if (end) |
826 | 794 byte_end = charbpos_to_bytebpos (buf, end); |
428 | 795 else |
826 | 796 byte_end = 0; |
797 byte_retval = byte_scan_buffer (buf, target, byte_start, byte_end, count, | |
428 | 798 shortage, allow_quit); |
826 | 799 return bytebpos_to_charbpos (buf, byte_retval); |
428 | 800 } |
801 | |
665 | 802 Bytebpos |
826 | 803 byte_find_next_newline_no_quit (struct buffer *buf, Bytebpos from, int count) |
428 | 804 { |
826 | 805 return byte_scan_buffer (buf, '\n', from, 0, count, 0, 0); |
428 | 806 } |
807 | |
665 | 808 Charbpos |
809 find_next_newline_no_quit (struct buffer *buf, Charbpos from, int count) | |
428 | 810 { |
811 return scan_buffer (buf, '\n', from, 0, count, 0, 0); | |
812 } | |
813 | |
665 | 814 Charbpos |
815 find_next_newline (struct buffer *buf, Charbpos from, int count) | |
428 | 816 { |
817 return scan_buffer (buf, '\n', from, 0, count, 0, 1); | |
818 } | |
819 | |
826 | 820 Bytecount |
867 | 821 byte_find_next_ichar_in_string (Lisp_Object str, Ichar target, Bytecount st, |
428 | 822 EMACS_INT count) |
823 { | |
793 | 824 Bytebpos lim = XSTRING_LENGTH (str) -1; |
867 | 825 Ibyte *s = XSTRING_DATA (str); |
428 | 826 |
827 assert (count >= 0); | |
828 | |
829 #ifdef MULE | |
830 /* Due to the Mule representation of characters in a buffer, | |
831 we can simply search for characters in the range 0 - 127 | |
832 directly. For other characters, we do it the "hard" way. | |
833 Note that this way works for all characters but the other | |
834 way is faster. */ | |
835 if (target >= 0200) | |
836 { | |
837 while (st < lim && count > 0) | |
838 { | |
867 | 839 if (string_ichar (str, st) == target) |
428 | 840 count--; |
826 | 841 INC_BYTECOUNT (s, st); |
428 | 842 } |
843 } | |
844 else | |
845 #endif | |
846 { | |
847 while (st < lim && count > 0) | |
848 { | |
867 | 849 Ibyte *bufptr = (Ibyte *) memchr (itext_n_addr (s, st), |
428 | 850 (int) target, lim - st); |
851 if (bufptr) | |
852 { | |
853 count--; | |
826 | 854 st = (Bytebpos) (bufptr - s) + 1; |
428 | 855 } |
856 else | |
857 st = lim; | |
858 } | |
859 } | |
860 return st; | |
861 } | |
862 | |
863 /* Like find_next_newline, but returns position before the newline, | |
864 not after, and only search up to TO. This isn't just | |
865 find_next_newline (...)-1, because you might hit TO. */ | |
665 | 866 Charbpos |
826 | 867 find_before_next_newline (struct buffer *buf, Charbpos from, Charbpos to, |
868 int count) | |
428 | 869 { |
870 EMACS_INT shortage; | |
665 | 871 Charbpos pos = scan_buffer (buf, '\n', from, to, count, &shortage, 1); |
428 | 872 |
873 if (shortage == 0) | |
874 pos--; | |
875 | |
876 return pos; | |
877 } | |
878 | |
872 | 879 /* This function synched with FSF 21.1 */ |
428 | 880 static Lisp_Object |
881 skip_chars (struct buffer *buf, int forwardp, int syntaxp, | |
882 Lisp_Object string, Lisp_Object lim) | |
883 { | |
867 | 884 REGISTER Ibyte *p, *pend; |
885 REGISTER Ichar c; | |
428 | 886 /* We store the first 256 chars in an array here and the rest in |
887 a range table. */ | |
888 unsigned char fastmap[0400]; | |
889 int negate = 0; | |
890 REGISTER int i; | |
665 | 891 Charbpos limit; |
826 | 892 struct syntax_cache *scache; |
893 | |
428 | 894 if (NILP (lim)) |
895 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf); | |
896 else | |
897 { | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
898 CHECK_FIXNUM_COERCE_MARKER (lim); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
899 limit = XFIXNUM (lim); |
428 | 900 |
901 /* In any case, don't allow scan outside bounds of buffer. */ | |
902 if (limit > BUF_ZV (buf)) limit = BUF_ZV (buf); | |
903 if (limit < BUF_BEGV (buf)) limit = BUF_BEGV (buf); | |
904 } | |
905 | |
906 CHECK_STRING (string); | |
907 p = XSTRING_DATA (string); | |
908 pend = p + XSTRING_LENGTH (string); | |
909 memset (fastmap, 0, sizeof (fastmap)); | |
910 | |
911 Fclear_range_table (Vskip_chars_range_table); | |
912 | |
913 if (p != pend && *p == '^') | |
914 { | |
915 negate = 1; | |
916 p++; | |
917 } | |
918 | |
919 /* Find the characters specified and set their elements of fastmap. | |
920 If syntaxp, each character counts as itself. | |
921 Otherwise, handle backslashes and ranges specially */ | |
922 | |
923 while (p != pend) | |
924 { | |
867 | 925 c = itext_ichar (p); |
926 INC_IBYTEPTR (p); | |
428 | 927 if (syntaxp) |
928 { | |
5542
dab422055bab
Correct array bound for syntax_code_spec.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5539
diff
changeset
|
929 if (c < 0200 && syntax_spec_code[c] < (unsigned char) Smax) |
428 | 930 fastmap[c] = 1; |
931 else | |
831 | 932 invalid_argument ("Invalid syntax designator", make_char (c)); |
428 | 933 } |
934 else | |
935 { | |
936 if (c == '\\') | |
937 { | |
938 if (p == pend) break; | |
867 | 939 c = itext_ichar (p); |
940 INC_IBYTEPTR (p); | |
428 | 941 } |
942 if (p != pend && *p == '-') | |
943 { | |
867 | 944 Ichar cend; |
428 | 945 |
872 | 946 /* Skip over the dash. */ |
428 | 947 p++; |
948 if (p == pend) break; | |
867 | 949 cend = itext_ichar (p); |
428 | 950 while (c <= cend && c < 0400) |
951 { | |
952 fastmap[c] = 1; | |
953 c++; | |
954 } | |
955 if (c <= cend) | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
956 Fput_range_table (make_fixnum (c), make_fixnum (cend), Qt, |
428 | 957 Vskip_chars_range_table); |
867 | 958 INC_IBYTEPTR (p); |
428 | 959 } |
960 else | |
961 { | |
962 if (c < 0400) | |
963 fastmap[c] = 1; | |
964 else | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
965 Fput_range_table (make_fixnum (c), make_fixnum (c), Qt, |
428 | 966 Vskip_chars_range_table); |
967 } | |
968 } | |
969 } | |
970 | |
872 | 971 /* #### Not in FSF 21.1 */ |
428 | 972 if (syntaxp && fastmap['-'] != 0) |
973 fastmap[' '] = 1; | |
974 | |
975 /* If ^ was the first character, complement the fastmap. | |
976 We don't complement the range table, however; we just use negate | |
977 in the comparisons below. */ | |
978 | |
979 if (negate) | |
647 | 980 for (i = 0; i < (int) (sizeof (fastmap)); i++) |
428 | 981 fastmap[i] ^= 1; |
982 | |
983 { | |
665 | 984 Charbpos start_point = BUF_PT (buf); |
872 | 985 Charbpos pos = start_point; |
986 Charbpos pos_byte = BYTE_BUF_PT (buf); | |
428 | 987 |
988 if (syntaxp) | |
989 { | |
872 | 990 scache = setup_buffer_syntax_cache (buf, pos, forwardp ? 1 : -1); |
428 | 991 /* All syntax designators are normal chars so nothing strange |
992 to worry about */ | |
993 if (forwardp) | |
994 { | |
872 | 995 if (pos < limit) |
996 while (fastmap[(unsigned char) | |
997 syntax_code_spec | |
998 [(int) SYNTAX_FROM_CACHE | |
999 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) | |
1000 { | |
1001 pos++; | |
1002 INC_BYTEBPOS (buf, pos_byte); | |
879 | 1003 if (pos >= limit) |
872 | 1004 break; |
1005 UPDATE_SYNTAX_CACHE_FORWARD (scache, pos); | |
1006 } | |
428 | 1007 } |
1008 else | |
1009 { | |
872 | 1010 while (pos > limit) |
460 | 1011 { |
872 | 1012 Charbpos savepos = pos_byte; |
1013 pos--; | |
1014 DEC_BYTEBPOS (buf, pos_byte); | |
1015 UPDATE_SYNTAX_CACHE_BACKWARD (scache, pos); | |
1016 if (!fastmap[(unsigned char) | |
1017 syntax_code_spec | |
1018 [(int) SYNTAX_FROM_CACHE | |
1019 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) | |
1020 { | |
1021 pos++; | |
1022 pos_byte = savepos; | |
1023 break; | |
1024 } | |
460 | 1025 } |
428 | 1026 } |
1027 } | |
1028 else | |
1029 { | |
1030 if (forwardp) | |
1031 { | |
872 | 1032 while (pos < limit) |
428 | 1033 { |
872 | 1034 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
428 | 1035 if ((ch < 0400) ? fastmap[ch] : |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1036 (NILP (Fget_range_table (make_fixnum (ch), |
428 | 1037 Vskip_chars_range_table, |
1038 Qnil)) | |
1039 == negate)) | |
872 | 1040 { |
1041 pos++; | |
1042 INC_BYTEBPOS (buf, pos_byte); | |
1043 } | |
428 | 1044 else |
1045 break; | |
1046 } | |
1047 } | |
1048 else | |
1049 { | |
872 | 1050 while (pos > limit) |
428 | 1051 { |
872 | 1052 Charbpos prev_pos_byte = pos_byte; |
1053 Ichar ch; | |
1054 | |
1055 DEC_BYTEBPOS (buf, prev_pos_byte); | |
1056 ch = BYTE_BUF_FETCH_CHAR (buf, prev_pos_byte); | |
428 | 1057 if ((ch < 0400) ? fastmap[ch] : |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1058 (NILP (Fget_range_table (make_fixnum (ch), |
428 | 1059 Vskip_chars_range_table, |
1060 Qnil)) | |
1061 == negate)) | |
872 | 1062 { |
1063 pos--; | |
1064 pos_byte = prev_pos_byte; | |
1065 } | |
428 | 1066 else |
1067 break; | |
1068 } | |
1069 } | |
1070 } | |
1071 QUIT; | |
872 | 1072 BOTH_BUF_SET_PT (buf, pos, pos_byte); |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1073 return make_fixnum (BUF_PT (buf) - start_point); |
428 | 1074 } |
1075 } | |
1076 | |
1077 DEFUN ("skip-chars-forward", Fskip_chars_forward, 1, 3, 0, /* | |
444 | 1078 Move point forward, stopping before a char not in STRING, or at pos LIMIT. |
428 | 1079 STRING is like the inside of a `[...]' in a regular expression |
1080 except that `]' is never special and `\\' quotes `^', `-' or `\\'. | |
1081 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter. | |
1082 With arg "^a-zA-Z", skips nonletters stopping before first letter. | |
1083 Returns the distance traveled, either zero or positive. | |
1084 | |
1085 Optional argument BUFFER defaults to the current buffer. | |
1086 */ | |
444 | 1087 (string, limit, buffer)) |
428 | 1088 { |
444 | 1089 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, limit); |
428 | 1090 } |
1091 | |
1092 DEFUN ("skip-chars-backward", Fskip_chars_backward, 1, 3, 0, /* | |
444 | 1093 Move point backward, stopping after a char not in STRING, or at pos LIMIT. |
428 | 1094 See `skip-chars-forward' for details. |
1095 Returns the distance traveled, either zero or negative. | |
1096 | |
1097 Optional argument BUFFER defaults to the current buffer. | |
1098 */ | |
444 | 1099 (string, limit, buffer)) |
428 | 1100 { |
444 | 1101 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, limit); |
428 | 1102 } |
1103 | |
1104 | |
1105 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, 1, 3, 0, /* | |
1106 Move point forward across chars in specified syntax classes. | |
1107 SYNTAX is a string of syntax code characters. | |
444 | 1108 Stop before a char whose syntax is not in SYNTAX, or at position LIMIT. |
428 | 1109 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX. |
1110 This function returns the distance traveled, either zero or positive. | |
1111 | |
1112 Optional argument BUFFER defaults to the current buffer. | |
1113 */ | |
444 | 1114 (syntax, limit, buffer)) |
428 | 1115 { |
444 | 1116 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, limit); |
428 | 1117 } |
1118 | |
1119 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, 1, 3, 0, /* | |
1120 Move point backward across chars in specified syntax classes. | |
1121 SYNTAX is a string of syntax code characters. | |
444 | 1122 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIMIT. |
428 | 1123 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX. |
1124 This function returns the distance traveled, either zero or negative. | |
1125 | |
1126 Optional argument BUFFER defaults to the current buffer. | |
1127 */ | |
444 | 1128 (syntax, limit, buffer)) |
428 | 1129 { |
444 | 1130 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, limit); |
428 | 1131 } |
1132 | |
1133 | |
1134 /* Subroutines of Lisp buffer search functions. */ | |
1135 | |
1136 static Lisp_Object | |
444 | 1137 search_command (Lisp_Object string, Lisp_Object limit, Lisp_Object noerror, |
428 | 1138 Lisp_Object count, Lisp_Object buffer, int direction, |
1139 int RE, int posix) | |
1140 { | |
665 | 1141 REGISTER Charbpos np; |
1142 Charbpos lim; | |
428 | 1143 EMACS_INT n = direction; |
1144 struct buffer *buf; | |
1145 | |
1146 if (!NILP (count)) | |
1147 { | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1148 CHECK_FIXNUM (count); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1149 n *= XFIXNUM (count); |
428 | 1150 } |
1151 | |
1152 buf = decode_buffer (buffer, 0); | |
1153 CHECK_STRING (string); | |
444 | 1154 if (NILP (limit)) |
428 | 1155 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf); |
1156 else | |
1157 { | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1158 CHECK_FIXNUM_COERCE_MARKER (limit); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1159 lim = XFIXNUM (limit); |
428 | 1160 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf)) |
563 | 1161 invalid_argument ("Invalid search limit (wrong side of point)", |
1162 Qunbound); | |
428 | 1163 if (lim > BUF_ZV (buf)) |
1164 lim = BUF_ZV (buf); | |
1165 if (lim < BUF_BEGV (buf)) | |
1166 lim = BUF_BEGV (buf); | |
1167 } | |
1168 | |
1169 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE, | |
1170 (!NILP (buf->case_fold_search) | |
446 | 1171 ? XCASE_TABLE_CANON (buf->case_table) |
1172 : Qnil), | |
428 | 1173 (!NILP (buf->case_fold_search) |
446 | 1174 ? XCASE_TABLE_EQV (buf->case_table) |
1175 : Qnil), posix); | |
428 | 1176 |
1177 if (np <= 0) | |
1178 { | |
444 | 1179 if (NILP (noerror)) |
2268 | 1180 { |
1181 signal_failure (string); | |
1182 RETURN_NOT_REACHED (Qnil); | |
1183 } | |
444 | 1184 if (!EQ (noerror, Qt)) |
428 | 1185 { |
1186 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf)) | |
2500 | 1187 ABORT (); |
428 | 1188 BUF_SET_PT (buf, lim); |
1189 return Qnil; | |
1190 #if 0 /* This would be clean, but maybe programs depend on | |
1191 a value of nil here. */ | |
1192 np = lim; | |
1193 #endif | |
1194 } | |
1195 else | |
1196 return Qnil; | |
1197 } | |
1198 | |
1199 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf)) | |
2500 | 1200 ABORT (); |
428 | 1201 |
1202 BUF_SET_PT (buf, np); | |
1203 | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
1204 return make_fixnum (np); |
428 | 1205 } |
1206 | |
1207 static int | |
1208 trivial_regexp_p (Lisp_Object regexp) | |
1209 { | |
1210 Bytecount len = XSTRING_LENGTH (regexp); | |
867 | 1211 Ibyte *s = XSTRING_DATA (regexp); |
428 | 1212 while (--len >= 0) |
1213 { | |
1214 switch (*s++) | |
1215 { | |
1724 | 1216 /* #### howcum ']' doesn't appear here, but ... */ |
428 | 1217 case '.': case '*': case '+': case '?': case '[': case '^': case '$': |
1218 return 0; | |
1219 case '\\': | |
1220 if (--len < 0) | |
1221 return 0; | |
1222 switch (*s++) | |
1223 { | |
1724 | 1224 /* ... ')' does appear here? ('<' and '>' can appear singly.) */ |
1225 /* #### are there other constructs to check? */ | |
428 | 1226 case '|': case '(': case ')': case '`': case '\'': case 'b': |
1227 case 'B': case '<': case '>': case 'w': case 'W': case 's': | |
1724 | 1228 case 'S': case '=': case '{': case '}': |
428 | 1229 #ifdef MULE |
1230 /* 97/2/25 jhod Added for category matches */ | |
1231 case 'c': case 'C': | |
1232 #endif /* MULE */ | |
1233 case '1': case '2': case '3': case '4': case '5': | |
1234 case '6': case '7': case '8': case '9': | |
1235 return 0; | |
1236 } | |
1237 } | |
1238 } | |
1239 return 1; | |
1240 } | |
1241 | |
1242 /* Search for the n'th occurrence of STRING in BUF, | |
665 | 1243 starting at position CHARBPOS and stopping at position BUFLIM, |
428 | 1244 treating PAT as a literal string if RE is false or as |
1245 a regular expression if RE is true. | |
1246 | |
1247 If N is positive, searching is forward and BUFLIM must be greater | |
665 | 1248 than CHARBPOS. |
428 | 1249 If N is negative, searching is backward and BUFLIM must be less |
665 | 1250 than CHARBPOS. |
428 | 1251 |
1252 Returns -x if only N-x occurrences found (x > 0), | |
1253 or else the position at the beginning of the Nth occurrence | |
1254 (if searching backward) or the end (if searching forward). | |
1255 | |
1256 POSIX is nonzero if we want full backtracking (POSIX style) | |
1257 for this pattern. 0 means backtrack only enough to get a valid match. */ | |
665 | 1258 static Charbpos |
1259 search_buffer (struct buffer *buf, Lisp_Object string, Charbpos charbpos, | |
1260 Charbpos buflim, EMACS_INT n, int RE, Lisp_Object trt, | |
446 | 1261 Lisp_Object inverse_trt, int posix) |
428 | 1262 { |
1263 Bytecount len = XSTRING_LENGTH (string); | |
867 | 1264 Ibyte *base_pat = XSTRING_DATA (string); |
428 | 1265 REGISTER EMACS_INT i, j; |
665 | 1266 Bytebpos p1, p2; |
428 | 1267 Bytecount s1, s2; |
665 | 1268 Bytebpos pos, lim; |
428 | 1269 |
853 | 1270 /* Some FSF junk with running_asynch_code, to preserve the match |
1271 data. Not necessary because we don't call process filters | |
1272 asynchronously (i.e. from within QUIT). */ | |
428 | 1273 |
1425 | 1274 /* Searching 0 times means noop---don't move, don't touch registers. */ |
1275 if (n == 0) | |
1276 return charbpos; | |
1277 | |
428 | 1278 /* Null string is found at starting position. */ |
1279 if (len == 0) | |
1280 { | |
665 | 1281 set_search_regs (buf, charbpos, 0); |
1282 return charbpos; | |
428 | 1283 } |
1284 | |
665 | 1285 pos = charbpos_to_bytebpos (buf, charbpos); |
1286 lim = charbpos_to_bytebpos (buf, buflim); | |
428 | 1287 if (RE && !trivial_regexp_p (string)) |
1288 { | |
1289 struct re_pattern_buffer *bufp; | |
826 | 1290 |
1291 bufp = compile_pattern (string, &search_regs, trt, | |
1292 wrap_buffer (buf), buf, posix, ERROR_ME); | |
428 | 1293 |
1294 /* Get pointers and sizes of the two strings | |
1295 that make up the visible portion of the buffer. */ | |
1296 | |
826 | 1297 p1 = BYTE_BUF_BEGV (buf); |
1298 p2 = BYTE_BUF_CEILING_OF (buf, p1); | |
428 | 1299 s1 = p2 - p1; |
826 | 1300 s2 = BYTE_BUF_ZV (buf) - p2; |
1301 | |
1302 while (n != 0) | |
428 | 1303 { |
1304 Bytecount val; | |
826 | 1305 struct syntax_cache scache_struct; |
1306 struct syntax_cache *scache = &scache_struct; | |
1307 | |
428 | 1308 QUIT; |
826 | 1309 /* By making the regex object, regex buffer, and syntax cache |
1310 arguments to re_{search,match}{,_2}, we've removed the need to | |
1311 do nasty things to deal with regex reentrancy. (See stack | |
1312 trace in signal.c for proof that this can happen.) | |
1313 | |
1314 #### there is still a potential problem with the regex cache -- | |
1315 the compiled regex could be overwritten. we'd need 20-fold | |
1316 reentrancy, though. Fix this. */ | |
1317 | |
428 | 1318 val = re_search_2 (bufp, |
826 | 1319 (char *) BYTE_BUF_BYTE_ADDRESS (buf, p1), s1, |
1320 (char *) BYTE_BUF_BYTE_ADDRESS (buf, p2), s2, | |
1321 pos - BYTE_BUF_BEGV (buf), lim - pos, &search_regs, | |
1322 n > 0 ? lim - BYTE_BUF_BEGV (buf) : | |
1323 pos - BYTE_BUF_BEGV (buf), wrap_buffer (buf), | |
1324 buf, scache); | |
428 | 1325 |
1326 if (val == -2) | |
1327 { | |
1328 matcher_overflow (); | |
1329 } | |
1330 if (val >= 0) | |
1331 { | |
1332 int num_regs = search_regs.num_regs; | |
826 | 1333 j = BYTE_BUF_BEGV (buf); |
428 | 1334 for (i = 0; i < num_regs; i++) |
1335 if (search_regs.start[i] >= 0) | |
1336 { | |
1337 search_regs.start[i] += j; | |
1338 search_regs.end[i] += j; | |
1339 } | |
793 | 1340 last_thing_searched = wrap_buffer (buf); |
428 | 1341 /* Set pos to the new position. */ |
826 | 1342 pos = n > 0 ? search_regs.end[0] : search_regs.start[0]; |
428 | 1343 fixup_search_regs_for_buffer (buf); |
665 | 1344 /* And charbpos too. */ |
826 | 1345 charbpos = n > 0 ? search_regs.end[0] : search_regs.start[0]; |
428 | 1346 } |
1347 else | |
826 | 1348 return (n > 0 ? 0 - n : n); |
1349 if (n > 0) n--; else n++; | |
428 | 1350 } |
665 | 1351 return charbpos; |
428 | 1352 } |
1353 else /* non-RE case */ | |
1354 { | |
446 | 1355 int charset_base = -1; |
1356 int boyer_moore_ok = 1; | |
2367 | 1357 Ibyte *patbuf = alloca_ibytes (len * MAX_ICHAR_LEN); |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1358 Ibyte *pat = patbuf; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1359 |
446 | 1360 #ifdef MULE |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1361 int entirely_one_byte_p = buf->text->entirely_one_byte_p; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1362 int nothing_greater_than_0xff = |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1363 buf->text->num_8_bit_fixed_chars == BUF_Z(buf) - BUF_BEG (buf); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1364 |
446 | 1365 while (len > 0) |
1366 { | |
867 | 1367 Ibyte tmp_str[MAX_ICHAR_LEN]; |
1368 Ichar c, translated, inverse; | |
446 | 1369 Bytecount orig_bytelen, new_bytelen, inv_bytelen; |
1370 | |
1371 /* If we got here and the RE flag is set, it's because | |
1372 we're dealing with a regexp known to be trivial, so the | |
1373 backslash just quotes the next character. */ | |
1374 if (RE && *base_pat == '\\') | |
1375 { | |
1376 len--; | |
1377 base_pat++; | |
1378 } | |
867 | 1379 c = itext_ichar (base_pat); |
446 | 1380 translated = TRANSLATE (trt, c); |
1381 inverse = TRANSLATE (inverse_trt, c); | |
1382 | |
867 | 1383 orig_bytelen = itext_ichar_len (base_pat); |
1384 inv_bytelen = set_itext_ichar (tmp_str, inverse); | |
1385 new_bytelen = set_itext_ichar (tmp_str, translated); | |
446 | 1386 |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1387 if (boyer_moore_ok |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1388 /* Only do the Boyer-Moore check for characters needing |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1389 translation. */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1390 && (translated != c || inverse != c)) |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1391 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1392 Ichar starting_c = c; |
4421
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1393 int charset_base_code, checked = 0; |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1394 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1395 do |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1396 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1397 c = TRANSLATE (inverse_trt, c); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1398 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1399 /* If a character cannot occur in the buffer, ignore |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1400 it. */ |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1401 if (c > 0x7F && entirely_one_byte_p) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1402 continue; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1403 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1404 if (c > 0xFF && nothing_greater_than_0xff) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1405 continue; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1406 |
4421
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1407 checked = 1; |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1408 |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1409 if (-1 == charset_base) /* No charset yet specified. */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1410 { |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1411 /* Keep track of which charset and character set row |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1412 contains the characters that need translation. |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1413 |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1414 Zero out the bits corresponding to the last |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1415 byte. */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1416 charset_base = c & ~ICHAR_FIELD3_MASK; |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1417 } |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1418 else |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1419 { |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1420 charset_base_code = c & ~ICHAR_FIELD3_MASK; |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1421 |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1422 if (charset_base_code != charset_base) |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1423 { |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1424 /* If two different rows, or two different |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1425 charsets, appear, needing non-ASCII |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1426 translation, then we cannot use boyer_moore |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1427 search. See the comment at the head of |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1428 boyer_moore(). */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1429 boyer_moore_ok = 0; |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1430 break; |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1431 } |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1432 } |
4901
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1433 |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1434 if (ichar_len (c) > 2) |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1435 { |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1436 /* Case-equivalence plus repeated octets throws off |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1437 the construction of the stride table; avoid this. |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1438 |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1439 It should be possible to correct boyer_moore to |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1440 behave correctly even in this case--it doesn't have |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1441 problems with repeated octets when case conversion |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1442 is not involved--but this is not a critical |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1443 issue. */ |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1444 Ibyte encoded[MAX_ICHAR_LEN]; |
5016
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1445 Bytecount clen = set_itext_ichar (encoded, c); |
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1446 int a, b; |
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1447 for (a = 0; a < clen && boyer_moore_ok; ++a) |
4901
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1448 { |
5016
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1449 for (b = a + 1; b < clen && boyer_moore_ok; ++b) |
4901
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1450 { |
5016
2ade80e8c640
enable more warnings and fix them
Ben Wing <ben@xemacs.org>
parents:
4962
diff
changeset
|
1451 if (encoded[a] == encoded[b]) |
4901
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1452 { |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1453 boyer_moore_ok = 0; |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1454 } |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1455 } |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1456 } |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1457 |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1458 if (0 == boyer_moore_ok) |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1459 { |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1460 break; |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1461 } |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1462 } |
7504864a986c
Don't use Boyer-Moore if repeated octets & case-insensitive search.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4897
diff
changeset
|
1463 |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1464 } while (c != starting_c); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1465 |
4421
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1466 if (!checked) |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1467 { |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1468 #ifdef DEBUG_XEMACS |
5041 | 1469 if (debug_searches) |
4421
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1470 { |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1471 Lisp_Symbol *sym = XSYMBOL (Qsearch_algorithm_used); |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1472 sym->value = Qnil; |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1473 } |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1474 #endif |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1475 /* The "continue" clauses were used above, for every |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1476 translation of the character. As such, this character |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1477 is not to be found in the buffer and neither is the |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1478 string as a whole. Return immediately; also avoid |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1479 triggering the assertion a few lines down. */ |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1480 return n > 0 ? -n : n; |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1481 } |
69b803c646cd
Fail searches immediately if searching for non-representable characters.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4414
diff
changeset
|
1482 |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1483 if (boyer_moore_ok && charset_base != -1 && |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1484 charset_base != (translated & ~ICHAR_FIELD3_MASK)) |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1485 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1486 /* In the rare event that the CANON entry for this |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1487 character is not in the desired set, choose one |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1488 that is, from the equivalence set. It doesn't much |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1489 matter which. */ |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1490 Ichar starting_ch = translated; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1491 do |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1492 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1493 translated = TRANSLATE (inverse_trt, translated); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1494 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1495 if (charset_base == (translated & ~ICHAR_FIELD3_MASK)) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1496 break; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1497 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1498 } while (starting_ch != translated); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1499 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1500 assert (starting_ch != translated); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1501 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1502 new_bytelen = set_itext_ichar (tmp_str, translated); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1503 } |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1504 } |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1505 |
446 | 1506 memcpy (pat, tmp_str, new_bytelen); |
1507 pat += new_bytelen; | |
1508 base_pat += orig_bytelen; | |
1509 len -= orig_bytelen; | |
1510 } | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1511 |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1512 if (-1 == charset_base) |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1513 { |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1514 charset_base = 'a' & ~ICHAR_FIELD3_MASK; /* Default to ASCII. */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1515 } |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1516 |
446 | 1517 #else /* not MULE */ |
1518 while (--len >= 0) | |
1519 { | |
1520 /* If we got here and the RE flag is set, it's because | |
1521 we're dealing with a regexp known to be trivial, so the | |
1522 backslash just quotes the next character. */ | |
1523 if (RE && *base_pat == '\\') | |
1524 { | |
1525 len--; | |
1526 base_pat++; | |
1527 } | |
1528 *pat++ = TRANSLATE (trt, *base_pat++); | |
1529 } | |
1530 #endif /* MULE */ | |
1531 len = pat - patbuf; | |
1532 pat = base_pat = patbuf; | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1533 |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1534 #ifdef DEBUG_XEMACS |
5041 | 1535 if (debug_searches) |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1536 { |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1537 Lisp_Symbol *sym = XSYMBOL (Qsearch_algorithm_used); |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1538 sym->value = boyer_moore_ok ? Qboyer_moore : Qsimple_search; |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1539 } |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1540 #endif |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1541 |
446 | 1542 if (boyer_moore_ok) |
1543 return boyer_moore (buf, base_pat, len, pos, lim, n, | |
1544 trt, inverse_trt, charset_base); | |
1545 else | |
1546 return simple_search (buf, base_pat, len, pos, lim, n, trt); | |
1547 } | |
1548 } | |
1549 | |
826 | 1550 /* Do a simple string search N times for the string PAT, whose length is |
1551 LEN/LEN_BYTE, from buffer position POS until LIM. TRT is the | |
1552 translation table. | |
446 | 1553 |
1554 Return the character position where the match is found. | |
1555 Otherwise, if M matches remained to be found, return -M. | |
1556 | |
1557 This kind of search works regardless of what is in PAT and | |
1558 regardless of what is in TRT. It is used in cases where | |
1559 boyer_moore cannot work. */ | |
1560 | |
665 | 1561 static Charbpos |
867 | 1562 simple_search (struct buffer *buf, Ibyte *base_pat, Bytecount len, |
826 | 1563 Bytebpos pos, Bytebpos lim, EMACS_INT n, Lisp_Object trt) |
446 | 1564 { |
1565 int forward = n > 0; | |
1566 Bytecount buf_len = 0; /* Shut up compiler. */ | |
1567 | |
826 | 1568 if (lim > pos) |
446 | 1569 while (n > 0) |
428 | 1570 { |
446 | 1571 while (1) |
428 | 1572 { |
826 | 1573 Bytecount this_len = len; |
1574 Bytebpos this_pos = pos; | |
867 | 1575 Ibyte *p = base_pat; |
826 | 1576 if (pos >= lim) |
446 | 1577 goto stop; |
1578 | |
1579 while (this_len > 0) | |
1580 { | |
867 | 1581 Ichar pat_ch, buf_ch; |
446 | 1582 Bytecount pat_len; |
1583 | |
867 | 1584 pat_ch = itext_ichar (p); |
826 | 1585 buf_ch = BYTE_BUF_FETCH_CHAR (buf, this_pos); |
446 | 1586 |
1587 buf_ch = TRANSLATE (trt, buf_ch); | |
1588 | |
1589 if (buf_ch != pat_ch) | |
1590 break; | |
1591 | |
867 | 1592 pat_len = itext_ichar_len (p); |
446 | 1593 p += pat_len; |
1594 this_len -= pat_len; | |
826 | 1595 INC_BYTEBPOS (buf, this_pos); |
446 | 1596 } |
1597 if (this_len == 0) | |
428 | 1598 { |
826 | 1599 buf_len = this_pos - pos; |
1600 pos = this_pos; | |
446 | 1601 break; |
428 | 1602 } |
826 | 1603 INC_BYTEBPOS (buf, pos); |
428 | 1604 } |
446 | 1605 n--; |
1606 } | |
1607 else | |
4322
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1608 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1609 /* If lim < len, then there are too few buffer positions to hold the |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1610 pattern between the beginning of the buffer and lim. Adjust to |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1611 ensure pattern fits. If we don't do this, we can assert in the |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1612 DEC_BYTEBPOS below. */ |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1613 if (lim < len) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1614 lim = len; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1615 while (n < 0) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1616 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1617 while (1) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1618 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1619 Bytecount this_len = len; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1620 Bytebpos this_pos = pos; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1621 Ibyte *p; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1622 if (pos <= lim) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1623 goto stop; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1624 p = base_pat + len; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1625 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1626 while (this_len > 0) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1627 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1628 Ichar pat_ch, buf_ch; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1629 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1630 DEC_IBYTEPTR (p); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1631 DEC_BYTEBPOS (buf, this_pos); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1632 pat_ch = itext_ichar (p); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1633 buf_ch = BYTE_BUF_FETCH_CHAR (buf, this_pos); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1634 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1635 buf_ch = TRANSLATE (trt, buf_ch); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1636 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1637 if (buf_ch != pat_ch) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1638 break; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1639 |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1640 this_len -= itext_ichar_len (p); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1641 } |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1642 if (this_len == 0) |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1643 { |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1644 buf_len = pos - this_pos; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1645 pos = this_pos; |
446 | 1646 break; |
4322
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1647 } |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1648 DEC_BYTEBPOS (buf, pos); |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1649 } |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1650 n++; |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1651 } |
f70e56bb52a7
src/search.c (simple_search): Fix underrun in reverse search.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4199
diff
changeset
|
1652 } |
446 | 1653 stop: |
1654 if (n == 0) | |
1655 { | |
665 | 1656 Charbpos beg, end, retval; |
446 | 1657 if (forward) |
1658 { | |
826 | 1659 beg = bytebpos_to_charbpos (buf, pos - buf_len); |
1660 retval = end = bytebpos_to_charbpos (buf, pos); | |
446 | 1661 } |
1662 else | |
428 | 1663 { |
826 | 1664 retval = beg = bytebpos_to_charbpos (buf, pos); |
1665 end = bytebpos_to_charbpos (buf, pos + buf_len); | |
428 | 1666 } |
446 | 1667 set_search_regs (buf, beg, end - beg); |
1668 | |
1669 return retval; | |
1670 } | |
1671 else if (n > 0) | |
1672 return -n; | |
1673 else | |
1674 return n; | |
1675 } | |
1676 | |
1677 /* Do Boyer-Moore search N times for the string PAT, | |
1678 whose length is LEN/LEN_BYTE, | |
1679 from buffer position POS/POS_BYTE until LIM/LIM_BYTE. | |
1680 DIRECTION says which direction we search in. | |
1681 TRT and INVERSE_TRT are translation tables. | |
1682 | |
1683 This kind of search works if all the characters in PAT that have | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1684 (non-ASCII) translation are the same aside from the last byte. This |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1685 makes it possible to translate just the last byte of a character, and do |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1686 so after just a simple test of the context. |
446 | 1687 |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1688 If that criterion is not satisfied, do not call this function. You will |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1689 get an assertion failure. */ |
446 | 1690 |
665 | 1691 static Charbpos |
867 | 1692 boyer_moore (struct buffer *buf, Ibyte *base_pat, Bytecount len, |
665 | 1693 Bytebpos pos, Bytebpos lim, EMACS_INT n, Lisp_Object trt, |
2333 | 1694 Lisp_Object inverse_trt, int USED_IF_MULE (charset_base)) |
446 | 1695 { |
1696 /* #### Someone really really really needs to comment the workings | |
1697 of this junk somewhat better. | |
1698 | |
1699 BTW "BM" stands for Boyer-Moore, which is one of the standard | |
1700 string-searching algorithms. It's the best string-searching | |
1701 algorithm out there, provided that: | |
1702 | |
1703 a) You're not fazed by algorithm complexity. (Rabin-Karp, which | |
1704 uses hashing, is much much easier to code but not as fast.) | |
1705 b) You can freely move backwards in the string that you're | |
1706 searching through. | |
1707 | |
1708 As the comment below tries to explain (but garbles in typical | |
1709 programmer-ese), the idea is that you don't have to do a | |
1710 string match at every successive position in the text. For | |
1711 example, let's say the pattern is "a very long string". We | |
1712 compare the last character in the string (`g') with the | |
1713 corresponding character in the text. If it mismatches, and | |
1714 it is, say, `z', then we can skip forward by the entire | |
1715 length of the pattern because `z' does not occur anywhere | |
1716 in the pattern. If the mismatching character does occur | |
1717 in the pattern, we can usually still skip forward by more | |
1718 than one: e.g. if it is `l', then we can skip forward | |
1719 by the length of the substring "ong string" -- i.e. the | |
1720 largest end section of the pattern that does not contain | |
1721 the mismatched character. So what we do is compute, for | |
1722 each possible character, the distance we can skip forward | |
1723 (the "stride") and use it in the string matching. This | |
1724 is what the BM_tab holds. */ | |
1725 REGISTER EMACS_INT *BM_tab; | |
1726 EMACS_INT *BM_tab_base; | |
1727 REGISTER Bytecount dirlen; | |
1728 EMACS_INT infinity; | |
665 | 1729 Bytebpos limit; |
446 | 1730 Bytecount stride_for_teases = 0; |
1731 REGISTER EMACS_INT i, j; | |
867 | 1732 Ibyte *pat, *pat_end; |
1733 REGISTER Ibyte *cursor, *p_limit, *ptr2; | |
1734 Ibyte simple_translate[0400]; | |
446 | 1735 REGISTER int direction = ((n > 0) ? 1 : -1); |
1736 #ifdef MULE | |
867 | 1737 Ibyte translate_prev_byte = 0; |
1738 Ibyte translate_anteprev_byte = 0; | |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1739 /* These need to be rethought in the event that the internal format |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1740 changes, or in the event that num_8_bit_fixed_chars disappears |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1741 (entirely_one_byte_p can be trivially worked out by checking is the |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1742 byte count equal to the char count.) */ |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1743 int buffer_entirely_one_byte_p = buf->text->entirely_one_byte_p; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1744 int buffer_nothing_greater_than_0xff = |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1745 buf->text->num_8_bit_fixed_chars == BUF_Z(buf) - BUF_BEG (buf); |
446 | 1746 #endif |
1747 #ifdef C_ALLOCA | |
1748 EMACS_INT BM_tab_space[0400]; | |
1749 BM_tab = &BM_tab_space[0]; | |
1750 #else | |
1751 BM_tab = alloca_array (EMACS_INT, 256); | |
1752 #endif | |
1753 | |
1754 /* The general approach is that we are going to maintain that we | |
1755 know the first (closest to the present position, in whatever | |
1756 direction we're searching) character that could possibly be | |
1757 the last (furthest from present position) character of a | |
1758 valid match. We advance the state of our knowledge by | |
1759 looking at that character and seeing whether it indeed | |
1760 matches the last character of the pattern. If it does, we | |
1761 take a closer look. If it does not, we move our pointer (to | |
1762 putative last characters) as far as is logically possible. | |
1763 This amount of movement, which I call a stride, will be the | |
1764 length of the pattern if the actual character appears nowhere | |
1765 in the pattern, otherwise it will be the distance from the | |
1766 last occurrence of that character to the end of the pattern. | |
1767 As a coding trick, an enormous stride is coded into the table | |
1768 for characters that match the last character. This allows | |
1769 use of only a single test, a test for having gone past the | |
1770 end of the permissible match region, to test for both | |
1771 possible matches (when the stride goes past the end | |
1772 immediately) and failure to match (where you get nudged past | |
1773 the end one stride at a time). | |
1774 | |
1775 Here we make a "mickey mouse" BM table. The stride of the | |
1776 search is determined only by the last character of the | |
1777 putative match. If that character does not match, we will | |
1778 stride the proper distance to propose a match that | |
1779 superimposes it on the last instance of a character that | |
1780 matches it (per trt), or misses it entirely if there is | |
1781 none. */ | |
1782 | |
1783 dirlen = len * direction; | |
1784 infinity = dirlen - (lim + pos + len + len) * direction; | |
1785 /* Record position after the end of the pattern. */ | |
1786 pat_end = base_pat + len; | |
1787 if (direction < 0) | |
1788 base_pat = pat_end - 1; | |
1789 BM_tab_base = BM_tab; | |
1790 BM_tab += 0400; | |
1791 j = dirlen; /* to get it in a register */ | |
1792 /* A character that does not appear in the pattern induces a | |
1793 stride equal to the pattern length. */ | |
1794 while (BM_tab_base != BM_tab) | |
1795 { | |
1796 *--BM_tab = j; | |
1797 *--BM_tab = j; | |
1798 *--BM_tab = j; | |
1799 *--BM_tab = j; | |
1800 } | |
1801 /* We use this for translation, instead of TRT itself. We | |
1802 fill this in to handle the characters that actually occur | |
1803 in the pattern. Others don't matter anyway! */ | |
1804 xzero (simple_translate); | |
1805 for (i = 0; i < 0400; i++) | |
867 | 1806 simple_translate[i] = (Ibyte) i; |
446 | 1807 i = 0; |
1425 | 1808 |
446 | 1809 while (i != infinity) |
1810 { | |
867 | 1811 Ibyte *ptr = base_pat + i; |
446 | 1812 i += direction; |
1813 if (i == dirlen) | |
1814 i = infinity; | |
1815 if (!NILP (trt)) | |
428 | 1816 { |
446 | 1817 #ifdef MULE |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1818 Ichar ch = -1, untranslated; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1819 Ibyte byte; |
446 | 1820 int this_translated = 1; |
1821 | |
1822 /* Is *PTR the last byte of a character? */ | |
867 | 1823 if (pat_end - ptr == 1 || ibyte_first_byte_p (ptr[1])) |
428 | 1824 { |
867 | 1825 Ibyte *charstart = ptr; |
1826 while (!ibyte_first_byte_p (*charstart)) | |
446 | 1827 charstart--; |
867 | 1828 untranslated = itext_ichar (charstart); |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1829 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1830 ch = TRANSLATE (trt, untranslated); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1831 if (!ibyte_first_byte_p (*ptr)) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1832 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1833 translate_prev_byte = ptr[-1]; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1834 if (!ibyte_first_byte_p (translate_prev_byte)) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1835 translate_anteprev_byte = ptr[-2]; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1836 } |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1837 |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1838 if (ch != untranslated && /* Was translation done? */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1839 charset_base != (ch & ~ICHAR_FIELD3_MASK)) |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1840 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1841 /* In the very rare event that the CANON entry for this |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1842 character is not in the desired set, choose one that |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1843 is, from the equivalence set. It doesn't much matter |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1844 which, since we're building our own cheesy equivalence |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1845 table instead of using that belonging to the case |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1846 table directly. |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1847 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1848 We can get here if search_buffer has worked out that |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1849 the buffer is entirely single width. */ |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1850 Ichar starting_ch = ch; |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1851 int count = 0; |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1852 do |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1853 { |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1854 ch = TRANSLATE (inverse_trt, ch); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1855 if (charset_base == (ch & ~ICHAR_FIELD3_MASK)) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1856 break; |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1857 ++count; |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1858 } while (starting_ch != ch); |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1859 |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1860 /* If starting_ch is equal to ch (and count is not one, |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1861 which means no translation is necessary), the case |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1862 table is corrupt. (Any mapping in the canon table |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1863 should be reflected in the equivalence table, and we |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1864 know from the canon table that untranslated maps to |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1865 starting_ch and that untranslated has the correct value |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1866 for charset_base.) */ |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
1867 assert (1 == count || starting_ch != ch); |
446 | 1868 } |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1869 { |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1870 Ibyte tmp[MAX_ICHAR_LEN]; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1871 Bytecount chlen; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1872 |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1873 chlen = set_itext_ichar (tmp, ch); |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1874 byte = tmp[chlen - 1]; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1875 } |
428 | 1876 } |
1877 else | |
1878 { | |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1879 byte = *ptr; |
446 | 1880 this_translated = 0; |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1881 ch = -1; |
446 | 1882 } |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1883 |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1884 /* BYTE = last byte of character CH when represented as text */ |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1885 j = byte; |
446 | 1886 |
1887 if (i == infinity) | |
1888 stride_for_teases = BM_tab[j]; | |
1889 BM_tab[j] = dirlen - i; | |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1890 /* A translation table is accompanied by its inverse -- see |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1891 comment in casetab.c. */ |
446 | 1892 if (this_translated) |
1893 { | |
867 | 1894 Ichar starting_ch = ch; |
446 | 1895 EMACS_INT starting_j = j; |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1896 |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1897 text_checking_assert (valid_ichar_p (ch)); |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1898 do |
446 | 1899 { |
1900 ch = TRANSLATE (inverse_trt, ch); | |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1901 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1902 if (ch > 0x7F && buffer_entirely_one_byte_p) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1903 continue; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1904 |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1905 if (ch > 0xFF && buffer_nothing_greater_than_0xff) |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1906 continue; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1907 |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1908 |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1909 /* Retrieve last byte of character CH when represented as |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1910 text */ |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1911 { |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1912 Ibyte tmp[MAX_ICHAR_LEN]; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1913 Bytecount chlen; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1914 |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1915 chlen = set_itext_ichar (tmp, ch); |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1916 j = tmp[chlen - 1]; |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1917 } |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1918 |
4407
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1919 /* For all the characters that map into CH, set up |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1920 simple_translate to map the last byte into |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1921 STARTING_J. */ |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1922 simple_translate[j] = (Ibyte) starting_j; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1923 BM_tab[j] = dirlen - i; |
4ee73bbe4f8e
Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4322
diff
changeset
|
1924 |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1925 } |
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1926 while (ch != starting_ch); |
446 | 1927 } |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1928 #else /* not MULE */ |
446 | 1929 EMACS_INT k; |
1930 j = *ptr; | |
1931 k = (j = TRANSLATE (trt, j)); | |
1932 if (i == infinity) | |
1933 stride_for_teases = BM_tab[j]; | |
1934 BM_tab[j] = dirlen - i; | |
1935 /* A translation table is accompanied by its inverse -- | |
826 | 1936 see comment in casetab.c. */ |
446 | 1937 while ((j = TRANSLATE (inverse_trt, j)) != k) |
1938 { | |
867 | 1939 simple_translate[j] = (Ibyte) k; |
428 | 1940 BM_tab[j] = dirlen - i; |
1941 } | |
4897
91a023144e72
fix longstanding search bug involving searching for Control-1 chars
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
1942 #endif /* (not) MULE */ |
446 | 1943 } |
1944 else | |
1945 { | |
1946 j = *ptr; | |
1947 | |
1948 if (i == infinity) | |
1949 stride_for_teases = BM_tab[j]; | |
1950 BM_tab[j] = dirlen - i; | |
428 | 1951 } |
446 | 1952 /* stride_for_teases tells how much to stride if we get a |
1953 match on the far character but are subsequently | |
1954 disappointed, by recording what the stride would have been | |
1955 for that character if the last character had been | |
1956 different. */ | |
1957 } | |
1958 infinity = dirlen - infinity; | |
1959 pos += dirlen - ((direction > 0) ? direction : 0); | |
1960 /* loop invariant - pos points at where last char (first char if | |
1961 reverse) of pattern would align in a possible match. */ | |
1962 while (n != 0) | |
1963 { | |
665 | 1964 Bytebpos tail_end; |
867 | 1965 Ibyte *tail_end_ptr; |
446 | 1966 /* It's been reported that some (broken) compiler thinks |
1967 that Boolean expressions in an arithmetic context are | |
1968 unsigned. Using an explicit ?1:0 prevents this. */ | |
1969 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0) | |
1970 return n * (0 - direction); | |
1971 /* First we do the part we can by pointers (maybe | |
1972 nothing) */ | |
1973 QUIT; | |
1974 pat = base_pat; | |
1975 limit = pos - dirlen + direction; | |
1976 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF | |
1977 have changed. See buffer.h. */ | |
1978 limit = ((direction > 0) | |
826 | 1979 ? BYTE_BUF_CEILING_OF (buf, limit) - 1 |
1980 : BYTE_BUF_FLOOR_OF (buf, limit + 1)); | |
446 | 1981 /* LIMIT is now the last (not beyond-last!) value POS can |
1982 take on without hitting edge of buffer or the gap. */ | |
1983 limit = ((direction > 0) | |
1984 ? min (lim - 1, min (limit, pos + 20000)) | |
1985 : max (lim, max (limit, pos - 20000))); | |
826 | 1986 tail_end = BYTE_BUF_CEILING_OF (buf, pos); |
1987 tail_end_ptr = BYTE_BUF_BYTE_ADDRESS (buf, tail_end); | |
446 | 1988 |
1989 if ((limit - pos) * direction > 20) | |
428 | 1990 { |
826 | 1991 /* We have to be careful because the code can generate addresses |
1992 that don't point to the beginning of characters. */ | |
1993 p_limit = BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, limit); | |
1994 ptr2 = (cursor = BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos)); | |
446 | 1995 /* In this loop, pos + cursor - ptr2 is the surrogate |
1996 for pos */ | |
1997 while (1) /* use one cursor setting as long as i can */ | |
1998 { | |
1999 if (direction > 0) /* worth duplicating */ | |
2000 { | |
2001 /* Use signed comparison if appropriate to make | |
2002 cursor+infinity sure to be > p_limit. | |
2003 Assuming that the buffer lies in a range of | |
2004 addresses that are all "positive" (as ints) | |
2005 or all "negative", either kind of comparison | |
2006 will work as long as we don't step by | |
2007 infinity. So pick the kind that works when | |
2008 we do step by infinity. */ | |
2009 if ((EMACS_INT) (p_limit + infinity) > | |
2010 (EMACS_INT) p_limit) | |
2011 while ((EMACS_INT) cursor <= | |
2012 (EMACS_INT) p_limit) | |
2013 cursor += BM_tab[*cursor]; | |
2014 else | |
2015 while ((EMACS_UINT) cursor <= | |
2016 (EMACS_UINT) p_limit) | |
2017 cursor += BM_tab[*cursor]; | |
2018 } | |
2019 else | |
2020 { | |
2021 if ((EMACS_INT) (p_limit + infinity) < | |
2022 (EMACS_INT) p_limit) | |
2023 while ((EMACS_INT) cursor >= | |
2024 (EMACS_INT) p_limit) | |
2025 cursor += BM_tab[*cursor]; | |
2026 else | |
2027 while ((EMACS_UINT) cursor >= | |
2028 (EMACS_UINT) p_limit) | |
2029 cursor += BM_tab[*cursor]; | |
2030 } | |
2031 /* If you are here, cursor is beyond the end of the | |
2032 searched region. This can happen if you match on | |
2033 the far character of the pattern, because the | |
2034 "stride" of that character is infinity, a number | |
2035 able to throw you well beyond the end of the | |
2036 search. It can also happen if you fail to match | |
2037 within the permitted region and would otherwise | |
2038 try a character beyond that region */ | |
2039 if ((cursor - p_limit) * direction <= len) | |
2040 break; /* a small overrun is genuine */ | |
2041 cursor -= infinity; /* large overrun = hit */ | |
2042 i = dirlen - direction; | |
2043 if (!NILP (trt)) | |
2044 { | |
2045 while ((i -= direction) + direction != 0) | |
2046 { | |
2047 #ifdef MULE | |
867 | 2048 Ichar ch; |
446 | 2049 cursor -= direction; |
2050 /* Translate only the last byte of a character. */ | |
2051 if ((cursor == tail_end_ptr | |
867 | 2052 || ibyte_first_byte_p (cursor[1])) |
2053 && (ibyte_first_byte_p (cursor[0]) | |
446 | 2054 || (translate_prev_byte == cursor[-1] |
867 | 2055 && (ibyte_first_byte_p (translate_prev_byte) |
446 | 2056 || translate_anteprev_byte == cursor[-2])))) |
2057 ch = simple_translate[*cursor]; | |
2058 else | |
2059 ch = *cursor; | |
2060 if (pat[i] != ch) | |
2061 break; | |
2062 #else | |
2063 if (pat[i] != TRANSLATE (trt, *(cursor -= direction))) | |
2064 break; | |
2065 #endif | |
2066 } | |
2067 } | |
2068 else | |
2069 { | |
2070 while ((i -= direction) + direction != 0) | |
2071 if (pat[i] != *(cursor -= direction)) | |
2072 break; | |
2073 } | |
2074 cursor += dirlen - i - direction; /* fix cursor */ | |
2075 if (i + direction == 0) | |
2076 { | |
2077 cursor -= direction; | |
2078 | |
2079 { | |
665 | 2080 Bytebpos bytstart = (pos + cursor - ptr2 + |
446 | 2081 ((direction > 0) |
2082 ? 1 - len : 0)); | |
665 | 2083 Charbpos bufstart = bytebpos_to_charbpos (buf, bytstart); |
2084 Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len); | |
446 | 2085 |
2086 set_search_regs (buf, bufstart, bufend - bufstart); | |
2087 } | |
2088 | |
2089 if ((n -= direction) != 0) | |
2090 cursor += dirlen; /* to resume search */ | |
2091 else | |
2092 return ((direction > 0) | |
2093 ? search_regs.end[0] : search_regs.start[0]); | |
2094 } | |
2095 else | |
2096 cursor += stride_for_teases; /* <sigh> we lose - */ | |
2097 } | |
2098 pos += cursor - ptr2; | |
2099 } | |
2100 else | |
2101 /* Now we'll pick up a clump that has to be done the hard | |
2102 way because it covers a discontinuity */ | |
2103 { | |
428 | 2104 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF |
2105 have changed. See buffer.h. */ | |
2106 limit = ((direction > 0) | |
826 | 2107 ? BYTE_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1 |
2108 : BYTE_BUF_FLOOR_OF (buf, pos - dirlen)); | |
428 | 2109 limit = ((direction > 0) |
446 | 2110 ? min (limit + len, lim - 1) |
2111 : max (limit - len, lim)); | |
2112 /* LIMIT is now the last value POS can have | |
2113 and still be valid for a possible match. */ | |
2114 while (1) | |
428 | 2115 { |
446 | 2116 /* This loop can be coded for space rather than |
2117 speed because it will usually run only once. | |
2118 (the reach is at most len + 21, and typically | |
2119 does not exceed len) */ | |
2120 while ((limit - pos) * direction >= 0) | |
826 | 2121 /* *not* BYTE_BUF_FETCH_CHAR. We are working here |
446 | 2122 with bytes, not characters. */ |
826 | 2123 pos += BM_tab[*BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos)]; |
446 | 2124 /* now run the same tests to distinguish going off |
2125 the end, a match or a phony match. */ | |
2126 if ((pos - limit) * direction <= len) | |
2127 break; /* ran off the end */ | |
2128 /* Found what might be a match. | |
2129 Set POS back to last (first if reverse) char pos. */ | |
2130 pos -= infinity; | |
2131 i = dirlen - direction; | |
2132 while ((i -= direction) + direction != 0) | |
428 | 2133 { |
446 | 2134 #ifdef MULE |
867 | 2135 Ichar ch; |
2136 Ibyte *ptr; | |
446 | 2137 #endif |
2138 pos -= direction; | |
2139 #ifdef MULE | |
826 | 2140 ptr = BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos); |
446 | 2141 if ((ptr == tail_end_ptr |
867 | 2142 || ibyte_first_byte_p (ptr[1])) |
2143 && (ibyte_first_byte_p (ptr[0]) | |
446 | 2144 || (translate_prev_byte == ptr[-1] |
867 | 2145 && (ibyte_first_byte_p (translate_prev_byte) |
446 | 2146 || translate_anteprev_byte == ptr[-2])))) |
2147 ch = simple_translate[*ptr]; | |
428 | 2148 else |
446 | 2149 ch = *ptr; |
2150 if (pat[i] != ch) | |
2151 break; | |
2152 | |
2153 #else | |
826 | 2154 if (pat[i] != |
2155 TRANSLATE (trt, | |
2156 *BYTE_BUF_BYTE_ADDRESS_NO_VERIFY (buf, pos))) | |
446 | 2157 break; |
2158 #endif | |
428 | 2159 } |
446 | 2160 /* Above loop has moved POS part or all the way back |
2161 to the first char pos (last char pos if reverse). | |
2162 Set it once again at the last (first if reverse) | |
2163 char. */ | |
2164 pos += dirlen - i- direction; | |
2165 if (i + direction == 0) | |
428 | 2166 { |
446 | 2167 pos -= direction; |
2168 | |
2169 { | |
665 | 2170 Bytebpos bytstart = (pos + |
446 | 2171 ((direction > 0) |
2172 ? 1 - len : 0)); | |
665 | 2173 Charbpos bufstart = bytebpos_to_charbpos (buf, bytstart); |
2174 Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len); | |
446 | 2175 |
2176 set_search_regs (buf, bufstart, bufend - bufstart); | |
2177 } | |
2178 | |
2179 if ((n -= direction) != 0) | |
2180 pos += dirlen; /* to resume search */ | |
428 | 2181 else |
446 | 2182 return ((direction > 0) |
2183 ? search_regs.end[0] : search_regs.start[0]); | |
428 | 2184 } |
446 | 2185 else |
2186 pos += stride_for_teases; | |
2187 } | |
428 | 2188 } |
446 | 2189 /* We have done one clump. Can we continue? */ |
2190 if ((lim - pos) * direction < 0) | |
2191 return (0 - n) * direction; | |
428 | 2192 } |
665 | 2193 return bytebpos_to_charbpos (buf, pos); |
428 | 2194 } |
2195 | |
1024 | 2196 /* Record the whole-match data (beginning BEG and end BEG + LEN) and the |
2197 buffer for a match just found. */ | |
428 | 2198 |
2199 static void | |
665 | 2200 set_search_regs (struct buffer *buf, Charbpos beg, Charcount len) |
428 | 2201 { |
2202 /* Make sure we have registers in which to store | |
2203 the match position. */ | |
2204 if (search_regs.num_regs == 0) | |
2205 { | |
2206 search_regs.start = xnew (regoff_t); | |
2207 search_regs.end = xnew (regoff_t); | |
2208 search_regs.num_regs = 1; | |
2209 } | |
2210 | |
1468 | 2211 clear_search_regs (); |
428 | 2212 search_regs.start[0] = beg; |
2213 search_regs.end[0] = beg + len; | |
793 | 2214 last_thing_searched = wrap_buffer (buf); |
428 | 2215 } |
2216 | |
1468 | 2217 /* Clear search registers so match data will be null. */ |
1024 | 2218 |
2219 static void | |
1468 | 2220 clear_search_regs (void) |
1024 | 2221 { |
2222 /* This function has been Mule-ized. */ | |
2223 int i; | |
2224 | |
1468 | 2225 for (i = 0; i < search_regs.num_regs; i++) |
2226 search_regs.start[i] = search_regs.end[i] = -1; | |
1024 | 2227 } |
2228 | |
428 | 2229 |
2230 /* Given a string of words separated by word delimiters, | |
442 | 2231 compute a regexp that matches those exact words |
2232 separated by arbitrary punctuation. */ | |
428 | 2233 |
2234 static Lisp_Object | |
2235 wordify (Lisp_Object buffer, Lisp_Object string) | |
2236 { | |
2237 Charcount i, len; | |
2238 EMACS_INT punct_count = 0, word_count = 0; | |
2239 struct buffer *buf = decode_buffer (buffer, 0); | |
826 | 2240 Lisp_Object syntax_table = buf->mirror_syntax_table; |
428 | 2241 |
2242 CHECK_STRING (string); | |
826 | 2243 len = string_char_length (string); |
428 | 2244 |
2245 for (i = 0; i < len; i++) | |
867 | 2246 if (!WORD_SYNTAX_P (syntax_table, string_ichar (string, i))) |
428 | 2247 { |
2248 punct_count++; | |
2249 if (i > 0 && WORD_SYNTAX_P (syntax_table, | |
867 | 2250 string_ichar (string, i - 1))) |
428 | 2251 word_count++; |
2252 } | |
867 | 2253 if (WORD_SYNTAX_P (syntax_table, string_ichar (string, len - 1))) |
428 | 2254 word_count++; |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
2255 if (!word_count) return build_ascstring (""); |
428 | 2256 |
2257 { | |
2258 /* The following value is an upper bound on the amount of storage we | |
2259 need. In non-Mule, it is exact. */ | |
867 | 2260 Ibyte *storage = |
2367 | 2261 alloca_ibytes (XSTRING_LENGTH (string) - punct_count + |
428 | 2262 5 * (word_count - 1) + 4); |
867 | 2263 Ibyte *o = storage; |
428 | 2264 |
2265 *o++ = '\\'; | |
2266 *o++ = 'b'; | |
2267 | |
2268 for (i = 0; i < len; i++) | |
2269 { | |
867 | 2270 Ichar ch = string_ichar (string, i); |
428 | 2271 |
2272 if (WORD_SYNTAX_P (syntax_table, ch)) | |
867 | 2273 o += set_itext_ichar (o, ch); |
428 | 2274 else if (i > 0 |
2275 && WORD_SYNTAX_P (syntax_table, | |
867 | 2276 string_ichar (string, i - 1)) |
428 | 2277 && --word_count) |
2278 { | |
2279 *o++ = '\\'; | |
2280 *o++ = 'W'; | |
2281 *o++ = '\\'; | |
2282 *o++ = 'W'; | |
2283 *o++ = '*'; | |
2284 } | |
2285 } | |
2286 | |
2287 *o++ = '\\'; | |
2288 *o++ = 'b'; | |
2289 | |
2290 return make_string (storage, o - storage); | |
2291 } | |
2292 } | |
2293 | |
2294 DEFUN ("search-backward", Fsearch_backward, 1, 5, "sSearch backward: ", /* | |
2295 Search backward from point for STRING. | |
2296 Set point to the beginning of the occurrence found, and return point. | |
444 | 2297 |
2298 Optional second argument LIMIT bounds the search; it is a buffer | |
2299 position. The match found must not extend before that position. | |
2300 The value nil is equivalent to (point-min). | |
2301 | |
2302 Optional third argument NOERROR, if t, means just return nil (no | |
2303 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2304 and return nil. | |
2305 | |
2306 Optional fourth argument COUNT is a repeat count--search for | |
2307 successive occurrences. | |
2308 | |
428 | 2309 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2310 defaults to the current buffer. |
2311 | |
1468 | 2312 When the match is successful, this function modifies the match data |
2313 that `match-beginning', `match-end' and `match-data' access; save the | |
2314 match data with `match-data' and restore it with `store-match-data' if | |
2315 you want to preserve them. If the match fails, the match data from the | |
2316 previous success match is preserved. | |
2317 | |
2318 See also the function `replace-match'. | |
428 | 2319 */ |
444 | 2320 (string, limit, noerror, count, buffer)) |
428 | 2321 { |
444 | 2322 return search_command (string, limit, noerror, count, buffer, -1, 0, 0); |
428 | 2323 } |
2324 | |
2325 DEFUN ("search-forward", Fsearch_forward, 1, 5, "sSearch: ", /* | |
2326 Search forward from point for STRING. | |
2327 Set point to the end of the occurrence found, and return point. | |
444 | 2328 |
2329 Optional second argument LIMIT bounds the search; it is a buffer | |
2330 position. The match found must not extend after that position. The | |
2331 value nil is equivalent to (point-max). | |
2332 | |
2333 Optional third argument NOERROR, if t, means just return nil (no | |
2334 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2335 and return nil. | |
2336 | |
2337 Optional fourth argument COUNT is a repeat count--search for | |
2338 successive occurrences. | |
2339 | |
428 | 2340 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2341 defaults to the current buffer. |
2342 | |
1468 | 2343 When the match is successful, this function modifies the match data |
2344 that `match-beginning', `match-end' and `match-data' access; save the | |
2345 match data with `match-data' and restore it with `store-match-data' if | |
2346 you want to preserve them. If the match fails, the match data from the | |
2347 previous success match is preserved. | |
2348 | |
2349 See also the function `replace-match'. | |
428 | 2350 */ |
444 | 2351 (string, limit, noerror, count, buffer)) |
428 | 2352 { |
444 | 2353 return search_command (string, limit, noerror, count, buffer, 1, 0, 0); |
428 | 2354 } |
2355 | |
2356 DEFUN ("word-search-backward", Fword_search_backward, 1, 5, | |
2357 "sWord search backward: ", /* | |
2358 Search backward from point for STRING, ignoring differences in punctuation. | |
2359 Set point to the beginning of the occurrence found, and return point. | |
444 | 2360 |
2361 Optional second argument LIMIT bounds the search; it is a buffer | |
2362 position. The match found must not extend before that position. | |
2363 The value nil is equivalent to (point-min). | |
2364 | |
2365 Optional third argument NOERROR, if t, means just return nil (no | |
2366 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2367 and return nil. | |
2368 | |
2369 Optional fourth argument COUNT is a repeat count--search for | |
2370 successive occurrences. | |
2371 | |
428 | 2372 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2373 defaults to the current buffer. |
2374 | |
1468 | 2375 When the match is successful, this function modifies the match data |
2376 that `match-beginning', `match-end' and `match-data' access; save the | |
2377 match data with `match-data' and restore it with `store-match-data' if | |
2378 you want to preserve them. If the match fails, the match data from the | |
2379 previous success match is preserved. | |
2380 | |
2381 See also the function `replace-match'. | |
428 | 2382 */ |
444 | 2383 (string, limit, noerror, count, buffer)) |
428 | 2384 { |
444 | 2385 return search_command (wordify (buffer, string), limit, noerror, count, |
428 | 2386 buffer, -1, 1, 0); |
2387 } | |
2388 | |
2389 DEFUN ("word-search-forward", Fword_search_forward, 1, 5, "sWord search: ", /* | |
2390 Search forward from point for STRING, ignoring differences in punctuation. | |
2391 Set point to the end of the occurrence found, and return point. | |
444 | 2392 |
2393 Optional second argument LIMIT bounds the search; it is a buffer | |
2394 position. The match found must not extend after that position. The | |
2395 value nil is equivalent to (point-max). | |
2396 | |
2397 Optional third argument NOERROR, if t, means just return nil (no | |
2398 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2399 and return nil. | |
2400 | |
2401 Optional fourth argument COUNT is a repeat count--search for | |
2402 successive occurrences. | |
2403 | |
428 | 2404 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2405 defaults to the current buffer. |
2406 | |
1468 | 2407 When the match is successful, this function modifies the match data |
2408 that `match-beginning', `match-end' and `match-data' access; save the | |
2409 match data with `match-data' and restore it with `store-match-data' if | |
2410 you want to preserve them. If the match fails, the match data from the | |
2411 previous success match is preserved. | |
2412 | |
2413 See also the function `replace-match'. | |
428 | 2414 */ |
444 | 2415 (string, limit, noerror, count, buffer)) |
428 | 2416 { |
444 | 2417 return search_command (wordify (buffer, string), limit, noerror, count, |
428 | 2418 buffer, 1, 1, 0); |
2419 } | |
2420 | |
2421 DEFUN ("re-search-backward", Fre_search_backward, 1, 5, | |
2422 "sRE search backward: ", /* | |
2423 Search backward from point for match for regular expression REGEXP. | |
2424 Set point to the beginning of the match, and return point. | |
2425 The match found is the one starting last in the buffer | |
2426 and yet ending before the origin of the search. | |
444 | 2427 |
2428 Optional second argument LIMIT bounds the search; it is a buffer | |
2429 position. The match found must not extend before that position. | |
2430 The value nil is equivalent to (point-min). | |
2431 | |
2432 Optional third argument NOERROR, if t, means just return nil (no | |
2433 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2434 and return nil. | |
2435 | |
2436 Optional fourth argument COUNT is a repeat count--search for | |
2437 successive occurrences. | |
2438 | |
428 | 2439 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2440 defaults to the current buffer. |
2441 | |
1468 | 2442 When the match is successful, this function modifies the match data |
2443 that `match-beginning', `match-end' and `match-data' access; save the | |
2444 match data with `match-data' and restore it with `store-match-data' if | |
2445 you want to preserve them. If the match fails, the match data from the | |
2446 previous success match is preserved. | |
2447 | |
2448 See also the function `replace-match'. | |
428 | 2449 */ |
444 | 2450 (regexp, limit, noerror, count, buffer)) |
428 | 2451 { |
444 | 2452 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 0); |
428 | 2453 } |
2454 | |
2455 DEFUN ("re-search-forward", Fre_search_forward, 1, 5, "sRE search: ", /* | |
2456 Search forward from point for regular expression REGEXP. | |
2457 Set point to the end of the occurrence found, and return point. | |
444 | 2458 |
2459 Optional second argument LIMIT bounds the search; it is a buffer | |
2460 position. The match found must not extend after that position. The | |
2461 value nil is equivalent to (point-max). | |
2462 | |
2463 Optional third argument NOERROR, if t, means just return nil (no | |
2464 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2465 and return nil. | |
2466 | |
2467 Optional fourth argument COUNT is a repeat count--search for | |
2468 successive occurrences. | |
2469 | |
428 | 2470 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2471 defaults to the current buffer. |
2472 | |
1468 | 2473 When the match is successful, this function modifies the match data |
2474 that `match-beginning', `match-end' and `match-data' access; save the | |
2475 match data with `match-data' and restore it with `store-match-data' if | |
2476 you want to preserve them. If the match fails, the match data from the | |
2477 previous success match is preserved. | |
2478 | |
2479 See also the function `replace-match'. | |
428 | 2480 */ |
444 | 2481 (regexp, limit, noerror, count, buffer)) |
428 | 2482 { |
444 | 2483 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 0); |
428 | 2484 } |
2485 | |
2486 DEFUN ("posix-search-backward", Fposix_search_backward, 1, 5, | |
2487 "sPosix search backward: ", /* | |
2488 Search backward from point for match for regular expression REGEXP. | |
2489 Find the longest match in accord with Posix regular expression rules. | |
2490 Set point to the beginning of the match, and return point. | |
2491 The match found is the one starting last in the buffer | |
2492 and yet ending before the origin of the search. | |
444 | 2493 |
2494 Optional second argument LIMIT bounds the search; it is a buffer | |
2495 position. The match found must not extend before that position. | |
2496 The value nil is equivalent to (point-min). | |
2497 | |
2498 Optional third argument NOERROR, if t, means just return nil (no | |
2499 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2500 and return nil. | |
2501 | |
2502 Optional fourth argument COUNT is a repeat count--search for | |
2503 successive occurrences. | |
2504 | |
428 | 2505 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2506 defaults to the current buffer. |
2507 | |
1468 | 2508 When the match is successful, this function modifies the match data |
2509 that `match-beginning', `match-end' and `match-data' access; save the | |
2510 match data with `match-data' and restore it with `store-match-data' if | |
2511 you want to preserve them. If the match fails, the match data from the | |
2512 previous success match is preserved. | |
2513 | |
2514 See also the function `replace-match'. | |
428 | 2515 */ |
444 | 2516 (regexp, limit, noerror, count, buffer)) |
428 | 2517 { |
444 | 2518 return search_command (regexp, limit, noerror, count, buffer, -1, 1, 1); |
428 | 2519 } |
2520 | |
2521 DEFUN ("posix-search-forward", Fposix_search_forward, 1, 5, "sPosix search: ", /* | |
2522 Search forward from point for regular expression REGEXP. | |
2523 Find the longest match in accord with Posix regular expression rules. | |
2524 Set point to the end of the occurrence found, and return point. | |
444 | 2525 |
2526 Optional second argument LIMIT bounds the search; it is a buffer | |
2527 position. The match found must not extend after that position. The | |
2528 value nil is equivalent to (point-max). | |
2529 | |
2530 Optional third argument NOERROR, if t, means just return nil (no | |
2531 error) if the search fails. If neither nil nor t, set point to LIMIT | |
2532 and return nil. | |
2533 | |
2534 Optional fourth argument COUNT is a repeat count--search for | |
2535 successive occurrences. | |
2536 | |
428 | 2537 Optional fifth argument BUFFER specifies the buffer to search in and |
444 | 2538 defaults to the current buffer. |
2539 | |
1468 | 2540 When the match is successful, this function modifies the match data |
2541 that `match-beginning', `match-end' and `match-data' access; save the | |
2542 match data with `match-data' and restore it with `store-match-data' if | |
2543 you want to preserve them. If the match fails, the match data from the | |
2544 previous success match is preserved. | |
2545 | |
2546 See also the function `replace-match'. | |
428 | 2547 */ |
444 | 2548 (regexp, limit, noerror, count, buffer)) |
428 | 2549 { |
444 | 2550 return search_command (regexp, limit, noerror, count, buffer, 1, 1, 1); |
428 | 2551 } |
2552 | |
2553 | |
2554 static Lisp_Object | |
2555 free_created_dynarrs (Lisp_Object cons) | |
2556 { | |
2557 Dynarr_free (get_opaque_ptr (XCAR (cons))); | |
2558 Dynarr_free (get_opaque_ptr (XCDR (cons))); | |
2559 free_opaque_ptr (XCAR (cons)); | |
2560 free_opaque_ptr (XCDR (cons)); | |
853 | 2561 free_cons (cons); |
428 | 2562 return Qnil; |
2563 } | |
2564 | |
2565 DEFUN ("replace-match", Freplace_match, 1, 5, 0, /* | |
444 | 2566 Replace text matched by last search with REPLACEMENT. |
4199 | 2567 Leaves point at end of replacement text. |
2568 Optional boolean FIXEDCASE inhibits matching case of REPLACEMENT to source. | |
2569 Optional boolean LITERAL inhibits interpretation of escape sequences. | |
2570 Optional STRING provides the source text to replace. | |
2571 Optional STRBUFFER may be a buffer, providing match context, or an integer | |
2572 specifying the subexpression to replace. | |
2573 | |
2574 If FIXEDCASE is non-nil, do not alter case of replacement text. | |
428 | 2575 Otherwise maybe capitalize the whole text, or maybe just word initials, |
2576 based on the replaced text. | |
4199 | 2577 If the replaced text has only capital letters and has at least one |
2578 multiletter word, convert REPLACEMENT to all caps. | |
428 | 2579 If the replaced text has at least one word starting with a capital letter, |
444 | 2580 then capitalize each word in REPLACEMENT. |
428 | 2581 |
4199 | 2582 If LITERAL is non-nil, insert REPLACEMENT literally. |
428 | 2583 Otherwise treat `\\' as special: |
444 | 2584 `\\&' in REPLACEMENT means substitute original matched text. |
428 | 2585 `\\N' means substitute what matched the Nth `\\(...\\)'. |
2586 If Nth parens didn't match, substitute nothing. | |
2587 `\\\\' means insert one `\\'. | |
2588 `\\u' means upcase the next character. | |
2589 `\\l' means downcase the next character. | |
2590 `\\U' means begin upcasing all following characters. | |
2591 `\\L' means begin downcasing all following characters. | |
2592 `\\E' means terminate the effect of any `\\U' or `\\L'. | |
2593 Case changes made with `\\u', `\\l', `\\U', and `\\L' override | |
2594 all other case changes that may be made in the replaced text. | |
4199 | 2595 |
2596 If non-nil, STRING is the source string, and a new string with the specified | |
2597 replacements is created and returned. Otherwise the current buffer is the | |
2598 source text. | |
2599 | |
2600 If non-nil, STRBUFFER may be an integer, interpreted as the index of the | |
2601 subexpression to replace in the source text, or a buffer to provide the | |
2602 syntax table and case table. If nil, then the \"subexpression\" is 0, i.e., | |
2603 the whole match, and the current buffer provides the syntax and case tables. | |
2604 If STRING is nil, STRBUFFER must be nil or an integer. | |
2605 | |
2606 Specifying a subexpression is only useful after a regular expression match, | |
2607 since a fixed string search has no non-trivial subexpressions. | |
2608 | |
2609 It is not possible to specify both a buffer and a subexpression. If that is | |
2610 desired, the idiom `(with-current-buffer BUFFER (replace-match ... INTEGER))' | |
2611 may be appropriate. | |
2612 | |
2613 If STRING is nil but the last thing matched (or searched) was a string, or | |
2614 STRING is a string but the last thing matched was a buffer, an | |
2615 `invalid-argument' error will be signaled. (XEmacs does not check that the | |
2616 last thing searched is the source string, but it is not useful to use a | |
2617 different string as source.) | |
2618 | |
2619 If no match (including searches) has been successful or the requested | |
1468 | 2620 subexpression was not matched, an `args-out-of-range' error will be |
2621 signaled. (If no match has ever been conducted in this instance of | |
2622 XEmacs, an `invalid-operation' error will be signaled. This is very | |
2623 rare.) | |
428 | 2624 */ |
444 | 2625 (replacement, fixedcase, literal, string, strbuffer)) |
428 | 2626 { |
2627 /* This function can GC */ | |
2628 enum { nochange, all_caps, cap_initial } case_action; | |
665 | 2629 Charbpos pos, last; |
428 | 2630 int some_multiletter_word; |
2631 int some_lowercase; | |
2632 int some_uppercase; | |
2633 int some_nonuppercase_initial; | |
867 | 2634 Ichar c, prevc; |
428 | 2635 Charcount inslen; |
2636 struct buffer *buf; | |
826 | 2637 Lisp_Object syntax_table; |
428 | 2638 int mc_count; |
2639 Lisp_Object buffer; | |
2640 int_dynarr *ul_action_dynarr = 0; | |
2641 int_dynarr *ul_pos_dynarr = 0; | |
502 | 2642 int sub = 0; |
428 | 2643 int speccount; |
2644 | |
444 | 2645 CHECK_STRING (replacement); |
428 | 2646 |
4199 | 2647 /* Because GNU decided to be incompatible here, we support the following |
2648 baroque and bogus API for the STRING and STRBUFFER arguments: | |
2649 types interpretations | |
2650 STRING STRBUFFER STRING STRBUFFER | |
2651 nil nil none 0 = index of subexpression to replace | |
2652 nil integer none index of subexpression to replace | |
2653 nil other ***** error ***** | |
2654 string nil source current buffer provides syntax table | |
2655 subexpression = 0 (whole match) | |
2656 string buffer source buffer providing syntax table | |
2657 subexpression = 0 (whole match) | |
2658 string integer source current buffer provides syntax table | |
2659 subexpression = STRBUFFER | |
2660 string other ***** error ***** | |
2661 */ | |
2662 | |
2663 /* Do STRBUFFER first; if STRING is nil, we'll overwrite BUF and BUFFER. */ | |
2664 | |
2665 /* If the match data were abstracted into a special "match data" type | |
2666 instead of the typical half-assed "let the implementation be visible" | |
2667 form it's in, we could extend it to include the last string matched | |
2668 and the buffer used for that matching. But of course we can't change | |
2669 it as it is. | |
2670 */ | |
2671 if (NILP (strbuffer) || BUFFERP (strbuffer)) | |
2672 { | |
2673 buf = decode_buffer (strbuffer, 0); | |
2674 } | |
2675 else if (!NILP (strbuffer)) | |
2676 { | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2677 CHECK_FIXNUM (strbuffer); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2678 sub = XFIXNUM (strbuffer); |
4199 | 2679 if (sub < 0 || sub >= (int) search_regs.num_regs) |
2680 invalid_argument ("match data register invalid", strbuffer); | |
2681 if (search_regs.start[sub] < 0) | |
2682 invalid_argument ("match data register not set", strbuffer); | |
2683 buf = current_buffer; | |
2684 } | |
2685 else | |
2686 invalid_argument ("STRBUFFER must be nil, a buffer, or an integer", | |
2687 strbuffer); | |
2688 buffer = wrap_buffer (buf); | |
2689 | |
428 | 2690 if (! NILP (string)) |
2691 { | |
2692 CHECK_STRING (string); | |
2693 if (!EQ (last_thing_searched, Qt)) | |
4199 | 2694 invalid_argument ("last thing matched was not a string", Qunbound); |
428 | 2695 } |
2696 else | |
2697 { | |
2698 if (!BUFFERP (last_thing_searched)) | |
4199 | 2699 invalid_argument ("last thing matched was not a buffer", Qunbound); |
428 | 2700 buffer = last_thing_searched; |
2701 buf = XBUFFER (buffer); | |
2702 } | |
2703 | |
826 | 2704 syntax_table = buf->mirror_syntax_table; |
428 | 2705 |
2706 case_action = nochange; /* We tried an initialization */ | |
2707 /* but some C compilers blew it */ | |
2708 | |
2709 if (search_regs.num_regs == 0) | |
826 | 2710 signal_error (Qinvalid_operation, |
2711 "replace-match called before any match found", Qunbound); | |
428 | 2712 |
2713 if (NILP (string)) | |
2714 { | |
469 | 2715 if (search_regs.start[sub] < BUF_BEGV (buf) |
2716 || search_regs.start[sub] > search_regs.end[sub] | |
2717 || search_regs.end[sub] > BUF_ZV (buf)) | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2718 args_out_of_range (make_fixnum (search_regs.start[sub]), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2719 make_fixnum (search_regs.end[sub])); |
428 | 2720 } |
2721 else | |
2722 { | |
2723 if (search_regs.start[0] < 0 | |
2724 || search_regs.start[0] > search_regs.end[0] | |
826 | 2725 || search_regs.end[0] > string_char_length (string)) |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2726 args_out_of_range (make_fixnum (search_regs.start[0]), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2727 make_fixnum (search_regs.end[0])); |
428 | 2728 } |
2729 | |
2730 if (NILP (fixedcase)) | |
2731 { | |
2732 /* Decide how to casify by examining the matched text. */ | |
2733 | |
707 | 2734 last = search_regs.end[sub]; |
428 | 2735 prevc = '\n'; |
2736 case_action = all_caps; | |
2737 | |
2738 /* some_multiletter_word is set nonzero if any original word | |
2739 is more than one letter long. */ | |
2740 some_multiletter_word = 0; | |
2741 some_lowercase = 0; | |
2742 some_nonuppercase_initial = 0; | |
2743 some_uppercase = 0; | |
2744 | |
707 | 2745 for (pos = search_regs.start[sub]; pos < last; pos++) |
428 | 2746 { |
2747 if (NILP (string)) | |
2748 c = BUF_FETCH_CHAR (buf, pos); | |
2749 else | |
867 | 2750 c = string_ichar (string, pos); |
428 | 2751 |
2752 if (LOWERCASEP (buf, c)) | |
2753 { | |
2754 /* Cannot be all caps if any original char is lower case */ | |
2755 | |
2756 some_lowercase = 1; | |
2757 if (!WORD_SYNTAX_P (syntax_table, prevc)) | |
2758 some_nonuppercase_initial = 1; | |
2759 else | |
2760 some_multiletter_word = 1; | |
2761 } | |
2762 else if (!NOCASEP (buf, c)) | |
2763 { | |
2764 some_uppercase = 1; | |
2765 if (!WORD_SYNTAX_P (syntax_table, prevc)) | |
2766 ; | |
2767 else | |
2768 some_multiletter_word = 1; | |
2769 } | |
2770 else | |
2771 { | |
2772 /* If the initial is a caseless word constituent, | |
2773 treat that like a lowercase initial. */ | |
2774 if (!WORD_SYNTAX_P (syntax_table, prevc)) | |
2775 some_nonuppercase_initial = 1; | |
2776 } | |
2777 | |
2778 prevc = c; | |
2779 } | |
2780 | |
2781 /* Convert to all caps if the old text is all caps | |
2782 and has at least one multiletter word. */ | |
2783 if (! some_lowercase && some_multiletter_word) | |
2784 case_action = all_caps; | |
2785 /* Capitalize each word, if the old text has all capitalized words. */ | |
2786 else if (!some_nonuppercase_initial && some_multiletter_word) | |
2787 case_action = cap_initial; | |
2788 else if (!some_nonuppercase_initial && some_uppercase) | |
2789 /* Should x -> yz, operating on X, give Yz or YZ? | |
2790 We'll assume the latter. */ | |
2791 case_action = all_caps; | |
2792 else | |
2793 case_action = nochange; | |
2794 } | |
2795 | |
2796 /* Do replacement in a string. */ | |
2797 if (!NILP (string)) | |
2798 { | |
2799 Lisp_Object before, after; | |
2800 | |
2801 speccount = specpdl_depth (); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2802 before = Fsubseq (string, Qzero, make_fixnum (search_regs.start[sub])); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2803 after = Fsubseq (string, make_fixnum (search_regs.end[sub]), Qnil); |
428 | 2804 |
444 | 2805 /* Do case substitution into REPLACEMENT if desired. */ |
428 | 2806 if (NILP (literal)) |
2807 { | |
826 | 2808 Charcount stlen = string_char_length (replacement); |
428 | 2809 Charcount strpos; |
2810 /* XEmacs change: rewrote this loop somewhat to make it | |
2811 cleaner. Also added \U, \E, etc. */ | |
2812 Charcount literal_start = 0; | |
2813 /* We build up the substituted string in ACCUM. */ | |
2814 Lisp_Object accum; | |
2815 | |
2816 accum = Qnil; | |
2817 | |
2818 /* OK, the basic idea here is that we scan through the | |
2819 replacement string until we find a backslash, which | |
2820 represents a substring of the original string to be | |
2821 substituted. We then append onto ACCUM the literal | |
2822 text before the backslash (LASTPOS marks the | |
2823 beginning of this) followed by the substring of the | |
2824 original string that needs to be inserted. */ | |
2825 for (strpos = 0; strpos < stlen; strpos++) | |
2826 { | |
2827 /* If LITERAL_END is set, we've encountered a backslash | |
2828 (the end of literal text to be inserted). */ | |
2829 Charcount literal_end = -1; | |
2830 /* If SUBSTART is set, we need to also insert the | |
2831 text from SUBSTART to SUBEND in the original string. */ | |
2832 Charcount substart = -1; | |
2833 Charcount subend = -1; | |
2834 | |
867 | 2835 c = string_ichar (replacement, strpos); |
428 | 2836 if (c == '\\' && strpos < stlen - 1) |
2837 { | |
867 | 2838 c = string_ichar (replacement, ++strpos); |
428 | 2839 if (c == '&') |
2840 { | |
2841 literal_end = strpos - 1; | |
2842 substart = search_regs.start[0]; | |
2843 subend = search_regs.end[0]; | |
2844 } | |
4199 | 2845 /* #### This logic is totally broken, |
2846 since we can have backrefs like "\99", right? */ | |
428 | 2847 else if (c >= '1' && c <= '9' && |
2848 c <= search_regs.num_regs + '0') | |
2849 { | |
2850 if (search_regs.start[c - '0'] >= 0) | |
2851 { | |
2852 literal_end = strpos - 1; | |
2853 substart = search_regs.start[c - '0']; | |
2854 subend = search_regs.end[c - '0']; | |
2855 } | |
2856 } | |
2857 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' || | |
2858 c == 'E') | |
2859 { | |
2860 /* Keep track of all case changes requested, but don't | |
2861 make them now. Do them later so we override | |
2862 everything else. */ | |
2863 if (!ul_pos_dynarr) | |
2864 { | |
2865 ul_pos_dynarr = Dynarr_new (int); | |
2866 ul_action_dynarr = Dynarr_new (int); | |
2867 record_unwind_protect | |
2868 (free_created_dynarrs, | |
2869 noseeum_cons | |
2870 (make_opaque_ptr (ul_pos_dynarr), | |
2871 make_opaque_ptr (ul_action_dynarr))); | |
2872 } | |
2873 literal_end = strpos - 1; | |
2874 Dynarr_add (ul_pos_dynarr, | |
2875 (!NILP (accum) | |
826 | 2876 ? string_char_length (accum) |
428 | 2877 : 0) + (literal_end - literal_start)); |
2878 Dynarr_add (ul_action_dynarr, c); | |
2879 } | |
2880 else if (c == '\\') | |
2881 /* So we get just one backslash. */ | |
2882 literal_end = strpos; | |
2883 } | |
2884 if (literal_end >= 0) | |
2885 { | |
2886 Lisp_Object literal_text = Qnil; | |
2887 Lisp_Object substring = Qnil; | |
2888 if (literal_end != literal_start) | |
5089
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2889 literal_text = Fsubseq (replacement, |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2890 make_fixnum (literal_start), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2891 make_fixnum (literal_end)); |
428 | 2892 if (substart >= 0 && subend != substart) |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2893 substring = Fsubseq (string, make_fixnum (substart), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2894 make_fixnum (subend)); |
428 | 2895 if (!NILP (literal_text) || !NILP (substring)) |
2896 accum = concat3 (accum, literal_text, substring); | |
2897 literal_start = strpos + 1; | |
2898 } | |
2899 } | |
2900 | |
2901 if (strpos != literal_start) | |
2902 /* some literal text at end to be inserted */ | |
5089
99f8ebc082d9
Make #'substring an alias of #'subseq; give the latter the byte code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5041
diff
changeset
|
2903 replacement = concat2 (accum, Fsubseq (replacement, |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2904 make_fixnum (literal_start), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
2905 make_fixnum (strpos))); |
428 | 2906 else |
444 | 2907 replacement = accum; |
428 | 2908 } |
2909 | |
444 | 2910 /* replacement can be nil. */ |
2911 if (NILP (replacement)) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4421
diff
changeset
|
2912 replacement = build_ascstring (""); |
444 | 2913 |
428 | 2914 if (case_action == all_caps) |
444 | 2915 replacement = Fupcase (replacement, buffer); |
428 | 2916 else if (case_action == cap_initial) |
444 | 2917 replacement = Fupcase_initials (replacement, buffer); |
428 | 2918 |
2919 /* Now finally, we need to process the \U's, \E's, etc. */ | |
2920 if (ul_pos_dynarr) | |
2921 { | |
2922 int i = 0; | |
2923 int cur_action = 'E'; | |
826 | 2924 Charcount stlen = string_char_length (replacement); |
428 | 2925 Charcount strpos; |
2926 | |
2927 for (strpos = 0; strpos < stlen; strpos++) | |
2928 { | |
867 | 2929 Ichar curchar = string_ichar (replacement, strpos); |
2930 Ichar newchar = -1; | |
428 | 2931 if (i < Dynarr_length (ul_pos_dynarr) && |
2932 strpos == Dynarr_at (ul_pos_dynarr, i)) | |
2933 { | |
2934 int new_action = Dynarr_at (ul_action_dynarr, i); | |
2935 i++; | |
2936 if (new_action == 'u') | |
2937 newchar = UPCASE (buf, curchar); | |
2938 else if (new_action == 'l') | |
2939 newchar = DOWNCASE (buf, curchar); | |
2940 else | |
2941 cur_action = new_action; | |
2942 } | |
2943 if (newchar == -1) | |
2944 { | |
2945 if (cur_action == 'U') | |
2946 newchar = UPCASE (buf, curchar); | |
2947 else if (cur_action == 'L') | |
2948 newchar = DOWNCASE (buf, curchar); | |
2949 else | |
2950 newchar = curchar; | |
2951 } | |
2952 if (newchar != curchar) | |
793 | 2953 set_string_char (replacement, strpos, newchar); |
428 | 2954 } |
2955 } | |
2956 | |
2957 /* frees the Dynarrs if necessary. */ | |
771 | 2958 unbind_to (speccount); |
444 | 2959 return concat3 (before, replacement, after); |
428 | 2960 } |
2961 | |
707 | 2962 mc_count = begin_multiple_change (buf, search_regs.start[sub], |
2963 search_regs.end[sub]); | |
428 | 2964 |
2965 /* begin_multiple_change() records an unwind-protect, so we need to | |
2966 record this value now. */ | |
2967 speccount = specpdl_depth (); | |
2968 | |
2969 /* We insert the replacement text before the old text, and then | |
2970 delete the original text. This means that markers at the | |
2971 beginning or end of the original will float to the corresponding | |
2972 position in the replacement. */ | |
707 | 2973 BUF_SET_PT (buf, search_regs.start[sub]); |
428 | 2974 if (!NILP (literal)) |
444 | 2975 Finsert (1, &replacement); |
428 | 2976 else |
2977 { | |
826 | 2978 Charcount stlen = string_char_length (replacement); |
428 | 2979 Charcount strpos; |
2980 struct gcpro gcpro1; | |
444 | 2981 GCPRO1 (replacement); |
428 | 2982 for (strpos = 0; strpos < stlen; strpos++) |
2983 { | |
707 | 2984 /* on the first iteration assert(offset==0), |
2985 exactly complementing BUF_SET_PT() above. | |
2986 During the loop, it keeps track of the amount inserted. | |
2987 */ | |
2988 Charcount offset = BUF_PT (buf) - search_regs.start[sub]; | |
428 | 2989 |
867 | 2990 c = string_ichar (replacement, strpos); |
428 | 2991 if (c == '\\' && strpos < stlen - 1) |
2992 { | |
707 | 2993 /* XXX FIXME: replacing just a substring non-literally |
2994 using backslash refs to the match looks dangerous. But | |
2995 <15366.18513.698042.156573@ns.caldera.de> from Torsten Duwe | |
2996 <duwe@caldera.de> claims Finsert_buffer_substring already | |
2997 handles this correctly. | |
2998 */ | |
867 | 2999 c = string_ichar (replacement, ++strpos); |
428 | 3000 if (c == '&') |
3001 Finsert_buffer_substring | |
3002 (buffer, | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3003 make_fixnum (search_regs.start[0] + offset), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3004 make_fixnum (search_regs.end[0] + offset)); |
4199 | 3005 /* #### This logic is totally broken, |
3006 since we can have backrefs like "\99", right? */ | |
428 | 3007 else if (c >= '1' && c <= '9' && |
3008 c <= search_regs.num_regs + '0') | |
3009 { | |
3010 if (search_regs.start[c - '0'] >= 1) | |
3011 Finsert_buffer_substring | |
3012 (buffer, | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3013 make_fixnum (search_regs.start[c - '0'] + offset), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3014 make_fixnum (search_regs.end[c - '0'] + offset)); |
428 | 3015 } |
3016 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' || | |
3017 c == 'E') | |
3018 { | |
3019 /* Keep track of all case changes requested, but don't | |
3020 make them now. Do them later so we override | |
3021 everything else. */ | |
3022 if (!ul_pos_dynarr) | |
3023 { | |
3024 ul_pos_dynarr = Dynarr_new (int); | |
3025 ul_action_dynarr = Dynarr_new (int); | |
3026 record_unwind_protect | |
3027 (free_created_dynarrs, | |
3028 Fcons (make_opaque_ptr (ul_pos_dynarr), | |
3029 make_opaque_ptr (ul_action_dynarr))); | |
3030 } | |
3031 Dynarr_add (ul_pos_dynarr, BUF_PT (buf)); | |
3032 Dynarr_add (ul_action_dynarr, c); | |
3033 } | |
3034 else | |
3035 buffer_insert_emacs_char (buf, c); | |
3036 } | |
3037 else | |
3038 buffer_insert_emacs_char (buf, c); | |
3039 } | |
3040 UNGCPRO; | |
3041 } | |
3042 | |
707 | 3043 inslen = BUF_PT (buf) - (search_regs.start[sub]); |
3044 buffer_delete_range (buf, search_regs.start[sub] + inslen, | |
3045 search_regs.end[sub] + inslen, 0); | |
428 | 3046 |
3047 if (case_action == all_caps) | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3048 Fupcase_region (make_fixnum (BUF_PT (buf) - inslen), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3049 make_fixnum (BUF_PT (buf)), buffer); |
428 | 3050 else if (case_action == cap_initial) |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3051 Fupcase_initials_region (make_fixnum (BUF_PT (buf) - inslen), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3052 make_fixnum (BUF_PT (buf)), buffer); |
428 | 3053 |
3054 /* Now go through and make all the case changes that were requested | |
3055 in the replacement string. */ | |
3056 if (ul_pos_dynarr) | |
3057 { | |
665 | 3058 Charbpos eend = BUF_PT (buf); |
428 | 3059 int i = 0; |
3060 int cur_action = 'E'; | |
3061 | |
3062 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++) | |
3063 { | |
867 | 3064 Ichar curchar = BUF_FETCH_CHAR (buf, pos); |
3065 Ichar newchar = -1; | |
428 | 3066 if (i < Dynarr_length (ul_pos_dynarr) && |
3067 pos == Dynarr_at (ul_pos_dynarr, i)) | |
3068 { | |
3069 int new_action = Dynarr_at (ul_action_dynarr, i); | |
3070 i++; | |
3071 if (new_action == 'u') | |
3072 newchar = UPCASE (buf, curchar); | |
3073 else if (new_action == 'l') | |
3074 newchar = DOWNCASE (buf, curchar); | |
3075 else | |
3076 cur_action = new_action; | |
3077 } | |
3078 if (newchar == -1) | |
3079 { | |
3080 if (cur_action == 'U') | |
3081 newchar = UPCASE (buf, curchar); | |
3082 else if (cur_action == 'L') | |
3083 newchar = DOWNCASE (buf, curchar); | |
3084 else | |
3085 newchar = curchar; | |
3086 } | |
3087 if (newchar != curchar) | |
3088 buffer_replace_char (buf, pos, newchar, 0, 0); | |
3089 } | |
3090 } | |
3091 | |
3092 /* frees the Dynarrs if necessary. */ | |
771 | 3093 unbind_to (speccount); |
428 | 3094 end_multiple_change (buf, mc_count); |
3095 | |
3096 return Qnil; | |
3097 } | |
3098 | |
3099 static Lisp_Object | |
3100 match_limit (Lisp_Object num, int beginningp) | |
3101 { | |
3102 int n; | |
3103 | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3104 CHECK_FIXNUM (num); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3105 n = XFIXNUM (num); |
428 | 3106 if (n < 0 || n >= search_regs.num_regs) |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3107 args_out_of_range (num, make_fixnum (search_regs.num_regs)); |
428 | 3108 if (search_regs.num_regs == 0 || |
3109 search_regs.start[n] < 0) | |
3110 return Qnil; | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3111 return make_fixnum (beginningp ? search_regs.start[n] : search_regs.end[n]); |
428 | 3112 } |
3113 | |
3114 DEFUN ("match-beginning", Fmatch_beginning, 1, 1, 0, /* | |
3115 Return position of start of text matched by last regexp search. | |
3116 NUM, specifies which parenthesized expression in the last regexp. | |
3117 Value is nil if NUMth pair didn't match, or there were less than NUM pairs. | |
3118 Zero means the entire text matched by the whole regexp or whole string. | |
3119 */ | |
3120 (num)) | |
3121 { | |
3122 return match_limit (num, 1); | |
3123 } | |
3124 | |
3125 DEFUN ("match-end", Fmatch_end, 1, 1, 0, /* | |
3126 Return position of end of text matched by last regexp search. | |
3127 NUM specifies which parenthesized expression in the last regexp. | |
3128 Value is nil if NUMth pair didn't match, or there were less than NUM pairs. | |
3129 Zero means the entire text matched by the whole regexp or whole string. | |
3130 */ | |
3131 (num)) | |
3132 { | |
3133 return match_limit (num, 0); | |
3134 } | |
3135 | |
3136 DEFUN ("match-data", Fmatch_data, 0, 2, 0, /* | |
3137 Return a list containing all info on what the last regexp search matched. | |
3138 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'. | |
3139 All the elements are markers or nil (nil if the Nth pair didn't match) | |
3140 if the last match was on a buffer; integers or nil if a string was matched. | |
3141 Use `store-match-data' to reinstate the data in this list. | |
3142 | |
3143 If INTEGERS (the optional first argument) is non-nil, always use integers | |
3144 \(rather than markers) to represent buffer positions. | |
3145 If REUSE is a list, reuse it as part of the value. If REUSE is long enough | |
3146 to hold all the values, and if INTEGERS is non-nil, no consing is done. | |
3147 */ | |
3148 (integers, reuse)) | |
3149 { | |
3150 Lisp_Object tail, prev; | |
3151 Lisp_Object *data; | |
3152 int i; | |
3153 Charcount len; | |
3154 | |
3155 if (NILP (last_thing_searched)) | |
563 | 3156 /*error ("match-data called before any match found", Qunbound);*/ |
428 | 3157 return Qnil; |
3158 | |
3159 data = alloca_array (Lisp_Object, 2 * search_regs.num_regs); | |
3160 | |
3161 len = -1; | |
3162 for (i = 0; i < search_regs.num_regs; i++) | |
3163 { | |
665 | 3164 Charbpos start = search_regs.start[i]; |
428 | 3165 if (start >= 0) |
3166 { | |
3167 if (EQ (last_thing_searched, Qt) | |
3168 || !NILP (integers)) | |
3169 { | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3170 data[2 * i] = make_fixnum (start); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3171 data[2 * i + 1] = make_fixnum (search_regs.end[i]); |
428 | 3172 } |
3173 else if (BUFFERP (last_thing_searched)) | |
3174 { | |
3175 data[2 * i] = Fmake_marker (); | |
3176 Fset_marker (data[2 * i], | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3177 make_fixnum (start), |
428 | 3178 last_thing_searched); |
3179 data[2 * i + 1] = Fmake_marker (); | |
3180 Fset_marker (data[2 * i + 1], | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3181 make_fixnum (search_regs.end[i]), |
428 | 3182 last_thing_searched); |
3183 } | |
3184 else | |
3185 /* last_thing_searched must always be Qt, a buffer, or Qnil. */ | |
2500 | 3186 ABORT (); |
428 | 3187 |
3188 len = i; | |
3189 } | |
3190 else | |
3191 data[2 * i] = data [2 * i + 1] = Qnil; | |
3192 } | |
3193 if (!CONSP (reuse)) | |
3194 return Flist (2 * len + 2, data); | |
3195 | |
3196 /* If REUSE is a list, store as many value elements as will fit | |
3197 into the elements of REUSE. */ | |
3198 for (prev = Qnil, i = 0, tail = reuse; CONSP (tail); i++, tail = XCDR (tail)) | |
3199 { | |
3200 if (i < 2 * len + 2) | |
3201 XCAR (tail) = data[i]; | |
3202 else | |
3203 XCAR (tail) = Qnil; | |
3204 prev = tail; | |
3205 } | |
3206 | |
3207 /* If we couldn't fit all value elements into REUSE, | |
3208 cons up the rest of them and add them to the end of REUSE. */ | |
3209 if (i < 2 * len + 2) | |
3210 XCDR (prev) = Flist (2 * len + 2 - i, data + i); | |
3211 | |
3212 return reuse; | |
3213 } | |
3214 | |
3215 | |
3216 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /* | |
3217 Set internal data on last search match from elements of LIST. | |
1468 | 3218 LIST should have been created by calling `match-data' previously, |
3219 or be nil, to clear the internal match data. | |
428 | 3220 */ |
3221 (list)) | |
3222 { | |
3223 REGISTER int i; | |
3224 REGISTER Lisp_Object marker; | |
3225 int num_regs; | |
3226 int length; | |
3227 | |
853 | 3228 /* Some FSF junk with running_asynch_code, to preserve the match |
3229 data. Not necessary because we don't call process filters | |
3230 asynchronously (i.e. from within QUIT). */ | |
428 | 3231 |
3232 CONCHECK_LIST (list); | |
3233 | |
3234 /* Unless we find a marker with a buffer in LIST, assume that this | |
3235 match data came from a string. */ | |
3236 last_thing_searched = Qt; | |
3237 | |
3238 /* Allocate registers if they don't already exist. */ | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3239 length = XFIXNUM (Flength (list)) / 2; |
428 | 3240 num_regs = search_regs.num_regs; |
3241 | |
3242 if (length > num_regs) | |
3243 { | |
3244 if (search_regs.num_regs == 0) | |
3245 { | |
3246 search_regs.start = xnew_array (regoff_t, length); | |
3247 search_regs.end = xnew_array (regoff_t, length); | |
3248 } | |
3249 else | |
3250 { | |
3251 XREALLOC_ARRAY (search_regs.start, regoff_t, length); | |
3252 XREALLOC_ARRAY (search_regs.end, regoff_t, length); | |
3253 } | |
3254 | |
3255 search_regs.num_regs = length; | |
3256 } | |
3257 | |
3258 for (i = 0; i < num_regs; i++) | |
3259 { | |
3260 marker = Fcar (list); | |
3261 if (NILP (marker)) | |
3262 { | |
3263 search_regs.start[i] = -1; | |
3264 list = Fcdr (list); | |
3265 } | |
3266 else | |
3267 { | |
3268 if (MARKERP (marker)) | |
3269 { | |
3270 if (XMARKER (marker)->buffer == 0) | |
3271 marker = Qzero; | |
3272 else | |
793 | 3273 last_thing_searched = wrap_buffer (XMARKER (marker)->buffer); |
428 | 3274 } |
3275 | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3276 CHECK_FIXNUM_COERCE_MARKER (marker); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3277 search_regs.start[i] = XFIXNUM (marker); |
428 | 3278 list = Fcdr (list); |
3279 | |
3280 marker = Fcar (list); | |
3281 if (MARKERP (marker) && XMARKER (marker)->buffer == 0) | |
3282 marker = Qzero; | |
3283 | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3284 CHECK_FIXNUM_COERCE_MARKER (marker); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5542
diff
changeset
|
3285 search_regs.end[i] = XFIXNUM (marker); |
428 | 3286 } |
3287 list = Fcdr (list); | |
3288 } | |
3289 | |
3290 return Qnil; | |
3291 } | |
3292 | |
3293 /* Quote a string to inactivate reg-expr chars */ | |
3294 | |
3295 DEFUN ("regexp-quote", Fregexp_quote, 1, 1, 0, /* | |
3296 Return a regexp string which matches exactly STRING and nothing else. | |
3297 */ | |
444 | 3298 (string)) |
428 | 3299 { |
867 | 3300 REGISTER Ibyte *in, *out, *end; |
3301 REGISTER Ibyte *temp; | |
428 | 3302 |
444 | 3303 CHECK_STRING (string); |
428 | 3304 |
2367 | 3305 temp = alloca_ibytes (XSTRING_LENGTH (string) * 2); |
428 | 3306 |
3307 /* Now copy the data into the new string, inserting escapes. */ | |
3308 | |
444 | 3309 in = XSTRING_DATA (string); |
3310 end = in + XSTRING_LENGTH (string); | |
428 | 3311 out = temp; |
3312 | |
3313 while (in < end) | |
3314 { | |
867 | 3315 Ichar c = itext_ichar (in); |
428 | 3316 |
3317 if (c == '[' || c == ']' | |
3318 || c == '*' || c == '.' || c == '\\' | |
3319 || c == '?' || c == '+' | |
3320 || c == '^' || c == '$') | |
3321 *out++ = '\\'; | |
867 | 3322 out += set_itext_ichar (out, c); |
3323 INC_IBYTEPTR (in); | |
428 | 3324 } |
3325 | |
3326 return make_string (temp, out - temp); | |
3327 } | |
3328 | |
3329 DEFUN ("set-word-regexp", Fset_word_regexp, 1, 1, 0, /* | |
3330 Set the regexp to be used to match a word in regular-expression searching. | |
3331 #### Not yet implemented. Currently does nothing. | |
3332 #### Do not use this yet. Its calling interface is likely to change. | |
3333 */ | |
2286 | 3334 (UNUSED (regexp))) |
428 | 3335 { |
3336 return Qnil; | |
3337 } | |
3338 | |
3339 | |
5041 | 3340 #ifdef DEBUG_XEMACS |
3341 | |
3342 static int | |
3343 debug_regexps_changed (Lisp_Object UNUSED (sym), Lisp_Object *val, | |
3344 Lisp_Object UNUSED (in_object), | |
3345 int UNUSED (flags)) | |
3346 { | |
3347 int newval = 0; | |
3348 | |
3349 EXTERNAL_LIST_LOOP_2 (elt, *val) | |
3350 { | |
3351 CHECK_SYMBOL (elt); | |
3352 if (EQ (elt, Qcompilation)) | |
3353 newval |= RE_DEBUG_COMPILATION; | |
3354 else if (EQ (elt, Qfailure_point)) | |
3355 newval |= RE_DEBUG_FAILURE_POINT; | |
3356 else if (EQ (elt, Qmatching)) | |
3357 newval |= RE_DEBUG_MATCHING; | |
3358 else | |
3359 invalid_argument | |
3360 ("Expected `compilation', `failure-point' or `matching'", elt); | |
3361 } | |
3362 debug_regexps = newval; | |
3363 return 0; | |
3364 } | |
3365 | |
3366 #endif /* DEBUG_XEMACS */ | |
3367 | |
3368 | |
428 | 3369 /************************************************************************/ |
3370 /* initialization */ | |
3371 /************************************************************************/ | |
3372 | |
3373 void | |
3374 syms_of_search (void) | |
3375 { | |
3376 | |
442 | 3377 DEFERROR_STANDARD (Qsearch_failed, Qinvalid_operation); |
3378 DEFERROR_STANDARD (Qinvalid_regexp, Qsyntax_error); | |
563 | 3379 Fput (Qinvalid_regexp, Qerror_lacks_explanatory_string, Qt); |
428 | 3380 |
3381 DEFSUBR (Flooking_at); | |
3382 DEFSUBR (Fposix_looking_at); | |
3383 DEFSUBR (Fstring_match); | |
3384 DEFSUBR (Fposix_string_match); | |
3385 DEFSUBR (Fskip_chars_forward); | |
3386 DEFSUBR (Fskip_chars_backward); | |
3387 DEFSUBR (Fskip_syntax_forward); | |
3388 DEFSUBR (Fskip_syntax_backward); | |
3389 DEFSUBR (Fsearch_forward); | |
3390 DEFSUBR (Fsearch_backward); | |
3391 DEFSUBR (Fword_search_forward); | |
3392 DEFSUBR (Fword_search_backward); | |
3393 DEFSUBR (Fre_search_forward); | |
3394 DEFSUBR (Fre_search_backward); | |
3395 DEFSUBR (Fposix_search_forward); | |
3396 DEFSUBR (Fposix_search_backward); | |
3397 DEFSUBR (Freplace_match); | |
3398 DEFSUBR (Fmatch_beginning); | |
3399 DEFSUBR (Fmatch_end); | |
3400 DEFSUBR (Fmatch_data); | |
3401 DEFSUBR (Fstore_match_data); | |
3402 DEFSUBR (Fregexp_quote); | |
3403 DEFSUBR (Fset_word_regexp); | |
3404 } | |
3405 | |
3406 void | |
3407 reinit_vars_of_search (void) | |
3408 { | |
3409 int i; | |
3410 | |
3411 last_thing_searched = Qnil; | |
3412 staticpro_nodump (&last_thing_searched); | |
3413 | |
3414 for (i = 0; i < REGEXP_CACHE_SIZE; ++i) | |
3415 { | |
3416 searchbufs[i].buf.allocated = 100; | |
3417 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100); | |
3418 searchbufs[i].buf.fastmap = searchbufs[i].fastmap; | |
3419 searchbufs[i].regexp = Qnil; | |
3420 staticpro_nodump (&searchbufs[i].regexp); | |
3421 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]); | |
3422 } | |
3423 searchbuf_head = &searchbufs[0]; | |
3424 } | |
3425 | |
3426 void | |
3427 vars_of_search (void) | |
3428 { | |
3429 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /* | |
3430 *Regular expression to be used in `forward-word'. | |
3431 #### Not yet implemented. | |
3432 */ ); | |
3433 Vforward_word_regexp = Qnil; | |
3434 | |
3435 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /* | |
3436 *Regular expression to be used in `backward-word'. | |
3437 #### Not yet implemented. | |
3438 */ ); | |
3439 Vbackward_word_regexp = Qnil; | |
502 | 3440 |
3441 DEFVAR_INT ("warn-about-possibly-incompatible-back-references", | |
3442 &warn_about_possibly_incompatible_back_references /* | |
3443 If true, issue warnings when new-semantics back references occur. | |
3444 This is to catch places where old code might inadvertently have changed | |
3445 semantics. This will occur in old code only where more than nine groups | |
3446 occur and a back reference to one of them is directly followed by a digit. | |
3447 */ ); | |
3448 warn_about_possibly_incompatible_back_references = 1; | |
814 | 3449 |
2421 | 3450 Vskip_chars_range_table = Fmake_range_table (Qstart_closed_end_closed); |
428 | 3451 staticpro (&Vskip_chars_range_table); |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3452 #ifdef DEBUG_XEMACS |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3453 DEFSYMBOL (Qsearch_algorithm_used); |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3454 DEFSYMBOL (Qboyer_moore); |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3455 DEFSYMBOL (Qsimple_search); |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3456 |
5041 | 3457 DEFSYMBOL (Qcompilation); |
3458 DEFSYMBOL (Qfailure_point); | |
3459 DEFSYMBOL (Qmatching); | |
3460 | |
3461 DEFVAR_INT ("debug-searches", &debug_searches /* | |
4414
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3462 If non-zero, bind `search-algorithm-used' to `boyer-moore' or `simple-search', |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3463 depending on the algorithm used for each search. Used for testing. |
df576f30c1d8
Correct case-insensitive search for non-case, non-ASCII chars. Add tests.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4407
diff
changeset
|
3464 */ ); |
5041 | 3465 debug_searches = 0; |
3466 | |
3467 DEFVAR_LISP_MAGIC ("debug-regexps", &Vdebug_regexps, /* | |
3468 List of areas to display debug info about during regexp operation. | |
3469 The following areas are recognized: | |
3470 | |
3471 `compilation' Display the result of compiling a regexp. | |
3472 `failure-point' Display info about failure points reached. | |
3473 `matching' Display info about the process of matching a regex against | |
3474 text. | |
3475 */ debug_regexps_changed); | |
3476 Vdebug_regexps = Qnil; | |
3477 debug_regexps = 0; | |
3478 #endif /* DEBUG_XEMACS */ | |
428 | 3479 } |