comparison src/search.c @ 0:376386a54a3c r19-14

Import from CVS: tag r19-14
author cvs
date Mon, 13 Aug 2007 08:45:50 +0200
parents
children 0293115a14e9
comparison
equal deleted inserted replaced
-1:000000000000 0:376386a54a3c
1 /* String search routines for XEmacs.
2 Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4
5 This file is part of XEmacs.
6
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
10 later version.
11
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 /* Synched up with: FSF 19.29, except for region-cache stuff. */
23
24 /* Hacked on for Mule by Ben Wing, December 1994 and August 1995. */
25
26 /* This file has been Mule-ized except for the TRT stuff. */
27
28 #include <config.h>
29 #include "lisp.h"
30
31 #include "buffer.h"
32 #include "commands.h"
33 #include "insdel.h"
34 #include "opaque.h"
35 #ifdef REGION_CACHE_NEEDS_WORK
36 #include "region-cache.h"
37 #endif
38 #include "syntax.h"
39
40 #include <sys/types.h>
41 #include "regex.h"
42
43
44 #define REGEXP_CACHE_SIZE 5
45
46 /* If the regexp is non-nil, then the buffer contains the compiled form
47 of that regexp, suitable for searching. */
48 struct regexp_cache {
49 struct regexp_cache *next;
50 Lisp_Object regexp;
51 struct re_pattern_buffer buf;
52 char fastmap[0400];
53 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
54 char posix;
55 };
56
57 /* The instances of that struct. */
58 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
59
60 /* The head of the linked list; points to the most recently used buffer. */
61 struct regexp_cache *searchbuf_head;
62
63
64 /* Every call to re_match, etc., must pass &search_regs as the regs
65 argument unless you can show it is unnecessary (i.e., if re_match
66 is certainly going to be called again before region-around-match
67 can be called).
68
69 Since the registers are now dynamically allocated, we need to make
70 sure not to refer to the Nth register before checking that it has
71 been allocated by checking search_regs.num_regs.
72
73 The regex code keeps track of whether it has allocated the search
74 buffer using bits in the re_pattern_buffer. This means that whenever
75 you compile a new pattern, it completely forgets whether it has
76 allocated any registers, and will allocate new registers the next
77 time you call a searching or matching function. Therefore, we need
78 to call re_set_registers after compiling a new pattern or after
79 setting the match registers, so that the regex functions will be
80 able to free or re-allocate it properly. */
81 static struct re_registers search_regs;
82
83 /* The buffer in which the last search was performed, or
84 Qt if the last search was done in a string;
85 Qnil if no searching has been done yet. */
86 static Lisp_Object last_thing_searched;
87
88 /* error condition signalled when regexp compile_pattern fails */
89
90 Lisp_Object Qinvalid_regexp;
91
92 /* Regular expressions used in forward/backward-word */
93 Lisp_Object Vforward_word_regexp, Vbackward_word_regexp;
94
95 /* range table for use with skip_chars. Only needed for Mule. */
96 Lisp_Object Vskip_chars_range_table;
97
98 static void set_search_regs (struct buffer *buf, Bufpos beg, Charcount len);
99 static void save_search_regs (void);
100 static Bufpos search_buffer (struct buffer *buf, Lisp_Object str,
101 Bufpos bufpos, Bufpos buflim, EMACS_INT n, int RE,
102 unsigned char *trt, unsigned char *inverse_trt,
103 int posix);
104
105 static void
106 matcher_overflow (void)
107 {
108 error ("Stack overflow in regexp matcher");
109 }
110
111 /* Compile a regexp and signal a Lisp error if anything goes wrong.
112 PATTERN is the pattern to compile.
113 CP is the place to put the result.
114 TRANSLATE is a translation table for ignoring case, or NULL for none.
115 REGP is the structure that says where to store the "register"
116 values that will result from matching this pattern.
117 If it is 0, we should compile the pattern not to record any
118 subexpression bounds.
119 POSIX is nonzero if we want full backtracking (POSIX style)
120 for this pattern. 0 means backtrack only enough to get a valid match. */
121
122 static int
123 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
124 char *translate, struct re_registers *regp, int posix,
125 Error_behavior errb)
126 {
127 CONST char *val;
128 reg_syntax_t old;
129
130 cp->regexp = Qnil;
131 cp->buf.translate = translate;
132 cp->posix = posix;
133 old = re_set_syntax (RE_SYNTAX_EMACS
134 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
135 val = (CONST char *)
136 re_compile_pattern ((char *) string_data (XSTRING (pattern)),
137 string_length (XSTRING (pattern)), &cp->buf);
138 re_set_syntax (old);
139 if (val)
140 {
141 maybe_signal_error (Qinvalid_regexp, list1 (build_string (val)),
142 Qsearch, errb);
143 return 0;
144 }
145
146 cp->regexp = Fcopy_sequence (pattern);
147 return 1;
148 }
149
150 /* Compile a regexp if necessary, but first check to see if there's one in
151 the cache.
152 PATTERN is the pattern to compile.
153 TRANSLATE is a translation table for ignoring case, or NULL for none.
154 REGP is the structure that says where to store the "register"
155 values that will result from matching this pattern.
156 If it is 0, we should compile the pattern not to record any
157 subexpression bounds.
158 POSIX is nonzero if we want full backtracking (POSIX style)
159 for this pattern. 0 means backtrack only enough to get a valid match. */
160
161 struct re_pattern_buffer *
162 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
163 char *translate, int posix, Error_behavior errb)
164 {
165 struct regexp_cache *cp, **cpp;
166
167 for (cpp = &searchbuf_head; ; cpp = &cp->next)
168 {
169 cp = *cpp;
170 if (!NILP (Fstring_equal (cp->regexp, pattern))
171 && cp->buf.translate == translate
172 && cp->posix == posix)
173 break;
174
175 /* If we're at the end of the cache, compile into the last cell. */
176 if (cp->next == 0)
177 {
178 if (!compile_pattern_1 (cp, pattern, translate, regp, posix,
179 errb))
180 return 0;
181 break;
182 }
183 }
184
185 /* When we get here, cp (aka *cpp) contains the compiled pattern,
186 either because we found it in the cache or because we just compiled it.
187 Move it to the front of the queue to mark it as most recently used. */
188 *cpp = cp->next;
189 cp->next = searchbuf_head;
190 searchbuf_head = cp;
191
192 /* Advise the searching functions about the space we have allocated
193 for register data. */
194 if (regp)
195 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
196
197 return &cp->buf;
198 }
199
200 /* Error condition used for failing searches */
201 Lisp_Object Qsearch_failed;
202
203 static Lisp_Object
204 signal_failure (Lisp_Object arg)
205 {
206 Fsignal (Qsearch_failed, list1 (arg));
207 return Qnil;
208 }
209
210 /* Convert the search registers from Bytinds to Bufpos's. Needs to be
211 done after each regexp match that uses the search regs.
212
213 We could get a potential speedup by not converting the search registers
214 until it's really necessary, e.g. when match-data or replace-match is
215 called. However, this complexifies the code a lot (e.g. the buffer
216 could have changed and the Bytinds stored might be invalid) and is
217 probably not a great time-saver. */
218
219 static void
220 fixup_search_regs_for_buffer (struct buffer *buf)
221 {
222 int i;
223
224 for (i = 0; i < search_regs.num_regs; i++)
225 {
226 if (search_regs.start[i] >= 0)
227 search_regs.start[i] = bytind_to_bufpos (buf, search_regs.start[i]);
228 if (search_regs.end[i] >= 0)
229 search_regs.end[i] = bytind_to_bufpos (buf, search_regs.end[i]);
230 }
231 }
232
233 /* Similar but for strings. */
234 static void
235 fixup_search_regs_for_string (Lisp_Object string)
236 {
237 int i;
238
239 /* #### bytecount_to_charcount() is not that efficient. This function
240 could be faster if it did its own conversion (using INC_CHARPTR()
241 and such), because the register ends are likely to be somewhat ordered.
242 (Even if not, you could sort them.)
243
244 Think about this if this function is a time hog, which it's probably
245 not. */
246 for (i = 0; i < search_regs.num_regs; i++)
247 {
248 if (search_regs.start[i] > 0)
249 {
250 search_regs.start[i] =
251 bytecount_to_charcount (string_data (XSTRING (string)),
252 search_regs.start[i]);
253 }
254 if (search_regs.end[i] > 0)
255 {
256 search_regs.end[i] =
257 bytecount_to_charcount (string_data (XSTRING (string)),
258 search_regs.end[i]);
259 }
260 }
261 }
262
263
264 static Lisp_Object
265 looking_at_1 (Lisp_Object string, struct buffer *buf, int posix)
266 {
267 /* This function has been Mule-ized, except for the trt table handling. */
268 Lisp_Object val;
269 Bytind p1, p2;
270 Bytecount s1, s2;
271 register int i;
272 struct re_pattern_buffer *bufp;
273
274 if (running_asynch_code)
275 save_search_regs ();
276
277 CHECK_STRING (string);
278 bufp = compile_pattern (string, &search_regs,
279 (!NILP (buf->case_fold_search)
280 ? (char *) MIRROR_DOWNCASE_TABLE_AS_STRING (buf)
281 : 0),
282 posix, ERROR_ME);
283
284 QUIT;
285
286 /* Get pointers and sizes of the two strings
287 that make up the visible portion of the buffer. */
288
289 p1 = BI_BUF_BEGV (buf);
290 p2 = BI_BUF_CEILING_OF (buf, p1);
291 s1 = p2 - p1;
292 s2 = BI_BUF_ZV (buf) - p2;
293
294 regex_emacs_buffer = buf;
295 i = re_match_2 (bufp, (char *) BI_BUF_BYTE_ADDRESS (buf, p1),
296 s1, (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
297 BI_BUF_PT (buf) - BI_BUF_BEGV (buf), &search_regs,
298 BI_BUF_ZV (buf) - BI_BUF_BEGV (buf));
299
300 if (i == -2)
301 matcher_overflow ();
302
303 val = (0 <= i ? Qt : Qnil);
304 if (NILP (val))
305 return Qnil;
306 for (i = 0; i < search_regs.num_regs; i++)
307 if (search_regs.start[i] >= 0)
308 {
309 search_regs.start[i] += BI_BUF_BEGV (buf);
310 search_regs.end[i] += BI_BUF_BEGV (buf);
311 }
312 XSETBUFFER (last_thing_searched, buf);
313 fixup_search_regs_for_buffer (buf);
314 return val;
315 }
316
317 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 2, 0 /*
318 Return t if text after point matches regular expression REGEXP.
319 This function modifies the match data that `match-beginning',
320 `match-end' and `match-data' access; save and restore the match
321 data if you want to preserve them.
322
323 Optional argument BUFFER defaults to the current buffer.
324 */ )
325 (regexp, buffer)
326 Lisp_Object regexp, buffer;
327 {
328 return looking_at_1 (regexp, decode_buffer (buffer, 0), 0);
329 }
330
331 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 2, 0 /*
332 Return t if text after point matches regular expression REGEXP.
333 Find the longest match, in accord with Posix regular expression rules.
334 This function modifies the match data that `match-beginning',
335 `match-end' and `match-data' access; save and restore the match
336 data if you want to preserve them.
337
338 Optional argument BUFFER defaults to the current buffer.
339 */ )
340 (regexp, buffer)
341 Lisp_Object regexp, buffer;
342 {
343 return looking_at_1 (regexp, decode_buffer (buffer, 0), 1);
344 }
345
346 static Lisp_Object
347 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
348 struct buffer *buf, int posix)
349 {
350 /* This function has been Mule-ized, except for the trt table handling. */
351 Bytecount val;
352 Charcount s;
353 struct re_pattern_buffer *bufp;
354
355 if (running_asynch_code)
356 save_search_regs ();
357
358 CHECK_STRING (regexp);
359 CHECK_STRING (string);
360
361 if (NILP (start))
362 s = 0;
363 else
364 {
365 Charcount len = string_char_length (XSTRING (string));
366
367 CHECK_INT (start);
368 s = XINT (start);
369 if (s < 0 && -s <= len)
370 s = len + s;
371 else if (0 > s || s > len)
372 args_out_of_range (string, start);
373 }
374
375
376 bufp = compile_pattern (regexp, &search_regs,
377 (!NILP (buf->case_fold_search)
378 ? (char *) MIRROR_DOWNCASE_TABLE_AS_STRING (buf)
379 : 0), 0, ERROR_ME);
380 QUIT;
381 {
382 Bytecount bis = charcount_to_bytecount (string_data (XSTRING (string)),
383 s);
384 regex_emacs_buffer = buf;
385 val = re_search (bufp, (char *) string_data (XSTRING (string)),
386 string_length (XSTRING (string)), bis,
387 string_length (XSTRING (string)) - bis,
388 &search_regs);
389 }
390 if (val == -2)
391 matcher_overflow ();
392 if (val < 0) return Qnil;
393 last_thing_searched = Qt;
394 fixup_search_regs_for_string (string);
395 return make_int (bytecount_to_charcount (string_data (XSTRING (string)),
396 val));
397 }
398
399 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 4, 0 /*
400 Return index of start of first match for REGEXP in STRING, or nil.
401 If third arg START is non-nil, start search at that index in STRING.
402 For index of first char beyond the match, do (match-end 0).
403 `match-end' and `match-beginning' also give indices of substrings
404 matched by parenthesis constructs in the pattern.
405
406 Optional arg BUFFER controls how case folding is done (according to
407 the value of `case-fold-search' in that buffer and that buffer's case
408 tables) and defaults to the current buffer.
409 */ )
410 (regexp, string, start, buffer)
411 Lisp_Object regexp, string, start, buffer;
412 {
413 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0),
414 0);
415 }
416
417 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 4, 0 /*
418 Return index of start of first match for REGEXP in STRING, or nil.
419 Find the longest match, in accord with Posix regular expression rules.
420 If third arg START is non-nil, start search at that index in STRING.
421 For index of first char beyond the match, do (match-end 0).
422 `match-end' and `match-beginning' also give indices of substrings
423 matched by parenthesis constructs in the pattern.
424
425 Optional arg BUFFER controls how case folding is done (according to
426 the value of `case-fold-search' in that buffer and that buffer's case
427 tables) and defaults to the current buffer.
428 */ )
429 (regexp, string, start, buffer)
430 Lisp_Object regexp, string, start, buffer;
431 {
432 return string_match_1 (regexp, string, start, decode_buffer (buffer, 0),
433 1);
434 }
435
436 /* Match REGEXP against STRING, searching all of STRING,
437 and return the index of the match, or negative on failure.
438 This does not clobber the match data. */
439
440 Bytecount
441 fast_string_match (Lisp_Object regexp, CONST Bufbyte *nonreloc,
442 Lisp_Object reloc, Bytecount offset,
443 Bytecount length, int case_fold_search,
444 Error_behavior errb, int no_quit)
445 {
446 /* This function has been Mule-ized, except for the trt table handling. */
447 Bytecount val;
448 Bufbyte *newnonreloc = (Bufbyte *) nonreloc;
449 struct re_pattern_buffer *bufp;
450
451 bufp = compile_pattern (regexp, 0,
452 (case_fold_search
453 ? (char *)
454 /* #### evil current-buffer dependency */
455 MIRROR_DOWNCASE_TABLE_AS_STRING (current_buffer)
456 : 0),
457 0, errb);
458 if (!bufp)
459 return -1; /* will only do this when errb != ERROR_ME */
460 if (!no_quit)
461 QUIT;
462 else
463 no_quit_in_re_search = 1;
464
465 fixup_internal_substring (nonreloc, reloc, offset, &length);
466
467 if (!NILP (reloc))
468 {
469 if (no_quit)
470 newnonreloc = string_data (XSTRING (reloc));
471 else
472 {
473 /* QUIT could relocate RELOC. Therefore we must alloca()
474 and copy. No way around this except some serious
475 rewriting of re_search(). */
476 newnonreloc = (Bufbyte *) alloca (length);
477 memcpy (newnonreloc, string_data (XSTRING (reloc)), length);
478 }
479 }
480
481 /* #### evil current-buffer dependency */
482 regex_emacs_buffer = current_buffer;
483 val = re_search (bufp, (char *) newnonreloc + offset, length, 0,
484 length, 0);
485
486 no_quit_in_re_search = 0;
487 return val;
488 }
489
490 Bytecount
491 fast_lisp_string_match (Lisp_Object regex, Lisp_Object string)
492 {
493 return fast_string_match (regex, 0, string, 0, -1, 0, ERROR_ME, 0);
494 }
495
496
497 #ifdef REGION_CACHE_NEEDS_WORK
498 /* The newline cache: remembering which sections of text have no newlines. */
499
500 /* If the user has requested newline caching, make sure it's on.
501 Otherwise, make sure it's off.
502 This is our cheezy way of associating an action with the change of
503 state of a buffer-local variable. */
504 static void
505 newline_cache_on_off (struct buffer *buf)
506 {
507 if (NILP (buf->cache_long_line_scans))
508 {
509 /* It should be off. */
510 if (buf->newline_cache)
511 {
512 free_region_cache (buf->newline_cache);
513 buf->newline_cache = 0;
514 }
515 }
516 else
517 {
518 /* It should be on. */
519 if (buf->newline_cache == 0)
520 buf->newline_cache = new_region_cache ();
521 }
522 }
523 #endif
524
525 /* Search in BUF for COUNT instances of the character TARGET between
526 START and END.
527
528 If COUNT is positive, search forwards; END must be >= START.
529 If COUNT is negative, search backwards for the -COUNTth instance;
530 END must be <= START.
531 If COUNT is zero, do anything you please; run rogue, for all I care.
532
533 If END is zero, use BEGV or ZV instead, as appropriate for the
534 direction indicated by COUNT.
535
536 If we find COUNT instances, set *SHORTAGE to zero, and return the
537 position after the COUNTth match. Note that for reverse motion
538 this is not the same as the usual convention for Emacs motion commands.
539
540 If we don't find COUNT instances before reaching END, set *SHORTAGE
541 to the number of TARGETs left unfound, and return END.
542
543 If ALLOW_QUIT is non-zero, call QUIT periodically. */
544
545 static Bytind
546 bi_scan_buffer (struct buffer *buf, Emchar target, Bytind st, Bytind en,
547 int count, int *shortage, int allow_quit)
548 {
549 /* This function has been Mule-ized. */
550 Bytind lim = en > 0 ? en :
551 ((count > 0) ? BI_BUF_ZV (buf) : BI_BUF_BEGV (buf));
552
553 /* #### newline cache stuff in this function not yet ported */
554
555 assert (count != 0);
556
557 if (shortage)
558 *shortage = 0;
559
560 if (count > 0)
561 {
562 {
563 while (st < lim && count > 0)
564 {
565 Bytind ceil;
566 Bufbyte *bufptr;
567
568 ceil = BI_BUF_CEILING_OF (buf, st);
569 ceil = min (lim, ceil);
570 bufptr = memchr (BI_BUF_BYTE_ADDRESS (buf, st), (int) target,
571 ceil - st);
572 if (bufptr)
573 {
574 count--;
575 st = BI_BUF_PTR_BYTE_POS (buf, bufptr) + 1;
576 }
577 else
578 st = ceil;
579 }
580 }
581
582 if (shortage)
583 *shortage = count;
584 if (allow_quit)
585 QUIT;
586 return st;
587 }
588 else
589 {
590 {
591 while (st > lim && count < 0)
592 {
593 Bytind floor;
594 Bufbyte *bufptr;
595 Bufbyte *floorptr;
596
597 floor = BI_BUF_FLOOR_OF (buf, st);
598 floor = max (lim, floor);
599 /* No memrchr() ... */
600 bufptr = BI_BUF_BYTE_ADDRESS_BEFORE (buf, st);
601 floorptr = BI_BUF_BYTE_ADDRESS (buf, floor);
602 while (bufptr >= floorptr)
603 {
604 st--;
605 /* At this point, both ST and BUFPTR refer to the same
606 character. When the loop terminates, ST will
607 always point to the last character we tried. */
608 if (* (unsigned char *) bufptr == (unsigned char) target)
609 {
610 count++;
611 break;
612 }
613 bufptr--;
614 }
615 }
616 }
617
618 if (shortage)
619 *shortage = -count;
620 if (allow_quit)
621 QUIT;
622 if (count)
623 return st;
624 else
625 {
626 /* We found the character we were looking for; we have to return
627 the position *after* it due to the strange way that the return
628 value is defined. */
629 INC_BYTIND (buf, st);
630 return st;
631 }
632 }
633 }
634
635 Bufpos
636 scan_buffer (struct buffer *buf, Emchar target, Bufpos start, Bufpos end,
637 int count, int *shortage, int allow_quit)
638 {
639 Bytind bi_retval;
640 Bytind bi_start, bi_end;
641
642 bi_start = bufpos_to_bytind (buf, start);
643 if (end)
644 bi_end = bufpos_to_bytind (buf, end);
645 else
646 bi_end = 0;
647 bi_retval = bi_scan_buffer (buf, target, bi_start, bi_end, count,
648 shortage, allow_quit);
649 return bytind_to_bufpos (buf, bi_retval);
650 }
651
652 Bytind
653 bi_find_next_newline_no_quit (struct buffer *buf, Bytind from, int cnt)
654 {
655 return bi_scan_buffer (buf, '\n', from, 0, cnt, (int *) 0, 0);
656 }
657
658 Bufpos
659 find_next_newline_no_quit (struct buffer *buf, Bufpos from, int cnt)
660 {
661 return scan_buffer (buf, '\n', from, 0, cnt, (int *) 0, 0);
662 }
663
664 Bufpos
665 find_next_newline (struct buffer *buf, Bufpos from, int cnt)
666 {
667 return scan_buffer (buf, '\n', from, 0, cnt, (int *) 0, 1);
668 }
669
670 /* Like find_next_newline, but returns position before the newline,
671 not after, and only search up to TO. This isn't just
672 find_next_newline (...)-1, because you might hit TO. */
673 Bufpos
674 find_before_next_newline (struct buffer *buf, Bufpos from, Bufpos to, int cnt)
675 {
676 int shortage;
677 Bufpos pos = scan_buffer (buf, '\n', from, to, cnt, &shortage, 1);
678
679 if (shortage == 0)
680 pos--;
681
682 return pos;
683 }
684
685 static Lisp_Object
686 skip_chars (struct buffer *buf, int forwardp, int syntaxp,
687 Lisp_Object string, Lisp_Object lim)
688 {
689 /* This function has been Mule-ized. */
690 register Bufbyte *p, *pend;
691 register Emchar c;
692 /* We store the first 256 chars in an array here and the rest in
693 a range table. */
694 unsigned char fastmap[0400];
695 int negate = 0;
696 register int i;
697 Lisp_Object syntax_table = buf->syntax_table;
698
699 CHECK_STRING (string);
700
701 if (NILP (lim))
702 XSETINT (lim, forwardp ? BUF_ZV (buf) : BUF_BEGV (buf));
703 else
704 CHECK_INT_COERCE_MARKER (lim);
705
706 /* In any case, don't allow scan outside bounds of buffer. */
707 if (XINT (lim) > BUF_ZV (buf))
708 lim = make_int (BUF_ZV (buf));
709 if (XINT (lim) < BUF_BEGV (buf))
710 lim = make_int (BUF_BEGV (buf));
711
712 p = string_data (XSTRING (string));
713 pend = p + string_length (XSTRING (string));
714 memset (fastmap, 0, sizeof (fastmap));
715
716 Fclear_range_table (Vskip_chars_range_table);
717
718 if (p != pend && *p == '^')
719 {
720 negate = 1;
721 p++;
722 }
723
724 /* Find the characters specified and set their elements of fastmap.
725 If syntaxp, each character counts as itself.
726 Otherwise, handle backslashes and ranges specially */
727
728 while (p != pend)
729 {
730 c = charptr_emchar (p);
731 INC_CHARPTR (p);
732 if (syntaxp)
733 {
734 if (c < 0400 && syntax_spec_code[c] < (unsigned char) Smax)
735 fastmap[c] = 1;
736 else
737 signal_simple_error ("Invalid syntax designator",
738 make_char (c));
739 }
740 else
741 {
742 if (c == '\\')
743 {
744 if (p == pend) break;
745 c = charptr_emchar (p);
746 INC_CHARPTR (p);
747 }
748 if (p != pend && *p == '-')
749 {
750 Emchar cend;
751
752 p++;
753 if (p == pend) break;
754 cend = charptr_emchar (p);
755 while (c <= cend && c < 0400)
756 {
757 fastmap[c] = 1;
758 c++;
759 }
760 if (c <= cend)
761 Fput_range_table (make_int (c), make_int (cend), Qt,
762 Vskip_chars_range_table);
763 INC_CHARPTR (p);
764 }
765 else
766 {
767 if (c < 0400)
768 fastmap[c] = 1;
769 else
770 Fput_range_table (make_int (c), make_int (c), Qt,
771 Vskip_chars_range_table);
772 }
773 }
774 }
775
776 if (syntaxp && fastmap['-'] != 0)
777 fastmap[' '] = 1;
778
779 /* If ^ was the first character, complement the fastmap.
780 We don't complement the range table, however; we just use negate
781 in the comparisons below. */
782
783 if (negate)
784 for (i = 0; i < sizeof fastmap; i++)
785 fastmap[i] ^= 1;
786
787 {
788 Bufpos start_point = BUF_PT (buf);
789
790 if (syntaxp)
791 {
792 /* All syntax designators are normal chars so nothing strange
793 to worry about */
794 if (forwardp)
795 {
796 while (BUF_PT (buf) < XINT (lim)
797 && fastmap[(unsigned char)
798 syntax_code_spec
799 [(int) SYNTAX (syntax_table,
800 BUF_FETCH_CHAR
801 (buf, BUF_PT (buf)))]])
802 BUF_SET_PT (buf, BUF_PT (buf) + 1);
803 }
804 else
805 {
806 while (BUF_PT (buf) > XINT (lim)
807 && fastmap[(unsigned char)
808 syntax_code_spec
809 [(int) SYNTAX (syntax_table,
810 BUF_FETCH_CHAR
811 (buf, BUF_PT (buf) - 1))]])
812 BUF_SET_PT (buf, BUF_PT (buf) - 1);
813 }
814 }
815 else
816 {
817 if (forwardp)
818 {
819 while (BUF_PT (buf) < XINT (lim))
820 {
821 Emchar ch = BUF_FETCH_CHAR (buf, BUF_PT (buf));
822 if ((ch < 0400) ? fastmap[ch] :
823 (NILP (Fget_range_table (make_int (ch),
824 Vskip_chars_range_table,
825 Qnil))
826 == negate))
827 BUF_SET_PT (buf, BUF_PT (buf) + 1);
828 else
829 break;
830 }
831 }
832 else
833 {
834 while (BUF_PT (buf) > XINT (lim))
835 {
836 Emchar ch = BUF_FETCH_CHAR (buf, BUF_PT (buf) - 1);
837 if ((ch < 0400) ? fastmap[ch] :
838 (NILP (Fget_range_table (make_int (ch),
839 Vskip_chars_range_table,
840 Qnil))
841 == negate))
842 BUF_SET_PT (buf, BUF_PT (buf) - 1);
843 else
844 break;
845 }
846 }
847 }
848 QUIT;
849 return make_int (BUF_PT (buf) - start_point);
850 }
851 }
852
853 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 3, 0 /*
854 Move point forward, stopping before a char not in STRING, or at pos LIM.
855 STRING is like the inside of a `[...]' in a regular expression
856 except that `]' is never special and `\\' quotes `^', `-' or `\\'.
857 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.
858 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.
859 Returns the distance traveled, either zero or positive.
860
861 Optional argument BUFFER defaults to the current buffer.
862 */ )
863 (string, lim, buffer)
864 Lisp_Object string, lim, buffer;
865 {
866 return skip_chars (decode_buffer (buffer, 0), 1, 0, string, lim);
867 }
868
869 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 3, 0 /*
870 Move point backward, stopping after a char not in STRING, or at pos LIM.
871 See `skip-chars-forward' for details.
872 Returns the distance traveled, either zero or negative.
873
874 Optional argument BUFFER defaults to the current buffer.
875 */ )
876 (string, lim, buffer)
877 Lisp_Object string, lim, buffer;
878 {
879 return skip_chars (decode_buffer (buffer, 0), 0, 0, string, lim);
880 }
881
882
883 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 3, 0 /*
884 Move point forward across chars in specified syntax classes.
885 SYNTAX is a string of syntax code characters.
886 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
887 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
888 This function returns the distance traveled, either zero or positive.
889
890 Optional argument BUFFER defaults to the current buffer.
891 */ )
892 (syntax, lim, buffer)
893 Lisp_Object syntax, lim, buffer;
894 {
895 return skip_chars (decode_buffer (buffer, 0), 1, 1, syntax, lim);
896 }
897
898 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 3, 0 /*
899 Move point backward across chars in specified syntax classes.
900 SYNTAX is a string of syntax code characters.
901 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
902 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
903 This function returns the distance traveled, either zero or negative.
904
905 Optional argument BUFFER defaults to the current buffer.
906 */ )
907 (syntax, lim, buffer)
908 Lisp_Object syntax, lim, buffer;
909 {
910 return skip_chars (decode_buffer (buffer, 0), 0, 1, syntax, lim);
911 }
912
913
914 /* Subroutines of Lisp buffer search functions. */
915
916 static Lisp_Object
917 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object no_error,
918 Lisp_Object count, Lisp_Object buffer, int direction,
919 int RE, int posix)
920 {
921 /* This function has been Mule-ized, except for the trt table handling. */
922 register Bufpos np;
923 Bufpos lim;
924 EMACS_INT n = direction;
925 struct buffer *buf;
926
927 if (!NILP (count))
928 {
929 CHECK_INT (count);
930 n *= XINT (count);
931 }
932
933 buf = decode_buffer (buffer, 0);
934 CHECK_STRING (string);
935 if (NILP (bound))
936 lim = n > 0 ? BUF_ZV (buf) : BUF_BEGV (buf);
937 else
938 {
939 CHECK_INT_COERCE_MARKER (bound);
940 lim = XINT (bound);
941 if (n > 0 ? lim < BUF_PT (buf) : lim > BUF_PT (buf))
942 error ("Invalid search bound (wrong side of point)");
943 if (lim > BUF_ZV (buf))
944 lim = BUF_ZV (buf);
945 if (lim < BUF_BEGV (buf))
946 lim = BUF_BEGV (buf);
947 }
948
949 np = search_buffer (buf, string, BUF_PT (buf), lim, n, RE,
950 (!NILP (buf->case_fold_search)
951 ? MIRROR_CANON_TABLE_AS_STRING (buf)
952 : 0),
953 (!NILP (buf->case_fold_search)
954 ? MIRROR_EQV_TABLE_AS_STRING (buf)
955 : 0), posix);
956
957 if (np <= 0)
958 {
959 if (NILP (no_error))
960 return signal_failure (string);
961 if (!EQ (no_error, Qt))
962 {
963 if (lim < BUF_BEGV (buf) || lim > BUF_ZV (buf))
964 abort ();
965 BUF_SET_PT (buf, lim);
966 return Qnil;
967 #if 0 /* This would be clean, but maybe programs depend on
968 a value of nil here. */
969 np = lim;
970 #endif
971 }
972 else
973 return Qnil;
974 }
975
976 if (np < BUF_BEGV (buf) || np > BUF_ZV (buf))
977 abort ();
978
979 BUF_SET_PT (buf, np);
980
981 return make_int (np);
982 }
983
984 static int
985 trivial_regexp_p (Lisp_Object regexp)
986 {
987 /* This function has been Mule-ized. */
988 Bytecount len = string_length (XSTRING (regexp));
989 Bufbyte *s = string_data (XSTRING (regexp));
990 while (--len >= 0)
991 {
992 switch (*s++)
993 {
994 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
995 return 0;
996 case '\\':
997 if (--len < 0)
998 return 0;
999 switch (*s++)
1000 {
1001 case '|': case '(': case ')': case '`': case '\'': case 'b':
1002 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1003 case 'S': case '=':
1004 case '1': case '2': case '3': case '4': case '5':
1005 case '6': case '7': case '8': case '9':
1006 return 0;
1007 }
1008 }
1009 }
1010 return 1;
1011 }
1012
1013 /* Search for the n'th occurrence of STRING in BUF,
1014 starting at position BUFPOS and stopping at position BUFLIM,
1015 treating PAT as a literal string if RE is false or as
1016 a regular expression if RE is true.
1017
1018 If N is positive, searching is forward and BUFLIM must be greater
1019 than BUFPOS.
1020 If N is negative, searching is backward and BUFLIM must be less
1021 than BUFPOS.
1022
1023 Returns -x if only N-x occurrences found (x > 0),
1024 or else the position at the beginning of the Nth occurrence
1025 (if searching backward) or the end (if searching forward).
1026
1027 POSIX is nonzero if we want full backtracking (POSIX style)
1028 for this pattern. 0 means backtrack only enough to get a valid match. */
1029
1030 static Bufpos
1031 search_buffer (struct buffer *buf, Lisp_Object string, Bufpos bufpos,
1032 Bufpos buflim, EMACS_INT n, int RE, unsigned char *trt,
1033 unsigned char *inverse_trt, int posix)
1034 {
1035 /* This function has been Mule-ized, except for the trt table handling. */
1036 Bytecount len = string_length (XSTRING (string));
1037 Bufbyte *base_pat = string_data (XSTRING (string));
1038 register EMACS_INT *BM_tab;
1039 EMACS_INT *BM_tab_base;
1040 register int direction = ((n > 0) ? 1 : -1);
1041 register Bytecount dirlen;
1042 EMACS_INT infinity;
1043 Bytind limit;
1044 EMACS_INT k;
1045 Bytecount stride_for_teases = 0;
1046 register Bufbyte *pat = 0;
1047 register Bufbyte *cursor, *p_limit, *ptr2;
1048 register EMACS_INT i, j;
1049 Bytind p1, p2;
1050 Bytecount s1, s2;
1051 Bytind pos, lim;
1052
1053 if (running_asynch_code)
1054 save_search_regs ();
1055
1056 /* Null string is found at starting position. */
1057 if (len == 0)
1058 {
1059 set_search_regs (buf, bufpos, 0);
1060 return bufpos;
1061 }
1062
1063 /* Searching 0 times means don't move. */
1064 if (n == 0)
1065 return bufpos;
1066
1067 pos = bufpos_to_bytind (buf, bufpos);
1068 lim = bufpos_to_bytind (buf, buflim);
1069 if (RE && !trivial_regexp_p (string))
1070 {
1071 struct re_pattern_buffer *bufp;
1072
1073 bufp = compile_pattern (string, &search_regs, (char *) trt, posix,
1074 ERROR_ME);
1075
1076 /* Get pointers and sizes of the two strings
1077 that make up the visible portion of the buffer. */
1078
1079 p1 = BI_BUF_BEGV (buf);
1080 p2 = BI_BUF_CEILING_OF (buf, p1);
1081 s1 = p2 - p1;
1082 s2 = BI_BUF_ZV (buf) - p2;
1083
1084 while (n < 0)
1085 {
1086 Bytecount val;
1087 QUIT;
1088 regex_emacs_buffer = buf;
1089 val = re_search_2 (bufp,
1090 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1091 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1092 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1093 pos - BI_BUF_BEGV (buf));
1094
1095 if (val == -2)
1096 {
1097 matcher_overflow ();
1098 }
1099 if (val >= 0)
1100 {
1101 j = BI_BUF_BEGV (buf);
1102 for (i = 0; i < search_regs.num_regs; i++)
1103 if (search_regs.start[i] >= 0)
1104 {
1105 search_regs.start[i] += j;
1106 search_regs.end[i] += j;
1107 }
1108 XSETBUFFER (last_thing_searched, buf);
1109 /* Set pos to the new position. */
1110 pos = search_regs.start[0];
1111 fixup_search_regs_for_buffer (buf);
1112 /* And bufpos too. */
1113 bufpos = search_regs.start[0];
1114 }
1115 else
1116 {
1117 return (n);
1118 }
1119 n++;
1120 }
1121 while (n > 0)
1122 {
1123 Bytecount val;
1124 QUIT;
1125 regex_emacs_buffer = buf;
1126 val = re_search_2 (bufp,
1127 (char *) BI_BUF_BYTE_ADDRESS (buf, p1), s1,
1128 (char *) BI_BUF_BYTE_ADDRESS (buf, p2), s2,
1129 pos - BI_BUF_BEGV (buf), lim - pos, &search_regs,
1130 lim - BI_BUF_BEGV (buf));
1131 if (val == -2)
1132 {
1133 matcher_overflow ();
1134 }
1135 if (val >= 0)
1136 {
1137 j = BI_BUF_BEGV (buf);
1138 for (i = 0; i < search_regs.num_regs; i++)
1139 if (search_regs.start[i] >= 0)
1140 {
1141 search_regs.start[i] += j;
1142 search_regs.end[i] += j;
1143 }
1144 XSETBUFFER (last_thing_searched, buf);
1145 /* Set pos to the new position. */
1146 pos = search_regs.end[0];
1147 fixup_search_regs_for_buffer (buf);
1148 /* And bufpos too. */
1149 bufpos = search_regs.end[0];
1150 }
1151 else
1152 {
1153 return (0 - n);
1154 }
1155 n--;
1156 }
1157 return (bufpos);
1158 }
1159 else /* non-RE case */
1160 /* #### Someone really really really needs to comment the workings
1161 of this junk somewhat better.
1162
1163 BTW "BM" stands for Boyer-Moore, which is one of the standard
1164 string-searching algorithms. It's the best string-searching
1165 algorithm out there provided
1166
1167 a) You're not fazed by algorithm complexity. (Rabin-Karp, which
1168 uses hashing, is much much easier to code but not as fast.)
1169 b) You can freely move backwards in the string that you're
1170 searching through.
1171
1172 As the comment below tries to explain (but garbles in typical
1173 programmer-ese), the idea is that you don't have to do a
1174 string match at every successive position in the text. For
1175 example, let's say the pattern is "a very long string". We
1176 compare the last character in the string (`g') with the
1177 corresponding character in the text. If it mismatches, and
1178 it is, say, `z', then we can skip forward by the entire
1179 length of the pattern because `z' does not occur anywhere
1180 in the pattern. If the mismatching character does occur
1181 in the pattern, we can usually still skip forward by more
1182 than one: e.g. if it is `l', then we can skip forward
1183 by the length of the substring "ong string" -- i.e. the
1184 largest end section of the pattern that does not contain
1185 the mismatched character. So what we do is compute, for
1186 each possible character, the distance we can skip forward
1187 (the "stride") and use it in the string matching. This
1188 is what the BM_tab holds. */
1189 {
1190 #ifdef C_ALLOCA
1191 EMACS_INT BM_tab_space[0400];
1192 BM_tab = &BM_tab_space[0];
1193 #else
1194 BM_tab = (EMACS_INT *) alloca (0400 * sizeof (EMACS_INT));
1195 #endif
1196 {
1197 Bufbyte *patbuf = (Bufbyte *) alloca (len);
1198 pat = patbuf;
1199 while (--len >= 0)
1200 {
1201 /* If we got here and the RE flag is set, it's because we're
1202 dealing with a regexp known to be trivial, so the backslash
1203 just quotes the next character. */
1204 if (RE && *base_pat == '\\')
1205 {
1206 len--;
1207 base_pat++;
1208 }
1209 *pat++ = (trt ? trt[*base_pat++] : *base_pat++);
1210 }
1211 len = pat - patbuf;
1212 pat = base_pat = patbuf;
1213 }
1214 /* The general approach is that we are going to maintain that we know */
1215 /* the first (closest to the present position, in whatever direction */
1216 /* we're searching) character that could possibly be the last */
1217 /* (furthest from present position) character of a valid match. We */
1218 /* advance the state of our knowledge by looking at that character */
1219 /* and seeing whether it indeed matches the last character of the */
1220 /* pattern. If it does, we take a closer look. If it does not, we */
1221 /* move our pointer (to putative last characters) as far as is */
1222 /* logically possible. This amount of movement, which I call a */
1223 /* stride, will be the length of the pattern if the actual character */
1224 /* appears nowhere in the pattern, otherwise it will be the distance */
1225 /* from the last occurrence of that character to the end of the */
1226 /* pattern. */
1227 /* As a coding trick, an enormous stride is coded into the table for */
1228 /* characters that match the last character. This allows use of only */
1229 /* a single test, a test for having gone past the end of the */
1230 /* permissible match region, to test for both possible matches (when */
1231 /* the stride goes past the end immediately) and failure to */
1232 /* match (where you get nudged past the end one stride at a time). */
1233
1234 /* Here we make a "mickey mouse" BM table. The stride of the search */
1235 /* is determined only by the last character of the putative match. */
1236 /* If that character does not match, we will stride the proper */
1237 /* distance to propose a match that superimposes it on the last */
1238 /* instance of a character that matches it (per trt), or misses */
1239 /* it entirely if there is none. */
1240
1241 dirlen = len * direction;
1242 infinity = dirlen - (lim + pos + len + len) * direction;
1243 if (direction < 0)
1244 pat = (base_pat += len - 1);
1245 BM_tab_base = BM_tab;
1246 BM_tab += 0400;
1247 j = dirlen; /* to get it in a register */
1248 /* A character that does not appear in the pattern induces a */
1249 /* stride equal to the pattern length. */
1250 while (BM_tab_base != BM_tab)
1251 {
1252 *--BM_tab = j;
1253 *--BM_tab = j;
1254 *--BM_tab = j;
1255 *--BM_tab = j;
1256 }
1257 i = 0;
1258 while (i != infinity)
1259 {
1260 j = pat[i]; i += direction;
1261 if (i == dirlen) i = infinity;
1262 if (trt != 0)
1263 {
1264 k = (j = trt[j]);
1265 if (i == infinity)
1266 stride_for_teases = BM_tab[j];
1267 BM_tab[j] = dirlen - i;
1268 /* A translation table is accompanied by its inverse -- see */
1269 /* comment following downcase_table for details */
1270
1271 while ((j = inverse_trt[j]) != k)
1272 BM_tab[j] = dirlen - i;
1273 }
1274 else
1275 {
1276 if (i == infinity)
1277 stride_for_teases = BM_tab[j];
1278 BM_tab[j] = dirlen - i;
1279 }
1280 /* stride_for_teases tells how much to stride if we get a */
1281 /* match on the far character but are subsequently */
1282 /* disappointed, by recording what the stride would have been */
1283 /* for that character if the last character had been */
1284 /* different. */
1285 }
1286 infinity = dirlen - infinity;
1287 pos += dirlen - ((direction > 0) ? direction : 0);
1288 /* loop invariant - pos points at where last char (first char if reverse)
1289 of pattern would align in a possible match. */
1290 while (n != 0)
1291 {
1292 /* It's been reported that some (broken) compiler thinks that
1293 Boolean expressions in an arithmetic context are unsigned.
1294 Using an explicit ?1:0 prevents this. */
1295 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1296 return (n * (0 - direction));
1297 /* First we do the part we can by pointers (maybe nothing) */
1298 QUIT;
1299 pat = base_pat;
1300 limit = pos - dirlen + direction;
1301 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1302 have changed. See buffer.h. */
1303 limit = ((direction > 0)
1304 ? BI_BUF_CEILING_OF (buf, limit) - 1
1305 : BI_BUF_FLOOR_OF (buf, limit + 1));
1306 /* LIMIT is now the last (not beyond-last!) value
1307 POS can take on without hitting edge of buffer or the gap. */
1308 limit = ((direction > 0)
1309 ? min (lim - 1, min (limit, pos + 20000))
1310 : max (lim, max (limit, pos - 20000)));
1311 if ((limit - pos) * direction > 20)
1312 {
1313 p_limit = BI_BUF_BYTE_ADDRESS (buf, limit);
1314 ptr2 = (cursor = BI_BUF_BYTE_ADDRESS (buf, pos));
1315 /* In this loop, pos + cursor - ptr2 is the surrogate for pos */
1316 while (1) /* use one cursor setting as long as i can */
1317 {
1318 if (direction > 0) /* worth duplicating */
1319 {
1320 /* Use signed comparison if appropriate
1321 to make cursor+infinity sure to be > p_limit.
1322 Assuming that the buffer lies in a range of addresses
1323 that are all "positive" (as ints) or all "negative",
1324 either kind of comparison will work as long
1325 as we don't step by infinity. So pick the kind
1326 that works when we do step by infinity. */
1327 if ((EMACS_INT) (p_limit + infinity) >
1328 (EMACS_INT) p_limit)
1329 while ((EMACS_INT) cursor <=
1330 (EMACS_INT) p_limit)
1331 cursor += BM_tab[*cursor];
1332 else
1333 while ((unsigned EMACS_INT) cursor <=
1334 (unsigned EMACS_INT) p_limit)
1335 cursor += BM_tab[*cursor];
1336 }
1337 else
1338 {
1339 if ((EMACS_INT) (p_limit + infinity) <
1340 (EMACS_INT) p_limit)
1341 while ((EMACS_INT) cursor >=
1342 (EMACS_INT) p_limit)
1343 cursor += BM_tab[*cursor];
1344 else
1345 while ((unsigned EMACS_INT) cursor >=
1346 (unsigned EMACS_INT) p_limit)
1347 cursor += BM_tab[*cursor];
1348 }
1349 /* If you are here, cursor is beyond the end of the searched region. */
1350 /* This can happen if you match on the far character of the pattern, */
1351 /* because the "stride" of that character is infinity, a number able */
1352 /* to throw you well beyond the end of the search. It can also */
1353 /* happen if you fail to match within the permitted region and would */
1354 /* otherwise try a character beyond that region */
1355 if ((cursor - p_limit) * direction <= len)
1356 break; /* a small overrun is genuine */
1357 cursor -= infinity; /* large overrun = hit */
1358 i = dirlen - direction;
1359 if (trt != 0)
1360 {
1361 while ((i -= direction) + direction != 0)
1362 if (pat[i] != trt[*(cursor -= direction)])
1363 break;
1364 }
1365 else
1366 {
1367 while ((i -= direction) + direction != 0)
1368 if (pat[i] != *(cursor -= direction))
1369 break;
1370 }
1371 cursor += dirlen - i - direction; /* fix cursor */
1372 if (i + direction == 0)
1373 {
1374 cursor -= direction;
1375
1376 {
1377 Bytind bytstart = (pos + cursor - ptr2 +
1378 ((direction > 0)
1379 ? 1 - len : 0));
1380 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1381 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1382
1383 set_search_regs (buf, bufstart, bufend - bufstart);
1384 }
1385
1386 if ((n -= direction) != 0)
1387 cursor += dirlen; /* to resume search */
1388 else
1389 return ((direction > 0)
1390 ? search_regs.end[0] : search_regs.start[0]);
1391 }
1392 else
1393 cursor += stride_for_teases; /* <sigh> we lose - */
1394 }
1395 pos += cursor - ptr2;
1396 }
1397 else
1398 /* Now we'll pick up a clump that has to be done the hard */
1399 /* way because it covers a discontinuity */
1400 {
1401 /* XEmacs change: definitions of CEILING_OF and FLOOR_OF
1402 have changed. See buffer.h. */
1403 limit = ((direction > 0)
1404 ? BI_BUF_CEILING_OF (buf, pos - dirlen + 1) - 1
1405 : BI_BUF_FLOOR_OF (buf, pos - dirlen));
1406 limit = ((direction > 0)
1407 ? min (limit + len, lim - 1)
1408 : max (limit - len, lim));
1409 /* LIMIT is now the last value POS can have
1410 and still be valid for a possible match. */
1411 while (1)
1412 {
1413 /* This loop can be coded for space rather than */
1414 /* speed because it will usually run only once. */
1415 /* (the reach is at most len + 21, and typically */
1416 /* does not exceed len) */
1417 while ((limit - pos) * direction >= 0)
1418 /* *not* BI_BUF_FETCH_CHAR. We are working here
1419 with bytes, not characters. */
1420 pos += BM_tab[*BI_BUF_BYTE_ADDRESS (buf, pos)];
1421 /* now run the same tests to distinguish going off the */
1422 /* end, a match or a phony match. */
1423 if ((pos - limit) * direction <= len)
1424 break; /* ran off the end */
1425 /* Found what might be a match.
1426 Set POS back to last (first if reverse) char pos. */
1427 pos -= infinity;
1428 i = dirlen - direction;
1429 while ((i -= direction) + direction != 0)
1430 {
1431 pos -= direction;
1432 if (pat[i] != (((Bufbyte *) trt)
1433 /* #### Does not handle TRT right */
1434 ? trt[*BI_BUF_BYTE_ADDRESS (buf, pos)]
1435 : *BI_BUF_BYTE_ADDRESS (buf, pos)))
1436 break;
1437 }
1438 /* Above loop has moved POS part or all the way
1439 back to the first char pos (last char pos if reverse).
1440 Set it once again at the last (first if reverse) char. */
1441 pos += dirlen - i- direction;
1442 if (i + direction == 0)
1443 {
1444 pos -= direction;
1445
1446 {
1447 Bytind bytstart = (pos +
1448 ((direction > 0)
1449 ? 1 - len : 0));
1450 Bufpos bufstart = bytind_to_bufpos (buf, bytstart);
1451 Bufpos bufend = bytind_to_bufpos (buf, bytstart + len);
1452
1453 set_search_regs (buf, bufstart, bufend - bufstart);
1454 }
1455
1456 if ((n -= direction) != 0)
1457 pos += dirlen; /* to resume search */
1458 else
1459 return ((direction > 0)
1460 ? search_regs.end[0] : search_regs.start[0]);
1461 }
1462 else
1463 pos += stride_for_teases;
1464 }
1465 }
1466 /* We have done one clump. Can we continue? */
1467 if ((lim - pos) * direction < 0)
1468 return ((0 - n) * direction);
1469 }
1470 return bytind_to_bufpos (buf, pos);
1471 }
1472 }
1473
1474 /* Record beginning BEG and end BEG + LEN
1475 for a match just found in the current buffer. */
1476
1477 static void
1478 set_search_regs (struct buffer *buf, Bufpos beg, Charcount len)
1479 {
1480 /* This function has been Mule-ized. */
1481 /* Make sure we have registers in which to store
1482 the match position. */
1483 if (search_regs.num_regs == 0)
1484 {
1485 /* #### XEmacs: the ones were twos before, which is surely broken. */
1486 search_regs.start = (regoff_t *) xmalloc (1 * sizeof (regoff_t));
1487 search_regs.end = (regoff_t *) xmalloc (1 * sizeof (regoff_t));
1488 search_regs.num_regs = 1;
1489 }
1490
1491 search_regs.start[0] = beg;
1492 search_regs.end[0] = beg + len;
1493 XSETBUFFER (last_thing_searched, buf);
1494 }
1495
1496
1497 /* Given a string of words separated by word delimiters,
1498 compute a regexp that matches those exact words
1499 separated by arbitrary punctuation. */
1500
1501 static Lisp_Object
1502 wordify (Lisp_Object buffer, Lisp_Object string)
1503 {
1504 Charcount i, len;
1505 EMACS_INT punct_count = 0, word_count = 0;
1506 struct buffer *buf = decode_buffer (buffer, 0);
1507 Lisp_Object syntax_table = buf->syntax_table;
1508
1509 CHECK_STRING (string);
1510 len = string_char_length (XSTRING (string));
1511
1512 for (i = 0; i < len; i++)
1513 if (!WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), i)))
1514 {
1515 punct_count++;
1516 if (i > 0 && WORD_SYNTAX_P (syntax_table,
1517 string_char (XSTRING (string), i - 1)))
1518 word_count++;
1519 }
1520 if (WORD_SYNTAX_P (syntax_table, string_char (XSTRING (string), len - 1)))
1521 word_count++;
1522 if (!word_count) return build_string ("");
1523
1524 {
1525 /* The following value is an upper bound on the amount of storage we
1526 need. In non-Mule, it is exact. */
1527 Bufbyte *storage =
1528 (Bufbyte *) alloca (string_length (XSTRING (string)) - punct_count +
1529 5 * (word_count - 1) + 4);
1530 Bufbyte *o = storage;
1531
1532 *o++ = '\\';
1533 *o++ = 'b';
1534
1535 for (i = 0; i < len; i++)
1536 {
1537 Emchar ch = string_char (XSTRING (string), i);
1538
1539 if (WORD_SYNTAX_P (syntax_table, ch))
1540 o += set_charptr_emchar (o, ch);
1541 else if (i > 0
1542 && WORD_SYNTAX_P (syntax_table,
1543 string_char (XSTRING (string), i - 1))
1544 && --word_count)
1545 {
1546 *o++ = '\\';
1547 *o++ = 'W';
1548 *o++ = '\\';
1549 *o++ = 'W';
1550 *o++ = '*';
1551 }
1552 }
1553
1554 *o++ = '\\';
1555 *o++ = 'b';
1556
1557 return make_string (storage, o - storage);
1558 }
1559 }
1560
1561 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 5,
1562 "sSearch backward: " /*
1563 Search backward from point for STRING.
1564 Set point to the beginning of the occurrence found, and return point.
1565 An optional second argument bounds the search; it is a buffer position.
1566 The match found must not extend before that position.
1567 Optional third argument, if t, means if fail just return nil (no error).
1568 If not nil and not t, position at limit of search and return nil.
1569 Optional fourth argument is repeat count--search for successive occurrences.
1570 Optional fifth argument BUFFER specifies the buffer to search in and
1571 defaults to the current buffer.
1572 See also the functions `match-beginning', `match-end' and `replace-match'.
1573 */ )
1574 (string, bound, no_error, count, buffer)
1575 Lisp_Object string, bound, no_error, count, buffer;
1576 {
1577 return search_command (string, bound, no_error, count, buffer, -1, 0, 0);
1578 }
1579
1580 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 5, "sSearch: " /*
1581 Search forward from point for STRING.
1582 Set point to the end of the occurrence found, and return point.
1583 An optional second argument bounds the search; it is a buffer position.
1584 The match found must not extend after that position. nil is equivalent
1585 to (point-max).
1586 Optional third argument, if t, means if fail just return nil (no error).
1587 If not nil and not t, move to limit of search and return nil.
1588 Optional fourth argument is repeat count--search for successive occurrences.
1589 Optional fifth argument BUFFER specifies the buffer to search in and
1590 defaults to the current buffer.
1591 See also the functions `match-beginning', `match-end' and `replace-match'.
1592 */ )
1593 (string, bound, no_error, count, buffer)
1594 Lisp_Object string, bound, no_error, count, buffer;
1595 {
1596 return search_command (string, bound, no_error, count, buffer, 1, 0, 0);
1597 }
1598
1599 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward,
1600 1, 5,
1601 "sWord search backward: " /*
1602 Search backward from point for STRING, ignoring differences in punctuation.
1603 Set point to the beginning of the occurrence found, and return point.
1604 An optional second argument bounds the search; it is a buffer position.
1605 The match found must not extend before that position.
1606 Optional third argument, if t, means if fail just return nil (no error).
1607 If not nil and not t, move to limit of search and return nil.
1608 Optional fourth argument is repeat count--search for successive occurrences.
1609 Optional fifth argument BUFFER specifies the buffer to search in and
1610 defaults to the current buffer.
1611 */ )
1612 (string, bound, no_error, count, buffer)
1613 Lisp_Object string, bound, no_error, count, buffer;
1614 {
1615 return search_command (wordify (buffer, string), bound, no_error, count,
1616 buffer, -1, 1, 0);
1617 }
1618
1619 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 5,
1620 "sWord search: " /*
1621 Search forward from point for STRING, ignoring differences in punctuation.
1622 Set point to the end of the occurrence found, and return point.
1623 An optional second argument bounds the search; it is a buffer position.
1624 The match found must not extend after that position.
1625 Optional third argument, if t, means if fail just return nil (no error).
1626 If not nil and not t, move to limit of search and return nil.
1627 Optional fourth argument is repeat count--search for successive occurrences.
1628 Optional fifth argument BUFFER specifies the buffer to search in and
1629 defaults to the current buffer.
1630 */ )
1631 (string, bound, no_error, count, buffer)
1632 Lisp_Object string, bound, no_error, count, buffer;
1633 {
1634 return search_command (wordify (buffer, string), bound, no_error, count,
1635 buffer, 1, 1, 0);
1636 }
1637
1638 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 5,
1639 "sRE search backward: " /*
1640 Search backward from point for match for regular expression REGEXP.
1641 Set point to the beginning of the match, and return point.
1642 The match found is the one starting last in the buffer
1643 and yet ending before the origin of the search.
1644 An optional second argument bounds the search; it is a buffer position.
1645 The match found must start at or after that position.
1646 Optional third argument, if t, means if fail just return nil (no error).
1647 If not nil and not t, move to limit of search and return nil.
1648 Optional fourth argument is repeat count--search for successive occurrences.
1649 Optional fifth argument BUFFER specifies the buffer to search in and
1650 defaults to the current buffer.
1651 See also the functions `match-beginning', `match-end' and `replace-match'.
1652 */ )
1653 (regexp, bound, no_error, count, buffer)
1654 Lisp_Object regexp, bound, no_error, count, buffer;
1655 {
1656 return search_command (regexp, bound, no_error, count, buffer, -1, 1, 0);
1657 }
1658
1659 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 5,
1660 "sRE search: " /*
1661 Search forward from point for regular expression REGEXP.
1662 Set point to the end of the occurrence found, and return point.
1663 An optional second argument bounds the search; it is a buffer position.
1664 The match found must not extend after that position.
1665 Optional third argument, if t, means if fail just return nil (no error).
1666 If not nil and not t, move to limit of search and return nil.
1667 Optional fourth argument is repeat count--search for successive occurrences.
1668 Optional fifth argument BUFFER specifies the buffer to search in and
1669 defaults to the current buffer.
1670 See also the functions `match-beginning', `match-end' and `replace-match'.
1671 */ )
1672 (regexp, bound, no_error, count, buffer)
1673 Lisp_Object regexp, bound, no_error, count, buffer;
1674 {
1675 return search_command (regexp, bound, no_error, count, buffer, 1, 1, 0);
1676 }
1677
1678 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward,
1679 1, 5,
1680 "sPosix search backward: " /*
1681 Search backward from point for match for regular expression REGEXP.
1682 Find the longest match in accord with Posix regular expression rules.
1683 Set point to the beginning of the match, and return point.
1684 The match found is the one starting last in the buffer
1685 and yet ending before the origin of the search.
1686 An optional second argument bounds the search; it is a buffer position.
1687 The match found must start at or after that position.
1688 Optional third argument, if t, means if fail just return nil (no error).
1689 If not nil and not t, move to limit of search and return nil.
1690 Optional fourth argument is repeat count--search for successive occurrences.
1691 Optional fifth argument BUFFER specifies the buffer to search in and
1692 defaults to the current buffer.
1693 See also the functions `match-beginning', `match-end' and `replace-match'.
1694 */ )
1695 (regexp, bound, no_error, count, buffer)
1696 Lisp_Object regexp, bound, no_error, count, buffer;
1697 {
1698 return search_command (regexp, bound, no_error, count, buffer, -1, 1, 1);
1699 }
1700
1701 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward,
1702 1, 5,
1703 "sPosix search: " /*
1704 Search forward from point for regular expression REGEXP.
1705 Find the longest match in accord with Posix regular expression rules.
1706 Set point to the end of the occurrence found, and return point.
1707 An optional second argument bounds the search; it is a buffer position.
1708 The match found must not extend after that position.
1709 Optional third argument, if t, means if fail just return nil (no error).
1710 If not nil and not t, move to limit of search and return nil.
1711 Optional fourth argument is repeat count--search for successive occurrences.
1712 Optional fifth argument BUFFER specifies the buffer to search in and
1713 defaults to the current buffer.
1714 See also the functions `match-beginning', `match-end' and `replace-match'.
1715 */ )
1716 (regexp, bound, no_error, count, buffer)
1717 Lisp_Object regexp, bound, no_error, count, buffer;
1718 {
1719 return search_command (regexp, bound, no_error, count, buffer, 1, 1, 1);
1720 }
1721
1722
1723 static Lisp_Object
1724 free_created_dynarrs (Lisp_Object cons)
1725 {
1726 Dynarr_free (get_opaque_ptr (XCAR (cons)));
1727 Dynarr_free (get_opaque_ptr (XCDR (cons)));
1728 free_opaque_ptr (XCAR (cons));
1729 free_opaque_ptr (XCDR (cons));
1730 free_cons (XCONS (cons));
1731 return Qnil;
1732 }
1733
1734 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0 /*
1735 Replace text matched by last search with NEWTEXT.
1736 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
1737 Otherwise maybe capitalize the whole text, or maybe just word initials,
1738 based on the replaced text.
1739 If the replaced text has only capital letters
1740 and has at least one multiletter word, convert NEWTEXT to all caps.
1741 If the replaced text has at least one word starting with a capital letter,
1742 then capitalize each word in NEWTEXT.
1743
1744 If third arg LITERAL is non-nil, insert NEWTEXT literally.
1745 Otherwise treat `\\' as special:
1746 `\\&' in NEWTEXT means substitute original matched text.
1747 `\\N' means substitute what matched the Nth `\\(...\\)'.
1748 If Nth parens didn't match, substitute nothing.
1749 `\\\\' means insert one `\\'.
1750 `\\u' means upcase the next character.
1751 `\\l' means downcase the next character.
1752 `\\U' means begin upcasing all following characters.
1753 `\\L' means begin downcasing all following characters.
1754 `\\E' means terminate the effect of any `\\U' or `\\L'.
1755 Case changes made with `\\u', `\\l', `\\U', and `\\L' override
1756 all other case changes that may be made in the replaced text.
1757 FIXEDCASE and LITERAL are optional arguments.
1758 Leaves point at end of replacement text.
1759
1760 The optional fourth argument STRING can be a string to modify.
1761 In that case, this function creates and returns a new string
1762 which is made by replacing the part of STRING that was matched.
1763 When fourth argument is a string, fifth argument STRBUFFER specifies
1764 the buffer to be used for syntax-table and case-table lookup and
1765 defaults to the current buffer. (When fourth argument is not a string,
1766 the buffer that the match occurred in has automatically been remembered
1767 and you do not need to specify it.)
1768 */ )
1769 (newtext, fixedcase, literal, string, strbuffer)
1770 Lisp_Object newtext, fixedcase, literal, string, strbuffer;
1771 {
1772 /* This function has been Mule-ized. */
1773 /* This function can GC */
1774 enum { nochange, all_caps, cap_initial } case_action;
1775 Bufpos pos, last;
1776 int some_multiletter_word;
1777 int some_lowercase;
1778 int some_uppercase;
1779 int some_nonuppercase_initial;
1780 Emchar c, prevc;
1781 Charcount inslen;
1782 struct buffer *buf;
1783 Lisp_Object syntax_table;
1784 int mc_count;
1785 Lisp_Object buffer;
1786 int_dynarr *ul_action_dynarr = 0;
1787 int_dynarr *ul_pos_dynarr = 0;
1788 int speccount;
1789
1790 CHECK_STRING (newtext);
1791
1792 if (! NILP (string))
1793 {
1794 CHECK_STRING (string);
1795 if (!EQ (last_thing_searched, Qt))
1796 error ("last thing matched was not a string");
1797 /* Damn you RMS! You are going to burn in hell for your
1798 antipathy towards data abstraction. If the match data
1799 were abstracted into a special "match data" type instead
1800 of the typical half-assed "let the implementation be
1801 visible" form it's in, we could extend it to include
1802 the last string matched and the buffer used for that
1803 matching. But of course we can't change it as it is. */
1804 buf = decode_buffer (strbuffer, 0);
1805 XSETBUFFER (buffer, buf);
1806 }
1807 else
1808 {
1809 if (!BUFFERP (last_thing_searched))
1810 error ("last thing matched was not a buffer");
1811 buffer = last_thing_searched;
1812 buf = XBUFFER (buffer);
1813 }
1814
1815 syntax_table = buf->syntax_table;
1816
1817 case_action = nochange; /* We tried an initialization */
1818 /* but some C compilers blew it */
1819
1820 if (search_regs.num_regs <= 0)
1821 error ("replace-match called before any match found");
1822
1823 if (NILP (string))
1824 {
1825 if (search_regs.start[0] < BUF_BEGV (buf)
1826 || search_regs.start[0] > search_regs.end[0]
1827 || search_regs.end[0] > BUF_ZV (buf))
1828 args_out_of_range (make_int (search_regs.start[0]),
1829 make_int (search_regs.end[0]));
1830 }
1831 else
1832 {
1833 if (search_regs.start[0] < 0
1834 || search_regs.start[0] > search_regs.end[0]
1835 || search_regs.end[0] > string_char_length (XSTRING (string)))
1836 args_out_of_range (make_int (search_regs.start[0]),
1837 make_int (search_regs.end[0]));
1838 }
1839
1840 if (NILP (fixedcase))
1841 {
1842 /* Decide how to casify by examining the matched text. */
1843
1844 last = search_regs.end[0];
1845 prevc = '\n';
1846 case_action = all_caps;
1847
1848 /* some_multiletter_word is set nonzero if any original word
1849 is more than one letter long. */
1850 some_multiletter_word = 0;
1851 some_lowercase = 0;
1852 some_nonuppercase_initial = 0;
1853 some_uppercase = 0;
1854
1855 for (pos = search_regs.start[0]; pos < last; pos++)
1856 {
1857 if (NILP (string))
1858 c = BUF_FETCH_CHAR (buf, pos);
1859 else
1860 c = string_char (XSTRING (string), pos);
1861
1862 if (LOWERCASEP (buf, c))
1863 {
1864 /* Cannot be all caps if any original char is lower case */
1865
1866 some_lowercase = 1;
1867 if (!WORD_SYNTAX_P (syntax_table, prevc))
1868 some_nonuppercase_initial = 1;
1869 else
1870 some_multiletter_word = 1;
1871 }
1872 else if (!NOCASEP (buf, c))
1873 {
1874 some_uppercase = 1;
1875 if (!WORD_SYNTAX_P (syntax_table, prevc))
1876 ;
1877 else
1878 some_multiletter_word = 1;
1879 }
1880 else
1881 {
1882 /* If the initial is a caseless word constituent,
1883 treat that like a lowercase initial. */
1884 if (!WORD_SYNTAX_P (syntax_table, prevc))
1885 some_nonuppercase_initial = 1;
1886 }
1887
1888 prevc = c;
1889 }
1890
1891 /* Convert to all caps if the old text is all caps
1892 and has at least one multiletter word. */
1893 if (! some_lowercase && some_multiletter_word)
1894 case_action = all_caps;
1895 /* Capitalize each word, if the old text has all capitalized words. */
1896 else if (!some_nonuppercase_initial && some_multiletter_word)
1897 case_action = cap_initial;
1898 else if (!some_nonuppercase_initial && some_uppercase)
1899 /* Should x -> yz, operating on X, give Yz or YZ?
1900 We'll assume the latter. */
1901 case_action = all_caps;
1902 else
1903 case_action = nochange;
1904 }
1905
1906 /* Do replacement in a string. */
1907 if (!NILP (string))
1908 {
1909 Lisp_Object before, after;
1910
1911 speccount = specpdl_depth ();
1912 before = Fsubstring (string, make_int (0),
1913 make_int (search_regs.start[0]));
1914 after = Fsubstring (string, make_int (search_regs.end[0]), Qnil);
1915
1916 /* Do case substitution into NEWTEXT if desired. */
1917 if (NILP (literal))
1918 {
1919 Charcount stlen = string_char_length (XSTRING (newtext));
1920 Charcount strpos;
1921 /* XEmacs change: rewrote this loop somewhat to make it
1922 cleaner. Also added \U, \E, etc. */
1923 Charcount literal_start = 0;
1924 /* We build up the substituted string in ACCUM. */
1925 Lisp_Object accum;
1926
1927 accum = Qnil;
1928
1929 /* OK, the basic idea here is that we scan through the
1930 replacement string until we find a backslash, which
1931 represents a substring of the original string to be
1932 substituted. We then append onto ACCUM the literal
1933 text before the backslash (LASTPOS marks the
1934 beginning of this) followed by the substring of the
1935 original string that needs to be inserted. */
1936 for (strpos = 0; strpos < stlen; strpos++)
1937 {
1938 /* If LITERAL_END is set, we've encountered a backslash
1939 (the end of literal text to be inserted). */
1940 Charcount literal_end = -1;
1941 /* If SUBSTART is set, we need to also insert the
1942 text from SUBSTART to SUBEND in the original string. */
1943 Charcount substart = -1;
1944 Charcount subend;
1945
1946 c = string_char (XSTRING (newtext), strpos);
1947 if (c == '\\')
1948 {
1949 c = string_char (XSTRING (newtext), ++strpos);
1950 if (c == '&')
1951 {
1952 literal_end = strpos - 1;
1953 substart = search_regs.start[0];
1954 subend = search_regs.end[0];
1955 }
1956 else if (c >= '1' && c <= '9' &&
1957 c <= search_regs.num_regs + '0')
1958 {
1959 if (search_regs.start[c - '0'] >= 0)
1960 {
1961 literal_end = strpos - 1;
1962 substart = search_regs.start[c - '0'];
1963 subend = search_regs.end[c - '0'];
1964 }
1965 }
1966 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
1967 c == 'E')
1968 {
1969 /* Keep track of all case changes requested, but don't
1970 make them now. Do them later so we override
1971 everything else. */
1972 if (!ul_pos_dynarr)
1973 {
1974 ul_pos_dynarr = Dynarr_new (int);
1975 ul_action_dynarr = Dynarr_new (int);
1976 record_unwind_protect
1977 (free_created_dynarrs,
1978 noseeum_cons
1979 (make_opaque_ptr (ul_pos_dynarr),
1980 make_opaque_ptr (ul_action_dynarr)));
1981 }
1982 literal_end = strpos - 1;
1983 Dynarr_add (ul_pos_dynarr,
1984 (!NILP (accum)
1985 ? string_char_length (XSTRING (accum))
1986 : 0) + (literal_end - literal_start));
1987 Dynarr_add (ul_action_dynarr, c);
1988 }
1989 else if (c == '\\')
1990 /* So we get just one backslash. */
1991 literal_end = strpos;
1992 }
1993 if (literal_end >= 0)
1994 {
1995 Lisp_Object literal_text = Qnil;
1996 Lisp_Object substring = Qnil;
1997 if (literal_end != literal_start)
1998 literal_text = Fsubstring (newtext,
1999 make_int (literal_start),
2000 make_int (literal_end));
2001 if (substart >= 0 && subend != substart)
2002 substring = Fsubstring (string,
2003 make_int (substart),
2004 make_int (subend));
2005 if (!NILP (literal_text) || !NILP (substring))
2006 accum = concat3 (accum, literal_text, substring);
2007 literal_start = strpos + 1;
2008 }
2009 }
2010
2011 if (strpos != literal_start)
2012 /* some literal text at end to be inserted */
2013 newtext = concat2 (accum, Fsubstring (newtext,
2014 make_int (literal_start),
2015 make_int (strpos)));
2016 else
2017 newtext = accum;
2018 }
2019
2020 if (case_action == all_caps)
2021 newtext = Fupcase (newtext, buffer);
2022 else if (case_action == cap_initial)
2023 newtext = Fupcase_initials (newtext, buffer);
2024
2025 /* Now finally, we need to process the \U's, \E's, etc. */
2026 if (ul_pos_dynarr)
2027 {
2028 int i = 0;
2029 int cur_action = 'E';
2030 Charcount stlen = string_char_length (XSTRING (newtext));
2031 Charcount strpos;
2032
2033 for (strpos = 0; strpos < stlen; strpos++)
2034 {
2035 Emchar curchar = string_char (XSTRING (newtext), strpos);
2036 Emchar newchar = -1;
2037 if (i < Dynarr_length (ul_pos_dynarr) &&
2038 strpos == Dynarr_at (ul_pos_dynarr, i))
2039 {
2040 int new_action = Dynarr_at (ul_action_dynarr, i);
2041 i++;
2042 if (new_action == 'u')
2043 newchar = UPCASE (buf, curchar);
2044 else if (new_action == 'l')
2045 newchar = DOWNCASE (buf, curchar);
2046 else
2047 cur_action = new_action;
2048 }
2049 if (newchar == -1)
2050 {
2051 if (cur_action == 'U')
2052 newchar = UPCASE (buf, curchar);
2053 else if (cur_action == 'L')
2054 newchar = DOWNCASE (buf, curchar);
2055 else
2056 newchar = curchar;
2057 }
2058 if (newchar != curchar)
2059 set_string_char (XSTRING (newtext), strpos, newchar);
2060 }
2061 }
2062
2063 /* frees the Dynarrs if necessary. */
2064 unbind_to (speccount, Qnil);
2065 return concat3 (before, newtext, after);
2066 }
2067
2068 mc_count = begin_multiple_change (buf, search_regs.start[0],
2069 search_regs.end[0]);
2070
2071 /* begin_multiple_change() records an unwind-protect, so we need to
2072 record this value now. */
2073 speccount = specpdl_depth ();
2074
2075 /* We insert the replacement text before the old text, and then
2076 delete the original text. This means that markers at the
2077 beginning or end of the original will float to the corresponding
2078 position in the replacement. */
2079 BUF_SET_PT (buf, search_regs.start[0]);
2080 if (!NILP (literal))
2081 Finsert (1, &newtext);
2082 else
2083 {
2084 Charcount stlen = string_char_length (XSTRING (newtext));
2085 Charcount strpos;
2086 struct gcpro gcpro1;
2087 GCPRO1 (newtext);
2088 for (strpos = 0; strpos < stlen; strpos++)
2089 {
2090 Charcount offset = BUF_PT (buf) - search_regs.start[0];
2091
2092 c = string_char (XSTRING (newtext), strpos);
2093 if (c == '\\')
2094 {
2095 c = string_char (XSTRING (newtext), ++strpos);
2096 if (c == '&')
2097 Finsert_buffer_substring
2098 (buffer,
2099 make_int (search_regs.start[0] + offset),
2100 make_int (search_regs.end[0] + offset));
2101 else if (c >= '1' && c <= '9' &&
2102 c <= search_regs.num_regs + '0')
2103 {
2104 if (search_regs.start[c - '0'] >= 1)
2105 Finsert_buffer_substring
2106 (buffer,
2107 make_int (search_regs.start[c - '0'] + offset),
2108 make_int (search_regs.end[c - '0'] + offset));
2109 }
2110 else if (c == 'U' || c == 'u' || c == 'L' || c == 'l' ||
2111 c == 'E')
2112 {
2113 /* Keep track of all case changes requested, but don't
2114 make them now. Do them later so we override
2115 everything else. */
2116 if (!ul_pos_dynarr)
2117 {
2118 ul_pos_dynarr = Dynarr_new (int);
2119 ul_action_dynarr = Dynarr_new (int);
2120 record_unwind_protect
2121 (free_created_dynarrs,
2122 Fcons (make_opaque_ptr (ul_pos_dynarr),
2123 make_opaque_ptr (ul_action_dynarr)));
2124 }
2125 Dynarr_add (ul_pos_dynarr, BUF_PT (buf));
2126 Dynarr_add (ul_action_dynarr, c);
2127 }
2128 else
2129 buffer_insert_emacs_char (buf, c);
2130 }
2131 else
2132 buffer_insert_emacs_char (buf, c);
2133 }
2134 UNGCPRO;
2135 }
2136
2137 inslen = BUF_PT (buf) - (search_regs.start[0]);
2138 buffer_delete_range (buf, search_regs.start[0] + inslen, search_regs.end[0] +
2139 inslen, 0);
2140
2141 if (case_action == all_caps)
2142 Fupcase_region (make_int (BUF_PT (buf) - inslen),
2143 make_int (BUF_PT (buf)), buffer);
2144 else if (case_action == cap_initial)
2145 Fupcase_initials_region (make_int (BUF_PT (buf) - inslen),
2146 make_int (BUF_PT (buf)), buffer);
2147
2148 /* Now go through and make all the case changes that were requested
2149 in the replacement string. */
2150 if (ul_pos_dynarr)
2151 {
2152 Bufpos eend = BUF_PT (buf);
2153 int i = 0;
2154 int cur_action = 'E';
2155
2156 for (pos = BUF_PT (buf) - inslen; pos < eend; pos++)
2157 {
2158 Emchar curchar = BUF_FETCH_CHAR (buf, pos);
2159 Emchar newchar = -1;
2160 if (i < Dynarr_length (ul_pos_dynarr) &&
2161 pos == Dynarr_at (ul_pos_dynarr, i))
2162 {
2163 int new_action = Dynarr_at (ul_action_dynarr, i);
2164 i++;
2165 if (new_action == 'u')
2166 newchar = UPCASE (buf, curchar);
2167 else if (new_action == 'l')
2168 newchar = DOWNCASE (buf, curchar);
2169 else
2170 cur_action = new_action;
2171 }
2172 if (newchar == -1)
2173 {
2174 if (cur_action == 'U')
2175 newchar = UPCASE (buf, curchar);
2176 else if (cur_action == 'L')
2177 newchar = DOWNCASE (buf, curchar);
2178 else
2179 newchar = curchar;
2180 }
2181 if (newchar != curchar)
2182 buffer_replace_char (buf, pos, newchar, 0, 0);
2183 }
2184 }
2185
2186 /* frees the Dynarrs if necessary. */
2187 unbind_to (speccount, Qnil);
2188 end_multiple_change (buf, mc_count);
2189
2190 return Qnil;
2191 }
2192
2193 static Lisp_Object
2194 match_limit (Lisp_Object num, int beginningp)
2195 {
2196 /* This function has been Mule-ized. */
2197 int n;
2198
2199 CHECK_INT (num);
2200 n = XINT (num);
2201 if (n < 0 || n >= search_regs.num_regs)
2202 args_out_of_range (num, make_int (search_regs.num_regs));
2203 if (search_regs.num_regs <= 0
2204 || search_regs.start[n] < 0)
2205 return Qnil;
2206 return (make_int ((beginningp) ? search_regs.start[n]
2207 : search_regs.end[n]));
2208 }
2209
2210 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0 /*
2211 Return position of start of text matched by last regexp search.
2212 NUM, specifies which parenthesized expression in the last regexp.
2213 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2214 Zero means the entire text matched by the whole regexp or whole string.
2215 */ )
2216 (num)
2217 Lisp_Object num;
2218 {
2219 return match_limit (num, 1);
2220 }
2221
2222 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0 /*
2223 Return position of end of text matched by last regexp search.
2224 NUM specifies which parenthesized expression in the last regexp.
2225 Value is nil if NUMth pair didn't match, or there were less than NUM pairs.
2226 Zero means the entire text matched by the whole regexp or whole string.
2227 */ )
2228 (num)
2229 Lisp_Object num;
2230 {
2231 return match_limit (num, 0);
2232 }
2233
2234 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0 /*
2235 Return a list containing all info on what the last regexp search matched.
2236 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2237 All the elements are markers or nil (nil if the Nth pair didn't match)
2238 if the last match was on a buffer; integers or nil if a string was matched.
2239 Use `store-match-data' to reinstate the data in this list.
2240 */ )
2241 ()
2242 {
2243 /* This function has been Mule-ized. */
2244 Lisp_Object *data;
2245 int i;
2246 Charcount len;
2247
2248 if (NILP (last_thing_searched))
2249 error ("match-data called before any match found");
2250
2251 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2252 * sizeof (Lisp_Object));
2253
2254 len = -1;
2255 for (i = 0; i < search_regs.num_regs; i++)
2256 {
2257 Bufpos start = search_regs.start[i];
2258 if (start >= 0)
2259 {
2260 if (EQ (last_thing_searched, Qt))
2261 {
2262 data[2 * i] = make_int (start);
2263 data[2 * i + 1] = make_int (search_regs.end[i]);
2264 }
2265 else if (BUFFERP (last_thing_searched))
2266 {
2267 data[2 * i] = Fmake_marker ();
2268 Fset_marker (data[2 * i],
2269 make_int (start),
2270 last_thing_searched);
2271 data[2 * i + 1] = Fmake_marker ();
2272 Fset_marker (data[2 * i + 1],
2273 make_int (search_regs.end[i]),
2274 last_thing_searched);
2275 }
2276 else
2277 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2278 abort ();
2279
2280 len = i;
2281 }
2282 else
2283 data[2 * i] = data [2 * i + 1] = Qnil;
2284 }
2285 return Flist (2 * len + 2, data);
2286 }
2287
2288
2289 DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0 /*
2290 Set internal data on last search match from elements of LIST.
2291 LIST should have been created by calling `match-data' previously.
2292 */ )
2293 (list)
2294 Lisp_Object list;
2295 {
2296 /* This function has been Mule-ized. */
2297 register int i;
2298 register Lisp_Object marker;
2299
2300 if (running_asynch_code)
2301 save_search_regs ();
2302
2303 if (!CONSP (list) && !NILP (list))
2304 list = wrong_type_argument (Qconsp, list);
2305
2306 /* Unless we find a marker with a buffer in LIST, assume that this
2307 match data came from a string. */
2308 last_thing_searched = Qt;
2309
2310 /* Allocate registers if they don't already exist. */
2311 {
2312 int length = XINT (Flength (list)) / 2;
2313
2314 if (length > search_regs.num_regs)
2315 {
2316 if (search_regs.num_regs == 0)
2317 {
2318 search_regs.start
2319 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2320 search_regs.end
2321 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2322 }
2323 else
2324 {
2325 search_regs.start
2326 = (regoff_t *) xrealloc (search_regs.start,
2327 length * sizeof (regoff_t));
2328 search_regs.end
2329 = (regoff_t *) xrealloc (search_regs.end,
2330 length * sizeof (regoff_t));
2331 }
2332
2333 search_regs.num_regs = length;
2334 }
2335 }
2336
2337 for (i = 0; i < search_regs.num_regs; i++)
2338 {
2339 marker = Fcar (list);
2340 if (NILP (marker))
2341 {
2342 search_regs.start[i] = -1;
2343 list = Fcdr (list);
2344 }
2345 else
2346 {
2347 if (MARKERP (marker))
2348 {
2349 if (XMARKER (marker)->buffer == 0)
2350 marker = Qzero;
2351 else
2352 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2353 }
2354
2355 CHECK_INT_COERCE_MARKER (marker);
2356 search_regs.start[i] = XINT (marker);
2357 list = Fcdr (list);
2358
2359 marker = Fcar (list);
2360 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2361 marker = Qzero;
2362
2363 CHECK_INT_COERCE_MARKER (marker);
2364 search_regs.end[i] = XINT (marker);
2365 }
2366 list = Fcdr (list);
2367 }
2368
2369 return Qnil;
2370 }
2371
2372 /* If non-zero the match data have been saved in saved_search_regs
2373 during the execution of a sentinel or filter. */
2374 static int search_regs_saved;
2375 static struct re_registers saved_search_regs;
2376
2377 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2378 if asynchronous code (filter or sentinel) is running. */
2379 static void
2380 save_search_regs (void)
2381 {
2382 if (!search_regs_saved)
2383 {
2384 saved_search_regs.num_regs = search_regs.num_regs;
2385 saved_search_regs.start = search_regs.start;
2386 saved_search_regs.end = search_regs.end;
2387 search_regs.num_regs = 0;
2388 search_regs.start = 0;
2389 search_regs.end = 0;
2390
2391 search_regs_saved = 1;
2392 }
2393 }
2394
2395 /* Called upon exit from filters and sentinels. */
2396 void
2397 restore_match_data (void)
2398 {
2399 if (search_regs_saved)
2400 {
2401 if (search_regs.num_regs > 0)
2402 {
2403 xfree (search_regs.start);
2404 xfree (search_regs.end);
2405 }
2406 search_regs.num_regs = saved_search_regs.num_regs;
2407 search_regs.start = saved_search_regs.start;
2408 search_regs.end = saved_search_regs.end;
2409
2410 search_regs_saved = 0;
2411 }
2412 }
2413
2414 /* Quote a string to inactivate reg-expr chars */
2415
2416 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0 /*
2417 Return a regexp string which matches exactly STRING and nothing else.
2418 */ )
2419 (str)
2420 Lisp_Object str;
2421 {
2422 /* This function has been Mule-ized. */
2423 register Bufbyte *in, *out, *end;
2424 register Bufbyte *temp;
2425
2426 CHECK_STRING (str);
2427
2428 temp = (Bufbyte *) alloca (string_length (XSTRING (str)) * 2);
2429
2430 /* Now copy the data into the new string, inserting escapes. */
2431
2432 in = string_data (XSTRING (str));
2433 end = in + string_length (XSTRING (str));
2434 out = temp;
2435
2436 for (; in != end; in++)
2437 {
2438 if (*in == '[' || *in == ']'
2439 || *in == '*' || *in == '.' || *in == '\\'
2440 || *in == '?' || *in == '+'
2441 || *in == '^' || *in == '$')
2442 *out++ = '\\';
2443 *out++ = *in;
2444 }
2445
2446 return make_string (temp, out - temp);
2447 }
2448
2449
2450 /************************************************************************/
2451 /* initialization */
2452 /************************************************************************/
2453
2454 void
2455 syms_of_search (void)
2456 {
2457
2458 deferror (&Qsearch_failed, "search-failed", "Search failed", Qerror);
2459 deferror (&Qinvalid_regexp, "invalid-regexp", "Invalid regexp", Qerror);
2460
2461 defsubr (&Slooking_at);
2462 defsubr (&Sposix_looking_at);
2463 defsubr (&Sstring_match);
2464 defsubr (&Sposix_string_match);
2465 defsubr (&Sskip_chars_forward);
2466 defsubr (&Sskip_chars_backward);
2467 defsubr (&Sskip_syntax_forward);
2468 defsubr (&Sskip_syntax_backward);
2469 defsubr (&Ssearch_forward);
2470 defsubr (&Ssearch_backward);
2471 defsubr (&Sword_search_forward);
2472 defsubr (&Sword_search_backward);
2473 defsubr (&Sre_search_forward);
2474 defsubr (&Sre_search_backward);
2475 defsubr (&Sposix_search_forward);
2476 defsubr (&Sposix_search_backward);
2477 defsubr (&Sreplace_match);
2478 defsubr (&Smatch_beginning);
2479 defsubr (&Smatch_end);
2480 defsubr (&Smatch_data);
2481 defsubr (&Sstore_match_data);
2482 defsubr (&Sregexp_quote);
2483 }
2484
2485 void
2486 vars_of_search (void)
2487 {
2488 register int i;
2489
2490 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2491 {
2492 searchbufs[i].buf.allocated = 100;
2493 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
2494 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2495 searchbufs[i].regexp = Qnil;
2496 staticpro (&searchbufs[i].regexp);
2497 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2498 }
2499 searchbuf_head = &searchbufs[0];
2500
2501 last_thing_searched = Qnil;
2502 staticpro (&last_thing_searched);
2503
2504 DEFVAR_LISP ("forward-word-regexp", &Vforward_word_regexp /*
2505 *Regular expression to be used in `forward-word'.
2506 #### Not yet implemented.
2507 */ );
2508 Vforward_word_regexp = Qnil;
2509
2510 DEFVAR_LISP ("backward-word-regexp", &Vbackward_word_regexp /*
2511 *Regular expression to be used in `backward-word'.
2512 #### Not yet implemented.
2513 */ );
2514 Vbackward_word_regexp = Qnil;
2515 }
2516
2517 void
2518 complex_vars_of_search (void)
2519 {
2520 Vskip_chars_range_table = Fmake_range_table ();
2521 staticpro (&Vskip_chars_range_table);
2522 }