Mercurial > hg > xemacs-beta
annotate src/regex.c @ 4614:afbfad080ddd
The URLs in our current config.guess and config.sub files are obsolete.
Update to the latest upstream release to get correct URLs, as well as fixes
and enhancements to those scripts.
| author | Jerry James <james@xemacs.org> |
|---|---|
| date | Wed, 11 Feb 2009 11:09:35 -0700 |
| parents | 8418d1ad4944 |
| children | b5f21bb36684 |
| rev | line source |
|---|---|
| 428 | 1 /* Extended regular expression matching and search library, |
| 2 version 0.12, extended for XEmacs. | |
| 3 (Implements POSIX draft P10003.2/D11.2, except for | |
| 4 internationalization features.) | |
| 5 | |
| 6 Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc. | |
| 7 Copyright (C) 1995 Sun Microsystems, Inc. | |
| 1333 | 8 Copyright (C) 1995, 2001, 2002, 2003 Ben Wing. |
| 428 | 9 |
| 10 This program is free software; you can redistribute it and/or modify | |
| 11 it under the terms of the GNU General Public License as published by | |
| 12 the Free Software Foundation; either version 2, or (at your option) | |
| 13 any later version. | |
| 14 | |
| 15 This program is distributed in the hope that it will be useful, | |
| 16 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 18 GNU General Public License for more details. | |
| 19 | |
| 20 You should have received a copy of the GNU General Public License | |
| 21 along with this program; see the file COPYING. If not, write to | |
| 22 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
| 23 Boston, MA 02111-1307, USA. */ | |
| 24 | |
| 25 /* Synched up with: FSF 19.29. */ | |
| 26 | |
| 27 #ifdef HAVE_CONFIG_H | |
| 28 #include <config.h> | |
| 29 #endif | |
| 30 | |
| 31 #ifndef _GNU_SOURCE | |
| 32 #define _GNU_SOURCE 1 | |
| 33 #endif | |
| 34 | |
| 35 /* We assume non-Mule if emacs isn't defined. */ | |
| 36 #ifndef emacs | |
| 37 #undef MULE | |
| 38 #endif | |
| 39 | |
| 771 | 40 /* XEmacs addition */ |
| 41 #ifdef REL_ALLOC | |
| 42 #define REGEX_REL_ALLOC /* may be undefined below */ | |
| 43 #endif | |
| 44 | |
| 428 | 45 /* XEmacs: define this to add in a speedup for patterns anchored at |
| 46 the beginning of a line. Keep the ifdefs so that it's easier to | |
| 47 tell where/why this code has diverged from v19. */ | |
| 48 #define REGEX_BEGLINE_CHECK | |
| 49 | |
| 50 /* XEmacs: the current mmap-based ralloc handles small blocks very | |
| 51 poorly, so we disable it here. */ | |
| 52 | |
| 771 | 53 #if defined (HAVE_MMAP) || defined (DOUG_LEA_MALLOC) |
| 54 # undef REGEX_REL_ALLOC | |
| 428 | 55 #endif |
| 56 | |
| 57 /* The `emacs' switch turns on certain matching commands | |
| 58 that make sense only in Emacs. */ | |
| 59 #ifdef emacs | |
| 60 | |
| 61 #include "lisp.h" | |
| 62 #include "buffer.h" | |
| 63 #include "syntax.h" | |
| 64 | |
| 65 #if (defined (DEBUG_XEMACS) && !defined (DEBUG)) | |
| 66 #define DEBUG | |
| 67 #endif | |
| 68 | |
| 867 | 69 #define RE_TRANSLATE_1(ch) TRT_TABLE_OF (translate, (Ichar) ch) |
| 446 | 70 #define TRANSLATE_P(tr) (!NILP (tr)) |
| 428 | 71 |
| 826 | 72 /* Converts the pointer to the char to BEG-based offset from the start. */ |
| 73 #define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING \ | |
| 74 ? (d) - string1 : (d) - (string2 - size1)) | |
| 75 | |
| 428 | 76 #else /* not emacs */ |
| 77 | |
| 2367 | 78 #include <stdlib.h> |
| 79 #include <sys/types.h> | |
| 80 #include <stddef.h> /* needed for ptrdiff_t under Solaris */ | |
| 81 #include <string.h> | |
| 82 | |
| 2286 | 83 #include "compiler.h" /* Get compiler-specific definitions like UNUSED */ |
| 84 | |
| 2500 | 85 #define ABORT abort |
| 86 | |
| 428 | 87 /* If we are not linking with Emacs proper, |
| 88 we can't use the relocating allocator | |
| 89 even if config.h says that we can. */ | |
| 771 | 90 #undef REGEX_REL_ALLOC |
| 428 | 91 |
| 544 | 92 /* defined in lisp.h */ |
| 93 #ifdef REGEX_MALLOC | |
| 94 #ifndef DECLARE_NOTHING | |
| 95 #define DECLARE_NOTHING struct nosuchstruct | |
| 96 #endif | |
| 97 #endif | |
| 98 | |
| 867 | 99 #define itext_ichar(str) ((Ichar) (str)[0]) |
| 100 #define itext_ichar_fmt(str, fmt, object) ((Ichar) (str)[0]) | |
| 101 #define itext_ichar_ascii_fmt(str, fmt, object) ((Ichar) (str)[0]) | |
| 428 | 102 |
| 103 #if (LONGBITS > INTBITS) | |
| 104 # define EMACS_INT long | |
| 105 #else | |
| 106 # define EMACS_INT int | |
| 107 #endif | |
| 108 | |
| 867 | 109 typedef int Ichar; |
| 110 | |
| 111 #define INC_IBYTEPTR(p) ((p)++) | |
| 112 #define INC_IBYTEPTR_FMT(p, fmt) ((p)++) | |
| 113 #define DEC_IBYTEPTR(p) ((p)--) | |
| 114 #define DEC_IBYTEPTR_FMT(p, fmt) ((p)--) | |
| 115 #define itext_ichar_len(ptr) 1 | |
| 116 #define itext_ichar_len_fmt(ptr, fmt) 1 | |
| 428 | 117 |
| 118 /* Define the syntax stuff for \<, \>, etc. */ | |
| 119 | |
| 120 /* This must be nonzero for the wordchar and notwordchar pattern | |
| 121 commands in re_match_2. */ | |
| 122 #ifndef Sword | |
| 123 #define Sword 1 | |
| 124 #endif | |
| 125 | |
| 126 #ifdef SYNTAX_TABLE | |
| 127 | |
| 128 extern char *re_syntax_table; | |
| 129 | |
| 130 #else /* not SYNTAX_TABLE */ | |
| 131 | |
| 132 /* How many characters in the character set. */ | |
| 133 #define CHAR_SET_SIZE 256 | |
| 134 | |
| 135 static char re_syntax_table[CHAR_SET_SIZE]; | |
| 136 | |
| 137 static void | |
| 138 init_syntax_once (void) | |
| 139 { | |
| 140 static int done = 0; | |
| 141 | |
| 142 if (!done) | |
| 143 { | |
| 442 | 144 const char *word_syntax_chars = |
| 428 | 145 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_"; |
| 146 | |
| 147 memset (re_syntax_table, 0, sizeof (re_syntax_table)); | |
| 148 | |
| 149 while (*word_syntax_chars) | |
| 647 | 150 re_syntax_table[(unsigned int) (*word_syntax_chars++)] = Sword; |
| 428 | 151 |
| 152 done = 1; | |
| 153 } | |
| 154 } | |
| 155 | |
| 446 | 156 #endif /* SYNTAX_TABLE */ |
| 428 | 157 |
| 826 | 158 #define SYNTAX(ignored, c) re_syntax_table[c] |
| 460 | 159 #undef SYNTAX_FROM_CACHE |
| 826 | 160 #define SYNTAX_FROM_CACHE SYNTAX |
| 161 | |
| 162 #define RE_TRANSLATE_1(c) translate[(unsigned char) (c)] | |
| 446 | 163 #define TRANSLATE_P(tr) tr |
| 164 | |
| 165 #endif /* emacs */ | |
| 428 | 166 |
| 2201 | 167 /* This is for other GNU distributions with internationalized messages. */ |
| 168 #if defined (I18N3) && (defined (HAVE_LIBINTL_H) || defined (_LIBC)) | |
| 169 # include <libintl.h> | |
| 170 #else | |
| 171 # define gettext(msgid) (msgid) | |
| 172 #endif | |
| 173 | |
| 428 | 174 /* Under XEmacs, this is needed because we don't define it elsewhere. */ |
| 175 #ifdef SWITCH_ENUM_BUG | |
| 176 #define SWITCH_ENUM_CAST(x) ((int)(x)) | |
| 177 #else | |
| 178 #define SWITCH_ENUM_CAST(x) (x) | |
| 179 #endif | |
| 180 | |
| 181 | |
| 182 /* Get the interface, including the syntax bits. */ | |
| 183 #include "regex.h" | |
| 184 | |
| 185 /* isalpha etc. are used for the character classes. */ | |
| 186 #include <ctype.h> | |
| 187 | |
| 188 /* Jim Meyering writes: | |
| 189 | |
| 190 "... Some ctype macros are valid only for character codes that | |
| 191 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when | |
| 192 using /bin/cc or gcc but without giving an ansi option). So, all | |
| 193 ctype uses should be through macros like ISPRINT... If | |
| 194 STDC_HEADERS is defined, then autoconf has verified that the ctype | |
| 195 macros don't need to be guarded with references to isascii. ... | |
| 196 Defining isascii to 1 should let any compiler worth its salt | |
| 197 eliminate the && through constant folding." */ | |
| 198 | |
| 199 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) | |
| 200 #define ISASCII_1(c) 1 | |
| 201 #else | |
| 202 #define ISASCII_1(c) isascii(c) | |
| 203 #endif | |
| 204 | |
| 205 #ifdef MULE | |
| 206 /* The IS*() macros can be passed any character, including an extended | |
| 207 one. We need to make sure there are no crashes, which would occur | |
| 208 otherwise due to out-of-bounds array references. */ | |
| 209 #define ISASCII(c) (((EMACS_UINT) (c)) < 0x100 && ISASCII_1 (c)) | |
| 210 #else | |
| 211 #define ISASCII(c) ISASCII_1 (c) | |
| 212 #endif /* MULE */ | |
| 213 | |
| 214 #ifdef isblank | |
| 215 #define ISBLANK(c) (ISASCII (c) && isblank (c)) | |
| 216 #else | |
| 217 #define ISBLANK(c) ((c) == ' ' || (c) == '\t') | |
| 218 #endif | |
| 219 #ifdef isgraph | |
| 220 #define ISGRAPH(c) (ISASCII (c) && isgraph (c)) | |
| 221 #else | |
| 222 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) | |
| 223 #endif | |
| 224 | |
| 225 #define ISPRINT(c) (ISASCII (c) && isprint (c)) | |
| 226 #define ISDIGIT(c) (ISASCII (c) && isdigit (c)) | |
| 227 #define ISALNUM(c) (ISASCII (c) && isalnum (c)) | |
| 228 #define ISALPHA(c) (ISASCII (c) && isalpha (c)) | |
| 229 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) | |
| 230 #define ISLOWER(c) (ISASCII (c) && islower (c)) | |
| 231 #define ISPUNCT(c) (ISASCII (c) && ispunct (c)) | |
| 232 #define ISSPACE(c) (ISASCII (c) && isspace (c)) | |
| 233 #define ISUPPER(c) (ISASCII (c) && isupper (c)) | |
| 234 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) | |
| 235 | |
| 236 #ifndef NULL | |
| 237 #define NULL (void *)0 | |
| 238 #endif | |
| 239 | |
| 240 /* We remove any previous definition of `SIGN_EXTEND_CHAR', | |
| 241 since ours (we hope) works properly with all combinations of | |
| 242 machines, compilers, `char' and `unsigned char' argument types. | |
| 243 (Per Bothner suggested the basic approach.) */ | |
| 244 #undef SIGN_EXTEND_CHAR | |
| 245 #if __STDC__ | |
| 246 #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) | |
| 247 #else /* not __STDC__ */ | |
| 248 /* As in Harbison and Steele. */ | |
| 249 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) | |
| 250 #endif | |
| 251 | |
| 252 /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we | |
| 253 use `alloca' instead of `malloc'. This is because using malloc in | |
| 254 re_search* or re_match* could cause memory leaks when C-g is used in | |
| 255 Emacs; also, malloc is slower and causes storage fragmentation. On | |
| 256 the other hand, malloc is more portable, and easier to debug. | |
| 257 | |
| 258 Because we sometimes use alloca, some routines have to be macros, | |
| 259 not functions -- `alloca'-allocated space disappears at the end of the | |
| 260 function it is called in. */ | |
| 261 | |
| 1333 | 262 #ifndef emacs |
| 263 #define ALLOCA alloca | |
| 264 #define xmalloc malloc | |
| 265 #define xrealloc realloc | |
| 1726 | 266 #define xfree(x,type) free (x) |
| 1333 | 267 #endif |
| 268 | |
| 269 #ifdef emacs | |
| 270 #define ALLOCA_GARBAGE_COLLECT() \ | |
| 271 do \ | |
| 272 { \ | |
| 273 if (need_to_check_c_alloca) \ | |
| 274 xemacs_c_alloca (0); \ | |
| 275 } while (0) | |
| 276 #elif defined (C_ALLOCA) | |
| 277 #define ALLOCA_GARBAGE_COLLECT() alloca (0) | |
| 278 #else | |
| 279 #define ALLOCA_GARBAGE_COLLECT() | |
| 280 #endif | |
| 281 | |
| 282 #ifndef emacs | |
| 283 /* So we can use just it to conditionalize on */ | |
| 284 #undef ERROR_CHECK_MALLOC | |
| 285 #endif | |
| 286 | |
| 287 #ifdef ERROR_CHECK_MALLOC | |
| 288 /* When REL_ALLOC, malloc() is problematic because it could potentially | |
| 289 cause all rel-alloc()ed data -- including buffer text -- to be relocated. | |
| 290 We deal with this by checking for such relocation whenever we have | |
| 291 executed a statement that may call malloc() -- or alloca(), which may | |
| 292 end up calling malloc() in some circumstances -- and recomputing all | |
| 293 of our string pointers in re_match_2_internal() and re_search_2(). | |
| 294 However, if malloc() or alloca() happens and we don't know about it, | |
| 295 we could still be screwed. So we set up a system where we indicate all | |
| 296 places where we are prepared for malloc() or alloca(), and in any | |
| 297 other circumstances, calls to those functions (from anywhere inside of | |
| 2500 | 298 XEmacs!) will ABORT(). We do this even when REL_ALLOC is not defined |
| 1333 | 299 so that we catch these problems sooner, since many developers and beta |
| 300 testers will not be running with REL_ALLOC. */ | |
| 301 int regex_malloc_disallowed; | |
| 302 #define BEGIN_REGEX_MALLOC_OK() regex_malloc_disallowed = 0 | |
| 303 #define END_REGEX_MALLOC_OK() regex_malloc_disallowed = 1 | |
| 304 #define UNBIND_REGEX_MALLOC_CHECK() unbind_to (depth) | |
| 305 #else | |
| 306 #define BEGIN_REGEX_MALLOC_OK() | |
| 307 #define END_REGEX_MALLOC_OK() | |
| 308 #define UNBIND_REGEX_MALLOC_CHECK() | |
| 309 #endif | |
| 310 | |
| 311 | |
| 428 | 312 #ifdef REGEX_MALLOC |
| 313 | |
| 1333 | 314 #define REGEX_ALLOCATE xmalloc |
| 315 #define REGEX_REALLOCATE(source, osize, nsize) xrealloc (source, nsize) | |
| 316 #define REGEX_FREE xfree | |
| 428 | 317 |
| 318 #else /* not REGEX_MALLOC */ | |
| 319 | |
| 320 /* Emacs already defines alloca, sometimes. */ | |
| 321 #ifndef alloca | |
| 322 | |
| 323 /* Make alloca work the best possible way. */ | |
| 324 #ifdef __GNUC__ | |
| 325 #define alloca __builtin_alloca | |
| 771 | 326 #elif defined (__DECC) /* XEmacs: added next 3 lines, similar to config.h.in */ |
| 327 #include <alloca.h> | |
| 328 #pragma intrinsic(alloca) | |
| 428 | 329 #else /* not __GNUC__ */ |
| 330 #if HAVE_ALLOCA_H | |
| 331 #include <alloca.h> | |
| 332 #else /* not __GNUC__ or HAVE_ALLOCA_H */ | |
| 333 #ifndef _AIX /* Already did AIX, up at the top. */ | |
| 444 | 334 void *alloca (); |
| 428 | 335 #endif /* not _AIX */ |
| 446 | 336 #endif /* HAVE_ALLOCA_H */ |
| 337 #endif /* __GNUC__ */ | |
| 428 | 338 |
| 339 #endif /* not alloca */ | |
| 340 | |
| 1333 | 341 #define REGEX_ALLOCATE ALLOCA |
| 428 | 342 |
| 2367 | 343 /* !!#### Needs review */ |
| 428 | 344 /* Assumes a `char *destination' variable. */ |
| 345 #define REGEX_REALLOCATE(source, osize, nsize) \ | |
| 1333 | 346 (destination = (char *) ALLOCA (nsize), \ |
| 428 | 347 memmove (destination, source, osize), \ |
| 348 destination) | |
| 349 | |
| 1726 | 350 /* No need to do anything to free, after alloca. |
| 351 Do nothing! But inhibit gcc warning. */ | |
| 352 #define REGEX_FREE(arg,type) ((void)0) | |
| 428 | 353 |
| 446 | 354 #endif /* REGEX_MALLOC */ |
| 428 | 355 |
| 356 /* Define how to allocate the failure stack. */ | |
| 357 | |
| 771 | 358 #ifdef REGEX_REL_ALLOC |
| 428 | 359 #define REGEX_ALLOCATE_STACK(size) \ |
| 1346 | 360 r_alloc ((unsigned char **) &failure_stack_ptr, (size)) |
| 428 | 361 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \ |
| 1346 | 362 r_re_alloc ((unsigned char **) &failure_stack_ptr, (nsize)) |
| 428 | 363 #define REGEX_FREE_STACK(ptr) \ |
| 1346 | 364 r_alloc_free ((unsigned char **) &failure_stack_ptr) |
| 428 | 365 |
| 771 | 366 #else /* not REGEX_REL_ALLOC */ |
| 428 | 367 |
| 368 #ifdef REGEX_MALLOC | |
| 369 | |
| 1333 | 370 #define REGEX_ALLOCATE_STACK xmalloc |
| 371 #define REGEX_REALLOCATE_STACK(source, osize, nsize) xrealloc (source, nsize) | |
| 1726 | 372 #define REGEX_FREE_STACK(arg) xfree (arg, fail_stack_elt_t *) |
| 428 | 373 |
| 374 #else /* not REGEX_MALLOC */ | |
| 375 | |
| 1333 | 376 #define REGEX_ALLOCATE_STACK ALLOCA |
| 428 | 377 |
| 378 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \ | |
| 379 REGEX_REALLOCATE (source, osize, nsize) | |
| 380 /* No need to explicitly free anything. */ | |
| 381 #define REGEX_FREE_STACK(arg) | |
| 382 | |
| 446 | 383 #endif /* REGEX_MALLOC */ |
| 771 | 384 #endif /* REGEX_REL_ALLOC */ |
| 428 | 385 |
| 386 | |
| 387 /* True if `size1' is non-NULL and PTR is pointing anywhere inside | |
| 388 `string1' or just past its end. This works if PTR is NULL, which is | |
| 389 a good thing. */ | |
| 390 #define FIRST_STRING_P(ptr) \ | |
| 391 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) | |
| 392 | |
| 393 /* (Re)Allocate N items of type T using malloc, or fail. */ | |
| 1333 | 394 #define TALLOC(n, t) ((t *) xmalloc ((n) * sizeof (t))) |
| 395 #define RETALLOC(addr, n, t) ((addr) = (t *) xrealloc (addr, (n) * sizeof (t))) | |
| 428 | 396 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) |
| 397 | |
| 398 #define BYTEWIDTH 8 /* In bits. */ | |
| 399 | |
| 434 | 400 #define STREQ(s1, s2) (strcmp (s1, s2) == 0) |
| 428 | 401 |
| 402 #undef MAX | |
| 403 #undef MIN | |
| 404 #define MAX(a, b) ((a) > (b) ? (a) : (b)) | |
| 405 #define MIN(a, b) ((a) < (b) ? (a) : (b)) | |
| 406 | |
| 446 | 407 /* Type of source-pattern and string chars. */ |
| 408 typedef const unsigned char re_char; | |
| 409 | |
| 460 | 410 typedef char re_bool; |
| 428 | 411 #define false 0 |
| 412 #define true 1 | |
| 413 | |
| 414 | |
| 1346 | 415 #ifdef emacs |
| 416 | |
| 417 #ifdef MULE | |
| 418 | |
| 419 Lisp_Object Vthe_lisp_rangetab; | |
| 420 | |
| 421 void | |
| 422 vars_of_regex (void) | |
| 423 { | |
| 2421 | 424 Vthe_lisp_rangetab = Fmake_range_table (Qstart_closed_end_closed); |
| 1346 | 425 staticpro (&Vthe_lisp_rangetab); |
| 426 } | |
| 427 | |
| 428 #else /* not MULE */ | |
| 429 | |
| 430 void | |
| 431 vars_of_regex (void) | |
| 432 { | |
| 433 } | |
| 434 | |
| 435 #endif /* MULE */ | |
| 436 | |
| 437 /* Convert an offset from the start of the logical text string formed by | |
| 438 concatenating the two strings together into a character position in the | |
| 439 Lisp buffer or string that the text represents. Knows that | |
| 440 when handling buffer text, the "string" we're passed in is always | |
| 441 BEGV - ZV. */ | |
| 442 | |
| 443 static Charxpos | |
| 444 offset_to_charxpos (Lisp_Object lispobj, int off) | |
| 445 { | |
| 446 if (STRINGP (lispobj)) | |
| 447 return string_index_byte_to_char (lispobj, off); | |
| 448 else if (BUFFERP (lispobj)) | |
| 449 return bytebpos_to_charbpos (XBUFFER (lispobj), | |
| 450 off + BYTE_BUF_BEGV (XBUFFER (lispobj))); | |
| 451 else | |
| 452 return 0; | |
| 453 } | |
| 454 | |
| 455 #ifdef REL_ALLOC | |
| 456 | |
| 457 /* STRING1 is the value of STRING1 given to re_match_2(). LISPOBJ is | |
| 458 the Lisp object (if any) from which the string is taken. If LISPOBJ | |
| 459 is a buffer, return a relocation offset to be added to all pointers to | |
| 460 string data so that they will be accurate again, after an allocation or | |
| 461 reallocation that potentially relocated the buffer data. | |
| 462 */ | |
| 463 static Bytecount | |
| 464 offset_post_relocation (Lisp_Object lispobj, Ibyte *orig_buftext) | |
| 465 { | |
| 466 if (!BUFFERP (lispobj)) | |
| 467 return 0; | |
| 468 return (BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), | |
| 469 BYTE_BUF_BEGV (XBUFFER (lispobj))) - | |
| 470 orig_buftext); | |
| 471 } | |
| 472 | |
| 473 #endif /* REL_ALLOC */ | |
| 474 | |
| 475 #ifdef ERROR_CHECK_MALLOC | |
| 476 | |
| 477 /* NOTE that this can run malloc() so you need to adjust afterwards. */ | |
| 478 | |
| 479 static int | |
| 480 bind_regex_malloc_disallowed (int value) | |
| 481 { | |
| 482 /* Tricky, because the act of binding can run malloc(). */ | |
| 483 int old_regex_malloc_disallowed = regex_malloc_disallowed; | |
| 484 int depth; | |
| 485 regex_malloc_disallowed = 0; | |
| 486 depth = record_unwind_protect_restoring_int (®ex_malloc_disallowed, | |
| 487 old_regex_malloc_disallowed); | |
| 488 regex_malloc_disallowed = value; | |
| 489 return depth; | |
| 490 } | |
| 491 | |
| 492 #endif /* ERROR_CHECK_MALLOC */ | |
| 493 | |
| 494 #endif /* emacs */ | |
| 495 | |
| 496 | |
| 428 | 497 /* These are the command codes that appear in compiled regular |
| 498 expressions. Some opcodes are followed by argument bytes. A | |
| 499 command code can specify any interpretation whatsoever for its | |
| 500 arguments. Zero bytes may appear in the compiled regular expression. */ | |
| 501 | |
| 502 typedef enum | |
| 503 { | |
| 504 no_op = 0, | |
| 505 | |
| 506 /* Succeed right away--no more backtracking. */ | |
| 507 succeed, | |
| 508 | |
| 509 /* Followed by one byte giving n, then by n literal bytes. */ | |
| 510 exactn, | |
| 511 | |
| 512 /* Matches any (more or less) character. */ | |
| 513 anychar, | |
| 514 | |
| 515 /* Matches any one char belonging to specified set. First | |
| 516 following byte is number of bitmap bytes. Then come bytes | |
| 517 for a bitmap saying which chars are in. Bits in each byte | |
| 518 are ordered low-bit-first. A character is in the set if its | |
| 519 bit is 1. A character too large to have a bit in the map is | |
| 520 automatically not in the set. */ | |
| 521 charset, | |
| 522 | |
| 523 /* Same parameters as charset, but match any character that is | |
| 524 not one of those specified. */ | |
| 525 charset_not, | |
| 526 | |
| 527 /* Start remembering the text that is matched, for storing in a | |
| 528 register. Followed by one byte with the register number, in | |
| 502 | 529 the range 1 to the pattern buffer's re_ngroups |
| 428 | 530 field. Then followed by one byte with the number of groups |
| 531 inner to this one. (This last has to be part of the | |
| 532 start_memory only because we need it in the on_failure_jump | |
| 533 of re_match_2.) */ | |
| 534 start_memory, | |
| 535 | |
| 536 /* Stop remembering the text that is matched and store it in a | |
| 537 memory register. Followed by one byte with the register | |
| 502 | 538 number, in the range 1 to `re_ngroups' in the |
| 428 | 539 pattern buffer, and one byte with the number of inner groups, |
| 540 just like `start_memory'. (We need the number of inner | |
| 541 groups here because we don't have any easy way of finding the | |
| 542 corresponding start_memory when we're at a stop_memory.) */ | |
| 543 stop_memory, | |
| 544 | |
| 545 /* Match a duplicate of something remembered. Followed by one | |
| 546 byte containing the register number. */ | |
| 547 duplicate, | |
| 548 | |
| 549 /* Fail unless at beginning of line. */ | |
| 550 begline, | |
| 551 | |
| 552 /* Fail unless at end of line. */ | |
| 553 endline, | |
| 554 | |
| 555 /* Succeeds if at beginning of buffer (if emacs) or at beginning | |
| 556 of string to be matched (if not). */ | |
| 557 begbuf, | |
| 558 | |
| 559 /* Analogously, for end of buffer/string. */ | |
| 560 endbuf, | |
| 561 | |
| 562 /* Followed by two byte relative address to which to jump. */ | |
| 563 jump, | |
| 564 | |
| 565 /* Same as jump, but marks the end of an alternative. */ | |
| 566 jump_past_alt, | |
| 567 | |
| 568 /* Followed by two-byte relative address of place to resume at | |
| 569 in case of failure. */ | |
| 570 on_failure_jump, | |
| 571 | |
| 572 /* Like on_failure_jump, but pushes a placeholder instead of the | |
| 573 current string position when executed. */ | |
| 574 on_failure_keep_string_jump, | |
| 575 | |
| 576 /* Throw away latest failure point and then jump to following | |
| 577 two-byte relative address. */ | |
| 578 pop_failure_jump, | |
| 579 | |
| 580 /* Change to pop_failure_jump if know won't have to backtrack to | |
| 581 match; otherwise change to jump. This is used to jump | |
| 582 back to the beginning of a repeat. If what follows this jump | |
| 583 clearly won't match what the repeat does, such that we can be | |
| 584 sure that there is no use backtracking out of repetitions | |
| 585 already matched, then we change it to a pop_failure_jump. | |
| 586 Followed by two-byte address. */ | |
| 587 maybe_pop_jump, | |
| 588 | |
| 589 /* Jump to following two-byte address, and push a dummy failure | |
| 590 point. This failure point will be thrown away if an attempt | |
| 591 is made to use it for a failure. A `+' construct makes this | |
| 592 before the first repeat. Also used as an intermediary kind | |
| 593 of jump when compiling an alternative. */ | |
| 594 dummy_failure_jump, | |
| 595 | |
| 596 /* Push a dummy failure point and continue. Used at the end of | |
| 597 alternatives. */ | |
| 598 push_dummy_failure, | |
| 599 | |
| 600 /* Followed by two-byte relative address and two-byte number n. | |
| 601 After matching N times, jump to the address upon failure. */ | |
| 602 succeed_n, | |
| 603 | |
| 604 /* Followed by two-byte relative address, and two-byte number n. | |
| 605 Jump to the address N times, then fail. */ | |
| 606 jump_n, | |
| 607 | |
| 608 /* Set the following two-byte relative address to the | |
| 609 subsequent two-byte number. The address *includes* the two | |
| 610 bytes of number. */ | |
| 611 set_number_at, | |
| 612 | |
| 613 wordchar, /* Matches any word-constituent character. */ | |
| 614 notwordchar, /* Matches any char that is not a word-constituent. */ | |
| 615 | |
| 616 wordbeg, /* Succeeds if at word beginning. */ | |
| 617 wordend, /* Succeeds if at word end. */ | |
| 618 | |
| 619 wordbound, /* Succeeds if at a word boundary. */ | |
| 620 notwordbound /* Succeeds if not at a word boundary. */ | |
| 621 | |
| 622 #ifdef emacs | |
| 623 ,before_dot, /* Succeeds if before point. */ | |
| 624 at_dot, /* Succeeds if at point. */ | |
| 625 after_dot, /* Succeeds if after point. */ | |
| 626 | |
| 627 /* Matches any character whose syntax is specified. Followed by | |
| 628 a byte which contains a syntax code, e.g., Sword. */ | |
| 629 syntaxspec, | |
| 630 | |
| 631 /* Matches any character whose syntax is not that specified. */ | |
| 632 notsyntaxspec | |
| 633 | |
| 634 #endif /* emacs */ | |
| 635 | |
| 636 #ifdef MULE | |
| 637 /* need extra stuff to be able to properly work with XEmacs/Mule | |
| 638 characters (which may take up more than one byte) */ | |
| 639 | |
| 640 ,charset_mule, /* Matches any character belonging to specified set. | |
| 641 The set is stored in "unified range-table | |
| 642 format"; see rangetab.c. Unlike the `charset' | |
| 643 opcode, this can handle arbitrary characters. */ | |
| 644 | |
| 645 charset_mule_not /* Same parameters as charset_mule, but match any | |
| 646 character that is not one of those specified. */ | |
| 647 | |
| 648 /* 97/2/17 jhod: The following two were merged back in from the Mule | |
| 649 2.3 code to enable some language specific processing */ | |
| 650 ,categoryspec, /* Matches entries in the character category tables */ | |
| 651 notcategoryspec /* The opposite of the above */ | |
| 652 #endif /* MULE */ | |
| 653 | |
| 654 } re_opcode_t; | |
| 655 | |
| 656 /* Common operations on the compiled pattern. */ | |
| 657 | |
| 658 /* Store NUMBER in two contiguous bytes starting at DESTINATION. */ | |
| 659 | |
| 660 #define STORE_NUMBER(destination, number) \ | |
| 661 do { \ | |
| 662 (destination)[0] = (number) & 0377; \ | |
| 663 (destination)[1] = (number) >> 8; \ | |
| 664 } while (0) | |
| 665 | |
| 666 /* Same as STORE_NUMBER, except increment DESTINATION to | |
| 667 the byte after where the number is stored. Therefore, DESTINATION | |
| 668 must be an lvalue. */ | |
| 669 | |
| 670 #define STORE_NUMBER_AND_INCR(destination, number) \ | |
| 671 do { \ | |
| 672 STORE_NUMBER (destination, number); \ | |
| 673 (destination) += 2; \ | |
| 674 } while (0) | |
| 675 | |
| 676 /* Put into DESTINATION a number stored in two contiguous bytes starting | |
| 677 at SOURCE. */ | |
| 678 | |
| 679 #define EXTRACT_NUMBER(destination, source) \ | |
| 680 do { \ | |
| 681 (destination) = *(source) & 0377; \ | |
| 682 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ | |
| 683 } while (0) | |
| 684 | |
| 685 #ifdef DEBUG | |
| 686 static void | |
| 446 | 687 extract_number (int *dest, re_char *source) |
| 428 | 688 { |
| 689 int temp = SIGN_EXTEND_CHAR (*(source + 1)); | |
| 690 *dest = *source & 0377; | |
| 691 *dest += temp << 8; | |
| 692 } | |
| 693 | |
| 694 #ifndef EXTRACT_MACROS /* To debug the macros. */ | |
| 695 #undef EXTRACT_NUMBER | |
| 696 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) | |
| 697 #endif /* not EXTRACT_MACROS */ | |
| 698 | |
| 699 #endif /* DEBUG */ | |
| 700 | |
| 701 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. | |
| 702 SOURCE must be an lvalue. */ | |
| 703 | |
| 704 #define EXTRACT_NUMBER_AND_INCR(destination, source) \ | |
| 705 do { \ | |
| 706 EXTRACT_NUMBER (destination, source); \ | |
| 707 (source) += 2; \ | |
| 708 } while (0) | |
| 709 | |
| 710 #ifdef DEBUG | |
| 711 static void | |
| 712 extract_number_and_incr (int *destination, unsigned char **source) | |
| 713 { | |
| 714 extract_number (destination, *source); | |
| 715 *source += 2; | |
| 716 } | |
| 717 | |
| 718 #ifndef EXTRACT_MACROS | |
| 719 #undef EXTRACT_NUMBER_AND_INCR | |
| 720 #define EXTRACT_NUMBER_AND_INCR(dest, src) \ | |
| 721 extract_number_and_incr (&dest, &src) | |
| 722 #endif /* not EXTRACT_MACROS */ | |
| 723 | |
| 724 #endif /* DEBUG */ | |
| 725 | |
| 726 /* If DEBUG is defined, Regex prints many voluminous messages about what | |
| 727 it is doing (if the variable `debug' is nonzero). If linked with the | |
| 728 main program in `iregex.c', you can enter patterns and strings | |
| 729 interactively. And if linked with the main program in `main.c' and | |
| 730 the other test files, you can run the already-written tests. */ | |
| 731 | |
| 732 #if defined (DEBUG) | |
| 733 | |
| 734 /* We use standard I/O for debugging. */ | |
| 735 #include <stdio.h> | |
| 736 | |
| 737 #ifndef emacs | |
| 738 /* XEmacs provides its own version of assert() */ | |
| 739 /* It is useful to test things that ``must'' be true when debugging. */ | |
| 740 #include <assert.h> | |
| 741 #endif | |
| 742 | |
| 743 static int debug = 0; | |
| 744 | |
| 745 #define DEBUG_STATEMENT(e) e | |
| 746 #define DEBUG_PRINT1(x) if (debug) printf (x) | |
| 747 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) | |
| 748 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) | |
| 749 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) | |
| 750 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ | |
| 751 if (debug) print_partial_compiled_pattern (s, e) | |
| 752 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ | |
| 753 if (debug) print_double_string (w, s1, sz1, s2, sz2) | |
| 754 | |
| 755 | |
| 756 /* Print the fastmap in human-readable form. */ | |
| 757 | |
| 758 static void | |
| 759 print_fastmap (char *fastmap) | |
| 760 { | |
| 647 | 761 int was_a_range = 0; |
| 762 int i = 0; | |
| 428 | 763 |
| 764 while (i < (1 << BYTEWIDTH)) | |
| 765 { | |
| 766 if (fastmap[i++]) | |
| 767 { | |
| 768 was_a_range = 0; | |
| 769 putchar (i - 1); | |
| 770 while (i < (1 << BYTEWIDTH) && fastmap[i]) | |
| 771 { | |
| 772 was_a_range = 1; | |
| 773 i++; | |
| 774 } | |
| 775 if (was_a_range) | |
| 776 { | |
| 777 putchar ('-'); | |
| 778 putchar (i - 1); | |
| 779 } | |
| 780 } | |
| 781 } | |
| 782 putchar ('\n'); | |
| 783 } | |
| 784 | |
| 785 | |
| 786 /* Print a compiled pattern string in human-readable form, starting at | |
| 787 the START pointer into it and ending just before the pointer END. */ | |
| 788 | |
| 789 static void | |
| 446 | 790 print_partial_compiled_pattern (re_char *start, re_char *end) |
| 428 | 791 { |
| 792 int mcnt, mcnt2; | |
| 446 | 793 unsigned char *p = (unsigned char *) start; |
| 794 re_char *pend = end; | |
| 428 | 795 |
| 796 if (start == NULL) | |
| 797 { | |
| 798 puts ("(null)"); | |
| 799 return; | |
| 800 } | |
| 801 | |
| 802 /* Loop over pattern commands. */ | |
| 803 while (p < pend) | |
| 804 { | |
| 805 printf ("%ld:\t", (long)(p - start)); | |
| 806 | |
| 807 switch ((re_opcode_t) *p++) | |
| 808 { | |
| 809 case no_op: | |
| 810 printf ("/no_op"); | |
| 811 break; | |
| 812 | |
| 813 case exactn: | |
| 814 mcnt = *p++; | |
| 815 printf ("/exactn/%d", mcnt); | |
| 816 do | |
| 817 { | |
| 818 putchar ('/'); | |
| 819 putchar (*p++); | |
| 820 } | |
| 821 while (--mcnt); | |
| 822 break; | |
| 823 | |
| 824 case start_memory: | |
| 825 mcnt = *p++; | |
| 826 printf ("/start_memory/%d/%d", mcnt, *p++); | |
| 827 break; | |
| 828 | |
| 829 case stop_memory: | |
| 830 mcnt = *p++; | |
| 831 printf ("/stop_memory/%d/%d", mcnt, *p++); | |
| 832 break; | |
| 833 | |
| 834 case duplicate: | |
| 835 printf ("/duplicate/%d", *p++); | |
| 836 break; | |
| 837 | |
| 838 case anychar: | |
| 839 printf ("/anychar"); | |
| 840 break; | |
| 841 | |
| 842 case charset: | |
| 843 case charset_not: | |
| 844 { | |
| 845 REGISTER int c, last = -100; | |
| 846 REGISTER int in_range = 0; | |
| 847 | |
| 848 printf ("/charset [%s", | |
| 849 (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); | |
| 850 | |
| 851 assert (p + *p < pend); | |
| 852 | |
| 853 for (c = 0; c < 256; c++) | |
| 854 if (((unsigned char) (c / 8) < *p) | |
| 855 && (p[1 + (c/8)] & (1 << (c % 8)))) | |
| 856 { | |
| 857 /* Are we starting a range? */ | |
| 858 if (last + 1 == c && ! in_range) | |
| 859 { | |
| 860 putchar ('-'); | |
| 861 in_range = 1; | |
| 862 } | |
| 863 /* Have we broken a range? */ | |
| 864 else if (last + 1 != c && in_range) | |
| 865 { | |
| 866 putchar (last); | |
| 867 in_range = 0; | |
| 868 } | |
| 869 | |
| 870 if (! in_range) | |
| 871 putchar (c); | |
| 872 | |
| 873 last = c; | |
| 874 } | |
| 875 | |
| 876 if (in_range) | |
| 877 putchar (last); | |
| 878 | |
| 879 putchar (']'); | |
| 880 | |
| 881 p += 1 + *p; | |
| 882 } | |
| 883 break; | |
| 884 | |
| 885 #ifdef MULE | |
| 886 case charset_mule: | |
| 887 case charset_mule_not: | |
| 888 { | |
| 889 int nentries, i; | |
| 890 | |
| 891 printf ("/charset_mule [%s", | |
| 892 (re_opcode_t) *(p - 1) == charset_mule_not ? "^" : ""); | |
| 893 nentries = unified_range_table_nentries (p); | |
| 894 for (i = 0; i < nentries; i++) | |
| 895 { | |
| 896 EMACS_INT first, last; | |
| 897 Lisp_Object dummy_val; | |
| 898 | |
| 899 unified_range_table_get_range (p, i, &first, &last, | |
| 900 &dummy_val); | |
| 901 if (first < 0x100) | |
| 902 putchar (first); | |
| 903 else | |
| 904 printf ("(0x%lx)", (long)first); | |
| 905 if (first != last) | |
| 906 { | |
| 907 putchar ('-'); | |
| 908 if (last < 0x100) | |
| 909 putchar (last); | |
| 910 else | |
| 911 printf ("(0x%lx)", (long)last); | |
| 912 } | |
| 913 } | |
| 914 putchar (']'); | |
| 915 p += unified_range_table_bytes_used (p); | |
| 916 } | |
| 917 break; | |
| 918 #endif | |
| 919 | |
| 920 case begline: | |
| 921 printf ("/begline"); | |
| 922 break; | |
| 923 | |
| 924 case endline: | |
| 925 printf ("/endline"); | |
| 926 break; | |
| 927 | |
| 928 case on_failure_jump: | |
| 929 extract_number_and_incr (&mcnt, &p); | |
| 930 printf ("/on_failure_jump to %ld", (long)(p + mcnt - start)); | |
| 931 break; | |
| 932 | |
| 933 case on_failure_keep_string_jump: | |
| 934 extract_number_and_incr (&mcnt, &p); | |
| 935 printf ("/on_failure_keep_string_jump to %ld", (long)(p + mcnt - start)); | |
| 936 break; | |
| 937 | |
| 938 case dummy_failure_jump: | |
| 939 extract_number_and_incr (&mcnt, &p); | |
| 940 printf ("/dummy_failure_jump to %ld", (long)(p + mcnt - start)); | |
| 941 break; | |
| 942 | |
| 943 case push_dummy_failure: | |
| 944 printf ("/push_dummy_failure"); | |
| 945 break; | |
| 946 | |
| 947 case maybe_pop_jump: | |
| 948 extract_number_and_incr (&mcnt, &p); | |
| 949 printf ("/maybe_pop_jump to %ld", (long)(p + mcnt - start)); | |
| 950 break; | |
| 951 | |
| 952 case pop_failure_jump: | |
| 953 extract_number_and_incr (&mcnt, &p); | |
| 954 printf ("/pop_failure_jump to %ld", (long)(p + mcnt - start)); | |
| 955 break; | |
| 956 | |
| 957 case jump_past_alt: | |
| 958 extract_number_and_incr (&mcnt, &p); | |
| 959 printf ("/jump_past_alt to %ld", (long)(p + mcnt - start)); | |
| 960 break; | |
| 961 | |
| 962 case jump: | |
| 963 extract_number_and_incr (&mcnt, &p); | |
| 964 printf ("/jump to %ld", (long)(p + mcnt - start)); | |
| 965 break; | |
| 966 | |
| 967 case succeed_n: | |
| 968 extract_number_and_incr (&mcnt, &p); | |
| 969 extract_number_and_incr (&mcnt2, &p); | |
| 970 printf ("/succeed_n to %ld, %d times", (long)(p + mcnt - start), mcnt2); | |
| 971 break; | |
| 972 | |
| 973 case jump_n: | |
| 974 extract_number_and_incr (&mcnt, &p); | |
| 975 extract_number_and_incr (&mcnt2, &p); | |
| 976 printf ("/jump_n to %ld, %d times", (long)(p + mcnt - start), mcnt2); | |
| 977 break; | |
| 978 | |
| 979 case set_number_at: | |
| 980 extract_number_and_incr (&mcnt, &p); | |
| 981 extract_number_and_incr (&mcnt2, &p); | |
| 982 printf ("/set_number_at location %ld to %d", (long)(p + mcnt - start), mcnt2); | |
| 983 break; | |
| 984 | |
| 985 case wordbound: | |
| 986 printf ("/wordbound"); | |
| 987 break; | |
| 988 | |
| 989 case notwordbound: | |
| 990 printf ("/notwordbound"); | |
| 991 break; | |
| 992 | |
| 993 case wordbeg: | |
| 994 printf ("/wordbeg"); | |
| 995 break; | |
| 996 | |
| 997 case wordend: | |
| 998 printf ("/wordend"); | |
| 999 | |
| 1000 #ifdef emacs | |
| 1001 case before_dot: | |
| 1002 printf ("/before_dot"); | |
| 1003 break; | |
| 1004 | |
| 1005 case at_dot: | |
| 1006 printf ("/at_dot"); | |
| 1007 break; | |
| 1008 | |
| 1009 case after_dot: | |
| 1010 printf ("/after_dot"); | |
| 1011 break; | |
| 1012 | |
| 1013 case syntaxspec: | |
| 1014 printf ("/syntaxspec"); | |
| 1015 mcnt = *p++; | |
| 1016 printf ("/%d", mcnt); | |
| 1017 break; | |
| 1018 | |
| 1019 case notsyntaxspec: | |
| 1020 printf ("/notsyntaxspec"); | |
| 1021 mcnt = *p++; | |
| 1022 printf ("/%d", mcnt); | |
| 1023 break; | |
| 1024 | |
| 1025 #ifdef MULE | |
| 1026 /* 97/2/17 jhod Mule category patch */ | |
| 1027 case categoryspec: | |
| 1028 printf ("/categoryspec"); | |
| 1029 mcnt = *p++; | |
| 1030 printf ("/%d", mcnt); | |
| 1031 break; | |
| 1032 | |
| 1033 case notcategoryspec: | |
| 1034 printf ("/notcategoryspec"); | |
| 1035 mcnt = *p++; | |
| 1036 printf ("/%d", mcnt); | |
| 1037 break; | |
| 1038 /* end of category patch */ | |
| 1039 #endif /* MULE */ | |
| 1040 #endif /* emacs */ | |
| 1041 | |
| 1042 case wordchar: | |
| 1043 printf ("/wordchar"); | |
| 1044 break; | |
| 1045 | |
| 1046 case notwordchar: | |
| 1047 printf ("/notwordchar"); | |
| 1048 break; | |
| 1049 | |
| 1050 case begbuf: | |
| 1051 printf ("/begbuf"); | |
| 1052 break; | |
| 1053 | |
| 1054 case endbuf: | |
| 1055 printf ("/endbuf"); | |
| 1056 break; | |
| 1057 | |
| 1058 default: | |
| 1059 printf ("?%d", *(p-1)); | |
| 1060 } | |
| 1061 | |
| 1062 putchar ('\n'); | |
| 1063 } | |
| 1064 | |
| 1065 printf ("%ld:\tend of pattern.\n", (long)(p - start)); | |
| 1066 } | |
| 1067 | |
| 1068 | |
| 1069 static void | |
| 1070 print_compiled_pattern (struct re_pattern_buffer *bufp) | |
| 1071 { | |
| 446 | 1072 re_char *buffer = bufp->buffer; |
| 428 | 1073 |
| 1074 print_partial_compiled_pattern (buffer, buffer + bufp->used); | |
| 1075 printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used, | |
| 1076 bufp->allocated); | |
| 1077 | |
| 1078 if (bufp->fastmap_accurate && bufp->fastmap) | |
| 1079 { | |
| 1080 printf ("fastmap: "); | |
| 1081 print_fastmap (bufp->fastmap); | |
| 1082 } | |
| 1083 | |
| 1084 printf ("re_nsub: %ld\t", (long)bufp->re_nsub); | |
| 502 | 1085 printf ("re_ngroups: %ld\t", (long)bufp->re_ngroups); |
| 428 | 1086 printf ("regs_alloc: %d\t", bufp->regs_allocated); |
| 1087 printf ("can_be_null: %d\t", bufp->can_be_null); | |
| 1088 printf ("newline_anchor: %d\n", bufp->newline_anchor); | |
| 1089 printf ("no_sub: %d\t", bufp->no_sub); | |
| 1090 printf ("not_bol: %d\t", bufp->not_bol); | |
| 1091 printf ("not_eol: %d\t", bufp->not_eol); | |
| 1092 printf ("syntax: %d\n", bufp->syntax); | |
| 1093 /* Perhaps we should print the translate table? */ | |
| 1094 /* and maybe the category table? */ | |
| 502 | 1095 |
| 1096 if (bufp->external_to_internal_register) | |
| 1097 { | |
| 1098 int i; | |
| 1099 | |
| 1100 printf ("external_to_internal_register:\n"); | |
| 1101 for (i = 0; i <= bufp->re_nsub; i++) | |
| 1102 { | |
| 1103 if (i > 0) | |
| 1104 printf (", "); | |
| 1105 printf ("%d -> %d", i, bufp->external_to_internal_register[i]); | |
| 1106 } | |
| 1107 printf ("\n"); | |
| 1108 } | |
| 428 | 1109 } |
| 1110 | |
| 1111 | |
| 1112 static void | |
| 446 | 1113 print_double_string (re_char *where, re_char *string1, int size1, |
| 1114 re_char *string2, int size2) | |
| 428 | 1115 { |
| 1116 if (where == NULL) | |
| 1117 printf ("(null)"); | |
| 1118 else | |
| 1119 { | |
| 647 | 1120 int this_char; |
| 428 | 1121 |
| 1122 if (FIRST_STRING_P (where)) | |
| 1123 { | |
| 1124 for (this_char = where - string1; this_char < size1; this_char++) | |
| 1125 putchar (string1[this_char]); | |
| 1126 | |
| 1127 where = string2; | |
| 1128 } | |
| 1129 | |
| 1130 for (this_char = where - string2; this_char < size2; this_char++) | |
| 1131 putchar (string2[this_char]); | |
| 1132 } | |
| 1133 } | |
| 1134 | |
| 1135 #else /* not DEBUG */ | |
| 1136 | |
| 771 | 1137 #ifndef emacs |
| 428 | 1138 #undef assert |
| 771 | 1139 #define assert(e) ((void) (1)) |
| 1140 #endif | |
| 428 | 1141 |
| 1142 #define DEBUG_STATEMENT(e) | |
| 1143 #define DEBUG_PRINT1(x) | |
| 1144 #define DEBUG_PRINT2(x1, x2) | |
| 1145 #define DEBUG_PRINT3(x1, x2, x3) | |
| 1146 #define DEBUG_PRINT4(x1, x2, x3, x4) | |
| 1147 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) | |
| 1148 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) | |
| 1149 | |
| 446 | 1150 #endif /* DEBUG */ |
| 428 | 1151 |
| 1152 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can | |
| 1153 also be assigned to arbitrarily: each pattern buffer stores its own | |
| 1154 syntax, so it can be changed between regex compilations. */ | |
| 1155 /* This has no initializer because initialized variables in Emacs | |
| 1156 become read-only after dumping. */ | |
| 1157 reg_syntax_t re_syntax_options; | |
| 1158 | |
| 1159 | |
| 1160 /* Specify the precise syntax of regexps for compilation. This provides | |
| 1161 for compatibility for various utilities which historically have | |
| 1162 different, incompatible syntaxes. | |
| 1163 | |
| 1164 The argument SYNTAX is a bit mask comprised of the various bits | |
| 1165 defined in regex.h. We return the old syntax. */ | |
| 1166 | |
| 1167 reg_syntax_t | |
| 1168 re_set_syntax (reg_syntax_t syntax) | |
| 1169 { | |
| 1170 reg_syntax_t ret = re_syntax_options; | |
| 1171 | |
| 1172 re_syntax_options = syntax; | |
| 1173 return ret; | |
| 1174 } | |
| 1175 | |
| 1176 /* This table gives an error message for each of the error codes listed | |
| 1177 in regex.h. Obviously the order here has to be same as there. | |
| 1178 POSIX doesn't require that we do anything for REG_NOERROR, | |
| 1179 but why not be nice? */ | |
| 1180 | |
| 442 | 1181 static const char *re_error_msgid[] = |
| 428 | 1182 { |
| 1183 "Success", /* REG_NOERROR */ | |
| 1184 "No match", /* REG_NOMATCH */ | |
| 1185 "Invalid regular expression", /* REG_BADPAT */ | |
| 1186 "Invalid collation character", /* REG_ECOLLATE */ | |
| 1187 "Invalid character class name", /* REG_ECTYPE */ | |
| 1188 "Trailing backslash", /* REG_EESCAPE */ | |
| 1189 "Invalid back reference", /* REG_ESUBREG */ | |
| 1190 "Unmatched [ or [^", /* REG_EBRACK */ | |
| 1191 "Unmatched ( or \\(", /* REG_EPAREN */ | |
| 1192 "Unmatched \\{", /* REG_EBRACE */ | |
| 1193 "Invalid content of \\{\\}", /* REG_BADBR */ | |
| 1194 "Invalid range end", /* REG_ERANGE */ | |
| 1195 "Memory exhausted", /* REG_ESPACE */ | |
| 1196 "Invalid preceding regular expression", /* REG_BADRPT */ | |
| 1197 "Premature end of regular expression", /* REG_EEND */ | |
| 1198 "Regular expression too big", /* REG_ESIZE */ | |
| 1199 "Unmatched ) or \\)", /* REG_ERPAREN */ | |
| 1200 #ifdef emacs | |
| 1201 "Invalid syntax designator", /* REG_ESYNTAX */ | |
| 1202 #endif | |
| 1203 #ifdef MULE | |
| 1204 "Ranges may not span charsets", /* REG_ERANGESPAN */ | |
| 1205 "Invalid category designator", /* REG_ECATEGORY */ | |
| 1206 #endif | |
| 1207 }; | |
| 1208 | |
| 1209 /* Avoiding alloca during matching, to placate r_alloc. */ | |
| 1210 | |
| 1333 | 1211 /* About these various flags: |
| 1212 | |
| 1213 MATCH_MAY_ALLOCATE indicates that it's OK to do allocation in the | |
| 1214 searching and matching functions. In this case, we use local variables | |
| 1215 to hold the values allocated. If not, we use *global* variables, which | |
| 1216 are pre-allocated. NOTE: XEmacs ***MUST*** run with MATCH_MAY_ALLOCATE, | |
| 1217 because the regexp routines may get called reentrantly as a result of | |
| 1218 QUIT processing (e.g. under Windows: re_match -> QUIT -> quit_p -> drain | |
| 1219 events -> process WM_INITMENU -> call filter -> re_match; see stack | |
| 1220 trace in signal.c), so we cannot have any global variables (unless we do | |
| 1221 lots of trickiness including some unwind-protects, which isn't worth it | |
| 1222 at this point). | |
| 1223 | |
| 1224 REL_ALLOC means that the relocating allocator is in use, for buffers | |
| 1225 and such. REGEX_REL_ALLOC means that we use rel-alloc to manage the | |
| 1226 fail stack, which may grow quite large. REGEX_MALLOC means we use | |
| 1227 malloc() in place of alloca() to allocate the fail stack -- only | |
| 1228 applicable if REGEX_REL_ALLOC is not defined. | |
| 1229 */ | |
| 1230 | |
| 428 | 1231 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the |
| 1232 searching and matching functions should not call alloca. On some | |
| 1233 systems, alloca is implemented in terms of malloc, and if we're | |
| 1234 using the relocating allocator routines, then malloc could cause a | |
| 1235 relocation, which might (if the strings being searched are in the | |
| 1236 ralloc heap) shift the data out from underneath the regexp | |
| 771 | 1237 routines. [To clarify: The purpose of rel-alloc is to allow data to |
| 1238 be moved in memory from one place to another so that all data | |
| 1239 blocks can be consolidated together and excess memory released back | |
| 1240 to the operating system. This requires that all the blocks that | |
| 1241 are managed by rel-alloc go at the very end of the program's heap, | |
| 1242 after all regularly malloc()ed data. malloc(), however, is used to | |
| 1243 owning the end of the heap, so that when more memory is needed, it | |
| 1244 just expands the heap using sbrk(). This is reconciled by using a | |
| 1245 malloc() (such as malloc.c, gmalloc.c, or recent versions of | |
| 1246 malloc() in libc) where the sbrk() call can be replaced with a | |
| 1247 user-specified call -- in this case, to rel-alloc's r_alloc_sbrk() | |
| 1248 routine. This routine calls the real sbrk(), but then shifts all | |
| 1249 the rel-alloc-managed blocks forward to the end of the heap again, | |
| 1250 so that malloc() gets the memory it needs in the location it needs | |
| 1251 it at. The regex routines may well have pointers to buffer data as | |
| 1252 their arguments, and buffers are managed by rel-alloc if rel-alloc | |
| 1253 has been enabled, so calling malloc() may potentially screw things | |
| 1254 up badly if it runs out of space and asks for more from the OS.] | |
| 1255 | |
| 1256 [[Here's another reason to avoid allocation: Emacs processes input | |
| 1257 from X in a signal handler; processing X input may call malloc; if | |
| 1258 input arrives while a matching routine is calling malloc, then | |
| 1259 we're scrod. But Emacs can't just block input while calling | |
| 1260 matching routines; then we don't notice interrupts when they come | |
| 1261 in. So, Emacs blocks input around all regexp calls except the | |
| 1262 matching calls, which it leaves unprotected, in the faith that they | |
| 1333 | 1263 will not malloc.]] This previous paragraph is irrelevant under XEmacs, |
| 1264 as we *do not* do anything so stupid as process input from within a | |
| 1265 signal handler. | |
| 1266 | |
| 1267 However, the regexp routines may get called reentrantly as a result of | |
| 1268 QUIT processing (e.g. under Windows: re_match -> QUIT -> quit_p -> drain | |
| 1269 events -> process WM_INITMENU -> call filter -> re_match; see stack | |
| 1270 trace in signal.c), so we cannot have any global variables (unless we do | |
| 1271 lots of trickiness including some unwind-protects, which isn't worth it | |
| 1272 at this point). Hence we MUST have MATCH_MAY_ALLOCATE defined. | |
| 1273 | |
| 1274 Also, the first paragraph does not make complete sense to me -- what | |
| 1275 about the use of rel-alloc to handle the fail stacks? Shouldn't these | |
| 1276 reallocations potentially cause buffer data to be relocated as well? I | |
| 826 | 1277 must be missing something, though -- perhaps the writer above is |
| 1278 assuming that the failure stack(s) will always be allocated after the | |
| 1279 buffer data, and thus reallocating them with rel-alloc won't move buffer | |
| 1333 | 1280 data. (In fact, a cursory glance at the code in ralloc.c seems to |
| 1281 confirm this.) --ben */ | |
| 428 | 1282 |
| 1283 /* Normally, this is fine. */ | |
| 1284 #define MATCH_MAY_ALLOCATE | |
| 1285 | |
| 1286 /* When using GNU C, we are not REALLY using the C alloca, no matter | |
| 1287 what config.h may say. So don't take precautions for it. */ | |
| 1288 #ifdef __GNUC__ | |
| 1289 #undef C_ALLOCA | |
| 1290 #endif | |
| 1291 | |
| 1292 /* The match routines may not allocate if (1) they would do it with malloc | |
| 1293 and (2) it's not safe for them to use malloc. | |
| 1294 Note that if REL_ALLOC is defined, matching would not use malloc for the | |
| 1295 failure stack, but we would still use it for the register vectors; | |
| 1296 so REL_ALLOC should not affect this. */ | |
| 771 | 1297 |
| 1333 | 1298 /* XEmacs can handle REL_ALLOC and malloc() OK */ |
| 1299 #if !defined (emacs) && (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (REL_ALLOC) | |
| 428 | 1300 #undef MATCH_MAY_ALLOCATE |
| 1301 #endif | |
| 1302 | |
| 1333 | 1303 #if !defined (MATCH_MAY_ALLOCATE) && defined (emacs) |
| 771 | 1304 #error regex must be handle reentrancy; MATCH_MAY_ALLOCATE must be defined |
| 1305 #endif | |
| 1306 | |
| 428 | 1307 |
| 1308 /* Failure stack declarations and macros; both re_compile_fastmap and | |
| 1309 re_match_2 use a failure stack. These have to be macros because of | |
| 1310 REGEX_ALLOCATE_STACK. */ | |
| 1311 | |
| 1312 | |
| 1313 /* Number of failure points for which to initially allocate space | |
| 1314 when matching. If this number is exceeded, we allocate more | |
| 1315 space, so it is not a hard limit. */ | |
| 1316 #ifndef INIT_FAILURE_ALLOC | |
| 3300 | 1317 #define INIT_FAILURE_ALLOC 20 |
| 428 | 1318 #endif |
| 1319 | |
| 1320 /* Roughly the maximum number of failure points on the stack. Would be | |
| 1321 exactly that if always used MAX_FAILURE_SPACE each time we failed. | |
| 1322 This is a variable only so users of regex can assign to it; we never | |
| 1323 change it ourselves. */ | |
| 1324 #if defined (MATCH_MAY_ALLOCATE) | |
| 1325 /* 4400 was enough to cause a crash on Alpha OSF/1, | |
| 1326 whose default stack limit is 2mb. */ | |
| 3300 | 1327 int re_max_failures = 40000; |
| 428 | 1328 #else |
| 3300 | 1329 int re_max_failures = 4000; |
| 428 | 1330 #endif |
| 1331 | |
| 1332 union fail_stack_elt | |
| 1333 { | |
| 446 | 1334 re_char *pointer; |
| 428 | 1335 int integer; |
| 1336 }; | |
| 1337 | |
| 1338 typedef union fail_stack_elt fail_stack_elt_t; | |
| 1339 | |
| 1340 typedef struct | |
| 1341 { | |
| 1342 fail_stack_elt_t *stack; | |
| 665 | 1343 Elemcount size; |
| 1344 Elemcount avail; /* Offset of next open position. */ | |
| 428 | 1345 } fail_stack_type; |
| 1346 | |
| 1347 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0) | |
| 1348 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) | |
| 1349 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) | |
| 1350 | |
| 1351 | |
| 1352 /* Define macros to initialize and free the failure stack. | |
| 1353 Do `return -2' if the alloc fails. */ | |
| 1354 | |
| 1355 #ifdef MATCH_MAY_ALLOCATE | |
| 1333 | 1356 #define INIT_FAIL_STACK() \ |
| 1357 do { \ | |
| 1358 fail_stack.stack = (fail_stack_elt_t *) \ | |
| 1359 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * \ | |
| 1360 sizeof (fail_stack_elt_t)); \ | |
| 1361 \ | |
| 1362 if (fail_stack.stack == NULL) \ | |
| 1363 { \ | |
| 1364 UNBIND_REGEX_MALLOC_CHECK (); \ | |
| 1365 return -2; \ | |
| 1366 } \ | |
| 1367 \ | |
| 1368 fail_stack.size = INIT_FAILURE_ALLOC; \ | |
| 1369 fail_stack.avail = 0; \ | |
| 428 | 1370 } while (0) |
| 1371 | |
| 1372 #define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) | |
| 1373 #else | |
| 1374 #define INIT_FAIL_STACK() \ | |
| 1375 do { \ | |
| 1376 fail_stack.avail = 0; \ | |
| 1377 } while (0) | |
| 1378 | |
| 1379 #define RESET_FAIL_STACK() | |
| 1380 #endif | |
| 1381 | |
| 1382 | |
| 1383 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. | |
| 1384 | |
| 1385 Return 1 if succeeds, and 0 if either ran out of memory | |
| 1386 allocating space for it or it was already too large. | |
| 1387 | |
| 1388 REGEX_REALLOCATE_STACK requires `destination' be declared. */ | |
| 1389 | |
| 1390 #define DOUBLE_FAIL_STACK(fail_stack) \ | |
| 1391 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ | |
| 1392 ? 0 \ | |
| 1393 : ((fail_stack).stack = (fail_stack_elt_t *) \ | |
| 1394 REGEX_REALLOCATE_STACK ((fail_stack).stack, \ | |
| 1395 (fail_stack).size * sizeof (fail_stack_elt_t), \ | |
| 1396 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ | |
| 1397 \ | |
| 1398 (fail_stack).stack == NULL \ | |
| 1399 ? 0 \ | |
| 1400 : ((fail_stack).size <<= 1, \ | |
| 1401 1))) | |
| 1402 | |
| 1333 | 1403 #if !defined (emacs) || !defined (REL_ALLOC) |
| 1404 #define RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS() | |
| 1405 #else | |
| 1406 /* Don't change NULL pointers */ | |
| 1407 #define ADD_IF_NZ(val) if (val) val += rmdp_offset | |
| 1346 | 1408 #define RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS() \ |
| 1409 do \ | |
| 1410 { \ | |
| 1411 Bytecount rmdp_offset = offset_post_relocation (lispobj, orig_buftext); \ | |
| 1412 \ | |
| 1413 if (rmdp_offset) \ | |
| 1414 { \ | |
| 1415 int i; \ | |
| 1416 \ | |
| 1417 ADD_IF_NZ (string1); \ | |
| 1418 ADD_IF_NZ (string2); \ | |
| 1419 ADD_IF_NZ (d); \ | |
| 1420 ADD_IF_NZ (dend); \ | |
| 1421 ADD_IF_NZ (end1); \ | |
| 1422 ADD_IF_NZ (end2); \ | |
| 1423 ADD_IF_NZ (end_match_1); \ | |
| 1424 ADD_IF_NZ (end_match_2); \ | |
| 1425 \ | |
| 1426 if (bufp->re_ngroups) \ | |
| 1427 { \ | |
| 1428 for (i = 0; i < num_regs; i++) \ | |
| 1429 { \ | |
| 1430 ADD_IF_NZ (regstart[i]); \ | |
| 1431 ADD_IF_NZ (regend[i]); \ | |
| 1432 ADD_IF_NZ (old_regstart[i]); \ | |
| 1433 ADD_IF_NZ (old_regend[i]); \ | |
| 1434 ADD_IF_NZ (best_regstart[i]); \ | |
| 1435 ADD_IF_NZ (best_regend[i]); \ | |
| 1436 ADD_IF_NZ (reg_dummy[i]); \ | |
| 1437 } \ | |
| 1438 } \ | |
| 1439 \ | |
| 1440 ADD_IF_NZ (match_end); \ | |
| 1441 } \ | |
| 1333 | 1442 } while (0) |
| 1443 #endif /* !defined (emacs) || !defined (REL_ALLOC) */ | |
| 1444 | |
| 1445 #if !defined (emacs) || !defined (REL_ALLOC) | |
| 1446 #define RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS() | |
| 1447 #else | |
| 1346 | 1448 #define RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS() \ |
| 1449 do \ | |
| 1450 { \ | |
| 1451 Bytecount rmdp_offset = offset_post_relocation (lispobj, orig_buftext); \ | |
| 1452 \ | |
| 1453 if (rmdp_offset) \ | |
| 1454 { \ | |
| 1455 ADD_IF_NZ (str1); \ | |
| 1456 ADD_IF_NZ (str2); \ | |
| 1457 ADD_IF_NZ (string1); \ | |
| 1458 ADD_IF_NZ (string2); \ | |
| 1459 ADD_IF_NZ (d); \ | |
| 1460 } \ | |
| 1333 | 1461 } while (0) |
| 1462 | |
| 1463 #endif /* emacs */ | |
| 428 | 1464 |
| 1465 /* Push pointer POINTER on FAIL_STACK. | |
| 1466 Return 1 if was able to do so and 0 if ran out of memory allocating | |
| 1467 space to do so. */ | |
| 1468 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ | |
| 1469 ((FAIL_STACK_FULL () \ | |
| 1470 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ | |
| 1471 ? 0 \ | |
| 1472 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ | |
| 1473 1)) | |
| 1474 | |
| 1475 /* Push a pointer value onto the failure stack. | |
| 1476 Assumes the variable `fail_stack'. Probably should only | |
| 1477 be called from within `PUSH_FAILURE_POINT'. */ | |
| 1478 #define PUSH_FAILURE_POINTER(item) \ | |
| 1479 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) | |
| 1480 | |
| 1481 /* This pushes an integer-valued item onto the failure stack. | |
| 1482 Assumes the variable `fail_stack'. Probably should only | |
| 1483 be called from within `PUSH_FAILURE_POINT'. */ | |
| 1484 #define PUSH_FAILURE_INT(item) \ | |
| 1485 fail_stack.stack[fail_stack.avail++].integer = (item) | |
| 1486 | |
| 1487 /* Push a fail_stack_elt_t value onto the failure stack. | |
| 1488 Assumes the variable `fail_stack'. Probably should only | |
| 1489 be called from within `PUSH_FAILURE_POINT'. */ | |
| 1490 #define PUSH_FAILURE_ELT(item) \ | |
| 1491 fail_stack.stack[fail_stack.avail++] = (item) | |
| 1492 | |
| 1493 /* These three POP... operations complement the three PUSH... operations. | |
| 1494 All assume that `fail_stack' is nonempty. */ | |
| 1495 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer | |
| 1496 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer | |
| 1497 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] | |
| 1498 | |
| 1499 /* Used to omit pushing failure point id's when we're not debugging. */ | |
| 1500 #ifdef DEBUG | |
| 1501 #define DEBUG_PUSH PUSH_FAILURE_INT | |
| 1502 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () | |
| 1503 #else | |
| 1504 #define DEBUG_PUSH(item) | |
| 1505 #define DEBUG_POP(item_addr) | |
| 1506 #endif | |
| 1507 | |
| 1508 | |
| 1509 /* Push the information about the state we will need | |
| 1510 if we ever fail back to it. | |
| 1511 | |
| 1512 Requires variables fail_stack, regstart, regend, reg_info, and | |
| 1513 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be | |
| 1514 declared. | |
| 1515 | |
| 1516 Does `return FAILURE_CODE' if runs out of memory. */ | |
| 1517 | |
| 771 | 1518 #if !defined (REGEX_MALLOC) && !defined (REGEX_REL_ALLOC) |
| 456 | 1519 #define DECLARE_DESTINATION char *destination |
| 428 | 1520 #else |
| 456 | 1521 #define DECLARE_DESTINATION DECLARE_NOTHING |
| 428 | 1522 #endif |
| 1523 | |
| 1524 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ | |
| 456 | 1525 do { \ |
| 1526 DECLARE_DESTINATION; \ | |
| 1527 /* Must be int, so when we don't save any registers, the arithmetic \ | |
| 1528 of 0 + -1 isn't done as unsigned. */ \ | |
| 1529 int this_reg; \ | |
| 428 | 1530 \ |
| 456 | 1531 DEBUG_STATEMENT (failure_id++); \ |
| 1532 DEBUG_STATEMENT (nfailure_points_pushed++); \ | |
| 647 | 1533 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%d:\n", failure_id); \ |
| 1534 DEBUG_PRINT2 (" Before push, next avail: %ld\n", \ | |
| 1535 (long) (fail_stack).avail); \ | |
| 1536 DEBUG_PRINT2 (" size: %ld\n", \ | |
| 1537 (long) (fail_stack).size); \ | |
| 456 | 1538 \ |
| 1539 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ | |
| 1540 DEBUG_PRINT2 (" available: %ld\n", \ | |
| 1541 (long) REMAINING_AVAIL_SLOTS); \ | |
| 428 | 1542 \ |
| 456 | 1543 /* Ensure we have enough space allocated for what we will push. */ \ |
| 1544 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ | |
| 1545 { \ | |
| 1333 | 1546 BEGIN_REGEX_MALLOC_OK (); \ |
| 456 | 1547 if (!DOUBLE_FAIL_STACK (fail_stack)) \ |
| 1333 | 1548 { \ |
| 1549 END_REGEX_MALLOC_OK (); \ | |
| 1550 UNBIND_REGEX_MALLOC_CHECK (); \ | |
| 1551 return failure_code; \ | |
| 1552 } \ | |
| 1553 END_REGEX_MALLOC_OK (); \ | |
| 647 | 1554 DEBUG_PRINT2 ("\n Doubled stack; size now: %ld\n", \ |
| 1555 (long) (fail_stack).size); \ | |
| 456 | 1556 DEBUG_PRINT2 (" slots available: %ld\n", \ |
| 1557 (long) REMAINING_AVAIL_SLOTS); \ | |
| 1333 | 1558 \ |
| 1559 RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); \ | |
| 456 | 1560 } \ |
| 428 | 1561 \ |
| 456 | 1562 /* Push the info, starting with the registers. */ \ |
| 1563 DEBUG_PRINT1 ("\n"); \ | |
| 428 | 1564 \ |
| 456 | 1565 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ |
| 1566 this_reg++) \ | |
| 1567 { \ | |
| 1568 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ | |
| 1569 DEBUG_STATEMENT (num_regs_pushed++); \ | |
| 428 | 1570 \ |
| 456 | 1571 DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \ |
| 1572 PUSH_FAILURE_POINTER (regstart[this_reg]); \ | |
| 1573 \ | |
| 1574 DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \ | |
| 1575 PUSH_FAILURE_POINTER (regend[this_reg]); \ | |
| 428 | 1576 \ |
| 456 | 1577 DEBUG_PRINT2 (" info: 0x%lx\n ", \ |
| 1578 * (long *) (®_info[this_reg])); \ | |
| 1579 DEBUG_PRINT2 (" match_null=%d", \ | |
| 1580 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ | |
| 1581 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ | |
| 1582 DEBUG_PRINT2 (" matched_something=%d", \ | |
| 1583 MATCHED_SOMETHING (reg_info[this_reg])); \ | |
| 1584 DEBUG_PRINT2 (" ever_matched_something=%d", \ | |
| 1585 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ | |
| 1586 DEBUG_PRINT1 ("\n"); \ | |
| 1587 PUSH_FAILURE_ELT (reg_info[this_reg].word); \ | |
| 1588 } \ | |
| 428 | 1589 \ |
| 456 | 1590 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg); \ |
| 1591 PUSH_FAILURE_INT (lowest_active_reg); \ | |
| 428 | 1592 \ |
| 456 | 1593 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg); \ |
| 1594 PUSH_FAILURE_INT (highest_active_reg); \ | |
| 428 | 1595 \ |
| 456 | 1596 DEBUG_PRINT2 (" Pushing pattern 0x%lx: \n", (long) pattern_place); \ |
| 1597 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ | |
| 1598 PUSH_FAILURE_POINTER (pattern_place); \ | |
| 428 | 1599 \ |
| 456 | 1600 DEBUG_PRINT2 (" Pushing string 0x%lx: `", (long) string_place); \ |
| 1601 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ | |
| 1602 size2); \ | |
| 1603 DEBUG_PRINT1 ("'\n"); \ | |
| 1604 PUSH_FAILURE_POINTER (string_place); \ | |
| 428 | 1605 \ |
| 456 | 1606 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ |
| 1607 DEBUG_PUSH (failure_id); \ | |
| 1608 } while (0) | |
| 428 | 1609 |
| 1610 /* This is the number of items that are pushed and popped on the stack | |
| 1611 for each register. */ | |
| 1612 #define NUM_REG_ITEMS 3 | |
| 1613 | |
| 1614 /* Individual items aside from the registers. */ | |
| 1615 #ifdef DEBUG | |
| 1616 #define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ | |
| 1617 #else | |
| 1618 #define NUM_NONREG_ITEMS 4 | |
| 1619 #endif | |
| 1620 | |
| 1621 /* We push at most this many items on the stack. */ | |
| 1622 /* We used to use (num_regs - 1), which is the number of registers | |
| 1623 this regexp will save; but that was changed to 5 | |
| 1624 to avoid stack overflow for a regexp with lots of parens. */ | |
| 1625 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) | |
| 1626 | |
| 1627 /* We actually push this many items. */ | |
| 1628 #define NUM_FAILURE_ITEMS \ | |
| 1629 ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ | |
| 1630 + NUM_NONREG_ITEMS) | |
| 1631 | |
| 1632 /* How many items can still be added to the stack without overflowing it. */ | |
| 1633 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) | |
| 1634 | |
| 1635 | |
| 1636 /* Pops what PUSH_FAIL_STACK pushes. | |
| 1637 | |
| 1638 We restore into the parameters, all of which should be lvalues: | |
| 1639 STR -- the saved data position. | |
| 1640 PAT -- the saved pattern position. | |
| 1641 LOW_REG, HIGH_REG -- the highest and lowest active registers. | |
| 1642 REGSTART, REGEND -- arrays of string positions. | |
| 1643 REG_INFO -- array of information about each subexpression. | |
| 1644 | |
| 1645 Also assumes the variables `fail_stack' and (if debugging), `bufp', | |
| 1646 `pend', `string1', `size1', `string2', and `size2'. */ | |
| 1647 | |
| 456 | 1648 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, \ |
| 1649 regstart, regend, reg_info) \ | |
| 1650 do { \ | |
| 428 | 1651 DEBUG_STATEMENT (fail_stack_elt_t ffailure_id;) \ |
| 1652 int this_reg; \ | |
| 442 | 1653 const unsigned char *string_temp; \ |
| 428 | 1654 \ |
| 1655 assert (!FAIL_STACK_EMPTY ()); \ | |
| 1656 \ | |
| 1657 /* Remove failure points and point to how many regs pushed. */ \ | |
| 1658 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ | |
| 647 | 1659 DEBUG_PRINT2 (" Before pop, next avail: %ld\n", \ |
| 1660 (long) fail_stack.avail); \ | |
| 1661 DEBUG_PRINT2 (" size: %ld\n", \ | |
| 1662 (long) fail_stack.size); \ | |
| 428 | 1663 \ |
| 1664 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ | |
| 1665 \ | |
| 1666 DEBUG_POP (&ffailure_id.integer); \ | |
| 647 | 1667 DEBUG_PRINT2 (" Popping failure id: %d\n", \ |
| 1668 * (int *) &ffailure_id); \ | |
| 428 | 1669 \ |
| 1670 /* If the saved string location is NULL, it came from an \ | |
| 1671 on_failure_keep_string_jump opcode, and we want to throw away the \ | |
| 1672 saved NULL, thus retaining our current position in the string. */ \ | |
| 1673 string_temp = POP_FAILURE_POINTER (); \ | |
| 1674 if (string_temp != NULL) \ | |
| 446 | 1675 str = string_temp; \ |
| 428 | 1676 \ |
| 1677 DEBUG_PRINT2 (" Popping string 0x%lx: `", (long) str); \ | |
| 1678 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ | |
| 1679 DEBUG_PRINT1 ("'\n"); \ | |
| 1680 \ | |
| 1681 pat = (unsigned char *) POP_FAILURE_POINTER (); \ | |
| 1682 DEBUG_PRINT2 (" Popping pattern 0x%lx: ", (long) pat); \ | |
| 1683 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ | |
| 1684 \ | |
| 1685 /* Restore register info. */ \ | |
| 647 | 1686 high_reg = POP_FAILURE_INT (); \ |
| 428 | 1687 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ |
| 1688 \ | |
| 647 | 1689 low_reg = POP_FAILURE_INT (); \ |
| 428 | 1690 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ |
| 1691 \ | |
| 1692 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ | |
| 1693 { \ | |
| 1694 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ | |
| 1695 \ | |
| 1696 reg_info[this_reg].word = POP_FAILURE_ELT (); \ | |
| 1697 DEBUG_PRINT2 (" info: 0x%lx\n", \ | |
| 1698 * (long *) ®_info[this_reg]); \ | |
| 1699 \ | |
| 446 | 1700 regend[this_reg] = POP_FAILURE_POINTER (); \ |
| 428 | 1701 DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \ |
| 1702 \ | |
| 446 | 1703 regstart[this_reg] = POP_FAILURE_POINTER (); \ |
| 428 | 1704 DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \ |
| 1705 } \ | |
| 1706 \ | |
| 1707 set_regs_matched_done = 0; \ | |
| 1708 DEBUG_STATEMENT (nfailure_points_popped++); \ | |
| 456 | 1709 } while (0) /* POP_FAILURE_POINT */ |
| 428 | 1710 |
| 1711 | |
| 1712 | |
| 1713 /* Structure for per-register (a.k.a. per-group) information. | |
| 1714 Other register information, such as the | |
| 1715 starting and ending positions (which are addresses), and the list of | |
| 1716 inner groups (which is a bits list) are maintained in separate | |
| 1717 variables. | |
| 1718 | |
| 1719 We are making a (strictly speaking) nonportable assumption here: that | |
| 1720 the compiler will pack our bit fields into something that fits into | |
| 1721 the type of `word', i.e., is something that fits into one item on the | |
| 1722 failure stack. */ | |
| 1723 | |
| 1724 typedef union | |
| 1725 { | |
| 1726 fail_stack_elt_t word; | |
| 1727 struct | |
| 1728 { | |
| 1729 /* This field is one if this group can match the empty string, | |
| 1730 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ | |
| 1731 #define MATCH_NULL_UNSET_VALUE 3 | |
| 647 | 1732 unsigned int match_null_string_p : 2; |
| 1733 unsigned int is_active : 1; | |
| 1734 unsigned int matched_something : 1; | |
| 1735 unsigned int ever_matched_something : 1; | |
| 428 | 1736 } bits; |
| 1737 } register_info_type; | |
| 1738 | |
| 1739 #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) | |
| 1740 #define IS_ACTIVE(R) ((R).bits.is_active) | |
| 1741 #define MATCHED_SOMETHING(R) ((R).bits.matched_something) | |
| 1742 #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) | |
| 1743 | |
| 1744 | |
| 1745 /* Call this when have matched a real character; it sets `matched' flags | |
| 1746 for the subexpressions which we are currently inside. Also records | |
| 1747 that those subexprs have matched. */ | |
| 1748 #define SET_REGS_MATCHED() \ | |
| 1749 do \ | |
| 1750 { \ | |
| 1751 if (!set_regs_matched_done) \ | |
| 1752 { \ | |
| 647 | 1753 int r; \ |
| 428 | 1754 set_regs_matched_done = 1; \ |
| 1755 for (r = lowest_active_reg; r <= highest_active_reg; r++) \ | |
| 1756 { \ | |
| 1757 MATCHED_SOMETHING (reg_info[r]) \ | |
| 1758 = EVER_MATCHED_SOMETHING (reg_info[r]) \ | |
| 1759 = 1; \ | |
| 1760 } \ | |
| 1761 } \ | |
| 1762 } \ | |
| 1763 while (0) | |
| 1764 | |
| 1765 /* Registers are set to a sentinel when they haven't yet matched. */ | |
| 446 | 1766 static unsigned char reg_unset_dummy; |
| 428 | 1767 #define REG_UNSET_VALUE (®_unset_dummy) |
| 1768 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) | |
| 1769 | |
| 1770 /* Subroutine declarations and macros for regex_compile. */ | |
| 1771 | |
| 1772 /* Fetch the next character in the uncompiled pattern---translating it | |
| 826 | 1773 if necessary. */ |
| 428 | 1774 #define PATFETCH(c) \ |
| 446 | 1775 do { \ |
| 1776 PATFETCH_RAW (c); \ | |
| 826 | 1777 c = RE_TRANSLATE (c); \ |
| 428 | 1778 } while (0) |
| 1779 | |
| 1780 /* Fetch the next character in the uncompiled pattern, with no | |
| 1781 translation. */ | |
| 1782 #define PATFETCH_RAW(c) \ | |
| 1783 do {if (p == pend) return REG_EEND; \ | |
| 1784 assert (p < pend); \ | |
| 867 | 1785 c = itext_ichar (p); \ |
| 1786 INC_IBYTEPTR (p); \ | |
| 428 | 1787 } while (0) |
| 1788 | |
| 1789 /* Go backwards one character in the pattern. */ | |
| 867 | 1790 #define PATUNFETCH DEC_IBYTEPTR (p) |
| 428 | 1791 |
| 1792 /* If `translate' is non-null, return translate[D], else just D. We | |
| 1793 cast the subscript to translate because some data is declared as | |
| 1794 `char *', to avoid warnings when a string constant is passed. But | |
| 1795 when we use a character as a subscript we must make it unsigned. */ | |
| 826 | 1796 #define RE_TRANSLATE(d) \ |
| 1797 (TRANSLATE_P (translate) ? RE_TRANSLATE_1 (d) : (d)) | |
| 428 | 1798 |
| 1799 /* Macros for outputting the compiled pattern into `buffer'. */ | |
| 1800 | |
| 1801 /* If the buffer isn't allocated when it comes in, use this. */ | |
| 1802 #define INIT_BUF_SIZE 32 | |
| 1803 | |
| 1804 /* Make sure we have at least N more bytes of space in buffer. */ | |
| 1805 #define GET_BUFFER_SPACE(n) \ | |
| 647 | 1806 while (buf_end - bufp->buffer + (n) > (ptrdiff_t) bufp->allocated) \ |
| 428 | 1807 EXTEND_BUFFER () |
| 1808 | |
| 1809 /* Make sure we have one more byte of buffer space and then add C to it. */ | |
| 1810 #define BUF_PUSH(c) \ | |
| 1811 do { \ | |
| 1812 GET_BUFFER_SPACE (1); \ | |
| 446 | 1813 *buf_end++ = (unsigned char) (c); \ |
| 428 | 1814 } while (0) |
| 1815 | |
| 1816 | |
| 1817 /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ | |
| 1818 #define BUF_PUSH_2(c1, c2) \ | |
| 1819 do { \ | |
| 1820 GET_BUFFER_SPACE (2); \ | |
| 446 | 1821 *buf_end++ = (unsigned char) (c1); \ |
| 1822 *buf_end++ = (unsigned char) (c2); \ | |
| 428 | 1823 } while (0) |
| 1824 | |
| 1825 | |
| 1826 /* As with BUF_PUSH_2, except for three bytes. */ | |
| 1827 #define BUF_PUSH_3(c1, c2, c3) \ | |
| 1828 do { \ | |
| 1829 GET_BUFFER_SPACE (3); \ | |
| 446 | 1830 *buf_end++ = (unsigned char) (c1); \ |
| 1831 *buf_end++ = (unsigned char) (c2); \ | |
| 1832 *buf_end++ = (unsigned char) (c3); \ | |
| 428 | 1833 } while (0) |
| 1834 | |
| 1835 | |
| 1836 /* Store a jump with opcode OP at LOC to location TO. We store a | |
| 1837 relative address offset by the three bytes the jump itself occupies. */ | |
| 1838 #define STORE_JUMP(op, loc, to) \ | |
| 1839 store_op1 (op, loc, (to) - (loc) - 3) | |
| 1840 | |
| 1841 /* Likewise, for a two-argument jump. */ | |
| 1842 #define STORE_JUMP2(op, loc, to, arg) \ | |
| 1843 store_op2 (op, loc, (to) - (loc) - 3, arg) | |
| 1844 | |
| 446 | 1845 /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the |
| 1846 buffer end. */ | |
| 428 | 1847 #define INSERT_JUMP(op, loc, to) \ |
| 446 | 1848 insert_op1 (op, loc, (to) - (loc) - 3, buf_end) |
| 1849 | |
| 1850 /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the | |
| 1851 buffer end. */ | |
| 428 | 1852 #define INSERT_JUMP2(op, loc, to, arg) \ |
| 446 | 1853 insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) |
| 428 | 1854 |
| 1855 | |
| 1856 /* This is not an arbitrary limit: the arguments which represent offsets | |
| 1857 into the pattern are two bytes long. So if 2^16 bytes turns out to | |
| 1858 be too small, many things would have to change. */ | |
| 1859 #define MAX_BUF_SIZE (1L << 16) | |
| 1860 | |
| 1861 | |
| 1862 /* Extend the buffer by twice its current size via realloc and | |
| 1863 reset the pointers that pointed into the old block to point to the | |
| 1864 correct places in the new one. If extending the buffer results in it | |
| 1865 being larger than MAX_BUF_SIZE, then flag memory exhausted. */ | |
| 1333 | 1866 #define EXTEND_BUFFER() \ |
| 1867 do { \ | |
| 1868 re_char *old_buffer = bufp->buffer; \ | |
| 1869 if (bufp->allocated == MAX_BUF_SIZE) \ | |
| 1870 return REG_ESIZE; \ | |
| 1871 bufp->allocated <<= 1; \ | |
| 1872 if (bufp->allocated > MAX_BUF_SIZE) \ | |
| 1873 bufp->allocated = MAX_BUF_SIZE; \ | |
| 1874 bufp->buffer = \ | |
| 1875 (unsigned char *) xrealloc (bufp->buffer, bufp->allocated); \ | |
| 1876 if (bufp->buffer == NULL) \ | |
| 1877 return REG_ESPACE; \ | |
| 1878 /* If the buffer moved, move all the pointers into it. */ \ | |
| 1879 if (old_buffer != bufp->buffer) \ | |
| 1880 { \ | |
| 1881 buf_end = (buf_end - old_buffer) + bufp->buffer; \ | |
| 1882 begalt = (begalt - old_buffer) + bufp->buffer; \ | |
| 1883 if (fixup_alt_jump) \ | |
| 1884 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \ | |
| 1885 if (laststart) \ | |
| 1886 laststart = (laststart - old_buffer) + bufp->buffer; \ | |
| 1887 if (pending_exact) \ | |
| 1888 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ | |
| 1889 } \ | |
| 428 | 1890 } while (0) |
| 1891 | |
| 1892 | |
| 1893 /* Since we have one byte reserved for the register number argument to | |
| 1894 {start,stop}_memory, the maximum number of groups we can report | |
| 1895 things about is what fits in that byte. */ | |
| 1896 #define MAX_REGNUM 255 | |
| 1897 | |
| 1898 /* But patterns can have more than `MAX_REGNUM' registers. We just | |
| 502 | 1899 ignore the excess. |
| 1900 #### not true! groups past this will fail in lots of ways, if we | |
| 1901 ever have to backtrack. | |
| 1902 */ | |
| 647 | 1903 typedef int regnum_t; |
| 428 | 1904 |
| 502 | 1905 #define INIT_REG_TRANSLATE_SIZE 5 |
| 428 | 1906 |
| 1907 /* Macros for the compile stack. */ | |
| 1908 | |
| 1909 /* Since offsets can go either forwards or backwards, this type needs to | |
| 1910 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ | |
| 1911 typedef int pattern_offset_t; | |
| 1912 | |
| 1913 typedef struct | |
| 1914 { | |
| 1915 pattern_offset_t begalt_offset; | |
| 1916 pattern_offset_t fixup_alt_jump; | |
| 1917 pattern_offset_t inner_group_offset; | |
| 1918 pattern_offset_t laststart_offset; | |
| 1919 regnum_t regnum; | |
| 1920 } compile_stack_elt_t; | |
| 1921 | |
| 1922 | |
| 1923 typedef struct | |
| 1924 { | |
| 1925 compile_stack_elt_t *stack; | |
| 647 | 1926 int size; |
| 1927 int avail; /* Offset of next open position. */ | |
| 428 | 1928 } compile_stack_type; |
| 1929 | |
| 1930 | |
| 1931 #define INIT_COMPILE_STACK_SIZE 32 | |
| 1932 | |
| 1933 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) | |
| 1934 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) | |
| 1935 | |
| 1936 /* The next available element. */ | |
| 1937 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) | |
| 1938 | |
| 1939 | |
| 1940 /* Set the bit for character C in a bit vector. */ | |
| 1941 #define SET_LIST_BIT(c) \ | |
| 446 | 1942 (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ |
| 428 | 1943 |= 1 << (((unsigned char) c) % BYTEWIDTH)) |
| 1944 | |
| 1945 #ifdef MULE | |
| 1946 | |
| 1947 /* Set the "bit" for character C in a range table. */ | |
| 1948 #define SET_RANGETAB_BIT(c) put_range_table (rtab, c, c, Qt) | |
| 1949 | |
| 1950 /* Set the "bit" for character c in the appropriate table. */ | |
| 1951 #define SET_EITHER_BIT(c) \ | |
| 1952 do { \ | |
| 1953 if (has_extended_chars) \ | |
| 1954 SET_RANGETAB_BIT (c); \ | |
| 1955 else \ | |
| 1956 SET_LIST_BIT (c); \ | |
| 1957 } while (0) | |
| 1958 | |
| 1959 #else /* not MULE */ | |
| 1960 | |
| 1961 #define SET_EITHER_BIT(c) SET_LIST_BIT (c) | |
| 1962 | |
| 1963 #endif | |
| 1964 | |
| 1965 | |
| 1966 /* Get the next unsigned number in the uncompiled pattern. */ | |
| 1967 #define GET_UNSIGNED_NUMBER(num) \ | |
| 1968 { if (p != pend) \ | |
| 1969 { \ | |
| 1970 PATFETCH (c); \ | |
| 1971 while (ISDIGIT (c)) \ | |
| 1972 { \ | |
| 1973 if (num < 0) \ | |
| 1974 num = 0; \ | |
| 1975 num = num * 10 + c - '0'; \ | |
| 1976 if (p == pend) \ | |
| 1977 break; \ | |
| 1978 PATFETCH (c); \ | |
| 1979 } \ | |
| 1980 } \ | |
| 1981 } | |
| 1982 | |
| 1983 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ | |
| 1984 | |
| 1985 #define IS_CHAR_CLASS(string) \ | |
| 1986 (STREQ (string, "alpha") || STREQ (string, "upper") \ | |
| 1987 || STREQ (string, "lower") || STREQ (string, "digit") \ | |
| 1988 || STREQ (string, "alnum") || STREQ (string, "xdigit") \ | |
| 1989 || STREQ (string, "space") || STREQ (string, "print") \ | |
| 1990 || STREQ (string, "punct") || STREQ (string, "graph") \ | |
| 1991 || STREQ (string, "cntrl") || STREQ (string, "blank")) | |
| 1992 | |
| 1993 static void store_op1 (re_opcode_t op, unsigned char *loc, int arg); | |
| 1994 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); | |
| 1995 static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, | |
| 1996 unsigned char *end); | |
| 1997 static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, | |
| 1998 unsigned char *end); | |
| 460 | 1999 static re_bool at_begline_loc_p (re_char *pattern, re_char *p, |
| 428 | 2000 reg_syntax_t syntax); |
| 460 | 2001 static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax); |
| 2002 static re_bool group_in_compile_stack (compile_stack_type compile_stack, | |
| 428 | 2003 regnum_t regnum); |
| 446 | 2004 static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, |
| 2005 RE_TRANSLATE_TYPE translate, | |
| 2006 reg_syntax_t syntax, | |
| 428 | 2007 unsigned char *b); |
| 2008 #ifdef MULE | |
| 446 | 2009 static reg_errcode_t compile_extended_range (re_char **p_ptr, |
| 2010 re_char *pend, | |
| 2011 RE_TRANSLATE_TYPE translate, | |
| 428 | 2012 reg_syntax_t syntax, |
| 2013 Lisp_Object rtab); | |
| 2014 #endif /* MULE */ | |
| 460 | 2015 static re_bool group_match_null_string_p (unsigned char **p, |
| 428 | 2016 unsigned char *end, |
| 2017 register_info_type *reg_info); | |
| 460 | 2018 static re_bool alt_match_null_string_p (unsigned char *p, unsigned char *end, |
| 428 | 2019 register_info_type *reg_info); |
| 460 | 2020 static re_bool common_op_match_null_string_p (unsigned char **p, |
| 428 | 2021 unsigned char *end, |
| 2022 register_info_type *reg_info); | |
| 826 | 2023 static int bcmp_translate (re_char *s1, re_char *s2, |
| 2024 REGISTER int len, RE_TRANSLATE_TYPE translate | |
| 2025 #ifdef emacs | |
| 2026 , Internal_Format fmt, Lisp_Object lispobj | |
| 2027 #endif | |
| 2028 ); | |
| 428 | 2029 static int re_match_2_internal (struct re_pattern_buffer *bufp, |
| 446 | 2030 re_char *string1, int size1, |
| 2031 re_char *string2, int size2, int pos, | |
| 826 | 2032 struct re_registers *regs, int stop |
| 2033 RE_LISP_CONTEXT_ARGS_DECL); | |
| 428 | 2034 |
| 2035 #ifndef MATCH_MAY_ALLOCATE | |
| 2036 | |
| 2037 /* If we cannot allocate large objects within re_match_2_internal, | |
| 2038 we make the fail stack and register vectors global. | |
| 2039 The fail stack, we grow to the maximum size when a regexp | |
| 2040 is compiled. | |
| 2041 The register vectors, we adjust in size each time we | |
| 2042 compile a regexp, according to the number of registers it needs. */ | |
| 2043 | |
| 2044 static fail_stack_type fail_stack; | |
| 2045 | |
| 2046 /* Size with which the following vectors are currently allocated. | |
| 2047 That is so we can make them bigger as needed, | |
| 2048 but never make them smaller. */ | |
| 2049 static int regs_allocated_size; | |
| 2050 | |
| 446 | 2051 static re_char ** regstart, ** regend; |
| 2052 static re_char ** old_regstart, ** old_regend; | |
| 2053 static re_char **best_regstart, **best_regend; | |
| 428 | 2054 static register_info_type *reg_info; |
| 446 | 2055 static re_char **reg_dummy; |
| 428 | 2056 static register_info_type *reg_info_dummy; |
| 2057 | |
| 2058 /* Make the register vectors big enough for NUM_REGS registers, | |
| 2059 but don't make them smaller. */ | |
| 2060 | |
| 2061 static | |
| 2062 regex_grow_registers (int num_regs) | |
| 2063 { | |
| 2064 if (num_regs > regs_allocated_size) | |
| 2065 { | |
| 551 | 2066 RETALLOC (regstart, num_regs, re_char *); |
| 2067 RETALLOC (regend, num_regs, re_char *); | |
| 2068 RETALLOC (old_regstart, num_regs, re_char *); | |
| 2069 RETALLOC (old_regend, num_regs, re_char *); | |
| 2070 RETALLOC (best_regstart, num_regs, re_char *); | |
| 2071 RETALLOC (best_regend, num_regs, re_char *); | |
| 2072 RETALLOC (reg_info, num_regs, register_info_type); | |
| 2073 RETALLOC (reg_dummy, num_regs, re_char *); | |
| 2074 RETALLOC (reg_info_dummy, num_regs, register_info_type); | |
| 428 | 2075 |
| 2076 regs_allocated_size = num_regs; | |
| 2077 } | |
| 2078 } | |
| 2079 | |
| 2080 #endif /* not MATCH_MAY_ALLOCATE */ | |
| 2081 | |
| 2082 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. | |
| 2083 Returns one of error codes defined in `regex.h', or zero for success. | |
| 2084 | |
| 2085 Assumes the `allocated' (and perhaps `buffer') and `translate' | |
| 2086 fields are set in BUFP on entry. | |
| 2087 | |
| 2088 If it succeeds, results are put in BUFP (if it returns an error, the | |
| 2089 contents of BUFP are undefined): | |
| 2090 `buffer' is the compiled pattern; | |
| 2091 `syntax' is set to SYNTAX; | |
| 2092 `used' is set to the length of the compiled pattern; | |
| 2093 `fastmap_accurate' is zero; | |
| 502 | 2094 `re_ngroups' is the number of groups/subexpressions (including shy |
| 2095 groups) in PATTERN; | |
| 2096 `re_nsub' is the number of non-shy groups in PATTERN; | |
| 428 | 2097 `not_bol' and `not_eol' are zero; |
| 2098 | |
| 2099 The `fastmap' and `newline_anchor' fields are neither | |
| 2100 examined nor set. */ | |
| 2101 | |
| 2102 /* Return, freeing storage we allocated. */ | |
| 1726 | 2103 #define FREE_STACK_RETURN(value) \ |
| 2104 do \ | |
| 2105 { \ | |
| 2106 xfree (compile_stack.stack, compile_stack_elt_t *); \ | |
| 2107 return value; \ | |
| 1333 | 2108 } while (0) |
| 428 | 2109 |
| 2110 static reg_errcode_t | |
| 446 | 2111 regex_compile (re_char *pattern, int size, reg_syntax_t syntax, |
| 428 | 2112 struct re_pattern_buffer *bufp) |
| 2113 { | |
| 2114 /* We fetch characters from PATTERN here. We declare these as int | |
| 2115 (or possibly long) so that chars above 127 can be used as | |
| 2116 array indices. The macros that fetch a character from the pattern | |
| 2117 make sure to coerce to unsigned char before assigning, so we won't | |
| 2118 get bitten by negative numbers here. */ | |
| 2119 /* XEmacs change: used to be unsigned char. */ | |
| 2120 REGISTER EMACS_INT c, c1; | |
| 2121 | |
| 2122 /* A random temporary spot in PATTERN. */ | |
| 446 | 2123 re_char *p1; |
| 428 | 2124 |
| 2125 /* Points to the end of the buffer, where we should append. */ | |
| 446 | 2126 REGISTER unsigned char *buf_end; |
| 428 | 2127 |
| 2128 /* Keeps track of unclosed groups. */ | |
| 2129 compile_stack_type compile_stack; | |
| 2130 | |
| 2131 /* Points to the current (ending) position in the pattern. */ | |
| 446 | 2132 re_char *p = pattern; |
| 2133 re_char *pend = pattern + size; | |
| 428 | 2134 |
| 2135 /* How to translate the characters in the pattern. */ | |
| 446 | 2136 RE_TRANSLATE_TYPE translate = bufp->translate; |
| 428 | 2137 |
| 2138 /* Address of the count-byte of the most recently inserted `exactn' | |
| 2139 command. This makes it possible to tell if a new exact-match | |
| 2140 character can be added to that command or if the character requires | |
| 2141 a new `exactn' command. */ | |
| 2142 unsigned char *pending_exact = 0; | |
| 2143 | |
| 2144 /* Address of start of the most recently finished expression. | |
| 2145 This tells, e.g., postfix * where to find the start of its | |
| 2146 operand. Reset at the beginning of groups and alternatives. */ | |
| 2147 unsigned char *laststart = 0; | |
| 2148 | |
| 2149 /* Address of beginning of regexp, or inside of last group. */ | |
| 2150 unsigned char *begalt; | |
| 2151 | |
| 2152 /* Place in the uncompiled pattern (i.e., the {) to | |
| 2153 which to go back if the interval is invalid. */ | |
| 446 | 2154 re_char *beg_interval; |
| 428 | 2155 |
| 2156 /* Address of the place where a forward jump should go to the end of | |
| 2157 the containing expression. Each alternative of an `or' -- except the | |
| 2158 last -- ends with a forward jump of this sort. */ | |
| 2159 unsigned char *fixup_alt_jump = 0; | |
| 2160 | |
| 2161 /* Counts open-groups as they are encountered. Remembered for the | |
| 2162 matching close-group on the compile stack, so the same register | |
| 2163 number is put in the stop_memory as the start_memory. */ | |
| 2164 regnum_t regnum = 0; | |
| 2165 | |
| 2166 #ifdef DEBUG | |
| 2167 DEBUG_PRINT1 ("\nCompiling pattern: "); | |
| 2168 if (debug) | |
| 2169 { | |
| 647 | 2170 int debug_count; |
| 428 | 2171 |
| 2172 for (debug_count = 0; debug_count < size; debug_count++) | |
| 2173 putchar (pattern[debug_count]); | |
| 2174 putchar ('\n'); | |
| 2175 } | |
| 2176 #endif /* DEBUG */ | |
| 2177 | |
| 2178 /* Initialize the compile stack. */ | |
| 2179 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); | |
| 2180 if (compile_stack.stack == NULL) | |
| 2181 return REG_ESPACE; | |
| 2182 | |
| 2183 compile_stack.size = INIT_COMPILE_STACK_SIZE; | |
| 2184 compile_stack.avail = 0; | |
| 2185 | |
| 2186 /* Initialize the pattern buffer. */ | |
| 2187 bufp->syntax = syntax; | |
| 2188 bufp->fastmap_accurate = 0; | |
| 2189 bufp->not_bol = bufp->not_eol = 0; | |
| 2190 | |
| 2191 /* Set `used' to zero, so that if we return an error, the pattern | |
| 2192 printer (for debugging) will think there's no pattern. We reset it | |
| 2193 at the end. */ | |
| 2194 bufp->used = 0; | |
| 2195 | |
| 2196 /* Always count groups, whether or not bufp->no_sub is set. */ | |
| 2197 bufp->re_nsub = 0; | |
| 502 | 2198 bufp->re_ngroups = 0; |
| 2199 | |
| 2200 bufp->warned_about_incompatible_back_references = 0; | |
| 2201 | |
| 2202 if (bufp->external_to_internal_register == 0) | |
| 2203 { | |
| 2204 bufp->external_to_internal_register_size = INIT_REG_TRANSLATE_SIZE; | |
| 2205 RETALLOC (bufp->external_to_internal_register, | |
| 2206 bufp->external_to_internal_register_size, | |
| 2207 int); | |
| 2208 } | |
| 2209 | |
| 2210 { | |
| 2211 int i; | |
| 2212 | |
| 2213 bufp->external_to_internal_register[0] = 0; | |
| 2214 for (i = 1; i < bufp->external_to_internal_register_size; i++) | |
| 2215 bufp->external_to_internal_register[i] = (int) 0xDEADBEEF; | |
| 2216 } | |
| 428 | 2217 |
| 2218 #if !defined (emacs) && !defined (SYNTAX_TABLE) | |
| 2219 /* Initialize the syntax table. */ | |
| 2220 init_syntax_once (); | |
| 2221 #endif | |
| 2222 | |
| 2223 if (bufp->allocated == 0) | |
| 2224 { | |
| 2225 if (bufp->buffer) | |
| 2226 { /* If zero allocated, but buffer is non-null, try to realloc | |
| 2227 enough space. This loses if buffer's address is bogus, but | |
| 2228 that is the user's responsibility. */ | |
| 2229 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); | |
| 2230 } | |
| 2231 else | |
| 2232 { /* Caller did not allocate a buffer. Do it for them. */ | |
| 2233 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); | |
| 2234 } | |
| 2235 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); | |
| 2236 | |
| 2237 bufp->allocated = INIT_BUF_SIZE; | |
| 2238 } | |
| 2239 | |
| 446 | 2240 begalt = buf_end = bufp->buffer; |
| 428 | 2241 |
| 2242 /* Loop through the uncompiled pattern until we're at the end. */ | |
| 2243 while (p != pend) | |
| 2244 { | |
| 2245 PATFETCH (c); | |
| 2246 | |
| 2247 switch (c) | |
| 2248 { | |
| 2249 case '^': | |
| 2250 { | |
| 2251 if ( /* If at start of pattern, it's an operator. */ | |
| 2252 p == pattern + 1 | |
| 2253 /* If context independent, it's an operator. */ | |
| 2254 || syntax & RE_CONTEXT_INDEP_ANCHORS | |
| 2255 /* Otherwise, depends on what's come before. */ | |
| 2256 || at_begline_loc_p (pattern, p, syntax)) | |
| 2257 BUF_PUSH (begline); | |
| 2258 else | |
| 2259 goto normal_char; | |
| 2260 } | |
| 2261 break; | |
| 2262 | |
| 2263 | |
| 2264 case '$': | |
| 2265 { | |
| 2266 if ( /* If at end of pattern, it's an operator. */ | |
| 2267 p == pend | |
| 2268 /* If context independent, it's an operator. */ | |
| 2269 || syntax & RE_CONTEXT_INDEP_ANCHORS | |
| 2270 /* Otherwise, depends on what's next. */ | |
| 2271 || at_endline_loc_p (p, pend, syntax)) | |
| 2272 BUF_PUSH (endline); | |
| 2273 else | |
| 2274 goto normal_char; | |
| 2275 } | |
| 2276 break; | |
| 2277 | |
| 2278 | |
| 2279 case '+': | |
| 2280 case '?': | |
| 2281 if ((syntax & RE_BK_PLUS_QM) | |
| 2282 || (syntax & RE_LIMITED_OPS)) | |
| 2283 goto normal_char; | |
| 2284 handle_plus: | |
| 2285 case '*': | |
| 2286 /* If there is no previous pattern... */ | |
| 2287 if (!laststart) | |
| 2288 { | |
| 2289 if (syntax & RE_CONTEXT_INVALID_OPS) | |
| 2290 FREE_STACK_RETURN (REG_BADRPT); | |
| 2291 else if (!(syntax & RE_CONTEXT_INDEP_OPS)) | |
| 2292 goto normal_char; | |
| 2293 } | |
| 2294 | |
| 2295 { | |
| 2296 /* true means zero/many matches are allowed. */ | |
| 460 | 2297 re_bool zero_times_ok = c != '+'; |
| 2298 re_bool many_times_ok = c != '?'; | |
| 428 | 2299 |
| 2300 /* true means match shortest string possible. */ | |
| 460 | 2301 re_bool minimal = false; |
| 428 | 2302 |
| 2303 /* If there is a sequence of repetition chars, collapse it | |
| 2304 down to just one (the right one). We can't combine | |
| 2305 interval operators with these because of, e.g., `a{2}*', | |
| 2306 which should only match an even number of `a's. */ | |
| 2307 while (p != pend) | |
| 2308 { | |
| 2309 PATFETCH (c); | |
| 2310 | |
| 2311 if (c == '*' || (!(syntax & RE_BK_PLUS_QM) | |
| 2312 && (c == '+' || c == '?'))) | |
| 2313 ; | |
| 2314 | |
| 2315 else if (syntax & RE_BK_PLUS_QM && c == '\\') | |
| 2316 { | |
| 2317 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); | |
| 2318 | |
| 2319 PATFETCH (c1); | |
| 2320 if (!(c1 == '+' || c1 == '?')) | |
| 2321 { | |
| 2322 PATUNFETCH; | |
| 2323 PATUNFETCH; | |
| 2324 break; | |
| 2325 } | |
| 2326 | |
| 2327 c = c1; | |
| 2328 } | |
| 2329 else | |
| 2330 { | |
| 2331 PATUNFETCH; | |
| 2332 break; | |
| 2333 } | |
| 2334 | |
| 2335 /* If we get here, we found another repeat character. */ | |
| 2336 if (!(syntax & RE_NO_MINIMAL_MATCHING)) | |
| 2337 { | |
| 440 | 2338 /* "*?" and "+?" and "??" are okay (and mean match |
| 2339 minimally), but other sequences (such as "*??" and | |
| 2340 "+++") are rejected (reserved for future use). */ | |
| 428 | 2341 if (minimal || c != '?') |
| 2342 FREE_STACK_RETURN (REG_BADRPT); | |
| 2343 minimal = true; | |
| 2344 } | |
| 2345 else | |
| 2346 { | |
| 2347 zero_times_ok |= c != '+'; | |
| 2348 many_times_ok |= c != '?'; | |
| 2349 } | |
| 2350 } | |
| 2351 | |
| 2352 /* Star, etc. applied to an empty pattern is equivalent | |
| 2353 to an empty pattern. */ | |
| 2354 if (!laststart) | |
| 2355 break; | |
| 2356 | |
| 2357 /* Now we know whether zero matches is allowed | |
| 2358 and whether two or more matches is allowed | |
| 2359 and whether we want minimal or maximal matching. */ | |
| 2360 if (minimal) | |
| 2361 { | |
| 2362 if (!many_times_ok) | |
| 2363 { | |
| 2364 /* "a??" becomes: | |
| 2365 0: /on_failure_jump to 6 | |
| 2366 3: /jump to 9 | |
| 2367 6: /exactn/1/A | |
| 2368 9: end of pattern. | |
| 2369 */ | |
| 2370 GET_BUFFER_SPACE (6); | |
| 446 | 2371 INSERT_JUMP (jump, laststart, buf_end + 3); |
| 2372 buf_end += 3; | |
| 428 | 2373 INSERT_JUMP (on_failure_jump, laststart, laststart + 6); |
| 446 | 2374 buf_end += 3; |
| 428 | 2375 } |
| 2376 else if (zero_times_ok) | |
| 2377 { | |
| 2378 /* "a*?" becomes: | |
| 2379 0: /jump to 6 | |
| 2380 3: /exactn/1/A | |
| 2381 6: /on_failure_jump to 3 | |
| 2382 9: end of pattern. | |
| 2383 */ | |
| 2384 GET_BUFFER_SPACE (6); | |
| 446 | 2385 INSERT_JUMP (jump, laststart, buf_end + 3); |
| 2386 buf_end += 3; | |
| 2387 STORE_JUMP (on_failure_jump, buf_end, laststart + 3); | |
| 2388 buf_end += 3; | |
| 428 | 2389 } |
| 2390 else | |
| 2391 { | |
| 2392 /* "a+?" becomes: | |
| 2393 0: /exactn/1/A | |
| 2394 3: /on_failure_jump to 0 | |
| 2395 6: end of pattern. | |
| 2396 */ | |
| 2397 GET_BUFFER_SPACE (3); | |
| 446 | 2398 STORE_JUMP (on_failure_jump, buf_end, laststart); |
| 2399 buf_end += 3; | |
| 428 | 2400 } |
| 2401 } | |
| 2402 else | |
| 2403 { | |
| 2404 /* Are we optimizing this jump? */ | |
| 460 | 2405 re_bool keep_string_p = false; |
| 428 | 2406 |
| 2407 if (many_times_ok) | |
| 446 | 2408 { /* More than one repetition is allowed, so put in |
| 2409 at the end a backward relative jump from | |
| 2410 `buf_end' to before the next jump we're going | |
| 2411 to put in below (which jumps from laststart to | |
| 2412 after this jump). | |
| 428 | 2413 |
| 2414 But if we are at the `*' in the exact sequence `.*\n', | |
| 2415 insert an unconditional jump backwards to the ., | |
| 2416 instead of the beginning of the loop. This way we only | |
| 2417 push a failure point once, instead of every time | |
| 2418 through the loop. */ | |
| 2419 assert (p - 1 > pattern); | |
| 2420 | |
| 2421 /* Allocate the space for the jump. */ | |
| 2422 GET_BUFFER_SPACE (3); | |
| 2423 | |
| 2424 /* We know we are not at the first character of the | |
| 2425 pattern, because laststart was nonzero. And we've | |
| 2426 already incremented `p', by the way, to be the | |
| 2427 character after the `*'. Do we have to do something | |
| 2428 analogous here for null bytes, because of | |
| 2429 RE_DOT_NOT_NULL? */ | |
| 446 | 2430 if (*(p - 2) == '.' |
| 428 | 2431 && zero_times_ok |
| 446 | 2432 && p < pend && *p == '\n' |
| 428 | 2433 && !(syntax & RE_DOT_NEWLINE)) |
| 2434 { /* We have .*\n. */ | |
| 446 | 2435 STORE_JUMP (jump, buf_end, laststart); |
| 428 | 2436 keep_string_p = true; |
| 2437 } | |
| 2438 else | |
| 2439 /* Anything else. */ | |
| 446 | 2440 STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); |
| 428 | 2441 |
| 2442 /* We've added more stuff to the buffer. */ | |
| 446 | 2443 buf_end += 3; |
| 428 | 2444 } |
| 2445 | |
| 446 | 2446 /* On failure, jump from laststart to buf_end + 3, |
| 2447 which will be the end of the buffer after this jump | |
| 2448 is inserted. */ | |
| 428 | 2449 GET_BUFFER_SPACE (3); |
| 2450 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump | |
| 2451 : on_failure_jump, | |
| 446 | 2452 laststart, buf_end + 3); |
| 2453 buf_end += 3; | |
| 428 | 2454 |
| 2455 if (!zero_times_ok) | |
| 2456 { | |
| 2457 /* At least one repetition is required, so insert a | |
| 2458 `dummy_failure_jump' before the initial | |
| 2459 `on_failure_jump' instruction of the loop. This | |
| 2460 effects a skip over that instruction the first time | |
| 2461 we hit that loop. */ | |
| 2462 GET_BUFFER_SPACE (3); | |
| 2463 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); | |
| 446 | 2464 buf_end += 3; |
| 428 | 2465 } |
| 2466 } | |
| 2467 pending_exact = 0; | |
| 2468 } | |
| 2469 break; | |
| 2470 | |
| 2471 | |
| 2472 case '.': | |
| 446 | 2473 laststart = buf_end; |
| 428 | 2474 BUF_PUSH (anychar); |
| 2475 break; | |
| 2476 | |
| 2477 | |
| 2478 case '[': | |
| 2479 { | |
| 2480 /* XEmacs change: this whole section */ | |
| 460 | 2481 re_bool had_char_class = false; |
| 428 | 2482 #ifdef MULE |
| 460 | 2483 re_bool has_extended_chars = false; |
| 428 | 2484 REGISTER Lisp_Object rtab = Qnil; |
| 2485 #endif | |
| 2486 | |
| 2487 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | |
| 2488 | |
| 2489 /* Ensure that we have enough space to push a charset: the | |
| 2490 opcode, the length count, and the bitset; 34 bytes in all. */ | |
| 2491 GET_BUFFER_SPACE (34); | |
| 2492 | |
| 446 | 2493 laststart = buf_end; |
| 428 | 2494 |
| 2495 /* We test `*p == '^' twice, instead of using an if | |
| 2496 statement, so we only need one BUF_PUSH. */ | |
| 2497 BUF_PUSH (*p == '^' ? charset_not : charset); | |
| 2498 if (*p == '^') | |
| 2499 p++; | |
| 2500 | |
| 2501 /* Remember the first position in the bracket expression. */ | |
| 2502 p1 = p; | |
| 2503 | |
| 2504 /* Push the number of bytes in the bitmap. */ | |
| 2505 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); | |
| 2506 | |
| 2507 /* Clear the whole map. */ | |
| 446 | 2508 memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); |
| 428 | 2509 |
| 2510 /* charset_not matches newline according to a syntax bit. */ | |
| 446 | 2511 if ((re_opcode_t) buf_end[-2] == charset_not |
| 428 | 2512 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) |
| 2513 SET_LIST_BIT ('\n'); | |
| 2514 | |
| 2515 #ifdef MULE | |
| 2516 start_over_with_extended: | |
| 2517 if (has_extended_chars) | |
| 2518 { | |
| 2519 /* There are extended chars here, which means we need to start | |
| 2520 over and shift to unified range-table format. */ | |
| 446 | 2521 if (buf_end[-2] == charset) |
| 2522 buf_end[-2] = charset_mule; | |
| 428 | 2523 else |
| 446 | 2524 buf_end[-2] = charset_mule_not; |
| 2525 buf_end--; | |
| 428 | 2526 p = p1; /* go back to the beginning of the charset, after |
| 2527 a possible ^. */ | |
| 2528 rtab = Vthe_lisp_rangetab; | |
| 2529 Fclear_range_table (rtab); | |
| 2530 | |
| 2531 /* charset_not matches newline according to a syntax bit. */ | |
| 446 | 2532 if ((re_opcode_t) buf_end[-1] == charset_mule_not |
| 428 | 2533 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) |
| 2534 SET_EITHER_BIT ('\n'); | |
| 2535 } | |
| 2536 #endif /* MULE */ | |
| 2537 | |
| 2538 /* Read in characters and ranges, setting map bits. */ | |
| 2539 for (;;) | |
| 2540 { | |
| 2541 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | |
| 2542 | |
| 446 | 2543 PATFETCH (c); |
| 428 | 2544 |
| 2545 #ifdef MULE | |
| 2546 if (c >= 0x80 && !has_extended_chars) | |
| 2547 { | |
| 2548 has_extended_chars = 1; | |
| 2549 /* Frumble-bumble, we've found some extended chars. | |
| 2550 Need to start over, process everything using | |
| 2551 the general extended-char mechanism, and need | |
| 2552 to use charset_mule and charset_mule_not instead | |
| 2553 of charset and charset_not. */ | |
| 2554 goto start_over_with_extended; | |
| 2555 } | |
| 2556 #endif /* MULE */ | |
| 2557 /* \ might escape characters inside [...] and [^...]. */ | |
| 2558 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') | |
| 2559 { | |
| 2560 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); | |
| 2561 | |
| 446 | 2562 PATFETCH (c1); |
| 428 | 2563 #ifdef MULE |
| 2564 if (c1 >= 0x80 && !has_extended_chars) | |
| 2565 { | |
| 2566 has_extended_chars = 1; | |
| 2567 goto start_over_with_extended; | |
| 2568 } | |
| 2569 #endif /* MULE */ | |
| 2570 SET_EITHER_BIT (c1); | |
| 2571 continue; | |
| 2572 } | |
| 2573 | |
| 2574 /* Could be the end of the bracket expression. If it's | |
| 2575 not (i.e., when the bracket expression is `[]' so | |
| 2576 far), the ']' character bit gets set way below. */ | |
| 2577 if (c == ']' && p != p1 + 1) | |
| 2578 break; | |
| 2579 | |
| 2580 /* Look ahead to see if it's a range when the last thing | |
| 2581 was a character class. */ | |
| 2582 if (had_char_class && c == '-' && *p != ']') | |
| 2583 FREE_STACK_RETURN (REG_ERANGE); | |
| 2584 | |
| 2585 /* Look ahead to see if it's a range when the last thing | |
| 2586 was a character: if this is a hyphen not at the | |
| 2587 beginning or the end of a list, then it's the range | |
| 2588 operator. */ | |
| 2589 if (c == '-' | |
| 2590 && !(p - 2 >= pattern && p[-2] == '[') | |
| 446 | 2591 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') |
| 428 | 2592 && *p != ']') |
| 2593 { | |
| 2594 reg_errcode_t ret; | |
| 2595 | |
| 2596 #ifdef MULE | |
| 2597 if (* (unsigned char *) p >= 0x80 && !has_extended_chars) | |
| 2598 { | |
| 2599 has_extended_chars = 1; | |
| 2600 goto start_over_with_extended; | |
| 2601 } | |
| 2602 if (has_extended_chars) | |
| 2603 ret = compile_extended_range (&p, pend, translate, | |
| 2604 syntax, rtab); | |
| 2605 else | |
| 2606 #endif /* MULE */ | |
| 446 | 2607 ret = compile_range (&p, pend, translate, syntax, buf_end); |
| 428 | 2608 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); |
| 2609 } | |
| 2610 | |
| 2611 else if (p[0] == '-' && p[1] != ']') | |
| 2612 { /* This handles ranges made up of characters only. */ | |
| 2613 reg_errcode_t ret; | |
| 2614 | |
| 2615 /* Move past the `-'. */ | |
| 2616 PATFETCH (c1); | |
| 2617 | |
| 2618 #ifdef MULE | |
| 2619 if (* (unsigned char *) p >= 0x80 && !has_extended_chars) | |
| 2620 { | |
| 2621 has_extended_chars = 1; | |
| 2622 goto start_over_with_extended; | |
| 2623 } | |
| 2624 if (has_extended_chars) | |
| 2625 ret = compile_extended_range (&p, pend, translate, | |
| 2626 syntax, rtab); | |
| 2627 else | |
| 2628 #endif /* MULE */ | |
| 446 | 2629 ret = compile_range (&p, pend, translate, syntax, buf_end); |
| 428 | 2630 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); |
| 2631 } | |
| 2632 | |
| 2633 /* See if we're at the beginning of a possible character | |
| 2634 class. */ | |
| 2635 | |
| 2636 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') | |
| 2637 { /* Leave room for the null. */ | |
| 2638 char str[CHAR_CLASS_MAX_LENGTH + 1]; | |
| 2639 | |
| 2640 PATFETCH (c); | |
| 2641 c1 = 0; | |
| 2642 | |
| 2643 /* If pattern is `[[:'. */ | |
| 2644 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | |
| 2645 | |
| 2646 for (;;) | |
| 2647 { | |
| 446 | 2648 /* #### This code is unused. |
| 2649 Correctness is not checked after TRT | |
| 2650 table change. */ | |
| 428 | 2651 PATFETCH (c); |
| 2652 if (c == ':' || c == ']' || p == pend | |
| 2653 || c1 == CHAR_CLASS_MAX_LENGTH) | |
| 2654 break; | |
| 442 | 2655 str[c1++] = (char) c; |
| 428 | 2656 } |
| 2657 str[c1] = '\0'; | |
| 2658 | |
| 446 | 2659 /* If isn't a word bracketed by `[:' and `:]': |
| 428 | 2660 undo the ending character, the letters, and leave |
| 2661 the leading `:' and `[' (but set bits for them). */ | |
| 2662 if (c == ':' && *p == ']') | |
| 2663 { | |
| 2664 int ch; | |
| 460 | 2665 re_bool is_alnum = STREQ (str, "alnum"); |
| 2666 re_bool is_alpha = STREQ (str, "alpha"); | |
| 2667 re_bool is_blank = STREQ (str, "blank"); | |
| 2668 re_bool is_cntrl = STREQ (str, "cntrl"); | |
| 2669 re_bool is_digit = STREQ (str, "digit"); | |
| 2670 re_bool is_graph = STREQ (str, "graph"); | |
| 2671 re_bool is_lower = STREQ (str, "lower"); | |
| 2672 re_bool is_print = STREQ (str, "print"); | |
| 2673 re_bool is_punct = STREQ (str, "punct"); | |
| 2674 re_bool is_space = STREQ (str, "space"); | |
| 2675 re_bool is_upper = STREQ (str, "upper"); | |
| 2676 re_bool is_xdigit = STREQ (str, "xdigit"); | |
| 428 | 2677 |
| 2678 if (!IS_CHAR_CLASS (str)) | |
| 2679 FREE_STACK_RETURN (REG_ECTYPE); | |
| 2680 | |
| 2681 /* Throw away the ] at the end of the character | |
| 2682 class. */ | |
| 2683 PATFETCH (c); | |
| 2684 | |
| 2685 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | |
| 2686 | |
| 2687 for (ch = 0; ch < 1 << BYTEWIDTH; ch++) | |
| 2688 { | |
| 2689 /* This was split into 3 if's to | |
| 2690 avoid an arbitrary limit in some compiler. */ | |
| 2691 if ( (is_alnum && ISALNUM (ch)) | |
| 2692 || (is_alpha && ISALPHA (ch)) | |
| 2693 || (is_blank && ISBLANK (ch)) | |
| 2694 || (is_cntrl && ISCNTRL (ch))) | |
| 2695 SET_EITHER_BIT (ch); | |
| 2696 if ( (is_digit && ISDIGIT (ch)) | |
| 2697 || (is_graph && ISGRAPH (ch)) | |
| 2698 || (is_lower && ISLOWER (ch)) | |
| 2699 || (is_print && ISPRINT (ch))) | |
| 2700 SET_EITHER_BIT (ch); | |
| 2701 if ( (is_punct && ISPUNCT (ch)) | |
| 2702 || (is_space && ISSPACE (ch)) | |
| 2703 || (is_upper && ISUPPER (ch)) | |
| 2704 || (is_xdigit && ISXDIGIT (ch))) | |
| 2705 SET_EITHER_BIT (ch); | |
| 2706 } | |
| 2707 had_char_class = true; | |
| 2708 } | |
| 2709 else | |
| 2710 { | |
| 2711 c1++; | |
| 2712 while (c1--) | |
| 2713 PATUNFETCH; | |
| 2714 SET_EITHER_BIT ('['); | |
| 2715 SET_EITHER_BIT (':'); | |
| 2716 had_char_class = false; | |
| 2717 } | |
| 2718 } | |
| 2719 else | |
| 2720 { | |
| 2721 had_char_class = false; | |
| 2722 SET_EITHER_BIT (c); | |
| 2723 } | |
| 2724 } | |
| 2725 | |
| 2726 #ifdef MULE | |
| 2727 if (has_extended_chars) | |
| 2728 { | |
| 2729 /* We have a range table, not a bit vector. */ | |
| 2730 int bytes_needed = | |
| 2731 unified_range_table_bytes_needed (rtab); | |
| 2732 GET_BUFFER_SPACE (bytes_needed); | |
| 446 | 2733 unified_range_table_copy_data (rtab, buf_end); |
| 2734 buf_end += unified_range_table_bytes_used (buf_end); | |
| 428 | 2735 break; |
| 2736 } | |
| 2737 #endif /* MULE */ | |
| 2738 /* Discard any (non)matching list bytes that are all 0 at the | |
| 2739 end of the map. Decrease the map-length byte too. */ | |
| 446 | 2740 while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) |
| 2741 buf_end[-1]--; | |
| 2742 buf_end += buf_end[-1]; | |
| 428 | 2743 } |
| 2744 break; | |
| 2745 | |
| 2746 | |
| 2747 case '(': | |
| 2748 if (syntax & RE_NO_BK_PARENS) | |
| 2749 goto handle_open; | |
| 2750 else | |
| 2751 goto normal_char; | |
| 2752 | |
| 2753 | |
| 2754 case ')': | |
| 2755 if (syntax & RE_NO_BK_PARENS) | |
| 2756 goto handle_close; | |
| 2757 else | |
| 2758 goto normal_char; | |
| 2759 | |
| 2760 | |
| 2761 case '\n': | |
| 2762 if (syntax & RE_NEWLINE_ALT) | |
| 2763 goto handle_alt; | |
| 2764 else | |
| 2765 goto normal_char; | |
| 2766 | |
| 2767 | |
| 2768 case '|': | |
| 2769 if (syntax & RE_NO_BK_VBAR) | |
| 2770 goto handle_alt; | |
| 2771 else | |
| 2772 goto normal_char; | |
| 2773 | |
| 2774 | |
| 2775 case '{': | |
| 2776 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) | |
| 2777 goto handle_interval; | |
| 2778 else | |
| 2779 goto normal_char; | |
| 2780 | |
| 2781 | |
| 2782 case '\\': | |
| 2783 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); | |
| 2784 | |
| 2785 /* Do not translate the character after the \, so that we can | |
| 2786 distinguish, e.g., \B from \b, even if we normally would | |
| 2787 translate, e.g., B to b. */ | |
| 2788 PATFETCH_RAW (c); | |
| 2789 | |
| 2790 switch (c) | |
| 2791 { | |
| 2792 case '(': | |
| 2793 if (syntax & RE_NO_BK_PARENS) | |
| 2794 goto normal_backslash; | |
| 2795 | |
| 2796 handle_open: | |
| 2797 { | |
| 2798 regnum_t r; | |
| 502 | 2799 int shy = 0; |
| 428 | 2800 |
| 2801 if (!(syntax & RE_NO_SHY_GROUPS) | |
| 2802 && p != pend | |
| 446 | 2803 && *p == '?') |
| 428 | 2804 { |
| 2805 p++; | |
| 446 | 2806 PATFETCH (c); |
| 428 | 2807 switch (c) |
| 2808 { | |
| 2809 case ':': /* shy groups */ | |
| 502 | 2810 shy = 1; |
| 428 | 2811 break; |
| 2812 | |
| 2813 /* All others are reserved for future constructs. */ | |
| 2814 default: | |
| 2815 FREE_STACK_RETURN (REG_BADPAT); | |
| 2816 } | |
| 2817 } | |
| 502 | 2818 |
| 2819 r = ++regnum; | |
| 2820 bufp->re_ngroups++; | |
| 2821 if (!shy) | |
| 2822 { | |
| 2823 bufp->re_nsub++; | |
| 2824 while (bufp->external_to_internal_register_size <= | |
| 2825 bufp->re_nsub) | |
| 2826 { | |
| 2827 int i; | |
| 2828 int old_size = | |
| 2829 bufp->external_to_internal_register_size; | |
| 2830 bufp->external_to_internal_register_size += 5; | |
| 2831 RETALLOC (bufp->external_to_internal_register, | |
| 2832 bufp->external_to_internal_register_size, | |
| 2833 int); | |
| 2834 /* debugging */ | |
| 2835 for (i = old_size; | |
| 2836 i < bufp->external_to_internal_register_size; i++) | |
| 2837 bufp->external_to_internal_register[i] = | |
| 2838 (int) 0xDEADBEEF; | |
| 2839 } | |
| 2840 | |
| 2841 bufp->external_to_internal_register[bufp->re_nsub] = | |
| 2842 bufp->re_ngroups; | |
| 2843 } | |
| 428 | 2844 |
| 2845 if (COMPILE_STACK_FULL) | |
| 2846 { | |
| 2847 RETALLOC (compile_stack.stack, compile_stack.size << 1, | |
| 2848 compile_stack_elt_t); | |
| 2849 if (compile_stack.stack == NULL) return REG_ESPACE; | |
| 2850 | |
| 2851 compile_stack.size <<= 1; | |
| 2852 } | |
| 2853 | |
| 2854 /* These are the values to restore when we hit end of this | |
| 2855 group. They are all relative offsets, so that if the | |
| 2856 whole pattern moves because of realloc, they will still | |
| 2857 be valid. */ | |
| 2858 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; | |
| 2859 COMPILE_STACK_TOP.fixup_alt_jump | |
| 2860 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; | |
| 446 | 2861 COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; |
| 428 | 2862 COMPILE_STACK_TOP.regnum = r; |
| 2863 | |
| 2864 /* We will eventually replace the 0 with the number of | |
| 2865 groups inner to this one. But do not push a | |
| 2866 start_memory for groups beyond the last one we can | |
| 502 | 2867 represent in the compiled pattern. |
| 2868 #### bad bad bad. this will fail in lots of ways, if we | |
| 2869 ever have to backtrack for these groups. | |
| 2870 */ | |
| 428 | 2871 if (r <= MAX_REGNUM) |
| 2872 { | |
| 2873 COMPILE_STACK_TOP.inner_group_offset | |
| 446 | 2874 = buf_end - bufp->buffer + 2; |
| 428 | 2875 BUF_PUSH_3 (start_memory, r, 0); |
| 2876 } | |
| 2877 | |
| 2878 compile_stack.avail++; | |
| 2879 | |
| 2880 fixup_alt_jump = 0; | |
| 2881 laststart = 0; | |
| 446 | 2882 begalt = buf_end; |
| 428 | 2883 /* If we've reached MAX_REGNUM groups, then this open |
| 2884 won't actually generate any code, so we'll have to | |
| 2885 clear pending_exact explicitly. */ | |
| 2886 pending_exact = 0; | |
| 2887 } | |
| 2888 break; | |
| 2889 | |
| 2890 | |
| 2891 case ')': | |
| 2892 if (syntax & RE_NO_BK_PARENS) goto normal_backslash; | |
| 2893 | |
| 2894 if (COMPILE_STACK_EMPTY) { | |
| 2895 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) | |
| 2896 goto normal_backslash; | |
| 2897 else | |
| 2898 FREE_STACK_RETURN (REG_ERPAREN); | |
| 2899 } | |
| 2900 | |
| 2901 handle_close: | |
| 2902 if (fixup_alt_jump) | |
| 2903 { /* Push a dummy failure point at the end of the | |
| 2904 alternative for a possible future | |
| 2905 `pop_failure_jump' to pop. See comments at | |
| 2906 `push_dummy_failure' in `re_match_2'. */ | |
| 2907 BUF_PUSH (push_dummy_failure); | |
| 2908 | |
| 2909 /* We allocated space for this jump when we assigned | |
| 2910 to `fixup_alt_jump', in the `handle_alt' case below. */ | |
| 446 | 2911 STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); |
| 428 | 2912 } |
| 2913 | |
| 2914 /* See similar code for backslashed left paren above. */ | |
| 2915 if (COMPILE_STACK_EMPTY) { | |
| 2916 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) | |
| 2917 goto normal_char; | |
| 2918 else | |
| 2919 FREE_STACK_RETURN (REG_ERPAREN); | |
| 2920 } | |
| 2921 | |
| 2922 /* Since we just checked for an empty stack above, this | |
| 2923 ``can't happen''. */ | |
| 2924 assert (compile_stack.avail != 0); | |
| 2925 { | |
| 2926 /* We don't just want to restore into `regnum', because | |
| 2927 later groups should continue to be numbered higher, | |
| 2928 as in `(ab)c(de)' -- the second group is #2. */ | |
| 2929 regnum_t this_group_regnum; | |
| 2930 | |
| 2931 compile_stack.avail--; | |
| 2932 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; | |
| 2933 fixup_alt_jump | |
| 2934 = COMPILE_STACK_TOP.fixup_alt_jump | |
| 2935 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 | |
| 2936 : 0; | |
| 2937 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; | |
| 2938 this_group_regnum = COMPILE_STACK_TOP.regnum; | |
| 2939 /* If we've reached MAX_REGNUM groups, then this open | |
| 2940 won't actually generate any code, so we'll have to | |
| 2941 clear pending_exact explicitly. */ | |
| 2942 pending_exact = 0; | |
| 2943 | |
| 2944 /* We're at the end of the group, so now we know how many | |
| 2945 groups were inside this one. */ | |
| 2946 if (this_group_regnum <= MAX_REGNUM) | |
| 2947 { | |
| 2948 unsigned char *inner_group_loc | |
| 2949 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; | |
| 2950 | |
| 2951 *inner_group_loc = regnum - this_group_regnum; | |
| 2952 BUF_PUSH_3 (stop_memory, this_group_regnum, | |
| 2953 regnum - this_group_regnum); | |
| 2954 } | |
| 2955 } | |
| 2956 break; | |
| 2957 | |
| 2958 | |
| 2959 case '|': /* `\|'. */ | |
| 2960 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) | |
| 2961 goto normal_backslash; | |
| 2962 handle_alt: | |
| 2963 if (syntax & RE_LIMITED_OPS) | |
| 2964 goto normal_char; | |
| 2965 | |
| 2966 /* Insert before the previous alternative a jump which | |
| 2967 jumps to this alternative if the former fails. */ | |
| 2968 GET_BUFFER_SPACE (3); | |
| 446 | 2969 INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); |
| 428 | 2970 pending_exact = 0; |
| 446 | 2971 buf_end += 3; |
| 428 | 2972 |
| 2973 /* The alternative before this one has a jump after it | |
| 2974 which gets executed if it gets matched. Adjust that | |
| 2975 jump so it will jump to this alternative's analogous | |
| 2976 jump (put in below, which in turn will jump to the next | |
| 2977 (if any) alternative's such jump, etc.). The last such | |
| 2978 jump jumps to the correct final destination. A picture: | |
| 2979 _____ _____ | |
| 2980 | | | | | |
| 2981 | v | v | |
| 2982 a | b | c | |
| 2983 | |
| 2984 If we are at `b', then fixup_alt_jump right now points to a | |
| 2985 three-byte space after `a'. We'll put in the jump, set | |
| 2986 fixup_alt_jump to right after `b', and leave behind three | |
| 2987 bytes which we'll fill in when we get to after `c'. */ | |
| 2988 | |
| 2989 if (fixup_alt_jump) | |
| 446 | 2990 STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); |
| 428 | 2991 |
| 2992 /* Mark and leave space for a jump after this alternative, | |
| 2993 to be filled in later either by next alternative or | |
| 2994 when know we're at the end of a series of alternatives. */ | |
| 446 | 2995 fixup_alt_jump = buf_end; |
| 428 | 2996 GET_BUFFER_SPACE (3); |
| 446 | 2997 buf_end += 3; |
| 428 | 2998 |
| 2999 laststart = 0; | |
| 446 | 3000 begalt = buf_end; |
| 428 | 3001 break; |
| 3002 | |
| 3003 | |
| 3004 case '{': | |
| 3005 /* If \{ is a literal. */ | |
| 3006 if (!(syntax & RE_INTERVALS) | |
| 3007 /* If we're at `\{' and it's not the open-interval | |
| 3008 operator. */ | |
| 3009 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) | |
| 3010 || (p - 2 == pattern && p == pend)) | |
| 3011 goto normal_backslash; | |
| 3012 | |
| 3013 handle_interval: | |
| 3014 { | |
| 3015 /* If got here, then the syntax allows intervals. */ | |
| 3016 | |
| 3017 /* At least (most) this many matches must be made. */ | |
| 3018 int lower_bound = -1, upper_bound = -1; | |
| 3019 | |
| 3020 beg_interval = p - 1; | |
| 3021 | |
| 3022 if (p == pend) | |
| 3023 { | |
| 3024 if (syntax & RE_NO_BK_BRACES) | |
| 3025 goto unfetch_interval; | |
| 3026 else | |
| 3027 FREE_STACK_RETURN (REG_EBRACE); | |
| 3028 } | |
| 3029 | |
| 3030 GET_UNSIGNED_NUMBER (lower_bound); | |
| 3031 | |
| 3032 if (c == ',') | |
| 3033 { | |
| 3034 GET_UNSIGNED_NUMBER (upper_bound); | |
| 3035 if (upper_bound < 0) upper_bound = RE_DUP_MAX; | |
| 3036 } | |
| 3037 else | |
| 3038 /* Interval such as `{1}' => match exactly once. */ | |
| 3039 upper_bound = lower_bound; | |
| 3040 | |
| 3041 if (lower_bound < 0 || upper_bound > RE_DUP_MAX | |
| 3042 || lower_bound > upper_bound) | |
| 3043 { | |
| 3044 if (syntax & RE_NO_BK_BRACES) | |
| 3045 goto unfetch_interval; | |
| 3046 else | |
| 3047 FREE_STACK_RETURN (REG_BADBR); | |
| 3048 } | |
| 3049 | |
| 3050 if (!(syntax & RE_NO_BK_BRACES)) | |
| 3051 { | |
| 3052 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); | |
| 3053 | |
| 3054 PATFETCH (c); | |
| 3055 } | |
| 3056 | |
| 3057 if (c != '}') | |
| 3058 { | |
| 3059 if (syntax & RE_NO_BK_BRACES) | |
| 3060 goto unfetch_interval; | |
| 3061 else | |
| 3062 FREE_STACK_RETURN (REG_BADBR); | |
| 3063 } | |
| 3064 | |
| 3065 /* We just parsed a valid interval. */ | |
| 3066 | |
| 3067 /* If it's invalid to have no preceding re. */ | |
| 3068 if (!laststart) | |
| 3069 { | |
| 3070 if (syntax & RE_CONTEXT_INVALID_OPS) | |
| 3071 FREE_STACK_RETURN (REG_BADRPT); | |
| 3072 else if (syntax & RE_CONTEXT_INDEP_OPS) | |
| 446 | 3073 laststart = buf_end; |
| 428 | 3074 else |
| 3075 goto unfetch_interval; | |
| 3076 } | |
| 3077 | |
| 3078 /* If the upper bound is zero, don't want to succeed at | |
| 3079 all; jump from `laststart' to `b + 3', which will be | |
| 3080 the end of the buffer after we insert the jump. */ | |
| 3081 if (upper_bound == 0) | |
| 3082 { | |
| 3083 GET_BUFFER_SPACE (3); | |
| 446 | 3084 INSERT_JUMP (jump, laststart, buf_end + 3); |
| 3085 buf_end += 3; | |
| 428 | 3086 } |
| 3087 | |
| 3088 /* Otherwise, we have a nontrivial interval. When | |
| 3089 we're all done, the pattern will look like: | |
| 3090 set_number_at <jump count> <upper bound> | |
| 3091 set_number_at <succeed_n count> <lower bound> | |
| 3092 succeed_n <after jump addr> <succeed_n count> | |
| 3093 <body of loop> | |
| 3094 jump_n <succeed_n addr> <jump count> | |
| 3095 (The upper bound and `jump_n' are omitted if | |
| 3096 `upper_bound' is 1, though.) */ | |
| 3097 else | |
| 3098 { /* If the upper bound is > 1, we need to insert | |
| 3099 more at the end of the loop. */ | |
| 647 | 3100 int nbytes = 10 + (upper_bound > 1) * 10; |
| 428 | 3101 |
| 3102 GET_BUFFER_SPACE (nbytes); | |
| 3103 | |
| 3104 /* Initialize lower bound of the `succeed_n', even | |
| 3105 though it will be set during matching by its | |
| 3106 attendant `set_number_at' (inserted next), | |
| 3107 because `re_compile_fastmap' needs to know. | |
| 3108 Jump to the `jump_n' we might insert below. */ | |
| 3109 INSERT_JUMP2 (succeed_n, laststart, | |
| 446 | 3110 buf_end + 5 + (upper_bound > 1) * 5, |
| 428 | 3111 lower_bound); |
| 446 | 3112 buf_end += 5; |
| 428 | 3113 |
| 3114 /* Code to initialize the lower bound. Insert | |
| 3115 before the `succeed_n'. The `5' is the last two | |
| 3116 bytes of this `set_number_at', plus 3 bytes of | |
| 3117 the following `succeed_n'. */ | |
| 446 | 3118 insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); |
| 3119 buf_end += 5; | |
| 428 | 3120 |
| 3121 if (upper_bound > 1) | |
| 3122 { /* More than one repetition is allowed, so | |
| 3123 append a backward jump to the `succeed_n' | |
| 3124 that starts this interval. | |
| 3125 | |
| 3126 When we've reached this during matching, | |
| 3127 we'll have matched the interval once, so | |
| 3128 jump back only `upper_bound - 1' times. */ | |
| 446 | 3129 STORE_JUMP2 (jump_n, buf_end, laststart + 5, |
| 428 | 3130 upper_bound - 1); |
| 446 | 3131 buf_end += 5; |
| 428 | 3132 |
| 3133 /* The location we want to set is the second | |
| 3134 parameter of the `jump_n'; that is `b-2' as | |
| 3135 an absolute address. `laststart' will be | |
| 3136 the `set_number_at' we're about to insert; | |
| 3137 `laststart+3' the number to set, the source | |
| 3138 for the relative address. But we are | |
| 3139 inserting into the middle of the pattern -- | |
| 3140 so everything is getting moved up by 5. | |
| 3141 Conclusion: (b - 2) - (laststart + 3) + 5, | |
| 3142 i.e., b - laststart. | |
| 3143 | |
| 3144 We insert this at the beginning of the loop | |
| 3145 so that if we fail during matching, we'll | |
| 3146 reinitialize the bounds. */ | |
| 446 | 3147 insert_op2 (set_number_at, laststart, |
| 3148 buf_end - laststart, | |
| 3149 upper_bound - 1, buf_end); | |
| 3150 buf_end += 5; | |
| 428 | 3151 } |
| 3152 } | |
| 3153 pending_exact = 0; | |
| 3154 beg_interval = NULL; | |
| 3155 } | |
| 3156 break; | |
| 3157 | |
| 3158 unfetch_interval: | |
| 3159 /* If an invalid interval, match the characters as literals. */ | |
| 3160 assert (beg_interval); | |
| 3161 p = beg_interval; | |
| 3162 beg_interval = NULL; | |
| 3163 | |
| 3164 /* normal_char and normal_backslash need `c'. */ | |
| 3165 PATFETCH (c); | |
| 3166 | |
| 3167 if (!(syntax & RE_NO_BK_BRACES)) | |
| 3168 { | |
| 3169 if (p > pattern && p[-1] == '\\') | |
| 3170 goto normal_backslash; | |
| 3171 } | |
| 3172 goto normal_char; | |
| 3173 | |
| 3174 #ifdef emacs | |
| 3175 /* There is no way to specify the before_dot and after_dot | |
| 3176 operators. rms says this is ok. --karl */ | |
| 3177 case '=': | |
| 3178 BUF_PUSH (at_dot); | |
| 3179 break; | |
| 3180 | |
| 3181 case 's': | |
| 446 | 3182 laststart = buf_end; |
| 428 | 3183 PATFETCH (c); |
| 3184 /* XEmacs addition */ | |
| 3185 if (c >= 0x80 || syntax_spec_code[c] == 0377) | |
| 3186 FREE_STACK_RETURN (REG_ESYNTAX); | |
| 3187 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); | |
| 3188 break; | |
| 3189 | |
| 3190 case 'S': | |
| 446 | 3191 laststart = buf_end; |
| 428 | 3192 PATFETCH (c); |
| 3193 /* XEmacs addition */ | |
| 3194 if (c >= 0x80 || syntax_spec_code[c] == 0377) | |
| 3195 FREE_STACK_RETURN (REG_ESYNTAX); | |
| 3196 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); | |
| 3197 break; | |
| 3198 | |
| 3199 #ifdef MULE | |
| 3200 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ | |
| 3201 case 'c': | |
| 446 | 3202 laststart = buf_end; |
| 428 | 3203 PATFETCH_RAW (c); |
| 3204 if (c < 32 || c > 127) | |
| 3205 FREE_STACK_RETURN (REG_ECATEGORY); | |
| 3206 BUF_PUSH_2 (categoryspec, c); | |
| 3207 break; | |
| 3208 | |
| 3209 case 'C': | |
| 446 | 3210 laststart = buf_end; |
| 428 | 3211 PATFETCH_RAW (c); |
| 3212 if (c < 32 || c > 127) | |
| 3213 FREE_STACK_RETURN (REG_ECATEGORY); | |
| 3214 BUF_PUSH_2 (notcategoryspec, c); | |
| 3215 break; | |
| 3216 /* end of category patch */ | |
| 3217 #endif /* MULE */ | |
| 3218 #endif /* emacs */ | |
| 3219 | |
| 3220 | |
| 3221 case 'w': | |
| 446 | 3222 laststart = buf_end; |
| 428 | 3223 BUF_PUSH (wordchar); |
| 3224 break; | |
| 3225 | |
| 3226 | |
| 3227 case 'W': | |
| 446 | 3228 laststart = buf_end; |
| 428 | 3229 BUF_PUSH (notwordchar); |
| 3230 break; | |
| 3231 | |
| 3232 | |
| 3233 case '<': | |
| 3234 BUF_PUSH (wordbeg); | |
| 3235 break; | |
| 3236 | |
| 3237 case '>': | |
| 3238 BUF_PUSH (wordend); | |
| 3239 break; | |
| 3240 | |
| 3241 case 'b': | |
| 3242 BUF_PUSH (wordbound); | |
| 3243 break; | |
| 3244 | |
| 3245 case 'B': | |
| 3246 BUF_PUSH (notwordbound); | |
| 3247 break; | |
| 3248 | |
| 3249 case '`': | |
| 3250 BUF_PUSH (begbuf); | |
| 3251 break; | |
| 3252 | |
| 3253 case '\'': | |
| 3254 BUF_PUSH (endbuf); | |
| 3255 break; | |
| 3256 | |
| 3257 case '1': case '2': case '3': case '4': case '5': | |
| 3258 case '6': case '7': case '8': case '9': | |
| 446 | 3259 { |
| 502 | 3260 regnum_t reg, regint; |
| 3261 int may_need_to_unfetch = 0; | |
| 446 | 3262 if (syntax & RE_NO_BK_REFS) |
| 3263 goto normal_char; | |
| 3264 | |
| 502 | 3265 /* This only goes up to 99. It could be extended to work |
| 3266 up to 255 (the maximum number of registers that can be | |
| 3267 handled by the current regexp engine, because it stores | |
| 3268 its register numbers in the compiled pattern as one byte, | |
| 3269 ugh). Doing that's a bit trickier, because you might | |
| 3270 have the case where \25 a back-ref but \255 is not, ... */ | |
| 446 | 3271 reg = c - '0'; |
| 502 | 3272 if (p < pend) |
| 3273 { | |
| 3274 PATFETCH (c); | |
| 3275 if (c >= '0' && c <= '9') | |
| 3276 { | |
| 3277 regnum_t new_reg = reg * 10 + c - '0'; | |
| 3278 if (new_reg <= bufp->re_nsub) | |
| 3279 { | |
| 3280 reg = new_reg; | |
| 3281 may_need_to_unfetch = 1; | |
| 3282 } | |
| 3283 else | |
| 3284 PATUNFETCH; | |
| 3285 } | |
| 523 | 3286 else |
| 3287 PATUNFETCH; | |
| 502 | 3288 } |
| 3289 | |
| 3290 if (reg > bufp->re_nsub) | |
| 446 | 3291 FREE_STACK_RETURN (REG_ESUBREG); |
| 3292 | |
| 502 | 3293 regint = bufp->external_to_internal_register[reg]; |
| 446 | 3294 /* Can't back reference to a subexpression if inside of it. */ |
| 502 | 3295 if (group_in_compile_stack (compile_stack, regint)) |
| 3296 { | |
| 3297 if (may_need_to_unfetch) | |
| 3298 PATUNFETCH; | |
| 3299 goto normal_char; | |
| 3300 } | |
| 3301 | |
| 3302 #ifdef emacs | |
| 3303 if (reg > 9 && | |
| 3304 bufp->warned_about_incompatible_back_references == 0) | |
| 3305 { | |
| 3306 bufp->warned_about_incompatible_back_references = 1; | |
| 3307 warn_when_safe (intern ("regex"), Qinfo, | |
| 3308 "Back reference \\%d now has new " | |
| 3309 "semantics in %s", reg, pattern); | |
| 3310 } | |
| 3311 #endif | |
| 446 | 3312 |
| 3313 laststart = buf_end; | |
| 502 | 3314 BUF_PUSH_2 (duplicate, regint); |
| 446 | 3315 } |
| 428 | 3316 break; |
| 3317 | |
| 3318 | |
| 3319 case '+': | |
| 3320 case '?': | |
| 3321 if (syntax & RE_BK_PLUS_QM) | |
| 3322 goto handle_plus; | |
| 3323 else | |
| 3324 goto normal_backslash; | |
| 3325 | |
| 3326 default: | |
| 3327 normal_backslash: | |
| 3328 /* You might think it would be useful for \ to mean | |
| 3329 not to translate; but if we don't translate it, | |
| 3330 it will never match anything. */ | |
| 826 | 3331 c = RE_TRANSLATE (c); |
| 428 | 3332 goto normal_char; |
| 3333 } | |
| 3334 break; | |
| 3335 | |
| 3336 | |
| 3337 default: | |
| 3338 /* Expects the character in `c'. */ | |
| 3339 /* `p' points to the location after where `c' came from. */ | |
| 3340 normal_char: | |
| 3341 { | |
| 3342 /* XEmacs: modifications here for Mule. */ | |
| 3343 /* `q' points to the beginning of the next char. */ | |
| 446 | 3344 re_char *q = p; |
| 428 | 3345 |
| 3346 /* If no exactn currently being built. */ | |
| 3347 if (!pending_exact | |
| 3348 | |
| 3349 /* If last exactn not at current position. */ | |
| 446 | 3350 || pending_exact + *pending_exact + 1 != buf_end |
| 428 | 3351 |
| 3352 /* We have only one byte following the exactn for the count. */ | |
| 3353 || ((unsigned int) (*pending_exact + (q - p)) >= | |
| 3354 ((unsigned int) (1 << BYTEWIDTH) - 1)) | |
| 3355 | |
| 3356 /* If followed by a repetition operator. */ | |
| 3357 || *q == '*' || *q == '^' | |
| 3358 || ((syntax & RE_BK_PLUS_QM) | |
| 3359 ? *q == '\\' && (q[1] == '+' || q[1] == '?') | |
| 3360 : (*q == '+' || *q == '?')) | |
| 3361 || ((syntax & RE_INTERVALS) | |
| 3362 && ((syntax & RE_NO_BK_BRACES) | |
| 3363 ? *q == '{' | |
| 3364 : (q[0] == '\\' && q[1] == '{')))) | |
| 3365 { | |
| 3366 /* Start building a new exactn. */ | |
| 3367 | |
| 446 | 3368 laststart = buf_end; |
| 428 | 3369 |
| 3370 BUF_PUSH_2 (exactn, 0); | |
| 446 | 3371 pending_exact = buf_end - 1; |
| 428 | 3372 } |
| 3373 | |
| 446 | 3374 #ifndef MULE |
| 428 | 3375 BUF_PUSH (c); |
| 3376 (*pending_exact)++; | |
| 446 | 3377 #else |
| 3378 { | |
| 3379 Bytecount bt_count; | |
| 867 | 3380 Ibyte tmp_buf[MAX_ICHAR_LEN]; |
| 446 | 3381 int i; |
| 3382 | |
| 867 | 3383 bt_count = set_itext_ichar (tmp_buf, c); |
| 446 | 3384 |
| 3385 for (i = 0; i < bt_count; i++) | |
| 3386 { | |
| 3387 BUF_PUSH (tmp_buf[i]); | |
| 3388 (*pending_exact)++; | |
| 3389 } | |
| 3390 } | |
| 3391 #endif | |
| 428 | 3392 break; |
| 3393 } | |
| 3394 } /* switch (c) */ | |
| 3395 } /* while p != pend */ | |
| 3396 | |
| 3397 | |
| 3398 /* Through the pattern now. */ | |
| 3399 | |
| 3400 if (fixup_alt_jump) | |
| 446 | 3401 STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); |
| 428 | 3402 |
| 3403 if (!COMPILE_STACK_EMPTY) | |
| 3404 FREE_STACK_RETURN (REG_EPAREN); | |
| 3405 | |
| 3406 /* If we don't want backtracking, force success | |
| 3407 the first time we reach the end of the compiled pattern. */ | |
| 3408 if (syntax & RE_NO_POSIX_BACKTRACKING) | |
| 3409 BUF_PUSH (succeed); | |
| 3410 | |
| 1726 | 3411 xfree (compile_stack.stack, compile_stack_elt_t *); |
| 428 | 3412 |
| 3413 /* We have succeeded; set the length of the buffer. */ | |
| 446 | 3414 bufp->used = buf_end - bufp->buffer; |
| 428 | 3415 |
| 3416 #ifdef DEBUG | |
| 3417 if (debug) | |
| 3418 { | |
| 3419 DEBUG_PRINT1 ("\nCompiled pattern: \n"); | |
| 3420 print_compiled_pattern (bufp); | |
| 3421 } | |
| 3422 #endif /* DEBUG */ | |
| 3423 | |
| 3424 #ifndef MATCH_MAY_ALLOCATE | |
| 3425 /* Initialize the failure stack to the largest possible stack. This | |
| 3426 isn't necessary unless we're trying to avoid calling alloca in | |
| 3427 the search and match routines. */ | |
| 3428 { | |
| 502 | 3429 int num_regs = bufp->re_ngroups + 1; |
| 428 | 3430 |
| 3431 /* Since DOUBLE_FAIL_STACK refuses to double only if the current size | |
| 3432 is strictly greater than re_max_failures, the largest possible stack | |
| 3433 is 2 * re_max_failures failure points. */ | |
| 3434 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) | |
| 3435 { | |
| 3436 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); | |
| 3437 | |
| 3438 if (! fail_stack.stack) | |
| 3439 fail_stack.stack | |
| 3440 = (fail_stack_elt_t *) xmalloc (fail_stack.size | |
| 3441 * sizeof (fail_stack_elt_t)); | |
| 3442 else | |
| 3443 fail_stack.stack | |
| 3444 = (fail_stack_elt_t *) xrealloc (fail_stack.stack, | |
| 3445 (fail_stack.size | |
| 3446 * sizeof (fail_stack_elt_t))); | |
| 3447 } | |
| 3448 | |
| 3449 regex_grow_registers (num_regs); | |
| 3450 } | |
| 3451 #endif /* not MATCH_MAY_ALLOCATE */ | |
| 3452 | |
| 3453 return REG_NOERROR; | |
| 3454 } /* regex_compile */ | |
| 3455 | |
| 3456 /* Subroutines for `regex_compile'. */ | |
| 3457 | |
| 3458 /* Store OP at LOC followed by two-byte integer parameter ARG. */ | |
| 3459 | |
| 3460 static void | |
| 3461 store_op1 (re_opcode_t op, unsigned char *loc, int arg) | |
| 3462 { | |
| 3463 *loc = (unsigned char) op; | |
| 3464 STORE_NUMBER (loc + 1, arg); | |
| 3465 } | |
| 3466 | |
| 3467 | |
| 3468 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ | |
| 3469 | |
| 3470 static void | |
| 3471 store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2) | |
| 3472 { | |
| 3473 *loc = (unsigned char) op; | |
| 3474 STORE_NUMBER (loc + 1, arg1); | |
| 3475 STORE_NUMBER (loc + 3, arg2); | |
| 3476 } | |
| 3477 | |
| 3478 | |
| 3479 /* Copy the bytes from LOC to END to open up three bytes of space at LOC | |
| 3480 for OP followed by two-byte integer parameter ARG. */ | |
| 3481 | |
| 3482 static void | |
| 3483 insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) | |
| 3484 { | |
| 3485 REGISTER unsigned char *pfrom = end; | |
| 3486 REGISTER unsigned char *pto = end + 3; | |
| 3487 | |
| 3488 while (pfrom != loc) | |
| 3489 *--pto = *--pfrom; | |
| 3490 | |
| 3491 store_op1 (op, loc, arg); | |
| 3492 } | |
| 3493 | |
| 3494 | |
| 3495 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ | |
| 3496 | |
| 3497 static void | |
| 3498 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, | |
| 3499 unsigned char *end) | |
| 3500 { | |
| 3501 REGISTER unsigned char *pfrom = end; | |
| 3502 REGISTER unsigned char *pto = end + 5; | |
| 3503 | |
| 3504 while (pfrom != loc) | |
| 3505 *--pto = *--pfrom; | |
| 3506 | |
| 3507 store_op2 (op, loc, arg1, arg2); | |
| 3508 } | |
| 3509 | |
| 3510 | |
| 3511 /* P points to just after a ^ in PATTERN. Return true if that ^ comes | |
| 3512 after an alternative or a begin-subexpression. We assume there is at | |
| 3513 least one character before the ^. */ | |
| 3514 | |
| 460 | 3515 static re_bool |
| 446 | 3516 at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) |
| 428 | 3517 { |
| 446 | 3518 re_char *prev = p - 2; |
| 460 | 3519 re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\'; |
| 428 | 3520 |
| 3521 return | |
| 3522 /* After a subexpression? */ | |
| 3523 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) | |
| 3524 /* After an alternative? */ | |
| 3525 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); | |
| 3526 } | |
| 3527 | |
| 3528 | |
| 3529 /* The dual of at_begline_loc_p. This one is for $. We assume there is | |
| 3530 at least one character after the $, i.e., `P < PEND'. */ | |
| 3531 | |
| 460 | 3532 static re_bool |
| 446 | 3533 at_endline_loc_p (re_char *p, re_char *pend, int syntax) |
| 428 | 3534 { |
| 446 | 3535 re_char *next = p; |
| 460 | 3536 re_bool next_backslash = *next == '\\'; |
| 446 | 3537 re_char *next_next = p + 1 < pend ? p + 1 : 0; |
| 428 | 3538 |
| 3539 return | |
| 3540 /* Before a subexpression? */ | |
| 3541 (syntax & RE_NO_BK_PARENS ? *next == ')' | |
| 3542 : next_backslash && next_next && *next_next == ')') | |
| 3543 /* Before an alternative? */ | |
| 3544 || (syntax & RE_NO_BK_VBAR ? *next == '|' | |
| 3545 : next_backslash && next_next && *next_next == '|'); | |
| 3546 } | |
| 3547 | |
| 3548 | |
| 3549 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and | |
| 3550 false if it's not. */ | |
| 3551 | |
| 460 | 3552 static re_bool |
| 428 | 3553 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) |
| 3554 { | |
| 3555 int this_element; | |
| 3556 | |
| 3557 for (this_element = compile_stack.avail - 1; | |
| 3558 this_element >= 0; | |
| 3559 this_element--) | |
| 3560 if (compile_stack.stack[this_element].regnum == regnum) | |
| 3561 return true; | |
| 3562 | |
| 3563 return false; | |
| 3564 } | |
| 3565 | |
| 3566 | |
| 3567 /* Read the ending character of a range (in a bracket expression) from the | |
| 3568 uncompiled pattern *P_PTR (which ends at PEND). We assume the | |
| 3569 starting character is in `P[-2]'. (`P[-1]' is the character `-'.) | |
| 3570 Then we set the translation of all bits between the starting and | |
| 3571 ending characters (inclusive) in the compiled pattern B. | |
| 3572 | |
| 3573 Return an error code. | |
| 3574 | |
| 3575 We use these short variable names so we can use the same macros as | |
| 826 | 3576 `regex_compile' itself. |
| 3577 | |
| 3578 Under Mule, this is only called when both chars of the range are | |
| 3579 ASCII. */ | |
| 428 | 3580 |
| 3581 static reg_errcode_t | |
| 446 | 3582 compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, |
| 3583 reg_syntax_t syntax, unsigned char *buf_end) | |
| 428 | 3584 { |
| 867 | 3585 Ichar this_char; |
| 428 | 3586 |
| 446 | 3587 re_char *p = *p_ptr; |
| 428 | 3588 int range_start, range_end; |
| 3589 | |
| 3590 if (p == pend) | |
| 3591 return REG_ERANGE; | |
| 3592 | |
| 3593 /* Even though the pattern is a signed `char *', we need to fetch | |
| 3594 with unsigned char *'s; if the high bit of the pattern character | |
| 3595 is set, the range endpoints will be negative if we fetch using a | |
| 3596 signed char *. | |
| 3597 | |
| 3598 We also want to fetch the endpoints without translating them; the | |
| 3599 appropriate translation is done in the bit-setting loop below. */ | |
| 442 | 3600 /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ |
| 3601 range_start = ((const unsigned char *) p)[-2]; | |
| 3602 range_end = ((const unsigned char *) p)[0]; | |
| 428 | 3603 |
| 3604 /* Have to increment the pointer into the pattern string, so the | |
| 3605 caller isn't still at the ending character. */ | |
| 3606 (*p_ptr)++; | |
| 3607 | |
| 3608 /* If the start is after the end, the range is empty. */ | |
| 3609 if (range_start > range_end) | |
| 3610 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; | |
| 3611 | |
| 3612 /* Here we see why `this_char' has to be larger than an `unsigned | |
| 3613 char' -- the range is inclusive, so if `range_end' == 0xff | |
| 3614 (assuming 8-bit characters), we would otherwise go into an infinite | |
| 3615 loop, since all characters <= 0xff. */ | |
| 3616 for (this_char = range_start; this_char <= range_end; this_char++) | |
| 3617 { | |
| 826 | 3618 SET_LIST_BIT (RE_TRANSLATE (this_char)); |
| 428 | 3619 } |
| 3620 | |
| 3621 return REG_NOERROR; | |
| 3622 } | |
| 3623 | |
| 3624 #ifdef MULE | |
| 3625 | |
| 3626 static reg_errcode_t | |
| 446 | 3627 compile_extended_range (re_char **p_ptr, re_char *pend, |
| 3628 RE_TRANSLATE_TYPE translate, | |
| 428 | 3629 reg_syntax_t syntax, Lisp_Object rtab) |
| 3630 { | |
| 867 | 3631 Ichar this_char, range_start, range_end; |
| 3632 const Ibyte *p; | |
| 428 | 3633 |
| 3634 if (*p_ptr == pend) | |
| 3635 return REG_ERANGE; | |
| 3636 | |
| 867 | 3637 p = (const Ibyte *) *p_ptr; |
| 3638 range_end = itext_ichar (p); | |
| 428 | 3639 p--; /* back to '-' */ |
| 867 | 3640 DEC_IBYTEPTR (p); /* back to start of range */ |
| 428 | 3641 /* We also want to fetch the endpoints without translating them; the |
| 3642 appropriate translation is done in the bit-setting loop below. */ | |
| 867 | 3643 range_start = itext_ichar (p); |
| 3644 INC_IBYTEPTR (*p_ptr); | |
| 428 | 3645 |
| 3646 /* If the start is after the end, the range is empty. */ | |
| 3647 if (range_start > range_end) | |
| 3648 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; | |
| 3649 | |
| 3650 /* Can't have ranges spanning different charsets, except maybe for | |
| 3651 ranges entirely within the first 256 chars. */ | |
| 3652 | |
| 3653 if ((range_start >= 0x100 || range_end >= 0x100) | |
| 867 | 3654 && ichar_leading_byte (range_start) != |
| 3655 ichar_leading_byte (range_end)) | |
| 428 | 3656 return REG_ERANGESPAN; |
| 3657 | |
| 826 | 3658 /* #### This might be way inefficient if the range encompasses 10,000 |
| 3659 chars or something. To be efficient, you'd have to do something like | |
| 3660 this: | |
| 428 | 3661 |
| 3662 range_table a; | |
| 3663 range_table b; | |
| 3664 map over translation table in [range_start, range_end] of | |
| 3665 (put the mapped range in a; | |
| 3666 put the translation in b) | |
| 3667 invert the range in a and truncate to [range_start, range_end] | |
| 3668 compute the union of a, b | |
| 3669 union the result into rtab | |
| 3670 */ | |
| 826 | 3671 for (this_char = range_start; this_char <= range_end; this_char++) |
| 428 | 3672 { |
| 826 | 3673 SET_RANGETAB_BIT (RE_TRANSLATE (this_char)); |
| 428 | 3674 } |
| 3675 | |
| 3676 if (this_char <= range_end) | |
| 3677 put_range_table (rtab, this_char, range_end, Qt); | |
| 3678 | |
| 3679 return REG_NOERROR; | |
| 3680 } | |
| 3681 | |
| 3682 #endif /* MULE */ | |
| 3683 | |
| 3684 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in | |
| 3685 BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible | |
| 3686 characters can start a string that matches the pattern. This fastmap | |
| 3687 is used by re_search to skip quickly over impossible starting points. | |
| 3688 | |
| 3689 The caller must supply the address of a (1 << BYTEWIDTH)-byte data | |
| 3690 area as BUFP->fastmap. | |
| 3691 | |
| 3692 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in | |
| 3693 the pattern buffer. | |
| 3694 | |
| 3695 Returns 0 if we succeed, -2 if an internal error. */ | |
| 3696 | |
| 3697 int | |
| 826 | 3698 re_compile_fastmap (struct re_pattern_buffer *bufp |
| 3699 RE_LISP_SHORT_CONTEXT_ARGS_DECL) | |
| 428 | 3700 { |
| 3701 int j, k; | |
| 3702 #ifdef MATCH_MAY_ALLOCATE | |
| 3703 fail_stack_type fail_stack; | |
| 3704 #endif | |
| 456 | 3705 DECLARE_DESTINATION; |
| 428 | 3706 /* We don't push any register information onto the failure stack. */ |
| 3707 | |
| 826 | 3708 /* &&#### this should be changed for 8-bit-fixed, for efficiency. see |
| 3709 comment marked with &&#### in re_search_2. */ | |
| 3710 | |
| 428 | 3711 REGISTER char *fastmap = bufp->fastmap; |
| 3712 unsigned char *pattern = bufp->buffer; | |
| 647 | 3713 long size = bufp->used; |
| 428 | 3714 unsigned char *p = pattern; |
| 3715 REGISTER unsigned char *pend = pattern + size; | |
| 3716 | |
| 771 | 3717 #ifdef REGEX_REL_ALLOC |
| 428 | 3718 /* This holds the pointer to the failure stack, when |
| 3719 it is allocated relocatably. */ | |
| 3720 fail_stack_elt_t *failure_stack_ptr; | |
| 3721 #endif | |
| 3722 | |
| 3723 /* Assume that each path through the pattern can be null until | |
| 3724 proven otherwise. We set this false at the bottom of switch | |
| 3725 statement, to which we get only if a particular path doesn't | |
| 3726 match the empty string. */ | |
| 460 | 3727 re_bool path_can_be_null = true; |
| 428 | 3728 |
| 3729 /* We aren't doing a `succeed_n' to begin with. */ | |
| 460 | 3730 re_bool succeed_n_p = false; |
| 428 | 3731 |
| 1333 | 3732 #ifdef ERROR_CHECK_MALLOC |
| 3733 /* The pattern comes from string data, not buffer data. We don't access | |
| 3734 any buffer data, so we don't have to worry about malloc() (but the | |
| 3735 disallowed flag may have been set by a caller). */ | |
| 3736 int depth = bind_regex_malloc_disallowed (0); | |
| 3737 #endif | |
| 3738 | |
| 428 | 3739 assert (fastmap != NULL && p != NULL); |
| 3740 | |
| 3741 INIT_FAIL_STACK (); | |
| 3742 memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ | |
| 3743 bufp->fastmap_accurate = 1; /* It will be when we're done. */ | |
| 3744 bufp->can_be_null = 0; | |
| 3745 | |
| 3746 while (1) | |
| 3747 { | |
| 3748 if (p == pend || *p == succeed) | |
| 3749 { | |
| 3750 /* We have reached the (effective) end of pattern. */ | |
| 3751 if (!FAIL_STACK_EMPTY ()) | |
| 3752 { | |
| 3753 bufp->can_be_null |= path_can_be_null; | |
| 3754 | |
| 3755 /* Reset for next path. */ | |
| 3756 path_can_be_null = true; | |
| 3757 | |
| 446 | 3758 p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; |
| 428 | 3759 |
| 3760 continue; | |
| 3761 } | |
| 3762 else | |
| 3763 break; | |
| 3764 } | |
| 3765 | |
| 3766 /* We should never be about to go beyond the end of the pattern. */ | |
| 3767 assert (p < pend); | |
| 3768 | |
| 3769 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) | |
| 3770 { | |
| 3771 | |
| 3772 /* I guess the idea here is to simply not bother with a fastmap | |
| 3773 if a backreference is used, since it's too hard to figure out | |
| 3774 the fastmap for the corresponding group. Setting | |
| 3775 `can_be_null' stops `re_search_2' from using the fastmap, so | |
| 3776 that is all we do. */ | |
| 3777 case duplicate: | |
| 3778 bufp->can_be_null = 1; | |
| 3779 goto done; | |
| 3780 | |
| 3781 | |
| 3782 /* Following are the cases which match a character. These end | |
| 3783 with `break'. */ | |
| 3784 | |
| 3785 case exactn: | |
| 3786 fastmap[p[1]] = 1; | |
| 3787 break; | |
| 3788 | |
| 3789 | |
| 3790 case charset: | |
| 3791 /* XEmacs: Under Mule, these bit vectors will | |
| 3792 only contain values for characters below 0x80. */ | |
| 3793 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) | |
| 3794 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) | |
| 3795 fastmap[j] = 1; | |
| 3796 break; | |
| 3797 | |
| 3798 | |
| 3799 case charset_not: | |
| 3800 /* Chars beyond end of map must be allowed. */ | |
| 3801 #ifdef MULE | |
| 3802 for (j = *p * BYTEWIDTH; j < 0x80; j++) | |
| 3803 fastmap[j] = 1; | |
| 3804 /* And all extended characters must be allowed, too. */ | |
| 3805 for (j = 0x80; j < 0xA0; j++) | |
| 3806 fastmap[j] = 1; | |
| 446 | 3807 #else /* not MULE */ |
| 428 | 3808 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) |
| 3809 fastmap[j] = 1; | |
| 446 | 3810 #endif /* MULE */ |
| 428 | 3811 |
| 3812 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) | |
| 3813 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) | |
| 3814 fastmap[j] = 1; | |
| 3815 break; | |
| 3816 | |
| 3817 #ifdef MULE | |
| 3818 case charset_mule: | |
| 3819 { | |
| 3820 int nentries; | |
| 3821 int i; | |
| 3822 | |
| 3823 nentries = unified_range_table_nentries (p); | |
| 3824 for (i = 0; i < nentries; i++) | |
| 3825 { | |
| 3826 EMACS_INT first, last; | |
| 3827 Lisp_Object dummy_val; | |
| 3828 int jj; | |
| 867 | 3829 Ibyte strr[MAX_ICHAR_LEN]; |
| 428 | 3830 |
| 3831 unified_range_table_get_range (p, i, &first, &last, | |
| 3832 &dummy_val); | |
| 3833 for (jj = first; jj <= last && jj < 0x80; jj++) | |
| 3834 fastmap[jj] = 1; | |
| 3835 /* Ranges below 0x100 can span charsets, but there | |
| 3836 are only two (Control-1 and Latin-1), and | |
| 3837 either first or last has to be in them. */ | |
| 867 | 3838 set_itext_ichar (strr, first); |
| 428 | 3839 fastmap[*strr] = 1; |
| 3840 if (last < 0x100) | |
| 3841 { | |
| 867 | 3842 set_itext_ichar (strr, last); |
| 428 | 3843 fastmap[*strr] = 1; |
| 3844 } | |
| 3845 } | |
| 3846 } | |
| 3847 break; | |
| 3848 | |
| 3849 case charset_mule_not: | |
| 3850 { | |
| 3851 int nentries; | |
| 3852 int i; | |
| 3853 | |
| 3854 nentries = unified_range_table_nentries (p); | |
| 3855 for (i = 0; i < nentries; i++) | |
| 3856 { | |
| 3857 EMACS_INT first, last; | |
| 3858 Lisp_Object dummy_val; | |
| 3859 int jj; | |
| 3860 int smallest_prev = 0; | |
| 3861 | |
| 3862 unified_range_table_get_range (p, i, &first, &last, | |
| 3863 &dummy_val); | |
| 3864 for (jj = smallest_prev; jj < first && jj < 0x80; jj++) | |
| 3865 fastmap[jj] = 1; | |
| 3866 smallest_prev = last + 1; | |
| 3867 if (smallest_prev >= 0x80) | |
| 3868 break; | |
| 3869 } | |
| 3870 /* Calculating which leading bytes are actually allowed | |
| 3871 here is rather difficult, so we just punt and allow | |
| 3872 all of them. */ | |
| 3873 for (i = 0x80; i < 0xA0; i++) | |
| 3874 fastmap[i] = 1; | |
| 3875 } | |
| 3876 break; | |
| 3877 #endif /* MULE */ | |
| 3878 | |
| 3879 | |
| 3880 case anychar: | |
| 3881 { | |
| 3882 int fastmap_newline = fastmap['\n']; | |
| 3883 | |
| 3884 /* `.' matches anything ... */ | |
| 3885 #ifdef MULE | |
| 3886 /* "anything" only includes bytes that can be the | |
| 3887 first byte of a character. */ | |
| 3888 for (j = 0; j < 0xA0; j++) | |
| 3889 fastmap[j] = 1; | |
| 3890 #else | |
| 3891 for (j = 0; j < (1 << BYTEWIDTH); j++) | |
| 3892 fastmap[j] = 1; | |
| 3893 #endif | |
| 3894 | |
| 3895 /* ... except perhaps newline. */ | |
| 3896 if (!(bufp->syntax & RE_DOT_NEWLINE)) | |
| 3897 fastmap['\n'] = fastmap_newline; | |
| 3898 | |
| 3899 /* Return if we have already set `can_be_null'; if we have, | |
| 3900 then the fastmap is irrelevant. Something's wrong here. */ | |
| 3901 else if (bufp->can_be_null) | |
| 3902 goto done; | |
| 3903 | |
| 3904 /* Otherwise, have to check alternative paths. */ | |
| 3905 break; | |
| 3906 } | |
| 3907 | |
| 826 | 3908 #ifndef emacs |
| 3909 case wordchar: | |
| 3910 for (j = 0; j < (1 << BYTEWIDTH); j++) | |
| 3911 if (SYNTAX (ignored, j) == Sword) | |
| 3912 fastmap[j] = 1; | |
| 3913 break; | |
| 3914 | |
| 3915 case notwordchar: | |
| 3916 for (j = 0; j < (1 << BYTEWIDTH); j++) | |
| 3917 if (SYNTAX (ignored, j) != Sword) | |
| 3918 fastmap[j] = 1; | |
| 3919 break; | |
| 3920 #else /* emacs */ | |
| 3921 case wordchar: | |
| 3922 case notwordchar: | |
| 460 | 3923 case wordbound: |
| 3924 case notwordbound: | |
| 3925 case wordbeg: | |
| 3926 case wordend: | |
| 3927 case notsyntaxspec: | |
| 3928 case syntaxspec: | |
| 3929 /* This match depends on text properties. These end with | |
| 3930 aborting optimizations. */ | |
| 3931 bufp->can_be_null = 1; | |
| 3932 goto done; | |
| 826 | 3933 #if 0 /* all of the following code is unused now that the `syntax-table' |
| 3934 property exists -- it's trickier to do this than just look in | |
| 3935 the buffer. &&#### but we could just use the syntax-cache stuff | |
| 3936 instead; why don't we? --ben */ | |
| 3937 case wordchar: | |
| 3938 k = (int) Sword; | |
| 3939 goto matchsyntax; | |
| 3940 | |
| 3941 case notwordchar: | |
| 3942 k = (int) Sword; | |
| 3943 goto matchnotsyntax; | |
| 3944 | |
| 428 | 3945 case syntaxspec: |
| 3946 k = *p++; | |
| 826 | 3947 matchsyntax: |
| 428 | 3948 #ifdef MULE |
| 3949 for (j = 0; j < 0x80; j++) | |
| 826 | 3950 if (SYNTAX |
| 3951 (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == | |
| 428 | 3952 (enum syntaxcode) k) |
| 3953 fastmap[j] = 1; | |
| 3954 for (j = 0x80; j < 0xA0; j++) | |
| 3955 { | |
| 826 | 3956 if (leading_byte_prefix_p ((unsigned char) j)) |
| 428 | 3957 /* too complicated to calculate this right */ |
| 3958 fastmap[j] = 1; | |
| 3959 else | |
| 3960 { | |
| 3961 int multi_p; | |
| 3962 Lisp_Object cset; | |
| 3963 | |
| 826 | 3964 cset = charset_by_leading_byte (j); |
| 428 | 3965 if (CHARSETP (cset)) |
| 3966 { | |
| 826 | 3967 if (charset_syntax (lispbuf, cset, &multi_p) |
| 428 | 3968 == Sword || multi_p) |
| 3969 fastmap[j] = 1; | |
| 3970 } | |
| 3971 } | |
| 3972 } | |
| 446 | 3973 #else /* not MULE */ |
| 428 | 3974 for (j = 0; j < (1 << BYTEWIDTH); j++) |
| 826 | 3975 if (SYNTAX |
| 3976 (XCHAR_TABLE (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) == | |
| 428 | 3977 (enum syntaxcode) k) |
| 3978 fastmap[j] = 1; | |
| 446 | 3979 #endif /* MULE */ |
| 428 | 3980 break; |
| 3981 | |
| 3982 | |
| 3983 case notsyntaxspec: | |
| 3984 k = *p++; | |
| 826 | 3985 matchnotsyntax: |
| 428 | 3986 #ifdef MULE |
| 3987 for (j = 0; j < 0x80; j++) | |
| 826 | 3988 if (SYNTAX |
| 428 | 3989 (XCHAR_TABLE |
| 826 | 3990 (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != |
| 428 | 3991 (enum syntaxcode) k) |
| 3992 fastmap[j] = 1; | |
| 3993 for (j = 0x80; j < 0xA0; j++) | |
| 3994 { | |
| 826 | 3995 if (leading_byte_prefix_p ((unsigned char) j)) |
| 428 | 3996 /* too complicated to calculate this right */ |
| 3997 fastmap[j] = 1; | |
| 3998 else | |
| 3999 { | |
| 4000 int multi_p; | |
| 4001 Lisp_Object cset; | |
| 4002 | |
| 826 | 4003 cset = charset_by_leading_byte (j); |
| 428 | 4004 if (CHARSETP (cset)) |
| 4005 { | |
| 826 | 4006 if (charset_syntax (lispbuf, cset, &multi_p) |
| 428 | 4007 != Sword || multi_p) |
| 4008 fastmap[j] = 1; | |
| 4009 } | |
| 4010 } | |
| 4011 } | |
| 446 | 4012 #else /* not MULE */ |
| 428 | 4013 for (j = 0; j < (1 << BYTEWIDTH); j++) |
| 826 | 4014 if (SYNTAX |
| 428 | 4015 (XCHAR_TABLE |
| 826 | 4016 (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf)), j) != |
| 428 | 4017 (enum syntaxcode) k) |
| 4018 fastmap[j] = 1; | |
| 446 | 4019 #endif /* MULE */ |
| 428 | 4020 break; |
| 826 | 4021 #endif /* 0 */ |
| 428 | 4022 |
| 4023 #ifdef MULE | |
| 4024 /* 97/2/17 jhod category patch */ | |
| 4025 case categoryspec: | |
| 4026 case notcategoryspec: | |
| 4027 bufp->can_be_null = 1; | |
| 1333 | 4028 UNBIND_REGEX_MALLOC_CHECK (); |
| 428 | 4029 return 0; |
| 4030 /* end if category patch */ | |
| 4031 #endif /* MULE */ | |
| 4032 | |
| 4033 /* All cases after this match the empty string. These end with | |
| 4034 `continue'. */ | |
| 4035 case before_dot: | |
| 4036 case at_dot: | |
| 4037 case after_dot: | |
| 4038 continue; | |
| 826 | 4039 #endif /* emacs */ |
| 428 | 4040 |
| 4041 | |
| 4042 case no_op: | |
| 4043 case begline: | |
| 4044 case endline: | |
| 4045 case begbuf: | |
| 4046 case endbuf: | |
| 460 | 4047 #ifndef emacs |
| 428 | 4048 case wordbound: |
| 4049 case notwordbound: | |
| 4050 case wordbeg: | |
| 4051 case wordend: | |
| 460 | 4052 #endif |
| 428 | 4053 case push_dummy_failure: |
| 4054 continue; | |
| 4055 | |
| 4056 | |
| 4057 case jump_n: | |
| 4058 case pop_failure_jump: | |
| 4059 case maybe_pop_jump: | |
| 4060 case jump: | |
| 4061 case jump_past_alt: | |
| 4062 case dummy_failure_jump: | |
| 4063 EXTRACT_NUMBER_AND_INCR (j, p); | |
| 4064 p += j; | |
| 4065 if (j > 0) | |
| 4066 continue; | |
| 4067 | |
| 4068 /* Jump backward implies we just went through the body of a | |
| 4069 loop and matched nothing. Opcode jumped to should be | |
| 4070 `on_failure_jump' or `succeed_n'. Just treat it like an | |
| 4071 ordinary jump. For a * loop, it has pushed its failure | |
| 4072 point already; if so, discard that as redundant. */ | |
| 4073 if ((re_opcode_t) *p != on_failure_jump | |
| 4074 && (re_opcode_t) *p != succeed_n) | |
| 4075 continue; | |
| 4076 | |
| 4077 p++; | |
| 4078 EXTRACT_NUMBER_AND_INCR (j, p); | |
| 4079 p += j; | |
| 4080 | |
| 4081 /* If what's on the stack is where we are now, pop it. */ | |
| 4082 if (!FAIL_STACK_EMPTY () | |
| 4083 && fail_stack.stack[fail_stack.avail - 1].pointer == p) | |
| 4084 fail_stack.avail--; | |
| 4085 | |
| 4086 continue; | |
| 4087 | |
| 4088 | |
| 4089 case on_failure_jump: | |
| 4090 case on_failure_keep_string_jump: | |
| 4091 handle_on_failure_jump: | |
| 4092 EXTRACT_NUMBER_AND_INCR (j, p); | |
| 4093 | |
| 4094 /* For some patterns, e.g., `(a?)?', `p+j' here points to the | |
| 4095 end of the pattern. We don't want to push such a point, | |
| 4096 since when we restore it above, entering the switch will | |
| 4097 increment `p' past the end of the pattern. We don't need | |
| 4098 to push such a point since we obviously won't find any more | |
| 4099 fastmap entries beyond `pend'. Such a pattern can match | |
| 4100 the null string, though. */ | |
| 4101 if (p + j < pend) | |
| 4102 { | |
| 4103 if (!PUSH_PATTERN_OP (p + j, fail_stack)) | |
| 4104 { | |
| 4105 RESET_FAIL_STACK (); | |
| 1333 | 4106 UNBIND_REGEX_MALLOC_CHECK (); |
| 428 | 4107 return -2; |
| 4108 } | |
| 4109 } | |
| 4110 else | |
| 4111 bufp->can_be_null = 1; | |
| 4112 | |
| 4113 if (succeed_n_p) | |
| 4114 { | |
| 4115 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ | |
| 4116 succeed_n_p = false; | |
| 4117 } | |
| 4118 | |
| 4119 continue; | |
| 4120 | |
| 4121 | |
| 4122 case succeed_n: | |
| 4123 /* Get to the number of times to succeed. */ | |
| 4124 p += 2; | |
| 4125 | |
| 4126 /* Increment p past the n for when k != 0. */ | |
| 4127 EXTRACT_NUMBER_AND_INCR (k, p); | |
| 4128 if (k == 0) | |
| 4129 { | |
| 4130 p -= 4; | |
| 4131 succeed_n_p = true; /* Spaghetti code alert. */ | |
| 4132 goto handle_on_failure_jump; | |
| 4133 } | |
| 4134 continue; | |
| 4135 | |
| 4136 | |
| 4137 case set_number_at: | |
| 4138 p += 4; | |
| 4139 continue; | |
| 4140 | |
| 4141 | |
| 4142 case start_memory: | |
| 4143 case stop_memory: | |
| 4144 p += 2; | |
| 4145 continue; | |
| 4146 | |
| 4147 | |
| 4148 default: | |
| 2500 | 4149 ABORT (); /* We have listed all the cases. */ |
| 428 | 4150 } /* switch *p++ */ |
| 4151 | |
| 4152 /* Getting here means we have found the possible starting | |
| 4153 characters for one path of the pattern -- and that the empty | |
| 4154 string does not match. We need not follow this path further. | |
| 4155 Instead, look at the next alternative (remembered on the | |
| 4156 stack), or quit if no more. The test at the top of the loop | |
| 4157 does these things. */ | |
| 4158 path_can_be_null = false; | |
| 4159 p = pend; | |
| 4160 } /* while p */ | |
| 4161 | |
| 4162 /* Set `can_be_null' for the last path (also the first path, if the | |
| 4163 pattern is empty). */ | |
| 4164 bufp->can_be_null |= path_can_be_null; | |
| 4165 | |
| 4166 done: | |
| 4167 RESET_FAIL_STACK (); | |
| 1333 | 4168 UNBIND_REGEX_MALLOC_CHECK (); |
| 428 | 4169 return 0; |
| 4170 } /* re_compile_fastmap */ | |
| 4171 | |
| 4172 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and | |
| 4173 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use | |
| 4174 this memory for recording register information. STARTS and ENDS | |
| 4175 must be allocated using the malloc library routine, and must each | |
| 4176 be at least NUM_REGS * sizeof (regoff_t) bytes long. | |
| 4177 | |
| 4178 If NUM_REGS == 0, then subsequent matches should allocate their own | |
| 4179 register data. | |
| 4180 | |
| 4181 Unless this function is called, the first search or match using | |
| 4182 PATTERN_BUFFER will allocate its own register data, without | |
| 4183 freeing the old data. */ | |
| 4184 | |
| 4185 void | |
| 4186 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, | |
| 647 | 4187 int num_regs, regoff_t *starts, regoff_t *ends) |
| 428 | 4188 { |
| 4189 if (num_regs) | |
| 4190 { | |
| 4191 bufp->regs_allocated = REGS_REALLOCATE; | |
| 4192 regs->num_regs = num_regs; | |
| 4193 regs->start = starts; | |
| 4194 regs->end = ends; | |
| 4195 } | |
| 4196 else | |
| 4197 { | |
| 4198 bufp->regs_allocated = REGS_UNALLOCATED; | |
| 4199 regs->num_regs = 0; | |
| 4200 regs->start = regs->end = (regoff_t *) 0; | |
| 4201 } | |
| 4202 } | |
| 4203 | |
| 4204 /* Searching routines. */ | |
| 4205 | |
| 4206 /* Like re_search_2, below, but only one string is specified, and | |
| 4207 doesn't let you say where to stop matching. */ | |
| 4208 | |
| 4209 int | |
| 442 | 4210 re_search (struct re_pattern_buffer *bufp, const char *string, int size, |
| 826 | 4211 int startpos, int range, struct re_registers *regs |
| 4212 RE_LISP_CONTEXT_ARGS_DECL) | |
| 428 | 4213 { |
| 4214 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, | |
| 826 | 4215 regs, size RE_LISP_CONTEXT_ARGS); |
| 428 | 4216 } |
| 4217 | |
| 4218 /* Using the compiled pattern in BUFP->buffer, first tries to match the | |
| 4219 virtual concatenation of STRING1 and STRING2, starting first at index | |
| 4220 STARTPOS, then at STARTPOS + 1, and so on. | |
| 4221 | |
| 4222 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. | |
| 4223 | |
| 4224 RANGE is how far to scan while trying to match. RANGE = 0 means try | |
| 4225 only at STARTPOS; in general, the last start tried is STARTPOS + | |
| 4226 RANGE. | |
| 4227 | |
| 826 | 4228 All sizes and positions refer to bytes (not chars); under Mule, the code |
| 4229 knows about the format of the text and will only check at positions | |
| 4230 where a character starts. | |
| 4231 | |
| 428 | 4232 With MULE, RANGE is a byte position, not a char position. The last |
| 4233 start tried is the character starting <= STARTPOS + RANGE. | |
| 4234 | |
| 4235 In REGS, return the indices of the virtual concatenation of STRING1 | |
| 4236 and STRING2 that matched the entire BUFP->buffer and its contained | |
| 4237 subexpressions. | |
| 4238 | |
| 4239 Do not consider matching one past the index STOP in the virtual | |
| 4240 concatenation of STRING1 and STRING2. | |
| 4241 | |
| 4242 We return either the position in the strings at which the match was | |
| 4243 found, -1 if no match, or -2 if error (such as failure | |
| 4244 stack overflow). */ | |
| 4245 | |
| 4246 int | |
| 446 | 4247 re_search_2 (struct re_pattern_buffer *bufp, const char *str1, |
| 4248 int size1, const char *str2, int size2, int startpos, | |
| 826 | 4249 int range, struct re_registers *regs, int stop |
| 4250 RE_LISP_CONTEXT_ARGS_DECL) | |
| 428 | 4251 { |
| 4252 int val; | |
| 446 | 4253 re_char *string1 = (re_char *) str1; |
| 4254 re_char *string2 = (re_char *) str2; | |
| 428 | 4255 REGISTER char *fastmap = bufp->fastmap; |
| 446 | 4256 REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; |
| 428 | 4257 int total_size = size1 + size2; |
| 4258 int endpos = startpos + range; | |
| 4259 #ifdef REGEX_BEGLINE_CHECK | |
| 4260 int anchored_at_begline = 0; | |
| 4261 #endif | |
| 446 | 4262 re_char *d; |
| 826 | 4263 #ifdef emacs |
| 4264 Internal_Format fmt = buffer_or_other_internal_format (lispobj); | |
| 1346 | 4265 #ifdef REL_ALLOC |
| 4266 Ibyte *orig_buftext = | |
| 4267 BUFFERP (lispobj) ? | |
| 4268 BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), | |
| 4269 BYTE_BUF_BEGV (XBUFFER (lispobj))) : | |
| 4270 0; | |
| 4271 #endif | |
| 1333 | 4272 #ifdef ERROR_CHECK_MALLOC |
| 4273 int depth; | |
| 4274 #endif | |
| 826 | 4275 #endif /* emacs */ |
| 4276 #if 1 | |
| 4277 int forward_search_p; | |
| 4278 #endif | |
| 428 | 4279 |
| 4280 /* Check for out-of-range STARTPOS. */ | |
| 4281 if (startpos < 0 || startpos > total_size) | |
| 4282 return -1; | |
| 4283 | |
| 4284 /* Fix up RANGE if it might eventually take us outside | |
| 4285 the virtual concatenation of STRING1 and STRING2. */ | |
| 4286 if (endpos < 0) | |
| 4287 range = 0 - startpos; | |
| 4288 else if (endpos > total_size) | |
| 4289 range = total_size - startpos; | |
| 4290 | |
| 826 | 4291 #if 1 |
| 4292 forward_search_p = range > 0; | |
| 4293 #endif | |
| 4294 | |
| 428 | 4295 /* If the search isn't to be a backwards one, don't waste time in a |
| 4296 search for a pattern that must be anchored. */ | |
| 4297 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) | |
| 4298 { | |
| 4299 if (startpos > 0) | |
| 4300 return -1; | |
| 4301 else | |
| 4302 { | |
| 442 | 4303 d = ((const unsigned char *) |
| 428 | 4304 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
| 867 | 4305 range = itext_ichar_len_fmt (d, fmt); |
| 428 | 4306 } |
| 4307 } | |
| 4308 | |
| 460 | 4309 #ifdef emacs |
| 4310 /* In a forward search for something that starts with \=. | |
| 4311 don't keep searching past point. */ | |
| 4312 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) | |
| 4313 { | |
| 826 | 4314 if (!BUFFERP (lispobj)) |
| 4315 return -1; | |
|
4527
8418d1ad4944
Fix at_dot regex under Mule. <87hc6rv53v.fsf@uwakimon.sk.tsukuba.ac.jp>
Stephen J. Turnbull <stephen@xemacs.org>
parents:
3300
diff
changeset
|
4316 range = (BYTE_BUF_PT (XBUFFER (lispobj)) |
|
8418d1ad4944
Fix at_dot regex under Mule. <87hc6rv53v.fsf@uwakimon.sk.tsukuba.ac.jp>
Stephen J. Turnbull <stephen@xemacs.org>
parents:
3300
diff
changeset
|
4317 - BYTE_BUF_BEGV (XBUFFER (lispobj)) - startpos); |
| 460 | 4318 if (range < 0) |
| 4319 return -1; | |
| 4320 } | |
| 4321 #endif /* emacs */ | |
| 4322 | |
| 1333 | 4323 #ifdef ERROR_CHECK_MALLOC |
| 4324 /* Do this after the above return()s. */ | |
| 4325 depth = bind_regex_malloc_disallowed (1); | |
| 4326 #endif | |
| 4327 | |
| 428 | 4328 /* Update the fastmap now if not correct already. */ |
| 1333 | 4329 BEGIN_REGEX_MALLOC_OK (); |
| 428 | 4330 if (fastmap && !bufp->fastmap_accurate) |
| 826 | 4331 if (re_compile_fastmap (bufp RE_LISP_SHORT_CONTEXT_ARGS) == -2) |
| 1333 | 4332 { |
| 4333 END_REGEX_MALLOC_OK (); | |
| 4334 UNBIND_REGEX_MALLOC_CHECK (); | |
| 4335 return -2; | |
| 4336 } | |
| 4337 | |
| 4338 END_REGEX_MALLOC_OK (); | |
| 4339 RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 428 | 4340 |
| 4341 #ifdef REGEX_BEGLINE_CHECK | |
| 4342 { | |
| 647 | 4343 long i = 0; |
| 428 | 4344 |
| 4345 while (i < bufp->used) | |
| 4346 { | |
| 4347 if (bufp->buffer[i] == start_memory || | |
| 4348 bufp->buffer[i] == stop_memory) | |
| 4349 i += 2; | |
| 4350 else | |
| 4351 break; | |
| 4352 } | |
| 4353 anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; | |
| 4354 } | |
| 4355 #endif | |
| 4356 | |
| 460 | 4357 #ifdef emacs |
| 1333 | 4358 BEGIN_REGEX_MALLOC_OK (); |
| 826 | 4359 scache = setup_syntax_cache (scache, lispobj, lispbuf, |
| 4360 offset_to_charxpos (lispobj, startpos), | |
| 4361 1); | |
| 1333 | 4362 END_REGEX_MALLOC_OK (); |
| 4363 RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 460 | 4364 #endif |
| 4365 | |
| 428 | 4366 /* Loop through the string, looking for a place to start matching. */ |
| 4367 for (;;) | |
| 4368 { | |
| 4369 #ifdef REGEX_BEGLINE_CHECK | |
| 826 | 4370 /* If the regex is anchored at the beginning of a line (i.e. with a |
| 4371 ^), then we can speed things up by skipping to the next | |
| 4372 beginning-of-line. However, to determine "beginning of line" we | |
| 4373 need to look at the previous char, so can't do this check if at | |
| 4374 beginning of either string. (Well, we could if at the beginning of | |
| 4375 the second string, but it would require additional code, and this | |
| 4376 is just an optimization.) */ | |
| 4377 if (anchored_at_begline && startpos > 0 && startpos != size1) | |
| 428 | 4378 { |
| 826 | 4379 if (range > 0) |
| 4380 { | |
| 4381 /* whose stupid idea was it anyway to make this | |
| 4382 function take two strings to match?? */ | |
| 4383 int lim = 0; | |
| 4384 re_char *orig_d; | |
| 4385 re_char *stop_d; | |
| 4386 | |
| 4387 /* Compute limit as below in fastmap code, so we are guaranteed | |
| 4388 to remain within a single string. */ | |
| 4389 if (startpos < size1 && startpos + range >= size1) | |
| 4390 lim = range - (size1 - startpos); | |
| 4391 | |
| 4392 d = ((const unsigned char *) | |
| 4393 (startpos >= size1 ? string2 - size1 : string1) + startpos); | |
| 4394 orig_d = d; | |
| 4395 stop_d = d + range - lim; | |
| 4396 | |
| 4397 /* We want to find the next location (including the current | |
| 4398 one) where the previous char is a newline, so back up one | |
| 4399 and search forward for a newline. */ | |
| 867 | 4400 DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ |
| 826 | 4401 |
| 4402 /* Written out as an if-else to avoid testing `translate' | |
| 4403 inside the loop. */ | |
| 4404 if (TRANSLATE_P (translate)) | |
| 4405 while (d < stop_d && | |
| 867 | 4406 RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) |
| 826 | 4407 != '\n') |
| 867 | 4408 INC_IBYTEPTR_FMT (d, fmt); |
| 826 | 4409 else |
| 4410 while (d < stop_d && | |
| 867 | 4411 itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') |
| 4412 INC_IBYTEPTR_FMT (d, fmt); | |
| 826 | 4413 |
| 4414 /* If we were stopped by a newline, skip forward over it. | |
| 4415 Otherwise we will get in an infloop when our start position | |
| 4416 was at begline. */ | |
| 4417 if (d < stop_d) | |
| 867 | 4418 INC_IBYTEPTR_FMT (d, fmt); |
| 826 | 4419 range -= d - orig_d; |
| 4420 startpos += d - orig_d; | |
| 4421 #if 1 | |
| 4422 assert (!forward_search_p || range >= 0); | |
| 4423 #endif | |
| 4424 } | |
| 4425 else if (range < 0) | |
| 4426 { | |
| 4427 /* We're lazy, like in the fastmap code below */ | |
| 867 | 4428 Ichar c; |
| 826 | 4429 |
| 4430 d = ((const unsigned char *) | |
| 4431 (startpos >= size1 ? string2 - size1 : string1) + startpos); | |
| 867 | 4432 DEC_IBYTEPTR_FMT (d, fmt); |
| 4433 c = itext_ichar_fmt (d, fmt, lispobj); | |
| 826 | 4434 c = RE_TRANSLATE (c); |
| 4435 if (c != '\n') | |
| 4436 goto advance; | |
| 4437 } | |
| 428 | 4438 } |
| 4439 #endif /* REGEX_BEGLINE_CHECK */ | |
| 4440 | |
| 4441 /* If a fastmap is supplied, skip quickly over characters that | |
| 4442 cannot be the start of a match. If the pattern can match the | |
| 4443 null string, however, we don't need to skip characters; we want | |
| 4444 the first null string. */ | |
| 4445 if (fastmap && startpos < total_size && !bufp->can_be_null) | |
| 4446 { | |
| 826 | 4447 /* For the moment, fastmap always works as if buffer |
| 4448 is in default format, so convert chars in the search strings | |
| 4449 into default format as we go along, if necessary. | |
| 4450 | |
| 4451 &&#### fastmap needs rethinking for 8-bit-fixed so | |
| 4452 it's faster. We need it to reflect the raw | |
| 4453 8-bit-fixed values. That isn't so hard if we assume | |
| 4454 that the top 96 bytes represent a single 1-byte | |
| 4455 charset. For 16-bit/32-bit stuff it's probably not | |
| 4456 worth it to make the fastmap represent the raw, due to | |
| 4457 its nature -- we'd have to use the LSB for the | |
| 4458 fastmap, and that causes lots of problems with Mule | |
| 4459 chars, where it essentially wipes out the usefulness | |
| 4460 of the fastmap entirely. */ | |
| 428 | 4461 if (range > 0) /* Searching forwards. */ |
| 4462 { | |
| 4463 int lim = 0; | |
| 4464 int irange = range; | |
| 4465 | |
| 4466 if (startpos < size1 && startpos + range >= size1) | |
| 4467 lim = range - (size1 - startpos); | |
| 4468 | |
| 442 | 4469 d = ((const unsigned char *) |
| 428 | 4470 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
| 4471 | |
| 4472 /* Written out as an if-else to avoid testing `translate' | |
| 4473 inside the loop. */ | |
| 446 | 4474 if (TRANSLATE_P (translate)) |
| 826 | 4475 { |
| 4476 while (range > lim) | |
| 4477 { | |
| 4478 re_char *old_d = d; | |
| 428 | 4479 #ifdef MULE |
| 867 | 4480 Ibyte tempch[MAX_ICHAR_LEN]; |
| 4481 Ichar buf_ch = | |
| 4482 RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); | |
| 4483 set_itext_ichar (tempch, buf_ch); | |
| 826 | 4484 if (fastmap[*tempch]) |
| 4485 break; | |
| 446 | 4486 #else |
| 826 | 4487 if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) |
| 4488 break; | |
| 446 | 4489 #endif /* MULE */ |
| 867 | 4490 INC_IBYTEPTR_FMT (d, fmt); |
| 826 | 4491 range -= (d - old_d); |
| 4492 #if 1 | |
| 1333 | 4493 assert (!forward_search_p || range >= 0); |
| 826 | 4494 #endif |
| 4495 } | |
| 4496 } | |
| 4497 #ifdef MULE | |
| 4498 else if (fmt != FORMAT_DEFAULT) | |
| 4499 { | |
| 4500 while (range > lim) | |
| 4501 { | |
| 4502 re_char *old_d = d; | |
| 867 | 4503 Ibyte tempch[MAX_ICHAR_LEN]; |
| 4504 Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); | |
| 4505 set_itext_ichar (tempch, buf_ch); | |
| 826 | 4506 if (fastmap[*tempch]) |
| 4507 break; | |
| 867 | 4508 INC_IBYTEPTR_FMT (d, fmt); |
| 826 | 4509 range -= (d - old_d); |
| 4510 #if 1 | |
| 1333 | 4511 assert (!forward_search_p || range >= 0); |
| 826 | 4512 #endif |
| 4513 } | |
| 4514 } | |
| 4515 #endif /* MULE */ | |
| 428 | 4516 else |
| 826 | 4517 { |
| 4518 while (range > lim && !fastmap[*d]) | |
| 4519 { | |
| 4520 re_char *old_d = d; | |
| 867 | 4521 INC_IBYTEPTR (d); |
| 826 | 4522 range -= (d - old_d); |
| 4523 #if 1 | |
| 4524 assert (!forward_search_p || range >= 0); | |
| 4525 #endif | |
| 4526 } | |
| 4527 } | |
| 428 | 4528 |
| 4529 startpos += irange - range; | |
| 4530 } | |
| 4531 else /* Searching backwards. */ | |
| 4532 { | |
| 826 | 4533 /* #### It's not clear why we don't just write a loop, like |
| 4534 for the moving-forward case. Perhaps the writer got lazy, | |
| 4535 since backward searches aren't so common. */ | |
| 4536 d = ((const unsigned char *) | |
| 4537 (startpos >= size1 ? string2 - size1 : string1) + startpos); | |
| 428 | 4538 #ifdef MULE |
| 826 | 4539 { |
| 867 | 4540 Ibyte tempch[MAX_ICHAR_LEN]; |
| 4541 Ichar buf_ch = | |
| 4542 RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); | |
| 4543 set_itext_ichar (tempch, buf_ch); | |
| 826 | 4544 if (!fastmap[*tempch]) |
| 4545 goto advance; | |
| 4546 } | |
| 428 | 4547 #else |
| 826 | 4548 if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) |
| 446 | 4549 goto advance; |
| 826 | 4550 #endif /* MULE */ |
| 428 | 4551 } |
| 4552 } | |
| 4553 | |
| 4554 /* If can't match the null string, and that's all we have left, fail. */ | |
| 4555 if (range >= 0 && startpos == total_size && fastmap | |
| 4556 && !bufp->can_be_null) | |
| 1333 | 4557 { |
| 4558 UNBIND_REGEX_MALLOC_CHECK (); | |
| 4559 return -1; | |
| 4560 } | |
| 428 | 4561 |
| 4562 #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ | |
| 4563 if (!no_quit_in_re_search) | |
| 1333 | 4564 { |
| 4565 BEGIN_REGEX_MALLOC_OK (); | |
| 4566 QUIT; | |
| 4567 END_REGEX_MALLOC_OK (); | |
| 4568 RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 4569 } | |
| 4570 | |
| 428 | 4571 #endif |
| 1333 | 4572 BEGIN_REGEX_MALLOC_OK (); |
| 428 | 4573 val = re_match_2_internal (bufp, string1, size1, string2, size2, |
| 826 | 4574 startpos, regs, stop |
| 4575 RE_LISP_CONTEXT_ARGS); | |
| 428 | 4576 #ifndef REGEX_MALLOC |
| 1333 | 4577 ALLOCA_GARBAGE_COLLECT (); |
| 428 | 4578 #endif |
| 1333 | 4579 END_REGEX_MALLOC_OK (); |
| 4580 RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 428 | 4581 |
| 4582 if (val >= 0) | |
| 1333 | 4583 { |
| 4584 UNBIND_REGEX_MALLOC_CHECK (); | |
| 4585 return startpos; | |
| 4586 } | |
| 428 | 4587 |
| 4588 if (val == -2) | |
| 1333 | 4589 { |
| 4590 UNBIND_REGEX_MALLOC_CHECK (); | |
| 4591 return -2; | |
| 4592 } | |
| 4593 | |
| 4594 RE_SEARCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 428 | 4595 advance: |
| 4596 if (!range) | |
| 4597 break; | |
| 4598 else if (range > 0) | |
| 4599 { | |
| 826 | 4600 Bytecount d_size; |
| 442 | 4601 d = ((const unsigned char *) |
| 428 | 4602 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
| 867 | 4603 d_size = itext_ichar_len_fmt (d, fmt); |
| 428 | 4604 range -= d_size; |
| 826 | 4605 #if 1 |
| 4606 assert (!forward_search_p || range >= 0); | |
| 4607 #endif | |
| 428 | 4608 startpos += d_size; |
| 4609 } | |
| 4610 else | |
| 4611 { | |
| 826 | 4612 Bytecount d_size; |
| 428 | 4613 /* Note startpos > size1 not >=. If we are on the |
| 4614 string1/string2 boundary, we want to backup into string1. */ | |
| 442 | 4615 d = ((const unsigned char *) |
| 428 | 4616 (startpos > size1 ? string2 - size1 : string1) + startpos); |
| 867 | 4617 DEC_IBYTEPTR_FMT (d, fmt); |
| 4618 d_size = itext_ichar_len_fmt (d, fmt); | |
| 428 | 4619 range += d_size; |
| 826 | 4620 #if 1 |
| 4621 assert (!forward_search_p || range >= 0); | |
| 4622 #endif | |
| 428 | 4623 startpos -= d_size; |
| 4624 } | |
| 4625 } | |
| 1333 | 4626 UNBIND_REGEX_MALLOC_CHECK (); |
| 428 | 4627 return -1; |
| 4628 } /* re_search_2 */ | |
| 826 | 4629 |
| 428 | 4630 |
| 4631 /* Declarations and macros for re_match_2. */ | |
| 4632 | |
| 4633 /* This converts PTR, a pointer into one of the search strings `string1' | |
| 4634 and `string2' into an offset from the beginning of that string. */ | |
| 4635 #define POINTER_TO_OFFSET(ptr) \ | |
| 4636 (FIRST_STRING_P (ptr) \ | |
| 4637 ? ((regoff_t) ((ptr) - string1)) \ | |
| 4638 : ((regoff_t) ((ptr) - string2 + size1))) | |
| 4639 | |
| 4640 /* Macros for dealing with the split strings in re_match_2. */ | |
| 4641 | |
| 4642 #define MATCHING_IN_FIRST_STRING (dend == end_match_1) | |
| 4643 | |
| 4644 /* Call before fetching a character with *d. This switches over to | |
| 4645 string2 if necessary. */ | |
| 826 | 4646 #define REGEX_PREFETCH() \ |
| 428 | 4647 while (d == dend) \ |
| 4648 { \ | |
| 4649 /* End of string2 => fail. */ \ | |
| 4650 if (dend == end_match_2) \ | |
| 4651 goto fail; \ | |
| 4652 /* End of string1 => advance to string2. */ \ | |
| 4653 d = string2; \ | |
| 4654 dend = end_match_2; \ | |
| 4655 } | |
| 4656 | |
| 4657 | |
| 4658 /* Test if at very beginning or at very end of the virtual concatenation | |
| 4659 of `string1' and `string2'. If only one string, it's `string2'. */ | |
| 4660 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) | |
| 4661 #define AT_STRINGS_END(d) ((d) == end2) | |
| 4662 | |
| 4663 /* XEmacs change: | |
| 4664 If the given position straddles the string gap, return the equivalent | |
| 4665 position that is before or after the gap, respectively; otherwise, | |
| 4666 return the same position. */ | |
| 4667 #define POS_BEFORE_GAP_UNSAFE(d) ((d) == string2 ? end1 : (d)) | |
| 4668 #define POS_AFTER_GAP_UNSAFE(d) ((d) == end1 ? string2 : (d)) | |
| 4669 | |
| 4670 /* Test if CH is a word-constituent character. (XEmacs change) */ | |
| 826 | 4671 #define WORDCHAR_P(ch) \ |
| 4672 (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), ch) == Sword) | |
| 428 | 4673 |
| 4674 /* Free everything we malloc. */ | |
| 4675 #ifdef MATCH_MAY_ALLOCATE | |
| 1726 | 4676 #define FREE_VAR(var,type) if (var) REGEX_FREE (var, type); var = NULL |
| 428 | 4677 #define FREE_VARIABLES() \ |
| 4678 do { \ | |
| 1333 | 4679 UNBIND_REGEX_MALLOC_CHECK (); \ |
| 428 | 4680 REGEX_FREE_STACK (fail_stack.stack); \ |
| 1726 | 4681 FREE_VAR (regstart, re_char **); \ |
| 4682 FREE_VAR (regend, re_char **); \ | |
| 4683 FREE_VAR (old_regstart, re_char **); \ | |
| 4684 FREE_VAR (old_regend, re_char **); \ | |
| 4685 FREE_VAR (best_regstart, re_char **); \ | |
| 4686 FREE_VAR (best_regend, re_char **); \ | |
| 4687 FREE_VAR (reg_info, register_info_type *); \ | |
| 4688 FREE_VAR (reg_dummy, re_char **); \ | |
| 4689 FREE_VAR (reg_info_dummy, register_info_type *); \ | |
| 428 | 4690 } while (0) |
| 446 | 4691 #else /* not MATCH_MAY_ALLOCATE */ |
| 1333 | 4692 #define FREE_VARIABLES() \ |
| 4693 do { \ | |
| 4694 UNBIND_REGEX_MALLOC_CHECK (); \ | |
| 4695 } while (0) | |
| 446 | 4696 #endif /* MATCH_MAY_ALLOCATE */ |
| 428 | 4697 |
| 4698 /* These values must meet several constraints. They must not be valid | |
| 4699 register values; since we have a limit of 255 registers (because | |
| 4700 we use only one byte in the pattern for the register number), we can | |
| 4701 use numbers larger than 255. They must differ by 1, because of | |
| 4702 NUM_FAILURE_ITEMS above. And the value for the lowest register must | |
| 4703 be larger than the value for the highest register, so we do not try | |
| 4704 to actually save any registers when none are active. */ | |
| 4705 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) | |
| 4706 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) | |
| 4707 | |
| 4708 /* Matching routines. */ | |
| 4709 | |
| 826 | 4710 #ifndef emacs /* XEmacs never uses this. */ |
| 428 | 4711 /* re_match is like re_match_2 except it takes only a single string. */ |
| 4712 | |
| 4713 int | |
| 442 | 4714 re_match (struct re_pattern_buffer *bufp, const char *string, int size, |
| 826 | 4715 int pos, struct re_registers *regs |
| 4716 RE_LISP_CONTEXT_ARGS_DECL) | |
| 428 | 4717 { |
| 446 | 4718 int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, |
| 826 | 4719 pos, regs, size |
| 4720 RE_LISP_CONTEXT_ARGS); | |
| 1333 | 4721 ALLOCA_GARBAGE_COLLECT (); |
| 428 | 4722 return result; |
| 4723 } | |
| 4724 #endif /* not emacs */ | |
| 4725 | |
| 4726 /* re_match_2 matches the compiled pattern in BUFP against the | |
| 4727 (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and | |
| 4728 SIZE2, respectively). We start matching at POS, and stop matching | |
| 4729 at STOP. | |
| 4730 | |
| 4731 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we | |
| 4732 store offsets for the substring each group matched in REGS. See the | |
| 4733 documentation for exactly how many groups we fill. | |
| 4734 | |
| 4735 We return -1 if no match, -2 if an internal error (such as the | |
| 4736 failure stack overflowing). Otherwise, we return the length of the | |
| 4737 matched substring. */ | |
| 4738 | |
| 4739 int | |
| 442 | 4740 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, |
| 4741 int size1, const char *string2, int size2, int pos, | |
| 826 | 4742 struct re_registers *regs, int stop |
| 4743 RE_LISP_CONTEXT_ARGS_DECL) | |
| 428 | 4744 { |
| 460 | 4745 int result; |
| 4746 | |
| 4747 #ifdef emacs | |
| 826 | 4748 scache = setup_syntax_cache (scache, lispobj, lispbuf, |
| 4749 offset_to_charxpos (lispobj, pos), | |
| 4750 1); | |
| 460 | 4751 #endif |
| 4752 | |
| 4753 result = re_match_2_internal (bufp, (re_char *) string1, size1, | |
| 4754 (re_char *) string2, size2, | |
| 826 | 4755 pos, regs, stop |
| 4756 RE_LISP_CONTEXT_ARGS); | |
| 460 | 4757 |
| 1333 | 4758 ALLOCA_GARBAGE_COLLECT (); |
| 428 | 4759 return result; |
| 4760 } | |
| 4761 | |
| 4762 /* This is a separate function so that we can force an alloca cleanup | |
| 4763 afterwards. */ | |
| 4764 static int | |
| 446 | 4765 re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, |
| 4766 int size1, re_char *string2, int size2, int pos, | |
| 826 | 4767 struct re_registers *regs, int stop |
| 2333 | 4768 RE_LISP_CONTEXT_ARGS_MULE_DECL) |
| 428 | 4769 { |
| 4770 /* General temporaries. */ | |
| 4771 int mcnt; | |
| 4772 unsigned char *p1; | |
| 4773 int should_succeed; /* XEmacs change */ | |
| 4774 | |
| 4775 /* Just past the end of the corresponding string. */ | |
| 446 | 4776 re_char *end1, *end2; |
| 428 | 4777 |
| 4778 /* Pointers into string1 and string2, just past the last characters in | |
| 4779 each to consider matching. */ | |
| 446 | 4780 re_char *end_match_1, *end_match_2; |
| 428 | 4781 |
| 4782 /* Where we are in the data, and the end of the current string. */ | |
| 446 | 4783 re_char *d, *dend; |
| 428 | 4784 |
| 4785 /* Where we are in the pattern, and the end of the pattern. */ | |
| 4786 unsigned char *p = bufp->buffer; | |
| 4787 REGISTER unsigned char *pend = p + bufp->used; | |
| 4788 | |
| 4789 /* Mark the opcode just after a start_memory, so we can test for an | |
| 4790 empty subpattern when we get to the stop_memory. */ | |
| 446 | 4791 re_char *just_past_start_mem = 0; |
| 428 | 4792 |
| 4793 /* We use this to map every character in the string. */ | |
| 446 | 4794 RE_TRANSLATE_TYPE translate = bufp->translate; |
| 428 | 4795 |
| 4796 /* Failure point stack. Each place that can handle a failure further | |
| 4797 down the line pushes a failure point on this stack. It consists of | |
| 4798 restart, regend, and reg_info for all registers corresponding to | |
| 4799 the subexpressions we're currently inside, plus the number of such | |
| 4800 registers, and, finally, two char *'s. The first char * is where | |
| 4801 to resume scanning the pattern; the second one is where to resume | |
| 4802 scanning the strings. If the latter is zero, the failure point is | |
| 4803 a ``dummy''; if a failure happens and the failure point is a dummy, | |
| 4804 it gets discarded and the next one is tried. */ | |
| 4805 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ | |
| 4806 fail_stack_type fail_stack; | |
| 4807 #endif | |
| 4808 #ifdef DEBUG | |
| 647 | 4809 static int failure_id; |
| 4810 int nfailure_points_pushed = 0, nfailure_points_popped = 0; | |
| 428 | 4811 #endif |
| 4812 | |
| 771 | 4813 #ifdef REGEX_REL_ALLOC |
| 428 | 4814 /* This holds the pointer to the failure stack, when |
| 4815 it is allocated relocatably. */ | |
| 4816 fail_stack_elt_t *failure_stack_ptr; | |
| 4817 #endif | |
| 4818 | |
| 4819 /* We fill all the registers internally, independent of what we | |
| 4820 return, for use in backreferences. The number here includes | |
| 4821 an element for register zero. */ | |
| 647 | 4822 int num_regs = bufp->re_ngroups + 1; |
| 428 | 4823 |
| 4824 /* The currently active registers. */ | |
| 647 | 4825 int lowest_active_reg = NO_LOWEST_ACTIVE_REG; |
| 4826 int highest_active_reg = NO_HIGHEST_ACTIVE_REG; | |
| 428 | 4827 |
| 4828 /* Information on the contents of registers. These are pointers into | |
| 4829 the input strings; they record just what was matched (on this | |
| 4830 attempt) by a subexpression part of the pattern, that is, the | |
| 4831 regnum-th regstart pointer points to where in the pattern we began | |
| 4832 matching and the regnum-th regend points to right after where we | |
| 4833 stopped matching the regnum-th subexpression. (The zeroth register | |
| 4834 keeps track of what the whole pattern matches.) */ | |
| 4835 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | |
| 446 | 4836 re_char **regstart, **regend; |
| 428 | 4837 #endif |
| 4838 | |
| 4839 /* If a group that's operated upon by a repetition operator fails to | |
| 4840 match anything, then the register for its start will need to be | |
| 4841 restored because it will have been set to wherever in the string we | |
| 4842 are when we last see its open-group operator. Similarly for a | |
| 4843 register's end. */ | |
| 4844 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | |
| 446 | 4845 re_char **old_regstart, **old_regend; |
| 428 | 4846 #endif |
| 4847 | |
| 4848 /* The is_active field of reg_info helps us keep track of which (possibly | |
| 4849 nested) subexpressions we are currently in. The matched_something | |
| 4850 field of reg_info[reg_num] helps us tell whether or not we have | |
| 4851 matched any of the pattern so far this time through the reg_num-th | |
| 4852 subexpression. These two fields get reset each time through any | |
| 4853 loop their register is in. */ | |
| 4854 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ | |
| 4855 register_info_type *reg_info; | |
| 4856 #endif | |
| 4857 | |
| 4858 /* The following record the register info as found in the above | |
| 4859 variables when we find a match better than any we've seen before. | |
| 4860 This happens as we backtrack through the failure points, which in | |
| 4861 turn happens only if we have not yet matched the entire string. */ | |
| 647 | 4862 int best_regs_set = false; |
| 428 | 4863 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ |
| 446 | 4864 re_char **best_regstart, **best_regend; |
| 428 | 4865 #endif |
| 4866 | |
| 4867 /* Logically, this is `best_regend[0]'. But we don't want to have to | |
| 4868 allocate space for that if we're not allocating space for anything | |
| 4869 else (see below). Also, we never need info about register 0 for | |
| 4870 any of the other register vectors, and it seems rather a kludge to | |
| 4871 treat `best_regend' differently than the rest. So we keep track of | |
| 4872 the end of the best match so far in a separate variable. We | |
| 4873 initialize this to NULL so that when we backtrack the first time | |
| 4874 and need to test it, it's not garbage. */ | |
| 446 | 4875 re_char *match_end = NULL; |
| 428 | 4876 |
| 4877 /* This helps SET_REGS_MATCHED avoid doing redundant work. */ | |
| 4878 int set_regs_matched_done = 0; | |
| 4879 | |
| 4880 /* Used when we pop values we don't care about. */ | |
| 4881 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | |
| 446 | 4882 re_char **reg_dummy; |
| 428 | 4883 register_info_type *reg_info_dummy; |
| 4884 #endif | |
| 4885 | |
| 4886 #ifdef DEBUG | |
| 4887 /* Counts the total number of registers pushed. */ | |
| 647 | 4888 int num_regs_pushed = 0; |
| 428 | 4889 #endif |
| 4890 | |
| 4891 /* 1 if this match ends in the same string (string1 or string2) | |
| 4892 as the best previous match. */ | |
| 460 | 4893 re_bool same_str_p; |
| 428 | 4894 |
| 4895 /* 1 if this match is the best seen so far. */ | |
| 460 | 4896 re_bool best_match_p; |
| 428 | 4897 |
| 826 | 4898 #ifdef emacs |
| 4899 Internal_Format fmt = buffer_or_other_internal_format (lispobj); | |
| 1346 | 4900 #ifdef REL_ALLOC |
| 4901 Ibyte *orig_buftext = | |
| 4902 BUFFERP (lispobj) ? | |
| 4903 BYTE_BUF_BYTE_ADDRESS (XBUFFER (lispobj), | |
| 4904 BYTE_BUF_BEGV (XBUFFER (lispobj))) : | |
| 4905 0; | |
| 4906 #endif | |
| 4907 | |
| 1333 | 4908 #ifdef ERROR_CHECK_MALLOC |
| 4909 int depth = bind_regex_malloc_disallowed (1); | |
| 4910 #endif | |
| 826 | 4911 #endif /* emacs */ |
| 771 | 4912 |
| 428 | 4913 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); |
| 4914 | |
| 1333 | 4915 BEGIN_REGEX_MALLOC_OK (); |
| 428 | 4916 INIT_FAIL_STACK (); |
| 1333 | 4917 END_REGEX_MALLOC_OK (); |
| 428 | 4918 |
| 4919 #ifdef MATCH_MAY_ALLOCATE | |
| 4920 /* Do not bother to initialize all the register variables if there are | |
| 4921 no groups in the pattern, as it takes a fair amount of time. If | |
| 4922 there are groups, we include space for register 0 (the whole | |
| 4923 pattern), even though we never use it, since it simplifies the | |
| 4924 array indexing. We should fix this. */ | |
| 502 | 4925 if (bufp->re_ngroups) |
| 428 | 4926 { |
| 1333 | 4927 BEGIN_REGEX_MALLOC_OK (); |
| 446 | 4928 regstart = REGEX_TALLOC (num_regs, re_char *); |
| 4929 regend = REGEX_TALLOC (num_regs, re_char *); | |
| 4930 old_regstart = REGEX_TALLOC (num_regs, re_char *); | |
| 4931 old_regend = REGEX_TALLOC (num_regs, re_char *); | |
| 4932 best_regstart = REGEX_TALLOC (num_regs, re_char *); | |
| 4933 best_regend = REGEX_TALLOC (num_regs, re_char *); | |
| 428 | 4934 reg_info = REGEX_TALLOC (num_regs, register_info_type); |
| 446 | 4935 reg_dummy = REGEX_TALLOC (num_regs, re_char *); |
| 428 | 4936 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); |
| 1333 | 4937 END_REGEX_MALLOC_OK (); |
| 428 | 4938 |
| 4939 if (!(regstart && regend && old_regstart && old_regend && reg_info | |
| 4940 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) | |
| 4941 { | |
| 4942 FREE_VARIABLES (); | |
| 4943 return -2; | |
| 4944 } | |
| 4945 } | |
| 4946 else | |
| 4947 { | |
| 4948 /* We must initialize all our variables to NULL, so that | |
| 4949 `FREE_VARIABLES' doesn't try to free them. */ | |
| 4950 regstart = regend = old_regstart = old_regend = best_regstart | |
| 4951 = best_regend = reg_dummy = NULL; | |
| 4952 reg_info = reg_info_dummy = (register_info_type *) NULL; | |
| 4953 } | |
| 4954 #endif /* MATCH_MAY_ALLOCATE */ | |
| 4955 | |
| 1333 | 4956 #if defined (emacs) && defined (REL_ALLOC) |
| 4957 { | |
| 4958 /* If the allocations above (or the call to setup_syntax_cache() in | |
| 4959 re_match_2) caused a rel-alloc relocation, then fix up the data | |
| 4960 pointers */ | |
| 1346 | 4961 Bytecount offset = offset_post_relocation (lispobj, orig_buftext); |
| 1333 | 4962 if (offset) |
| 4963 { | |
| 4964 string1 += offset; | |
| 4965 string2 += offset; | |
| 4966 } | |
| 4967 } | |
| 4968 #endif /* defined (emacs) && defined (REL_ALLOC) */ | |
| 4969 | |
| 428 | 4970 /* The starting position is bogus. */ |
| 4971 if (pos < 0 || pos > size1 + size2) | |
| 4972 { | |
| 4973 FREE_VARIABLES (); | |
| 4974 return -1; | |
| 4975 } | |
| 4976 | |
| 4977 /* Initialize subexpression text positions to -1 to mark ones that no | |
| 4978 start_memory/stop_memory has been seen for. Also initialize the | |
| 4979 register information struct. */ | |
| 4980 for (mcnt = 1; mcnt < num_regs; mcnt++) | |
| 4981 { | |
| 4982 regstart[mcnt] = regend[mcnt] | |
| 4983 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; | |
| 4984 | |
| 4985 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; | |
| 4986 IS_ACTIVE (reg_info[mcnt]) = 0; | |
| 4987 MATCHED_SOMETHING (reg_info[mcnt]) = 0; | |
| 4988 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; | |
| 4989 } | |
| 4990 /* We move `string1' into `string2' if the latter's empty -- but not if | |
| 4991 `string1' is null. */ | |
| 4992 if (size2 == 0 && string1 != NULL) | |
| 4993 { | |
| 4994 string2 = string1; | |
| 4995 size2 = size1; | |
| 4996 string1 = 0; | |
| 4997 size1 = 0; | |
| 4998 } | |
| 4999 end1 = string1 + size1; | |
| 5000 end2 = string2 + size2; | |
| 5001 | |
| 5002 /* Compute where to stop matching, within the two strings. */ | |
| 5003 if (stop <= size1) | |
| 5004 { | |
| 5005 end_match_1 = string1 + stop; | |
| 5006 end_match_2 = string2; | |
| 5007 } | |
| 5008 else | |
| 5009 { | |
| 5010 end_match_1 = end1; | |
| 5011 end_match_2 = string2 + stop - size1; | |
| 5012 } | |
| 5013 | |
| 5014 /* `p' scans through the pattern as `d' scans through the data. | |
| 5015 `dend' is the end of the input string that `d' points within. `d' | |
| 5016 is advanced into the following input string whenever necessary, but | |
| 5017 this happens before fetching; therefore, at the beginning of the | |
| 5018 loop, `d' can be pointing at the end of a string, but it cannot | |
| 5019 equal `string2'. */ | |
| 5020 if (size1 > 0 && pos <= size1) | |
| 5021 { | |
| 5022 d = string1 + pos; | |
| 5023 dend = end_match_1; | |
| 5024 } | |
| 5025 else | |
| 5026 { | |
| 5027 d = string2 + pos - size1; | |
| 5028 dend = end_match_2; | |
| 5029 } | |
| 5030 | |
| 446 | 5031 DEBUG_PRINT1 ("The compiled pattern is: \n"); |
| 428 | 5032 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); |
| 5033 DEBUG_PRINT1 ("The string to match is: `"); | |
| 5034 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); | |
| 5035 DEBUG_PRINT1 ("'\n"); | |
| 5036 | |
| 5037 /* This loops over pattern commands. It exits by returning from the | |
| 5038 function if the match is complete, or it drops through if the match | |
| 5039 fails at this starting point in the input data. */ | |
| 5040 for (;;) | |
| 5041 { | |
| 5042 DEBUG_PRINT2 ("\n0x%lx: ", (long) p); | |
| 5043 #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ | |
| 5044 if (!no_quit_in_re_search) | |
| 1333 | 5045 { |
| 5046 BEGIN_REGEX_MALLOC_OK (); | |
| 5047 QUIT; | |
| 5048 END_REGEX_MALLOC_OK (); | |
| 1346 | 5049 RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); |
| 1333 | 5050 } |
| 428 | 5051 #endif |
| 5052 | |
| 5053 if (p == pend) | |
| 5054 { /* End of pattern means we might have succeeded. */ | |
| 5055 DEBUG_PRINT1 ("end of pattern ... "); | |
| 5056 | |
| 5057 /* If we haven't matched the entire string, and we want the | |
| 5058 longest match, try backtracking. */ | |
| 5059 if (d != end_match_2) | |
| 5060 { | |
| 5061 same_str_p = (FIRST_STRING_P (match_end) | |
| 5062 == MATCHING_IN_FIRST_STRING); | |
| 5063 | |
| 5064 /* AIX compiler got confused when this was combined | |
| 5065 with the previous declaration. */ | |
| 5066 if (same_str_p) | |
| 5067 best_match_p = d > match_end; | |
| 5068 else | |
| 5069 best_match_p = !MATCHING_IN_FIRST_STRING; | |
| 5070 | |
| 5071 DEBUG_PRINT1 ("backtracking.\n"); | |
| 5072 | |
| 5073 if (!FAIL_STACK_EMPTY ()) | |
| 5074 { /* More failure points to try. */ | |
| 5075 | |
| 5076 /* If exceeds best match so far, save it. */ | |
| 5077 if (!best_regs_set || best_match_p) | |
| 5078 { | |
| 5079 best_regs_set = true; | |
| 5080 match_end = d; | |
| 5081 | |
| 5082 DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); | |
| 5083 | |
| 5084 for (mcnt = 1; mcnt < num_regs; mcnt++) | |
| 5085 { | |
| 5086 best_regstart[mcnt] = regstart[mcnt]; | |
| 5087 best_regend[mcnt] = regend[mcnt]; | |
| 5088 } | |
| 5089 } | |
| 5090 goto fail; | |
| 5091 } | |
| 5092 | |
| 5093 /* If no failure points, don't restore garbage. And if | |
| 5094 last match is real best match, don't restore second | |
| 5095 best one. */ | |
| 5096 else if (best_regs_set && !best_match_p) | |
| 5097 { | |
| 5098 restore_best_regs: | |
| 5099 /* Restore best match. It may happen that `dend == | |
| 5100 end_match_1' while the restored d is in string2. | |
| 5101 For example, the pattern `x.*y.*z' against the | |
| 5102 strings `x-' and `y-z-', if the two strings are | |
| 5103 not consecutive in memory. */ | |
| 5104 DEBUG_PRINT1 ("Restoring best registers.\n"); | |
| 5105 | |
| 5106 d = match_end; | |
| 5107 dend = ((d >= string1 && d <= end1) | |
| 5108 ? end_match_1 : end_match_2); | |
| 5109 | |
| 5110 for (mcnt = 1; mcnt < num_regs; mcnt++) | |
| 5111 { | |
| 5112 regstart[mcnt] = best_regstart[mcnt]; | |
| 5113 regend[mcnt] = best_regend[mcnt]; | |
| 5114 } | |
| 5115 } | |
| 5116 } /* d != end_match_2 */ | |
| 5117 | |
| 5118 succeed_label: | |
| 5119 DEBUG_PRINT1 ("Accepting match.\n"); | |
| 5120 | |
| 5121 /* If caller wants register contents data back, do it. */ | |
| 1028 | 5122 { |
| 5123 int num_nonshy_regs = bufp->re_nsub + 1; | |
| 5124 if (regs && !bufp->no_sub) | |
| 5125 { | |
| 5126 /* Have the register data arrays been allocated? */ | |
| 5127 if (bufp->regs_allocated == REGS_UNALLOCATED) | |
| 5128 { /* No. So allocate them with malloc. We need one | |
| 5129 extra element beyond `num_regs' for the `-1' marker | |
| 5130 GNU code uses. */ | |
| 5131 regs->num_regs = MAX (RE_NREGS, num_nonshy_regs + 1); | |
| 1333 | 5132 BEGIN_REGEX_MALLOC_OK (); |
| 1028 | 5133 regs->start = TALLOC (regs->num_regs, regoff_t); |
| 5134 regs->end = TALLOC (regs->num_regs, regoff_t); | |
| 1333 | 5135 END_REGEX_MALLOC_OK (); |
| 5136 RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 1028 | 5137 if (regs->start == NULL || regs->end == NULL) |
| 5138 { | |
| 5139 FREE_VARIABLES (); | |
| 5140 return -2; | |
| 5141 } | |
| 5142 bufp->regs_allocated = REGS_REALLOCATE; | |
| 5143 } | |
| 5144 else if (bufp->regs_allocated == REGS_REALLOCATE) | |
| 5145 { /* Yes. If we need more elements than were already | |
| 5146 allocated, reallocate them. If we need fewer, just | |
| 5147 leave it alone. */ | |
| 5148 if (regs->num_regs < num_nonshy_regs + 1) | |
| 5149 { | |
| 5150 regs->num_regs = num_nonshy_regs + 1; | |
| 1333 | 5151 BEGIN_REGEX_MALLOC_OK (); |
| 1028 | 5152 RETALLOC (regs->start, regs->num_regs, regoff_t); |
| 5153 RETALLOC (regs->end, regs->num_regs, regoff_t); | |
| 1333 | 5154 END_REGEX_MALLOC_OK (); |
| 5155 RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 1028 | 5156 if (regs->start == NULL || regs->end == NULL) |
| 5157 { | |
| 5158 FREE_VARIABLES (); | |
| 5159 return -2; | |
| 5160 } | |
| 5161 } | |
| 5162 } | |
| 5163 else | |
| 5164 { | |
| 5165 /* The braces fend off a "empty body in an else-statement" | |
| 5166 warning under GCC when assert expands to nothing. */ | |
| 5167 assert (bufp->regs_allocated == REGS_FIXED); | |
| 5168 } | |
| 5169 | |
| 5170 /* Convert the pointer data in `regstart' and `regend' to | |
| 5171 indices. Register zero has to be set differently, | |
| 5172 since we haven't kept track of any info for it. */ | |
| 5173 if (regs->num_regs > 0) | |
| 5174 { | |
| 5175 regs->start[0] = pos; | |
| 5176 regs->end[0] = (MATCHING_IN_FIRST_STRING | |
| 5177 ? ((regoff_t) (d - string1)) | |
| 5178 : ((regoff_t) (d - string2 + size1))); | |
| 5179 } | |
| 5180 | |
| 2639 | 5181 /* Map over the NUM_NONSHY_REGS non-shy internal registers. |
| 5182 Copy each into the corresponding external register. | |
| 5183 MCNT indexes external registers. */ | |
| 1028 | 5184 for (mcnt = 1; mcnt < MIN (num_nonshy_regs, regs->num_regs); |
| 5185 mcnt++) | |
| 5186 { | |
| 5187 int internal_reg = bufp->external_to_internal_register[mcnt]; | |
| 5188 if (REG_UNSET (regstart[internal_reg]) || | |
| 5189 REG_UNSET (regend[internal_reg])) | |
| 5190 regs->start[mcnt] = regs->end[mcnt] = -1; | |
| 5191 else | |
| 5192 { | |
| 5193 regs->start[mcnt] = | |
| 5194 (regoff_t) POINTER_TO_OFFSET (regstart[internal_reg]); | |
| 5195 regs->end[mcnt] = | |
| 5196 (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); | |
| 5197 } | |
| 5198 } | |
| 5199 } /* regs && !bufp->no_sub */ | |
| 5200 | |
| 5201 /* If we have regs and the regs structure has more elements than | |
| 2639 | 5202 were in the pattern, set the extra elements starting with |
| 5203 NUM_NONSHY_REGS to -1. If we (re)allocated the registers, | |
| 5204 this is the case, because we always allocate enough to have | |
| 5205 at least one -1 at the end. | |
| 1028 | 5206 |
| 5207 We do this even when no_sub is set because some applications | |
| 5208 (XEmacs) reuse register structures which may contain stale | |
| 5209 information, and permit attempts to access those registers. | |
| 5210 | |
| 5211 It would be possible to require the caller to do this, but we'd | |
| 5212 have to change the API for this function to reflect that, and | |
| 1425 | 5213 audit all callers. Note: as of 2003-04-17 callers in XEmacs |
| 5214 do clear the registers, but it's safer to leave this code in | |
| 5215 because of reallocation. | |
| 5216 */ | |
| 1028 | 5217 if (regs && regs->num_regs > 0) |
| 5218 for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) | |
| 5219 regs->start[mcnt] = regs->end[mcnt] = -1; | |
| 5220 } | |
| 428 | 5221 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", |
| 5222 nfailure_points_pushed, nfailure_points_popped, | |
| 5223 nfailure_points_pushed - nfailure_points_popped); | |
| 5224 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); | |
| 5225 | |
| 5226 mcnt = d - pos - (MATCHING_IN_FIRST_STRING | |
| 5227 ? string1 | |
| 5228 : string2 - size1); | |
| 5229 | |
| 5230 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); | |
| 5231 | |
| 5232 FREE_VARIABLES (); | |
| 5233 return mcnt; | |
| 5234 } | |
| 5235 | |
| 5236 /* Otherwise match next pattern command. */ | |
| 5237 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) | |
| 5238 { | |
| 5239 /* Ignore these. Used to ignore the n of succeed_n's which | |
| 5240 currently have n == 0. */ | |
| 5241 case no_op: | |
| 5242 DEBUG_PRINT1 ("EXECUTING no_op.\n"); | |
| 5243 break; | |
| 5244 | |
| 5245 case succeed: | |
| 5246 DEBUG_PRINT1 ("EXECUTING succeed.\n"); | |
| 5247 goto succeed_label; | |
| 5248 | |
| 826 | 5249 /* Match exactly a string of length n in the pattern. The |
| 5250 following byte in the pattern defines n, and the n bytes after | |
| 5251 that make up the string to match. (Under Mule, this will be in | |
| 5252 the default internal format.) */ | |
| 428 | 5253 case exactn: |
| 5254 mcnt = *p++; | |
| 5255 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); | |
| 5256 | |
| 5257 /* This is written out as an if-else so we don't waste time | |
| 5258 testing `translate' inside the loop. */ | |
| 446 | 5259 if (TRANSLATE_P (translate)) |
| 428 | 5260 { |
| 5261 do | |
| 5262 { | |
| 446 | 5263 #ifdef MULE |
| 5264 Bytecount pat_len; | |
| 5265 | |
| 450 | 5266 REGEX_PREFETCH (); |
| 867 | 5267 if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) |
| 5268 != itext_ichar (p)) | |
| 428 | 5269 goto fail; |
| 446 | 5270 |
| 867 | 5271 pat_len = itext_ichar_len (p); |
| 446 | 5272 p += pat_len; |
| 867 | 5273 INC_IBYTEPTR_FMT (d, fmt); |
| 446 | 5274 |
| 5275 mcnt -= pat_len; | |
| 5276 #else /* not MULE */ | |
| 450 | 5277 REGEX_PREFETCH (); |
| 826 | 5278 if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) |
| 446 | 5279 goto fail; |
| 5280 mcnt--; | |
| 5281 #endif | |
| 428 | 5282 } |
| 446 | 5283 while (mcnt > 0); |
| 428 | 5284 } |
| 5285 else | |
| 5286 { | |
| 826 | 5287 #ifdef MULE |
| 5288 /* If buffer format is default, then we can shortcut and just | |
| 5289 compare the text directly, byte by byte. Otherwise, we | |
| 5290 need to go character by character. */ | |
| 5291 if (fmt != FORMAT_DEFAULT) | |
| 428 | 5292 { |
| 826 | 5293 do |
| 5294 { | |
| 5295 Bytecount pat_len; | |
| 5296 | |
| 5297 REGEX_PREFETCH (); | |
| 867 | 5298 if (itext_ichar_fmt (d, fmt, lispobj) != |
| 5299 itext_ichar (p)) | |
| 826 | 5300 goto fail; |
| 5301 | |
| 867 | 5302 pat_len = itext_ichar_len (p); |
| 826 | 5303 p += pat_len; |
| 867 | 5304 INC_IBYTEPTR_FMT (d, fmt); |
| 826 | 5305 |
| 5306 mcnt -= pat_len; | |
| 5307 } | |
| 5308 while (mcnt > 0); | |
| 428 | 5309 } |
| 826 | 5310 else |
| 5311 #endif | |
| 5312 { | |
| 5313 do | |
| 5314 { | |
| 5315 REGEX_PREFETCH (); | |
| 5316 if (*d++ != *p++) goto fail; | |
| 5317 mcnt--; | |
| 5318 } | |
| 5319 while (mcnt > 0); | |
| 5320 } | |
| 428 | 5321 } |
| 5322 SET_REGS_MATCHED (); | |
| 5323 break; | |
| 5324 | |
| 5325 | |
| 5326 /* Match any character except possibly a newline or a null. */ | |
| 5327 case anychar: | |
| 5328 DEBUG_PRINT1 ("EXECUTING anychar.\n"); | |
| 5329 | |
| 450 | 5330 REGEX_PREFETCH (); |
| 428 | 5331 |
| 826 | 5332 if ((!(bufp->syntax & RE_DOT_NEWLINE) && |
| 867 | 5333 RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') |
| 826 | 5334 || (bufp->syntax & RE_DOT_NOT_NULL && |
| 867 | 5335 RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == |
| 826 | 5336 '\000')) |
| 428 | 5337 goto fail; |
| 5338 | |
| 5339 SET_REGS_MATCHED (); | |
| 5340 DEBUG_PRINT2 (" Matched `%d'.\n", *d); | |
| 867 | 5341 INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ |
| 428 | 5342 break; |
| 5343 | |
| 5344 | |
| 5345 case charset: | |
| 5346 case charset_not: | |
| 5347 { | |
| 1414 | 5348 REGISTER Ichar c; |
| 460 | 5349 re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; |
| 458 | 5350 |
| 5351 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); | |
| 428 | 5352 |
| 450 | 5353 REGEX_PREFETCH (); |
| 867 | 5354 c = itext_ichar_fmt (d, fmt, lispobj); |
| 826 | 5355 c = RE_TRANSLATE (c); /* The character to match. */ |
| 428 | 5356 |
| 647 | 5357 /* Cast to `unsigned int' instead of `unsigned char' in case the |
| 428 | 5358 bit list is a full 32 bytes long. */ |
| 1414 | 5359 if ((unsigned int)c < (unsigned int) (*p * BYTEWIDTH) |
| 428 | 5360 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) |
| 458 | 5361 not_p = !not_p; |
| 428 | 5362 |
| 5363 p += 1 + *p; | |
| 5364 | |
| 458 | 5365 if (!not_p) goto fail; |
| 428 | 5366 |
| 5367 SET_REGS_MATCHED (); | |
| 867 | 5368 INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ |
| 428 | 5369 break; |
| 5370 } | |
| 5371 | |
| 5372 #ifdef MULE | |
| 5373 case charset_mule: | |
| 5374 case charset_mule_not: | |
| 5375 { | |
| 867 | 5376 REGISTER Ichar c; |
| 460 | 5377 re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; |
| 458 | 5378 |
| 5379 DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : ""); | |
| 428 | 5380 |
| 450 | 5381 REGEX_PREFETCH (); |
| 867 | 5382 c = itext_ichar_fmt (d, fmt, lispobj); |
| 826 | 5383 c = RE_TRANSLATE (c); /* The character to match. */ |
| 428 | 5384 |
| 5385 if (EQ (Qt, unified_range_table_lookup (p, c, Qnil))) | |
| 458 | 5386 not_p = !not_p; |
| 428 | 5387 |
| 5388 p += unified_range_table_bytes_used (p); | |
| 5389 | |
| 458 | 5390 if (!not_p) goto fail; |
| 428 | 5391 |
| 5392 SET_REGS_MATCHED (); | |
| 867 | 5393 INC_IBYTEPTR_FMT (d, fmt); |
| 428 | 5394 break; |
| 5395 } | |
| 5396 #endif /* MULE */ | |
| 5397 | |
| 5398 | |
| 5399 /* The beginning of a group is represented by start_memory. | |
| 5400 The arguments are the register number in the next byte, and the | |
| 5401 number of groups inner to this one in the next. The text | |
| 5402 matched within the group is recorded (in the internal | |
| 5403 registers data structure) under the register number. */ | |
| 5404 case start_memory: | |
| 5405 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); | |
| 5406 | |
| 5407 /* Find out if this group can match the empty string. */ | |
| 5408 p1 = p; /* To send to group_match_null_string_p. */ | |
| 5409 | |
| 5410 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) | |
| 2639 | 5411 REG_MATCH_NULL_STRING_P (reg_info[*p]) |
| 5412 = group_match_null_string_p (&p1, pend, reg_info); | |
| 5413 | |
| 5414 DEBUG_PRINT2 (" group CAN%s match null string\n", | |
| 5415 REG_MATCH_NULL_STRING_P (reg_info[*p]) ? "NOT" : ""); | |
| 428 | 5416 |
| 5417 /* Save the position in the string where we were the last time | |
| 5418 we were at this open-group operator in case the group is | |
| 5419 operated upon by a repetition operator, e.g., with `(a*)*b' | |
| 5420 against `ab'; then we want to ignore where we are now in | |
| 5421 the string in case this attempt to match fails. */ | |
| 5422 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) | |
| 5423 ? REG_UNSET (regstart[*p]) ? d : regstart[*p] | |
| 5424 : regstart[*p]; | |
| 5425 DEBUG_PRINT2 (" old_regstart: %d\n", | |
| 5426 POINTER_TO_OFFSET (old_regstart[*p])); | |
| 5427 | |
| 5428 regstart[*p] = d; | |
| 5429 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); | |
| 5430 | |
| 5431 IS_ACTIVE (reg_info[*p]) = 1; | |
| 5432 MATCHED_SOMETHING (reg_info[*p]) = 0; | |
| 5433 | |
| 5434 /* Clear this whenever we change the register activity status. */ | |
| 5435 set_regs_matched_done = 0; | |
| 5436 | |
| 5437 /* This is the new highest active register. */ | |
| 5438 highest_active_reg = *p; | |
| 5439 | |
| 5440 /* If nothing was active before, this is the new lowest active | |
| 5441 register. */ | |
| 5442 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) | |
| 5443 lowest_active_reg = *p; | |
| 5444 | |
| 5445 /* Move past the register number and inner group count. */ | |
| 5446 p += 2; | |
| 5447 just_past_start_mem = p; | |
| 5448 | |
| 5449 break; | |
| 5450 | |
| 5451 | |
| 5452 /* The stop_memory opcode represents the end of a group. Its | |
| 5453 arguments are the same as start_memory's: the register | |
| 5454 number, and the number of inner groups. */ | |
| 5455 case stop_memory: | |
| 5456 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); | |
| 5457 | |
| 5458 /* We need to save the string position the last time we were at | |
| 5459 this close-group operator in case the group is operated | |
| 5460 upon by a repetition operator, e.g., with `((a*)*(b*)*)*' | |
| 5461 against `aba'; then we want to ignore where we are now in | |
| 5462 the string in case this attempt to match fails. */ | |
| 5463 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) | |
| 5464 ? REG_UNSET (regend[*p]) ? d : regend[*p] | |
| 5465 : regend[*p]; | |
| 5466 DEBUG_PRINT2 (" old_regend: %d\n", | |
| 5467 POINTER_TO_OFFSET (old_regend[*p])); | |
| 5468 | |
| 5469 regend[*p] = d; | |
| 5470 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); | |
| 5471 | |
| 5472 /* This register isn't active anymore. */ | |
| 5473 IS_ACTIVE (reg_info[*p]) = 0; | |
| 5474 | |
| 5475 /* Clear this whenever we change the register activity status. */ | |
| 5476 set_regs_matched_done = 0; | |
| 5477 | |
| 5478 /* If this was the only register active, nothing is active | |
| 5479 anymore. */ | |
| 5480 if (lowest_active_reg == highest_active_reg) | |
| 5481 { | |
| 5482 lowest_active_reg = NO_LOWEST_ACTIVE_REG; | |
| 5483 highest_active_reg = NO_HIGHEST_ACTIVE_REG; | |
| 5484 } | |
| 5485 else | |
| 5486 { /* We must scan for the new highest active register, since | |
| 5487 it isn't necessarily one less than now: consider | |
| 5488 (a(b)c(d(e)f)g). When group 3 ends, after the f), the | |
| 5489 new highest active register is 1. */ | |
| 5490 unsigned char r = *p - 1; | |
| 5491 while (r > 0 && !IS_ACTIVE (reg_info[r])) | |
| 5492 r--; | |
| 5493 | |
| 5494 /* If we end up at register zero, that means that we saved | |
| 5495 the registers as the result of an `on_failure_jump', not | |
| 5496 a `start_memory', and we jumped to past the innermost | |
| 5497 `stop_memory'. For example, in ((.)*) we save | |
| 5498 registers 1 and 2 as a result of the *, but when we pop | |
| 5499 back to the second ), we are at the stop_memory 1. | |
| 5500 Thus, nothing is active. */ | |
| 5501 if (r == 0) | |
| 5502 { | |
| 5503 lowest_active_reg = NO_LOWEST_ACTIVE_REG; | |
| 5504 highest_active_reg = NO_HIGHEST_ACTIVE_REG; | |
| 5505 } | |
| 5506 else | |
| 5507 { | |
| 5508 highest_active_reg = r; | |
| 5509 | |
| 5510 /* 98/9/21 jhod: We've also gotta set lowest_active_reg, don't we? */ | |
| 5511 r = 1; | |
| 5512 while (r < highest_active_reg && !IS_ACTIVE(reg_info[r])) | |
| 5513 r++; | |
| 5514 lowest_active_reg = r; | |
| 5515 } | |
| 5516 } | |
| 5517 | |
| 5518 /* If just failed to match something this time around with a | |
| 5519 group that's operated on by a repetition operator, try to | |
| 5520 force exit from the ``loop'', and restore the register | |
| 5521 information for this group that we had before trying this | |
| 5522 last match. */ | |
| 5523 if ((!MATCHED_SOMETHING (reg_info[*p]) | |
| 5524 || just_past_start_mem == p - 1) | |
| 5525 && (p + 2) < pend) | |
| 5526 { | |
| 460 | 5527 re_bool is_a_jump_n = false; |
| 428 | 5528 |
| 5529 p1 = p + 2; | |
| 5530 mcnt = 0; | |
| 5531 switch ((re_opcode_t) *p1++) | |
| 5532 { | |
| 5533 case jump_n: | |
| 5534 is_a_jump_n = true; | |
| 5535 case pop_failure_jump: | |
| 5536 case maybe_pop_jump: | |
| 5537 case jump: | |
| 5538 case dummy_failure_jump: | |
| 5539 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | |
| 5540 if (is_a_jump_n) | |
| 5541 p1 += 2; | |
| 5542 break; | |
| 5543 | |
| 5544 default: | |
| 5545 /* do nothing */ ; | |
| 5546 } | |
| 5547 p1 += mcnt; | |
| 5548 | |
| 5549 /* If the next operation is a jump backwards in the pattern | |
| 5550 to an on_failure_jump right before the start_memory | |
| 5551 corresponding to this stop_memory, exit from the loop | |
| 5552 by forcing a failure after pushing on the stack the | |
| 5553 on_failure_jump's jump in the pattern, and d. */ | |
| 5554 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump | |
| 5555 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) | |
| 5556 { | |
| 5557 /* If this group ever matched anything, then restore | |
| 5558 what its registers were before trying this last | |
| 5559 failed match, e.g., with `(a*)*b' against `ab' for | |
| 5560 regstart[1], and, e.g., with `((a*)*(b*)*)*' | |
| 5561 against `aba' for regend[3]. | |
| 5562 | |
| 5563 Also restore the registers for inner groups for, | |
| 5564 e.g., `((a*)(b*))*' against `aba' (register 3 would | |
| 5565 otherwise get trashed). */ | |
| 5566 | |
| 5567 if (EVER_MATCHED_SOMETHING (reg_info[*p])) | |
| 5568 { | |
| 647 | 5569 int r; |
| 428 | 5570 |
| 5571 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; | |
| 5572 | |
| 5573 /* Restore this and inner groups' (if any) registers. */ | |
| 5574 for (r = *p; r < *p + *(p + 1); r++) | |
| 5575 { | |
| 5576 regstart[r] = old_regstart[r]; | |
| 5577 | |
| 5578 /* xx why this test? */ | |
| 5579 if (old_regend[r] >= regstart[r]) | |
| 5580 regend[r] = old_regend[r]; | |
| 5581 } | |
| 5582 } | |
| 5583 p1++; | |
| 5584 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | |
| 5585 PUSH_FAILURE_POINT (p1 + mcnt, d, -2); | |
| 5586 | |
| 5587 goto fail; | |
| 5588 } | |
| 5589 } | |
| 5590 | |
| 5591 /* Move past the register number and the inner group count. */ | |
| 5592 p += 2; | |
| 5593 break; | |
| 5594 | |
| 5595 | |
| 5596 /* \<digit> has been turned into a `duplicate' command which is | |
| 502 | 5597 followed by the numeric value of <digit> as the register number. |
| 5598 (Already passed through external-to-internal-register mapping, | |
| 5599 so it refers to the actual group number, not the non-shy-only | |
| 5600 numbering used in the external world.) */ | |
| 428 | 5601 case duplicate: |
| 5602 { | |
| 446 | 5603 REGISTER re_char *d2, *dend2; |
| 502 | 5604 /* Get which register to match against. */ |
| 5605 int regno = *p++; | |
| 428 | 5606 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); |
| 5607 | |
| 5608 /* Can't back reference a group which we've never matched. */ | |
| 5609 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) | |
| 5610 goto fail; | |
| 5611 | |
| 5612 /* Where in input to try to start matching. */ | |
| 5613 d2 = regstart[regno]; | |
| 5614 | |
| 5615 /* Where to stop matching; if both the place to start and | |
| 5616 the place to stop matching are in the same string, then | |
| 5617 set to the place to stop, otherwise, for now have to use | |
| 5618 the end of the first string. */ | |
| 5619 | |
| 5620 dend2 = ((FIRST_STRING_P (regstart[regno]) | |
| 5621 == FIRST_STRING_P (regend[regno])) | |
| 5622 ? regend[regno] : end_match_1); | |
| 5623 for (;;) | |
| 5624 { | |
| 5625 /* If necessary, advance to next segment in register | |
| 5626 contents. */ | |
| 5627 while (d2 == dend2) | |
| 5628 { | |
| 5629 if (dend2 == end_match_2) break; | |
| 5630 if (dend2 == regend[regno]) break; | |
| 5631 | |
| 5632 /* End of string1 => advance to string2. */ | |
| 5633 d2 = string2; | |
| 5634 dend2 = regend[regno]; | |
| 5635 } | |
| 5636 /* At end of register contents => success */ | |
| 5637 if (d2 == dend2) break; | |
| 5638 | |
| 5639 /* If necessary, advance to next segment in data. */ | |
| 450 | 5640 REGEX_PREFETCH (); |
| 428 | 5641 |
| 5642 /* How many characters left in this segment to match. */ | |
| 5643 mcnt = dend - d; | |
| 5644 | |
| 5645 /* Want how many consecutive characters we can match in | |
| 5646 one shot, so, if necessary, adjust the count. */ | |
| 5647 if (mcnt > dend2 - d2) | |
| 5648 mcnt = dend2 - d2; | |
| 5649 | |
| 5650 /* Compare that many; failure if mismatch, else move | |
| 5651 past them. */ | |
| 446 | 5652 if (TRANSLATE_P (translate) |
| 826 | 5653 ? bcmp_translate (d, d2, mcnt, translate |
| 5654 #ifdef emacs | |
| 5655 , fmt, lispobj | |
| 5656 #endif | |
| 5657 ) | |
| 428 | 5658 : memcmp (d, d2, mcnt)) |
| 5659 goto fail; | |
| 5660 d += mcnt, d2 += mcnt; | |
| 5661 | |
| 5662 /* Do this because we've match some characters. */ | |
| 5663 SET_REGS_MATCHED (); | |
| 5664 } | |
| 5665 } | |
| 5666 break; | |
| 5667 | |
| 5668 | |
| 5669 /* begline matches the empty string at the beginning of the string | |
| 5670 (unless `not_bol' is set in `bufp'), and, if | |
| 5671 `newline_anchor' is set, after newlines. */ | |
| 5672 case begline: | |
| 5673 DEBUG_PRINT1 ("EXECUTING begline.\n"); | |
| 5674 | |
| 5675 if (AT_STRINGS_BEG (d)) | |
| 5676 { | |
| 5677 if (!bufp->not_bol) break; | |
| 5678 } | |
| 826 | 5679 else |
| 5680 { | |
| 5681 re_char *d2 = d; | |
| 867 | 5682 DEC_IBYTEPTR (d2); |
| 5683 if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && | |
| 826 | 5684 bufp->newline_anchor) |
| 5685 break; | |
| 5686 } | |
| 428 | 5687 /* In all other cases, we fail. */ |
| 5688 goto fail; | |
| 5689 | |
| 5690 | |
| 5691 /* endline is the dual of begline. */ | |
| 5692 case endline: | |
| 5693 DEBUG_PRINT1 ("EXECUTING endline.\n"); | |
| 5694 | |
| 5695 if (AT_STRINGS_END (d)) | |
| 5696 { | |
| 5697 if (!bufp->not_eol) break; | |
| 5698 } | |
| 5699 | |
| 5700 /* We have to ``prefetch'' the next character. */ | |
| 826 | 5701 else if ((d == end1 ? |
| 867 | 5702 itext_ichar_ascii_fmt (string2, fmt, lispobj) : |
| 5703 itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' | |
| 428 | 5704 && bufp->newline_anchor) |
| 5705 { | |
| 5706 break; | |
| 5707 } | |
| 5708 goto fail; | |
| 5709 | |
| 5710 | |
| 5711 /* Match at the very beginning of the data. */ | |
| 5712 case begbuf: | |
| 5713 DEBUG_PRINT1 ("EXECUTING begbuf.\n"); | |
| 5714 if (AT_STRINGS_BEG (d)) | |
| 5715 break; | |
| 5716 goto fail; | |
| 5717 | |
| 5718 | |
| 5719 /* Match at the very end of the data. */ | |
| 5720 case endbuf: | |
| 5721 DEBUG_PRINT1 ("EXECUTING endbuf.\n"); | |
| 5722 if (AT_STRINGS_END (d)) | |
| 5723 break; | |
| 5724 goto fail; | |
| 5725 | |
| 5726 | |
| 5727 /* on_failure_keep_string_jump is used to optimize `.*\n'. It | |
| 5728 pushes NULL as the value for the string on the stack. Then | |
| 5729 `pop_failure_point' will keep the current value for the | |
| 5730 string, instead of restoring it. To see why, consider | |
| 5731 matching `foo\nbar' against `.*\n'. The .* matches the foo; | |
| 5732 then the . fails against the \n. But the next thing we want | |
| 5733 to do is match the \n against the \n; if we restored the | |
| 5734 string value, we would be back at the foo. | |
| 5735 | |
| 5736 Because this is used only in specific cases, we don't need to | |
| 5737 check all the things that `on_failure_jump' does, to make | |
| 5738 sure the right things get saved on the stack. Hence we don't | |
| 5739 share its code. The only reason to push anything on the | |
| 5740 stack at all is that otherwise we would have to change | |
| 5741 `anychar's code to do something besides goto fail in this | |
| 5742 case; that seems worse than this. */ | |
| 5743 case on_failure_keep_string_jump: | |
| 5744 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); | |
| 5745 | |
| 5746 EXTRACT_NUMBER_AND_INCR (mcnt, p); | |
| 5747 DEBUG_PRINT3 (" %d (to 0x%lx):\n", mcnt, (long) (p + mcnt)); | |
| 5748 | |
| 446 | 5749 PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); |
| 428 | 5750 break; |
| 5751 | |
| 5752 | |
| 5753 /* Uses of on_failure_jump: | |
| 5754 | |
| 5755 Each alternative starts with an on_failure_jump that points | |
| 5756 to the beginning of the next alternative. Each alternative | |
| 5757 except the last ends with a jump that in effect jumps past | |
| 5758 the rest of the alternatives. (They really jump to the | |
| 5759 ending jump of the following alternative, because tensioning | |
| 5760 these jumps is a hassle.) | |
| 5761 | |
| 5762 Repeats start with an on_failure_jump that points past both | |
| 5763 the repetition text and either the following jump or | |
| 5764 pop_failure_jump back to this on_failure_jump. */ | |
| 5765 case on_failure_jump: | |
| 5766 on_failure: | |
| 5767 DEBUG_PRINT1 ("EXECUTING on_failure_jump"); | |
| 5768 | |
| 5769 EXTRACT_NUMBER_AND_INCR (mcnt, p); | |
| 5770 DEBUG_PRINT3 (" %d (to 0x%lx)", mcnt, (long) (p + mcnt)); | |
| 5771 | |
| 5772 /* If this on_failure_jump comes right before a group (i.e., | |
| 5773 the original * applied to a group), save the information | |
| 5774 for that group and all inner ones, so that if we fail back | |
| 5775 to this point, the group's information will be correct. | |
| 5776 For example, in \(a*\)*\1, we need the preceding group, | |
| 5777 and in \(\(a*\)b*\)\2, we need the inner group. */ | |
| 5778 | |
| 5779 /* We can't use `p' to check ahead because we push | |
| 5780 a failure point to `p + mcnt' after we do this. */ | |
| 5781 p1 = p; | |
| 5782 | |
| 5783 /* We need to skip no_op's before we look for the | |
| 5784 start_memory in case this on_failure_jump is happening as | |
| 5785 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 | |
| 5786 against aba. */ | |
| 5787 while (p1 < pend && (re_opcode_t) *p1 == no_op) | |
| 5788 p1++; | |
| 5789 | |
| 5790 if (p1 < pend && (re_opcode_t) *p1 == start_memory) | |
| 5791 { | |
| 5792 /* We have a new highest active register now. This will | |
| 5793 get reset at the start_memory we are about to get to, | |
| 5794 but we will have saved all the registers relevant to | |
| 5795 this repetition op, as described above. */ | |
| 5796 highest_active_reg = *(p1 + 1) + *(p1 + 2); | |
| 5797 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) | |
| 5798 lowest_active_reg = *(p1 + 1); | |
| 5799 } | |
| 5800 | |
| 5801 DEBUG_PRINT1 (":\n"); | |
| 5802 PUSH_FAILURE_POINT (p + mcnt, d, -2); | |
| 5803 break; | |
| 5804 | |
| 5805 | |
| 5806 /* A smart repeat ends with `maybe_pop_jump'. | |
| 5807 We change it to either `pop_failure_jump' or `jump'. */ | |
| 5808 case maybe_pop_jump: | |
| 5809 EXTRACT_NUMBER_AND_INCR (mcnt, p); | |
| 5810 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); | |
| 5811 { | |
| 5812 REGISTER unsigned char *p2 = p; | |
| 5813 | |
| 5814 /* Compare the beginning of the repeat with what in the | |
| 5815 pattern follows its end. If we can establish that there | |
| 5816 is nothing that they would both match, i.e., that we | |
| 5817 would have to backtrack because of (as in, e.g., `a*a') | |
| 5818 then we can change to pop_failure_jump, because we'll | |
| 5819 never have to backtrack. | |
| 5820 | |
| 5821 This is not true in the case of alternatives: in | |
| 5822 `(a|ab)*' we do need to backtrack to the `ab' alternative | |
| 5823 (e.g., if the string was `ab'). But instead of trying to | |
| 5824 detect that here, the alternative has put on a dummy | |
| 5825 failure point which is what we will end up popping. */ | |
| 5826 | |
| 5827 /* Skip over open/close-group commands. | |
| 5828 If what follows this loop is a ...+ construct, | |
| 5829 look at what begins its body, since we will have to | |
| 5830 match at least one of that. */ | |
| 5831 while (1) | |
| 5832 { | |
| 5833 if (p2 + 2 < pend | |
| 5834 && ((re_opcode_t) *p2 == stop_memory | |
| 5835 || (re_opcode_t) *p2 == start_memory)) | |
| 5836 p2 += 3; | |
| 5837 else if (p2 + 6 < pend | |
| 5838 && (re_opcode_t) *p2 == dummy_failure_jump) | |
| 5839 p2 += 6; | |
| 5840 else | |
| 5841 break; | |
| 5842 } | |
| 5843 | |
| 5844 p1 = p + mcnt; | |
| 5845 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding | |
| 5846 to the `maybe_finalize_jump' of this case. Examine what | |
| 5847 follows. */ | |
| 5848 | |
| 5849 /* If we're at the end of the pattern, we can change. */ | |
| 5850 if (p2 == pend) | |
| 5851 { | |
| 5852 /* Consider what happens when matching ":\(.*\)" | |
| 5853 against ":/". I don't really understand this code | |
| 5854 yet. */ | |
| 5855 p[-3] = (unsigned char) pop_failure_jump; | |
| 5856 DEBUG_PRINT1 | |
| 5857 (" End of pattern: change to `pop_failure_jump'.\n"); | |
| 5858 } | |
| 5859 | |
| 5860 else if ((re_opcode_t) *p2 == exactn | |
| 5861 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) | |
| 5862 { | |
| 5863 REGISTER unsigned char c | |
| 5864 = *p2 == (unsigned char) endline ? '\n' : p2[2]; | |
| 5865 | |
| 5866 if ((re_opcode_t) p1[3] == exactn && p1[5] != c) | |
| 5867 { | |
| 5868 p[-3] = (unsigned char) pop_failure_jump; | |
| 5869 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", | |
| 5870 c, p1[5]); | |
| 5871 } | |
| 5872 | |
| 5873 else if ((re_opcode_t) p1[3] == charset | |
| 5874 || (re_opcode_t) p1[3] == charset_not) | |
| 5875 { | |
| 458 | 5876 int not_p = (re_opcode_t) p1[3] == charset_not; |
| 428 | 5877 |
| 5878 if (c < (unsigned char) (p1[4] * BYTEWIDTH) | |
| 5879 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) | |
| 458 | 5880 not_p = !not_p; |
| 5881 | |
| 5882 /* `not_p' is equal to 1 if c would match, which means | |
| 428 | 5883 that we can't change to pop_failure_jump. */ |
| 458 | 5884 if (!not_p) |
| 428 | 5885 { |
| 5886 p[-3] = (unsigned char) pop_failure_jump; | |
| 5887 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); | |
| 5888 } | |
| 5889 } | |
| 5890 } | |
| 5891 else if ((re_opcode_t) *p2 == charset) | |
| 5892 { | |
| 5893 #ifdef DEBUG | |
| 5894 REGISTER unsigned char c | |
| 5895 = *p2 == (unsigned char) endline ? '\n' : p2[2]; | |
| 5896 #endif | |
| 5897 | |
| 5898 if ((re_opcode_t) p1[3] == exactn | |
| 5899 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] | |
| 5900 && (p2[2 + p1[5] / BYTEWIDTH] | |
| 5901 & (1 << (p1[5] % BYTEWIDTH))))) | |
| 5902 { | |
| 5903 p[-3] = (unsigned char) pop_failure_jump; | |
| 5904 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", | |
| 5905 c, p1[5]); | |
| 5906 } | |
| 5907 | |
| 5908 else if ((re_opcode_t) p1[3] == charset_not) | |
| 5909 { | |
| 5910 int idx; | |
| 5911 /* We win if the charset_not inside the loop | |
| 5912 lists every character listed in the charset after. */ | |
| 5913 for (idx = 0; idx < (int) p2[1]; idx++) | |
| 5914 if (! (p2[2 + idx] == 0 | |
| 5915 || (idx < (int) p1[4] | |
| 5916 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) | |
| 5917 break; | |
| 5918 | |
| 5919 if (idx == p2[1]) | |
| 5920 { | |
| 5921 p[-3] = (unsigned char) pop_failure_jump; | |
| 5922 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); | |
| 5923 } | |
| 5924 } | |
| 5925 else if ((re_opcode_t) p1[3] == charset) | |
| 5926 { | |
| 5927 int idx; | |
| 5928 /* We win if the charset inside the loop | |
| 5929 has no overlap with the one after the loop. */ | |
| 5930 for (idx = 0; | |
| 5931 idx < (int) p2[1] && idx < (int) p1[4]; | |
| 5932 idx++) | |
| 5933 if ((p2[2 + idx] & p1[5 + idx]) != 0) | |
| 5934 break; | |
| 5935 | |
| 5936 if (idx == p2[1] || idx == p1[4]) | |
| 5937 { | |
| 5938 p[-3] = (unsigned char) pop_failure_jump; | |
| 5939 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); | |
| 5940 } | |
| 5941 } | |
| 5942 } | |
| 5943 } | |
| 5944 p -= 2; /* Point at relative address again. */ | |
| 5945 if ((re_opcode_t) p[-1] != pop_failure_jump) | |
| 5946 { | |
| 5947 p[-1] = (unsigned char) jump; | |
| 5948 DEBUG_PRINT1 (" Match => jump.\n"); | |
| 5949 goto unconditional_jump; | |
| 5950 } | |
| 5951 /* Note fall through. */ | |
| 5952 | |
| 5953 | |
| 5954 /* The end of a simple repeat has a pop_failure_jump back to | |
| 5955 its matching on_failure_jump, where the latter will push a | |
| 5956 failure point. The pop_failure_jump takes off failure | |
| 5957 points put on by this pop_failure_jump's matching | |
| 5958 on_failure_jump; we got through the pattern to here from the | |
| 5959 matching on_failure_jump, so didn't fail. */ | |
| 5960 case pop_failure_jump: | |
| 5961 { | |
| 5962 /* We need to pass separate storage for the lowest and | |
| 5963 highest registers, even though we don't care about the | |
| 5964 actual values. Otherwise, we will restore only one | |
| 5965 register from the stack, since lowest will == highest in | |
| 5966 `pop_failure_point'. */ | |
| 647 | 5967 int dummy_low_reg, dummy_high_reg; |
| 428 | 5968 unsigned char *pdummy; |
| 446 | 5969 re_char *sdummy = NULL; |
| 428 | 5970 |
| 5971 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); | |
| 5972 POP_FAILURE_POINT (sdummy, pdummy, | |
| 5973 dummy_low_reg, dummy_high_reg, | |
| 5974 reg_dummy, reg_dummy, reg_info_dummy); | |
| 5975 } | |
| 5976 /* Note fall through. */ | |
| 5977 | |
| 5978 | |
| 5979 /* Unconditionally jump (without popping any failure points). */ | |
| 5980 case jump: | |
| 5981 unconditional_jump: | |
| 5982 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ | |
| 5983 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); | |
| 5984 p += mcnt; /* Do the jump. */ | |
| 5985 DEBUG_PRINT2 ("(to 0x%lx).\n", (long) p); | |
| 5986 break; | |
| 5987 | |
| 5988 | |
| 5989 /* We need this opcode so we can detect where alternatives end | |
| 5990 in `group_match_null_string_p' et al. */ | |
| 5991 case jump_past_alt: | |
| 5992 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); | |
| 5993 goto unconditional_jump; | |
| 5994 | |
| 5995 | |
| 5996 /* Normally, the on_failure_jump pushes a failure point, which | |
| 5997 then gets popped at pop_failure_jump. We will end up at | |
| 5998 pop_failure_jump, also, and with a pattern of, say, `a+', we | |
| 5999 are skipping over the on_failure_jump, so we have to push | |
| 6000 something meaningless for pop_failure_jump to pop. */ | |
| 6001 case dummy_failure_jump: | |
| 6002 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); | |
| 6003 /* It doesn't matter what we push for the string here. What | |
| 6004 the code at `fail' tests is the value for the pattern. */ | |
| 446 | 6005 PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); |
| 428 | 6006 goto unconditional_jump; |
| 6007 | |
| 6008 | |
| 6009 /* At the end of an alternative, we need to push a dummy failure | |
| 6010 point in case we are followed by a `pop_failure_jump', because | |
| 6011 we don't want the failure point for the alternative to be | |
| 6012 popped. For example, matching `(a|ab)*' against `aab' | |
| 6013 requires that we match the `ab' alternative. */ | |
| 6014 case push_dummy_failure: | |
| 6015 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); | |
| 6016 /* See comments just above at `dummy_failure_jump' about the | |
| 6017 two zeroes. */ | |
| 446 | 6018 PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); |
| 428 | 6019 break; |
| 6020 | |
| 6021 /* Have to succeed matching what follows at least n times. | |
| 6022 After that, handle like `on_failure_jump'. */ | |
| 6023 case succeed_n: | |
| 6024 EXTRACT_NUMBER (mcnt, p + 2); | |
| 6025 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); | |
| 6026 | |
| 6027 assert (mcnt >= 0); | |
| 6028 /* Originally, this is how many times we HAVE to succeed. */ | |
| 6029 if (mcnt > 0) | |
| 6030 { | |
| 6031 mcnt--; | |
| 6032 p += 2; | |
| 6033 STORE_NUMBER_AND_INCR (p, mcnt); | |
| 6034 DEBUG_PRINT3 (" Setting 0x%lx to %d.\n", (long) p, mcnt); | |
| 6035 } | |
| 6036 else if (mcnt == 0) | |
| 6037 { | |
| 6038 DEBUG_PRINT2 (" Setting two bytes from 0x%lx to no_op.\n", | |
| 6039 (long) (p+2)); | |
| 6040 p[2] = (unsigned char) no_op; | |
| 6041 p[3] = (unsigned char) no_op; | |
| 6042 goto on_failure; | |
| 6043 } | |
| 6044 break; | |
| 6045 | |
| 6046 case jump_n: | |
| 6047 EXTRACT_NUMBER (mcnt, p + 2); | |
| 6048 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); | |
| 6049 | |
| 6050 /* Originally, this is how many times we CAN jump. */ | |
| 6051 if (mcnt) | |
| 6052 { | |
| 6053 mcnt--; | |
| 6054 STORE_NUMBER (p + 2, mcnt); | |
| 6055 goto unconditional_jump; | |
| 6056 } | |
| 6057 /* If don't have to jump any more, skip over the rest of command. */ | |
| 6058 else | |
| 6059 p += 4; | |
| 6060 break; | |
| 6061 | |
| 6062 case set_number_at: | |
| 6063 { | |
| 6064 DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); | |
| 6065 | |
| 6066 EXTRACT_NUMBER_AND_INCR (mcnt, p); | |
| 6067 p1 = p + mcnt; | |
| 6068 EXTRACT_NUMBER_AND_INCR (mcnt, p); | |
| 6069 DEBUG_PRINT3 (" Setting 0x%lx to %d.\n", (long) p1, mcnt); | |
| 6070 STORE_NUMBER (p1, mcnt); | |
| 6071 break; | |
| 6072 } | |
| 6073 | |
| 6074 case wordbound: | |
| 6075 DEBUG_PRINT1 ("EXECUTING wordbound.\n"); | |
| 6076 should_succeed = 1; | |
| 6077 matchwordbound: | |
| 6078 { | |
| 6079 /* XEmacs change */ | |
| 1377 | 6080 /* Straightforward and (I hope) correct implementation. |
| 6081 Probably should be optimized by arranging to compute | |
| 1497 | 6082 charpos only once. */ |
| 1377 | 6083 /* emch1 is the character before d, syn1 is the syntax of |
| 6084 emch1, emch2 is the character at d, and syn2 is the | |
| 6085 syntax of emch2. */ | |
| 6086 Ichar emch1, emch2; | |
| 1468 | 6087 int syn1 = 0, |
| 6088 syn2 = 0; | |
| 1377 | 6089 re_char *d_before, *d_after; |
| 6090 int result, | |
| 6091 at_beg = AT_STRINGS_BEG (d), | |
| 6092 at_end = AT_STRINGS_END (d); | |
| 6093 #ifdef emacs | |
| 1497 | 6094 Charxpos charpos; |
| 1377 | 6095 #endif |
| 6096 | |
| 6097 if (at_beg && at_end) | |
| 6098 { | |
| 6099 result = 0; | |
| 6100 } | |
| 428 | 6101 else |
| 6102 { | |
| 1377 | 6103 if (!at_beg) |
| 6104 { | |
| 6105 d_before = POS_BEFORE_GAP_UNSAFE (d); | |
| 6106 DEC_IBYTEPTR_FMT (d_before, fmt); | |
| 6107 emch1 = itext_ichar_fmt (d_before, fmt, lispobj); | |
| 460 | 6108 #ifdef emacs |
| 1497 | 6109 charpos = offset_to_charxpos (lispobj, |
| 6110 PTR_TO_OFFSET (d)) - 1; | |
| 1377 | 6111 BEGIN_REGEX_MALLOC_OK (); |
| 1497 | 6112 UPDATE_SYNTAX_CACHE (scache, charpos); |
| 460 | 6113 #endif |
| 1377 | 6114 syn1 = SYNTAX_FROM_CACHE (scache, emch1); |
| 6115 END_REGEX_MALLOC_OK (); | |
| 6116 } | |
| 6117 if (!at_end) | |
| 6118 { | |
| 6119 d_after = POS_AFTER_GAP_UNSAFE (d); | |
| 6120 emch2 = itext_ichar_fmt (d_after, fmt, lispobj); | |
| 460 | 6121 #ifdef emacs |
| 1497 | 6122 charpos = offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)); |
| 1377 | 6123 BEGIN_REGEX_MALLOC_OK (); |
| 1497 | 6124 UPDATE_SYNTAX_CACHE_FORWARD (scache, charpos); |
| 460 | 6125 #endif |
| 1377 | 6126 syn2 = SYNTAX_FROM_CACHE (scache, emch2); |
| 6127 END_REGEX_MALLOC_OK (); | |
| 6128 } | |
| 1333 | 6129 RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); |
| 1377 | 6130 |
| 6131 if (at_beg) | |
| 6132 result = (syn2 == Sword); | |
| 6133 else if (at_end) | |
| 6134 result = (syn1 == Sword); | |
| 6135 else | |
| 6136 result = ((syn1 == Sword) != (syn2 == Sword)); | |
| 428 | 6137 } |
| 1377 | 6138 |
| 428 | 6139 if (result == should_succeed) |
| 6140 break; | |
| 6141 goto fail; | |
| 6142 } | |
| 6143 | |
| 6144 case notwordbound: | |
| 6145 DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); | |
| 6146 should_succeed = 0; | |
| 6147 goto matchwordbound; | |
| 6148 | |
| 6149 case wordbeg: | |
| 6150 DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); | |
| 460 | 6151 if (AT_STRINGS_END (d)) |
| 6152 goto fail; | |
| 428 | 6153 { |
| 6154 /* XEmacs: this originally read: | |
| 6155 | |
| 6156 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) | |
| 6157 break; | |
| 6158 | |
| 6159 */ | |
| 460 | 6160 re_char *dtmp = POS_AFTER_GAP_UNSAFE (d); |
| 867 | 6161 Ichar emch = itext_ichar_fmt (dtmp, fmt, lispobj); |
| 1333 | 6162 int tempres; |
| 1347 | 6163 #ifdef emacs |
| 6164 Charxpos charpos = offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)); | |
| 6165 #endif | |
| 1333 | 6166 BEGIN_REGEX_MALLOC_OK (); |
| 460 | 6167 #ifdef emacs |
| 826 | 6168 UPDATE_SYNTAX_CACHE (scache, charpos); |
| 460 | 6169 #endif |
| 1333 | 6170 tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); |
| 6171 END_REGEX_MALLOC_OK (); | |
| 6172 RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 6173 if (tempres) | |
| 428 | 6174 goto fail; |
| 6175 if (AT_STRINGS_BEG (d)) | |
| 6176 break; | |
| 460 | 6177 dtmp = POS_BEFORE_GAP_UNSAFE (d); |
| 867 | 6178 DEC_IBYTEPTR_FMT (dtmp, fmt); |
| 6179 emch = itext_ichar_fmt (dtmp, fmt, lispobj); | |
| 1333 | 6180 BEGIN_REGEX_MALLOC_OK (); |
| 460 | 6181 #ifdef emacs |
| 826 | 6182 UPDATE_SYNTAX_CACHE_BACKWARD (scache, charpos - 1); |
| 460 | 6183 #endif |
| 1333 | 6184 tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); |
| 6185 END_REGEX_MALLOC_OK (); | |
| 6186 RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 6187 if (tempres) | |
| 428 | 6188 break; |
| 6189 goto fail; | |
| 6190 } | |
| 6191 | |
| 6192 case wordend: | |
| 6193 DEBUG_PRINT1 ("EXECUTING wordend.\n"); | |
| 460 | 6194 if (AT_STRINGS_BEG (d)) |
| 6195 goto fail; | |
| 428 | 6196 { |
| 6197 /* XEmacs: this originally read: | |
| 6198 | |
| 6199 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) | |
| 6200 && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) | |
| 6201 break; | |
| 6202 | |
| 6203 The or condition is incorrect (reversed). | |
| 6204 */ | |
| 460 | 6205 re_char *dtmp; |
| 867 | 6206 Ichar emch; |
| 1333 | 6207 int tempres; |
| 460 | 6208 #ifdef emacs |
| 826 | 6209 Charxpos charpos = offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)); |
| 1347 | 6210 BEGIN_REGEX_MALLOC_OK (); |
| 826 | 6211 UPDATE_SYNTAX_CACHE (scache, charpos); |
| 1333 | 6212 END_REGEX_MALLOC_OK (); |
| 6213 RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 1347 | 6214 #endif |
| 460 | 6215 dtmp = POS_BEFORE_GAP_UNSAFE (d); |
| 867 | 6216 DEC_IBYTEPTR_FMT (dtmp, fmt); |
| 6217 emch = itext_ichar_fmt (dtmp, fmt, lispobj); | |
| 1333 | 6218 BEGIN_REGEX_MALLOC_OK (); |
| 6219 tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); | |
| 6220 END_REGEX_MALLOC_OK (); | |
| 6221 RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 6222 if (tempres) | |
| 428 | 6223 goto fail; |
| 6224 if (AT_STRINGS_END (d)) | |
| 6225 break; | |
| 460 | 6226 dtmp = POS_AFTER_GAP_UNSAFE (d); |
| 867 | 6227 emch = itext_ichar_fmt (dtmp, fmt, lispobj); |
| 1333 | 6228 BEGIN_REGEX_MALLOC_OK (); |
| 460 | 6229 #ifdef emacs |
| 826 | 6230 UPDATE_SYNTAX_CACHE_FORWARD (scache, charpos + 1); |
| 460 | 6231 #endif |
| 1333 | 6232 tempres = (SYNTAX_FROM_CACHE (scache, emch) != Sword); |
| 6233 END_REGEX_MALLOC_OK (); | |
| 6234 RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 6235 if (tempres) | |
| 428 | 6236 break; |
| 6237 goto fail; | |
| 6238 } | |
| 6239 | |
| 6240 #ifdef emacs | |
| 6241 case before_dot: | |
| 6242 DEBUG_PRINT1 ("EXECUTING before_dot.\n"); | |
| 826 | 6243 if (!BUFFERP (lispobj) |
| 6244 || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) | |
| 6245 >= BUF_PT (XBUFFER (lispobj)))) | |
| 428 | 6246 goto fail; |
| 6247 break; | |
| 6248 | |
| 6249 case at_dot: | |
| 6250 DEBUG_PRINT1 ("EXECUTING at_dot.\n"); | |
| 826 | 6251 if (!BUFFERP (lispobj) |
| 6252 || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) | |
| 6253 != BUF_PT (XBUFFER (lispobj)))) | |
| 428 | 6254 goto fail; |
| 6255 break; | |
| 6256 | |
| 6257 case after_dot: | |
| 6258 DEBUG_PRINT1 ("EXECUTING after_dot.\n"); | |
| 826 | 6259 if (!BUFFERP (lispobj) |
| 6260 || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) | |
| 6261 <= BUF_PT (XBUFFER (lispobj)))) | |
| 428 | 6262 goto fail; |
| 6263 break; | |
| 6264 | |
| 6265 case syntaxspec: | |
| 6266 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); | |
| 6267 mcnt = *p++; | |
| 6268 goto matchsyntax; | |
| 6269 | |
| 6270 case wordchar: | |
| 6271 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); | |
| 6272 mcnt = (int) Sword; | |
| 6273 matchsyntax: | |
| 6274 should_succeed = 1; | |
| 6275 matchornotsyntax: | |
| 6276 { | |
| 6277 int matches; | |
| 867 | 6278 Ichar emch; |
| 428 | 6279 |
| 450 | 6280 REGEX_PREFETCH (); |
| 1333 | 6281 BEGIN_REGEX_MALLOC_OK (); |
| 826 | 6282 UPDATE_SYNTAX_CACHE |
| 6283 (scache, offset_to_charxpos (lispobj, PTR_TO_OFFSET (d))); | |
| 1333 | 6284 END_REGEX_MALLOC_OK (); |
| 6285 RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 826 | 6286 |
| 867 | 6287 emch = itext_ichar_fmt (d, fmt, lispobj); |
| 1333 | 6288 BEGIN_REGEX_MALLOC_OK (); |
| 826 | 6289 matches = (SYNTAX_FROM_CACHE (scache, emch) == |
| 6290 (enum syntaxcode) mcnt); | |
| 1333 | 6291 END_REGEX_MALLOC_OK (); |
| 6292 RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); | |
| 867 | 6293 INC_IBYTEPTR_FMT (d, fmt); |
| 428 | 6294 if (matches != should_succeed) |
| 6295 goto fail; | |
| 6296 SET_REGS_MATCHED (); | |
| 6297 } | |
| 6298 break; | |
| 6299 | |
| 6300 case notsyntaxspec: | |
| 6301 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); | |
| 6302 mcnt = *p++; | |
| 6303 goto matchnotsyntax; | |
| 6304 | |
| 6305 case notwordchar: | |
| 6306 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); | |
| 6307 mcnt = (int) Sword; | |
| 6308 matchnotsyntax: | |
| 6309 should_succeed = 0; | |
| 6310 goto matchornotsyntax; | |
| 6311 | |
| 6312 #ifdef MULE | |
| 6313 /* 97/2/17 jhod Mule category code patch */ | |
| 6314 case categoryspec: | |
| 6315 should_succeed = 1; | |
| 6316 matchornotcategory: | |
| 6317 { | |
| 867 | 6318 Ichar emch; |
| 428 | 6319 |
| 6320 mcnt = *p++; | |
| 450 | 6321 REGEX_PREFETCH (); |
| 867 | 6322 emch = itext_ichar_fmt (d, fmt, lispobj); |
| 6323 INC_IBYTEPTR_FMT (d, fmt); | |
| 826 | 6324 if (check_category_char (emch, BUFFER_CATEGORY_TABLE (lispbuf), |
| 6325 mcnt, should_succeed)) | |
| 428 | 6326 goto fail; |
| 6327 SET_REGS_MATCHED (); | |
| 6328 } | |
| 6329 break; | |
| 6330 | |
| 6331 case notcategoryspec: | |
| 6332 should_succeed = 0; | |
| 6333 goto matchornotcategory; | |
| 6334 /* end of category patch */ | |
| 6335 #endif /* MULE */ | |
| 6336 #else /* not emacs */ | |
| 6337 case wordchar: | |
| 6338 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); | |
| 450 | 6339 REGEX_PREFETCH (); |
| 826 | 6340 if (!WORDCHAR_P ((int) (*d))) |
| 428 | 6341 goto fail; |
| 6342 SET_REGS_MATCHED (); | |
| 6343 d++; | |
| 6344 break; | |
| 6345 | |
| 6346 case notwordchar: | |
| 6347 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); | |
| 450 | 6348 REGEX_PREFETCH (); |
| 826 | 6349 if (!WORDCHAR_P ((int) (*d))) |
| 428 | 6350 goto fail; |
| 6351 SET_REGS_MATCHED (); | |
| 6352 d++; | |
| 6353 break; | |
| 446 | 6354 #endif /* emacs */ |
| 428 | 6355 |
| 6356 default: | |
| 2500 | 6357 ABORT (); |
| 428 | 6358 } |
| 6359 continue; /* Successfully executed one pattern command; keep going. */ | |
| 6360 | |
| 6361 | |
| 6362 /* We goto here if a matching operation fails. */ | |
| 6363 fail: | |
| 6364 if (!FAIL_STACK_EMPTY ()) | |
| 6365 { /* A restart point is known. Restore to that state. */ | |
| 6366 DEBUG_PRINT1 ("\nFAIL:\n"); | |
| 6367 POP_FAILURE_POINT (d, p, | |
| 6368 lowest_active_reg, highest_active_reg, | |
| 6369 regstart, regend, reg_info); | |
| 6370 | |
| 6371 /* If this failure point is a dummy, try the next one. */ | |
| 6372 if (!p) | |
| 6373 goto fail; | |
| 6374 | |
| 6375 /* If we failed to the end of the pattern, don't examine *p. */ | |
| 6376 assert (p <= pend); | |
| 6377 if (p < pend) | |
| 6378 { | |
| 460 | 6379 re_bool is_a_jump_n = false; |
| 428 | 6380 |
| 6381 /* If failed to a backwards jump that's part of a repetition | |
| 6382 loop, need to pop this failure point and use the next one. */ | |
| 6383 switch ((re_opcode_t) *p) | |
| 6384 { | |
| 6385 case jump_n: | |
| 6386 is_a_jump_n = true; | |
| 6387 case maybe_pop_jump: | |
| 6388 case pop_failure_jump: | |
| 6389 case jump: | |
| 6390 p1 = p + 1; | |
| 6391 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | |
| 6392 p1 += mcnt; | |
| 6393 | |
| 6394 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) | |
| 6395 || (!is_a_jump_n | |
| 6396 && (re_opcode_t) *p1 == on_failure_jump)) | |
| 6397 goto fail; | |
| 6398 break; | |
| 6399 default: | |
| 6400 /* do nothing */ ; | |
| 6401 } | |
| 6402 } | |
| 6403 | |
| 6404 if (d >= string1 && d <= end1) | |
| 6405 dend = end_match_1; | |
| 6406 } | |
| 6407 else | |
| 6408 break; /* Matching at this starting point really fails. */ | |
| 6409 } /* for (;;) */ | |
| 6410 | |
| 6411 if (best_regs_set) | |
| 6412 goto restore_best_regs; | |
| 6413 | |
| 6414 FREE_VARIABLES (); | |
| 6415 | |
| 6416 return -1; /* Failure to match. */ | |
| 1333 | 6417 } /* re_match_2_internal */ |
| 428 | 6418 |
| 6419 /* Subroutine definitions for re_match_2. */ | |
| 6420 | |
| 6421 | |
| 6422 /* We are passed P pointing to a register number after a start_memory. | |
| 6423 | |
| 6424 Return true if the pattern up to the corresponding stop_memory can | |
| 6425 match the empty string, and false otherwise. | |
| 6426 | |
| 6427 If we find the matching stop_memory, sets P to point to one past its number. | |
| 6428 Otherwise, sets P to an undefined byte less than or equal to END. | |
| 6429 | |
| 6430 We don't handle duplicates properly (yet). */ | |
| 6431 | |
| 460 | 6432 static re_bool |
| 428 | 6433 group_match_null_string_p (unsigned char **p, unsigned char *end, |
| 6434 register_info_type *reg_info) | |
| 6435 { | |
| 6436 int mcnt; | |
| 6437 /* Point to after the args to the start_memory. */ | |
| 6438 unsigned char *p1 = *p + 2; | |
| 6439 | |
| 6440 while (p1 < end) | |
| 6441 { | |
| 6442 /* Skip over opcodes that can match nothing, and return true or | |
| 6443 false, as appropriate, when we get to one that can't, or to the | |
| 6444 matching stop_memory. */ | |
| 6445 | |
| 6446 switch ((re_opcode_t) *p1) | |
| 6447 { | |
| 6448 /* Could be either a loop or a series of alternatives. */ | |
| 6449 case on_failure_jump: | |
| 6450 p1++; | |
| 6451 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | |
| 6452 | |
| 6453 /* If the next operation is not a jump backwards in the | |
| 6454 pattern. */ | |
| 6455 | |
| 6456 if (mcnt >= 0) | |
| 6457 { | |
| 6458 /* Go through the on_failure_jumps of the alternatives, | |
| 6459 seeing if any of the alternatives cannot match nothing. | |
| 6460 The last alternative starts with only a jump, | |
| 6461 whereas the rest start with on_failure_jump and end | |
| 6462 with a jump, e.g., here is the pattern for `a|b|c': | |
| 6463 | |
| 6464 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 | |
| 6465 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 | |
| 6466 /exactn/1/c | |
| 6467 | |
| 6468 So, we have to first go through the first (n-1) | |
| 6469 alternatives and then deal with the last one separately. */ | |
| 6470 | |
| 6471 | |
| 6472 /* Deal with the first (n-1) alternatives, which start | |
| 6473 with an on_failure_jump (see above) that jumps to right | |
| 6474 past a jump_past_alt. */ | |
| 6475 | |
| 6476 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) | |
| 6477 { | |
| 6478 /* `mcnt' holds how many bytes long the alternative | |
| 6479 is, including the ending `jump_past_alt' and | |
| 6480 its number. */ | |
| 6481 | |
| 6482 if (!alt_match_null_string_p (p1, p1 + mcnt - 3, | |
| 6483 reg_info)) | |
| 6484 return false; | |
| 6485 | |
| 6486 /* Move to right after this alternative, including the | |
| 6487 jump_past_alt. */ | |
| 6488 p1 += mcnt; | |
| 6489 | |
| 6490 /* Break if it's the beginning of an n-th alternative | |
| 6491 that doesn't begin with an on_failure_jump. */ | |
| 6492 if ((re_opcode_t) *p1 != on_failure_jump) | |
| 6493 break; | |
| 6494 | |
| 6495 /* Still have to check that it's not an n-th | |
| 6496 alternative that starts with an on_failure_jump. */ | |
| 6497 p1++; | |
| 6498 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | |
| 6499 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) | |
| 6500 { | |
| 6501 /* Get to the beginning of the n-th alternative. */ | |
| 6502 p1 -= 3; | |
| 6503 break; | |
| 6504 } | |
| 6505 } | |
| 6506 | |
| 6507 /* Deal with the last alternative: go back and get number | |
| 6508 of the `jump_past_alt' just before it. `mcnt' contains | |
| 6509 the length of the alternative. */ | |
| 6510 EXTRACT_NUMBER (mcnt, p1 - 2); | |
| 6511 | |
| 6512 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) | |
| 6513 return false; | |
| 6514 | |
| 6515 p1 += mcnt; /* Get past the n-th alternative. */ | |
| 6516 } /* if mcnt > 0 */ | |
| 6517 break; | |
| 6518 | |
| 6519 | |
| 6520 case stop_memory: | |
| 6521 assert (p1[1] == **p); | |
| 6522 *p = p1 + 2; | |
| 6523 return true; | |
| 6524 | |
| 6525 | |
| 6526 default: | |
| 6527 if (!common_op_match_null_string_p (&p1, end, reg_info)) | |
| 6528 return false; | |
| 6529 } | |
| 6530 } /* while p1 < end */ | |
| 6531 | |
| 6532 return false; | |
| 6533 } /* group_match_null_string_p */ | |
| 6534 | |
| 6535 | |
| 6536 /* Similar to group_match_null_string_p, but doesn't deal with alternatives: | |
| 6537 It expects P to be the first byte of a single alternative and END one | |
| 6538 byte past the last. The alternative can contain groups. */ | |
| 6539 | |
| 460 | 6540 static re_bool |
| 428 | 6541 alt_match_null_string_p (unsigned char *p, unsigned char *end, |
| 6542 register_info_type *reg_info) | |
| 6543 { | |
| 6544 int mcnt; | |
| 6545 unsigned char *p1 = p; | |
| 6546 | |
| 6547 while (p1 < end) | |
| 6548 { | |
| 6549 /* Skip over opcodes that can match nothing, and break when we get | |
| 6550 to one that can't. */ | |
| 6551 | |
| 6552 switch ((re_opcode_t) *p1) | |
| 6553 { | |
| 6554 /* It's a loop. */ | |
| 6555 case on_failure_jump: | |
| 6556 p1++; | |
| 6557 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | |
| 6558 p1 += mcnt; | |
| 6559 break; | |
| 6560 | |
| 6561 default: | |
| 6562 if (!common_op_match_null_string_p (&p1, end, reg_info)) | |
| 6563 return false; | |
| 6564 } | |
| 6565 } /* while p1 < end */ | |
| 6566 | |
| 6567 return true; | |
| 6568 } /* alt_match_null_string_p */ | |
| 6569 | |
| 6570 | |
| 6571 /* Deals with the ops common to group_match_null_string_p and | |
| 6572 alt_match_null_string_p. | |
| 6573 | |
| 6574 Sets P to one after the op and its arguments, if any. */ | |
| 6575 | |
| 460 | 6576 static re_bool |
| 428 | 6577 common_op_match_null_string_p (unsigned char **p, unsigned char *end, |
| 6578 register_info_type *reg_info) | |
| 6579 { | |
| 6580 int mcnt; | |
| 460 | 6581 re_bool ret; |
| 428 | 6582 int reg_no; |
| 6583 unsigned char *p1 = *p; | |
| 6584 | |
| 6585 switch ((re_opcode_t) *p1++) | |
| 6586 { | |
| 6587 case no_op: | |
| 6588 case begline: | |
| 6589 case endline: | |
| 6590 case begbuf: | |
| 6591 case endbuf: | |
| 6592 case wordbeg: | |
| 6593 case wordend: | |
| 6594 case wordbound: | |
| 6595 case notwordbound: | |
| 6596 #ifdef emacs | |
| 6597 case before_dot: | |
| 6598 case at_dot: | |
| 6599 case after_dot: | |
| 6600 #endif | |
| 6601 break; | |
| 6602 | |
| 6603 case start_memory: | |
| 6604 reg_no = *p1; | |
| 6605 assert (reg_no > 0 && reg_no <= MAX_REGNUM); | |
| 6606 ret = group_match_null_string_p (&p1, end, reg_info); | |
| 6607 | |
| 6608 /* Have to set this here in case we're checking a group which | |
| 6609 contains a group and a back reference to it. */ | |
| 6610 | |
| 6611 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) | |
| 6612 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; | |
| 6613 | |
| 6614 if (!ret) | |
| 6615 return false; | |
| 6616 break; | |
| 6617 | |
| 6618 /* If this is an optimized succeed_n for zero times, make the jump. */ | |
| 6619 case jump: | |
| 6620 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | |
| 6621 if (mcnt >= 0) | |
| 6622 p1 += mcnt; | |
| 6623 else | |
| 6624 return false; | |
| 6625 break; | |
| 6626 | |
| 6627 case succeed_n: | |
| 6628 /* Get to the number of times to succeed. */ | |
| 6629 p1 += 2; | |
| 6630 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | |
| 6631 | |
| 6632 if (mcnt == 0) | |
| 6633 { | |
| 6634 p1 -= 4; | |
| 6635 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | |
| 6636 p1 += mcnt; | |
| 6637 } | |
| 6638 else | |
| 6639 return false; | |
| 6640 break; | |
| 6641 | |
| 6642 case duplicate: | |
| 6643 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) | |
| 6644 return false; | |
| 6645 break; | |
| 6646 | |
| 6647 case set_number_at: | |
| 6648 p1 += 4; | |
| 6649 | |
| 6650 default: | |
| 6651 /* All other opcodes mean we cannot match the empty string. */ | |
| 6652 return false; | |
| 6653 } | |
| 6654 | |
| 6655 *p = p1; | |
| 6656 return true; | |
| 6657 } /* common_op_match_null_string_p */ | |
| 6658 | |
| 6659 | |
| 6660 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN | |
| 6661 bytes; nonzero otherwise. */ | |
| 6662 | |
| 6663 static int | |
| 446 | 6664 bcmp_translate (re_char *s1, re_char *s2, |
| 826 | 6665 REGISTER int len, RE_TRANSLATE_TYPE translate |
| 6666 #ifdef emacs | |
| 2333 | 6667 , Internal_Format USED_IF_MULE (fmt), |
| 6668 Lisp_Object USED_IF_MULE (lispobj) | |
| 826 | 6669 #endif |
| 6670 ) | |
| 428 | 6671 { |
| 826 | 6672 REGISTER re_char *p1 = s1, *p2 = s2; |
| 446 | 6673 #ifdef MULE |
| 826 | 6674 re_char *p1_end = s1 + len; |
| 6675 re_char *p2_end = s2 + len; | |
| 446 | 6676 |
| 6677 while (p1 != p1_end && p2 != p2_end) | |
| 6678 { | |
| 867 | 6679 Ichar p1_ch, p2_ch; |
| 6680 | |
| 6681 p1_ch = itext_ichar_fmt (p1, fmt, lispobj); | |
| 6682 p2_ch = itext_ichar_fmt (p2, fmt, lispobj); | |
| 826 | 6683 |
| 6684 if (RE_TRANSLATE_1 (p1_ch) | |
| 6685 != RE_TRANSLATE_1 (p2_ch)) | |
| 446 | 6686 return 1; |
| 867 | 6687 INC_IBYTEPTR_FMT (p1, fmt); |
| 6688 INC_IBYTEPTR_FMT (p2, fmt); | |
| 446 | 6689 } |
| 6690 #else /* not MULE */ | |
| 428 | 6691 while (len) |
| 6692 { | |
| 826 | 6693 if (RE_TRANSLATE_1 (*p1++) != RE_TRANSLATE_1 (*p2++)) return 1; |
| 428 | 6694 len--; |
| 6695 } | |
| 446 | 6696 #endif /* MULE */ |
| 428 | 6697 return 0; |
| 6698 } | |
| 6699 | |
| 6700 /* Entry points for GNU code. */ | |
| 6701 | |
| 6702 /* re_compile_pattern is the GNU regular expression compiler: it | |
| 6703 compiles PATTERN (of length SIZE) and puts the result in BUFP. | |
| 6704 Returns 0 if the pattern was valid, otherwise an error string. | |
| 6705 | |
| 6706 Assumes the `allocated' (and perhaps `buffer') and `translate' fields | |
| 6707 are set in BUFP on entry. | |
| 6708 | |
| 6709 We call regex_compile to do the actual compilation. */ | |
| 6710 | |
| 442 | 6711 const char * |
| 6712 re_compile_pattern (const char *pattern, int length, | |
| 428 | 6713 struct re_pattern_buffer *bufp) |
| 6714 { | |
| 6715 reg_errcode_t ret; | |
| 6716 | |
| 6717 /* GNU code is written to assume at least RE_NREGS registers will be set | |
| 6718 (and at least one extra will be -1). */ | |
| 6719 bufp->regs_allocated = REGS_UNALLOCATED; | |
| 6720 | |
| 6721 /* And GNU code determines whether or not to get register information | |
| 6722 by passing null for the REGS argument to re_match, etc., not by | |
| 6723 setting no_sub. */ | |
| 6724 bufp->no_sub = 0; | |
| 6725 | |
| 6726 /* Match anchors at newline. */ | |
| 6727 bufp->newline_anchor = 1; | |
| 6728 | |
| 826 | 6729 ret = regex_compile ((unsigned char *) pattern, length, re_syntax_options, |
| 6730 bufp); | |
| 428 | 6731 |
| 6732 if (!ret) | |
| 6733 return NULL; | |
| 6734 return gettext (re_error_msgid[(int) ret]); | |
| 6735 } | |
| 6736 | |
| 6737 /* Entry points compatible with 4.2 BSD regex library. We don't define | |
| 6738 them unless specifically requested. */ | |
| 6739 | |
| 6740 #ifdef _REGEX_RE_COMP | |
| 6741 | |
| 6742 /* BSD has one and only one pattern buffer. */ | |
| 6743 static struct re_pattern_buffer re_comp_buf; | |
| 6744 | |
| 6745 char * | |
| 442 | 6746 re_comp (const char *s) |
| 428 | 6747 { |
| 6748 reg_errcode_t ret; | |
| 6749 | |
| 6750 if (!s) | |
| 6751 { | |
| 6752 if (!re_comp_buf.buffer) | |
| 6753 return gettext ("No previous regular expression"); | |
| 6754 return 0; | |
| 6755 } | |
| 6756 | |
| 6757 if (!re_comp_buf.buffer) | |
| 6758 { | |
| 1333 | 6759 re_comp_buf.buffer = (unsigned char *) xmalloc (200); |
| 428 | 6760 if (re_comp_buf.buffer == NULL) |
| 6761 return gettext (re_error_msgid[(int) REG_ESPACE]); | |
| 6762 re_comp_buf.allocated = 200; | |
| 6763 | |
| 1333 | 6764 re_comp_buf.fastmap = (char *) xmalloc (1 << BYTEWIDTH); |
| 428 | 6765 if (re_comp_buf.fastmap == NULL) |
| 6766 return gettext (re_error_msgid[(int) REG_ESPACE]); | |
| 6767 } | |
| 6768 | |
| 6769 /* Since `re_exec' always passes NULL for the `regs' argument, we | |
| 6770 don't need to initialize the pattern buffer fields which affect it. */ | |
| 6771 | |
| 6772 /* Match anchors at newlines. */ | |
| 6773 re_comp_buf.newline_anchor = 1; | |
| 6774 | |
| 826 | 6775 ret = regex_compile ((unsigned char *)s, strlen (s), re_syntax_options, |
| 6776 &re_comp_buf); | |
| 428 | 6777 |
| 6778 if (!ret) | |
| 6779 return NULL; | |
| 6780 | |
| 442 | 6781 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ |
| 428 | 6782 return (char *) gettext (re_error_msgid[(int) ret]); |
| 6783 } | |
| 6784 | |
| 6785 | |
| 6786 int | |
| 442 | 6787 re_exec (const char *s) |
| 428 | 6788 { |
| 442 | 6789 const int len = strlen (s); |
| 428 | 6790 return |
| 6791 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); | |
| 6792 } | |
| 6793 #endif /* _REGEX_RE_COMP */ | |
| 6794 | |
| 6795 /* POSIX.2 functions. Don't define these for Emacs. */ | |
| 6796 | |
| 6797 #ifndef emacs | |
| 6798 | |
| 6799 /* regcomp takes a regular expression as a string and compiles it. | |
| 6800 | |
| 6801 PREG is a regex_t *. We do not expect any fields to be initialized, | |
| 6802 since POSIX says we shouldn't. Thus, we set | |
| 6803 | |
| 6804 `buffer' to the compiled pattern; | |
| 6805 `used' to the length of the compiled pattern; | |
| 6806 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the | |
| 6807 REG_EXTENDED bit in CFLAGS is set; otherwise, to | |
| 6808 RE_SYNTAX_POSIX_BASIC; | |
| 6809 `newline_anchor' to REG_NEWLINE being set in CFLAGS; | |
| 6810 `fastmap' and `fastmap_accurate' to zero; | |
| 6811 `re_nsub' to the number of subexpressions in PATTERN. | |
| 502 | 6812 (non-shy of course. POSIX probably doesn't know about |
| 6813 shy ones, and in any case they should be invisible.) | |
| 428 | 6814 |
| 6815 PATTERN is the address of the pattern string. | |
| 6816 | |
| 6817 CFLAGS is a series of bits which affect compilation. | |
| 6818 | |
| 6819 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we | |
| 6820 use POSIX basic syntax. | |
| 6821 | |
| 6822 If REG_NEWLINE is set, then . and [^...] don't match newline. | |
| 6823 Also, regexec will try a match beginning after every newline. | |
| 6824 | |
| 6825 If REG_ICASE is set, then we considers upper- and lowercase | |
| 6826 versions of letters to be equivalent when matching. | |
| 6827 | |
| 6828 If REG_NOSUB is set, then when PREG is passed to regexec, that | |
| 6829 routine will report only success or failure, and nothing about the | |
| 6830 registers. | |
| 6831 | |
| 6832 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for | |
| 6833 the return codes and their meanings.) */ | |
| 6834 | |
| 6835 int | |
| 442 | 6836 regcomp (regex_t *preg, const char *pattern, int cflags) |
| 428 | 6837 { |
| 6838 reg_errcode_t ret; | |
| 647 | 6839 unsigned int syntax |
| 428 | 6840 = (cflags & REG_EXTENDED) ? |
| 6841 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; | |
| 6842 | |
| 6843 /* regex_compile will allocate the space for the compiled pattern. */ | |
| 6844 preg->buffer = 0; | |
| 6845 preg->allocated = 0; | |
| 6846 preg->used = 0; | |
| 6847 | |
| 6848 /* Don't bother to use a fastmap when searching. This simplifies the | |
| 6849 REG_NEWLINE case: if we used a fastmap, we'd have to put all the | |
| 6850 characters after newlines into the fastmap. This way, we just try | |
| 6851 every character. */ | |
| 6852 preg->fastmap = 0; | |
| 6853 | |
| 6854 if (cflags & REG_ICASE) | |
| 6855 { | |
| 647 | 6856 int i; |
| 428 | 6857 |
| 1333 | 6858 preg->translate = (char *) xmalloc (CHAR_SET_SIZE); |
| 428 | 6859 if (preg->translate == NULL) |
| 6860 return (int) REG_ESPACE; | |
| 6861 | |
| 6862 /* Map uppercase characters to corresponding lowercase ones. */ | |
| 6863 for (i = 0; i < CHAR_SET_SIZE; i++) | |
| 6864 preg->translate[i] = ISUPPER (i) ? tolower (i) : i; | |
| 6865 } | |
| 6866 else | |
| 6867 preg->translate = NULL; | |
| 6868 | |
| 6869 /* If REG_NEWLINE is set, newlines are treated differently. */ | |
| 6870 if (cflags & REG_NEWLINE) | |
| 6871 { /* REG_NEWLINE implies neither . nor [^...] match newline. */ | |
| 6872 syntax &= ~RE_DOT_NEWLINE; | |
| 6873 syntax |= RE_HAT_LISTS_NOT_NEWLINE; | |
| 6874 /* It also changes the matching behavior. */ | |
| 6875 preg->newline_anchor = 1; | |
| 6876 } | |
| 6877 else | |
| 6878 preg->newline_anchor = 0; | |
| 6879 | |
| 6880 preg->no_sub = !!(cflags & REG_NOSUB); | |
| 6881 | |
| 6882 /* POSIX says a null character in the pattern terminates it, so we | |
| 6883 can use strlen here in compiling the pattern. */ | |
| 446 | 6884 ret = regex_compile ((unsigned char *) pattern, strlen (pattern), syntax, preg); |
| 428 | 6885 |
| 6886 /* POSIX doesn't distinguish between an unmatched open-group and an | |
| 6887 unmatched close-group: both are REG_EPAREN. */ | |
| 6888 if (ret == REG_ERPAREN) ret = REG_EPAREN; | |
| 6889 | |
| 6890 return (int) ret; | |
| 6891 } | |
| 6892 | |
| 6893 | |
| 6894 /* regexec searches for a given pattern, specified by PREG, in the | |
| 6895 string STRING. | |
| 6896 | |
| 6897 If NMATCH is zero or REG_NOSUB was set in the cflags argument to | |
| 6898 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at | |
| 6899 least NMATCH elements, and we set them to the offsets of the | |
| 6900 corresponding matched substrings. | |
| 6901 | |
| 6902 EFLAGS specifies `execution flags' which affect matching: if | |
| 6903 REG_NOTBOL is set, then ^ does not match at the beginning of the | |
| 6904 string; if REG_NOTEOL is set, then $ does not match at the end. | |
| 6905 | |
| 6906 We return 0 if we find a match and REG_NOMATCH if not. */ | |
| 6907 | |
| 6908 int | |
| 442 | 6909 regexec (const regex_t *preg, const char *string, size_t nmatch, |
| 428 | 6910 regmatch_t pmatch[], int eflags) |
| 6911 { | |
| 6912 int ret; | |
| 6913 struct re_registers regs; | |
| 6914 regex_t private_preg; | |
| 6915 int len = strlen (string); | |
| 460 | 6916 re_bool want_reg_info = !preg->no_sub && nmatch > 0; |
| 428 | 6917 |
| 6918 private_preg = *preg; | |
| 6919 | |
| 6920 private_preg.not_bol = !!(eflags & REG_NOTBOL); | |
| 6921 private_preg.not_eol = !!(eflags & REG_NOTEOL); | |
| 6922 | |
| 6923 /* The user has told us exactly how many registers to return | |
| 6924 information about, via `nmatch'. We have to pass that on to the | |
| 6925 matching routines. */ | |
| 6926 private_preg.regs_allocated = REGS_FIXED; | |
| 6927 | |
| 6928 if (want_reg_info) | |
| 6929 { | |
| 647 | 6930 regs.num_regs = (int) nmatch; |
| 6931 regs.start = TALLOC ((int) nmatch, regoff_t); | |
| 6932 regs.end = TALLOC ((int) nmatch, regoff_t); | |
| 428 | 6933 if (regs.start == NULL || regs.end == NULL) |
| 6934 return (int) REG_NOMATCH; | |
| 6935 } | |
| 6936 | |
| 6937 /* Perform the searching operation. */ | |
| 6938 ret = re_search (&private_preg, string, len, | |
| 6939 /* start: */ 0, /* range: */ len, | |
| 6940 want_reg_info ? ®s : (struct re_registers *) 0); | |
| 6941 | |
| 6942 /* Copy the register information to the POSIX structure. */ | |
| 6943 if (want_reg_info) | |
| 6944 { | |
| 6945 if (ret >= 0) | |
| 6946 { | |
| 647 | 6947 int r; |
| 6948 | |
| 6949 for (r = 0; r < (int) nmatch; r++) | |
| 428 | 6950 { |
| 6951 pmatch[r].rm_so = regs.start[r]; | |
| 6952 pmatch[r].rm_eo = regs.end[r]; | |
| 6953 } | |
| 6954 } | |
| 6955 | |
| 6956 /* If we needed the temporary register info, free the space now. */ | |
| 1726 | 6957 xfree (regs.start, regoff_t *); |
| 6958 xfree (regs.end, regoff_t *); | |
| 428 | 6959 } |
| 6960 | |
| 6961 /* We want zero return to mean success, unlike `re_search'. */ | |
| 6962 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; | |
| 6963 } | |
| 6964 | |
| 6965 | |
| 6966 /* Returns a message corresponding to an error code, ERRCODE, returned | |
| 6967 from either regcomp or regexec. We don't use PREG here. */ | |
| 6968 | |
| 6969 size_t | |
| 2286 | 6970 regerror (int errcode, const regex_t *UNUSED (preg), char *errbuf, |
| 647 | 6971 size_t errbuf_size) |
| 428 | 6972 { |
| 442 | 6973 const char *msg; |
| 665 | 6974 Bytecount msg_size; |
| 428 | 6975 |
| 6976 if (errcode < 0 | |
| 647 | 6977 || errcode >= (int) (sizeof (re_error_msgid) / |
| 6978 sizeof (re_error_msgid[0]))) | |
| 428 | 6979 /* Only error codes returned by the rest of the code should be passed |
| 6980 to this routine. If we are given anything else, or if other regex | |
| 6981 code generates an invalid error code, then the program has a bug. | |
| 6982 Dump core so we can fix it. */ | |
| 2500 | 6983 ABORT (); |
| 428 | 6984 |
| 6985 msg = gettext (re_error_msgid[errcode]); | |
| 6986 | |
| 6987 msg_size = strlen (msg) + 1; /* Includes the null. */ | |
| 6988 | |
| 6989 if (errbuf_size != 0) | |
| 6990 { | |
| 665 | 6991 if (msg_size > (Bytecount) errbuf_size) |
| 428 | 6992 { |
| 6993 strncpy (errbuf, msg, errbuf_size - 1); | |
| 6994 errbuf[errbuf_size - 1] = 0; | |
| 6995 } | |
| 6996 else | |
| 6997 strcpy (errbuf, msg); | |
| 6998 } | |
| 6999 | |
| 647 | 7000 return (size_t) msg_size; |
| 428 | 7001 } |
| 7002 | |
| 7003 | |
| 7004 /* Free dynamically allocated space used by PREG. */ | |
| 7005 | |
| 7006 void | |
| 7007 regfree (regex_t *preg) | |
| 7008 { | |
| 7009 if (preg->buffer != NULL) | |
| 1726 | 7010 xfree (preg->buffer, unsigned char *); |
| 428 | 7011 preg->buffer = NULL; |
| 7012 | |
| 7013 preg->allocated = 0; | |
| 7014 preg->used = 0; | |
| 7015 | |
| 7016 if (preg->fastmap != NULL) | |
| 1726 | 7017 xfree (preg->fastmap, char *); |
| 428 | 7018 preg->fastmap = NULL; |
| 7019 preg->fastmap_accurate = 0; | |
| 7020 | |
| 7021 if (preg->translate != NULL) | |
| 1726 | 7022 xfree (preg->translate, RE_TRANSLATE_TYPE); |
| 428 | 7023 preg->translate = NULL; |
| 7024 } | |
| 7025 | |
| 7026 #endif /* not emacs */ | |
| 7027 |
