comparison src/regex.c @ 460:223736d75acb r21-2-45

Import from CVS: tag r21-2-45
author cvs
date Mon, 13 Aug 2007 11:43:24 +0200
parents c33ae14dd6d0
children 7039e6323819
comparison
equal deleted inserted replaced
459:9d4fd877b885 460:223736d75acb
45 45
46 #ifndef _GNU_SOURCE 46 #ifndef _GNU_SOURCE
47 #define _GNU_SOURCE 1 47 #define _GNU_SOURCE 1
48 #endif 48 #endif
49 49
50 #ifdef emacs
51 /* Converts the pointer to the char to BEG-based offset from the start. */
52 #define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING \
53 ? (d) - string1 : (d) - (string2 - size1))
54 #else
55 #define PTR_TO_OFFSET(d) 0
56 #endif
57
50 /* We assume non-Mule if emacs isn't defined. */ 58 /* We assume non-Mule if emacs isn't defined. */
51 #ifndef emacs 59 #ifndef emacs
52 #undef MULE 60 #undef MULE
53 #endif 61 #endif
54 62
177 } 185 }
178 186
179 #endif /* SYNTAX_TABLE */ 187 #endif /* SYNTAX_TABLE */
180 188
181 #define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c] 189 #define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c]
190 #undef SYNTAX_FROM_CACHE
191 #define SYNTAX_FROM_CACHE SYNTAX_UNSAFE
182 192
183 #define RE_TRANSLATE(c) translate[(unsigned char) (c)] 193 #define RE_TRANSLATE(c) translate[(unsigned char) (c)]
184 #define TRANSLATE_P(tr) tr 194 #define TRANSLATE_P(tr) tr
185 195
186 #endif /* emacs */ 196 #endif /* emacs */
366 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 376 #define MIN(a, b) ((a) < (b) ? (a) : (b))
367 377
368 /* Type of source-pattern and string chars. */ 378 /* Type of source-pattern and string chars. */
369 typedef const unsigned char re_char; 379 typedef const unsigned char re_char;
370 380
371 typedef char boolean; 381 typedef char re_bool;
372 #define false 0 382 #define false 0
373 #define true 1 383 #define true 1
374 384
375 385
376 /* These are the command codes that appear in compiled regular 386 /* These are the command codes that appear in compiled regular
1778 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); 1788 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
1779 static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, 1789 static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg,
1780 unsigned char *end); 1790 unsigned char *end);
1781 static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, 1791 static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
1782 unsigned char *end); 1792 unsigned char *end);
1783 static boolean at_begline_loc_p (re_char *pattern, re_char *p, 1793 static re_bool at_begline_loc_p (re_char *pattern, re_char *p,
1784 reg_syntax_t syntax); 1794 reg_syntax_t syntax);
1785 static boolean at_endline_loc_p (re_char *p, re_char *pend, int syntax); 1795 static re_bool at_endline_loc_p (re_char *p, re_char *pend, int syntax);
1786 static boolean group_in_compile_stack (compile_stack_type compile_stack, 1796 static re_bool group_in_compile_stack (compile_stack_type compile_stack,
1787 regnum_t regnum); 1797 regnum_t regnum);
1788 static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, 1798 static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend,
1789 RE_TRANSLATE_TYPE translate, 1799 RE_TRANSLATE_TYPE translate,
1790 reg_syntax_t syntax, 1800 reg_syntax_t syntax,
1791 unsigned char *b); 1801 unsigned char *b);
1794 re_char *pend, 1804 re_char *pend,
1795 RE_TRANSLATE_TYPE translate, 1805 RE_TRANSLATE_TYPE translate,
1796 reg_syntax_t syntax, 1806 reg_syntax_t syntax,
1797 Lisp_Object rtab); 1807 Lisp_Object rtab);
1798 #endif /* MULE */ 1808 #endif /* MULE */
1799 static boolean group_match_null_string_p (unsigned char **p, 1809 static re_bool group_match_null_string_p (unsigned char **p,
1800 unsigned char *end, 1810 unsigned char *end,
1801 register_info_type *reg_info); 1811 register_info_type *reg_info);
1802 static boolean alt_match_null_string_p (unsigned char *p, unsigned char *end, 1812 static re_bool alt_match_null_string_p (unsigned char *p, unsigned char *end,
1803 register_info_type *reg_info); 1813 register_info_type *reg_info);
1804 static boolean common_op_match_null_string_p (unsigned char **p, 1814 static re_bool common_op_match_null_string_p (unsigned char **p,
1805 unsigned char *end, 1815 unsigned char *end,
1806 register_info_type *reg_info); 1816 register_info_type *reg_info);
1807 static int bcmp_translate (const unsigned char *s1, const unsigned char *s2, 1817 static int bcmp_translate (const unsigned char *s1, const unsigned char *s2,
1808 REGISTER int len, RE_TRANSLATE_TYPE translate); 1818 REGISTER int len, RE_TRANSLATE_TYPE translate);
1809 static int re_match_2_internal (struct re_pattern_buffer *bufp, 1819 static int re_match_2_internal (struct re_pattern_buffer *bufp,
2046 goto normal_char; 2056 goto normal_char;
2047 } 2057 }
2048 2058
2049 { 2059 {
2050 /* true means zero/many matches are allowed. */ 2060 /* true means zero/many matches are allowed. */
2051 boolean zero_times_ok = c != '+'; 2061 re_bool zero_times_ok = c != '+';
2052 boolean many_times_ok = c != '?'; 2062 re_bool many_times_ok = c != '?';
2053 2063
2054 /* true means match shortest string possible. */ 2064 /* true means match shortest string possible. */
2055 boolean minimal = false; 2065 re_bool minimal = false;
2056 2066
2057 /* If there is a sequence of repetition chars, collapse it 2067 /* If there is a sequence of repetition chars, collapse it
2058 down to just one (the right one). We can't combine 2068 down to just one (the right one). We can't combine
2059 interval operators with these because of, e.g., `a{2}*', 2069 interval operators with these because of, e.g., `a{2}*',
2060 which should only match an even number of `a's. */ 2070 which should only match an even number of `a's. */
2154 } 2164 }
2155 } 2165 }
2156 else 2166 else
2157 { 2167 {
2158 /* Are we optimizing this jump? */ 2168 /* Are we optimizing this jump? */
2159 boolean keep_string_p = false; 2169 re_bool keep_string_p = false;
2160 2170
2161 if (many_times_ok) 2171 if (many_times_ok)
2162 { /* More than one repetition is allowed, so put in 2172 { /* More than one repetition is allowed, so put in
2163 at the end a backward relative jump from 2173 at the end a backward relative jump from
2164 `buf_end' to before the next jump we're going 2174 `buf_end' to before the next jump we're going
2230 2240
2231 2241
2232 case '[': 2242 case '[':
2233 { 2243 {
2234 /* XEmacs change: this whole section */ 2244 /* XEmacs change: this whole section */
2235 boolean had_char_class = false; 2245 re_bool had_char_class = false;
2236 #ifdef MULE 2246 #ifdef MULE
2237 boolean has_extended_chars = false; 2247 re_bool has_extended_chars = false;
2238 REGISTER Lisp_Object rtab = Qnil; 2248 REGISTER Lisp_Object rtab = Qnil;
2239 #endif 2249 #endif
2240 2250
2241 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2251 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2242 2252
2414 undo the ending character, the letters, and leave 2424 undo the ending character, the letters, and leave
2415 the leading `:' and `[' (but set bits for them). */ 2425 the leading `:' and `[' (but set bits for them). */
2416 if (c == ':' && *p == ']') 2426 if (c == ':' && *p == ']')
2417 { 2427 {
2418 int ch; 2428 int ch;
2419 boolean is_alnum = STREQ (str, "alnum"); 2429 re_bool is_alnum = STREQ (str, "alnum");
2420 boolean is_alpha = STREQ (str, "alpha"); 2430 re_bool is_alpha = STREQ (str, "alpha");
2421 boolean is_blank = STREQ (str, "blank"); 2431 re_bool is_blank = STREQ (str, "blank");
2422 boolean is_cntrl = STREQ (str, "cntrl"); 2432 re_bool is_cntrl = STREQ (str, "cntrl");
2423 boolean is_digit = STREQ (str, "digit"); 2433 re_bool is_digit = STREQ (str, "digit");
2424 boolean is_graph = STREQ (str, "graph"); 2434 re_bool is_graph = STREQ (str, "graph");
2425 boolean is_lower = STREQ (str, "lower"); 2435 re_bool is_lower = STREQ (str, "lower");
2426 boolean is_print = STREQ (str, "print"); 2436 re_bool is_print = STREQ (str, "print");
2427 boolean is_punct = STREQ (str, "punct"); 2437 re_bool is_punct = STREQ (str, "punct");
2428 boolean is_space = STREQ (str, "space"); 2438 re_bool is_space = STREQ (str, "space");
2429 boolean is_upper = STREQ (str, "upper"); 2439 re_bool is_upper = STREQ (str, "upper");
2430 boolean is_xdigit = STREQ (str, "xdigit"); 2440 re_bool is_xdigit = STREQ (str, "xdigit");
2431 2441
2432 if (!IS_CHAR_CLASS (str)) 2442 if (!IS_CHAR_CLASS (str))
2433 FREE_STACK_RETURN (REG_ECTYPE); 2443 FREE_STACK_RETURN (REG_ECTYPE);
2434 2444
2435 /* Throw away the ] at the end of the character 2445 /* Throw away the ] at the end of the character
3211 3221
3212 /* P points to just after a ^ in PATTERN. Return true if that ^ comes 3222 /* P points to just after a ^ in PATTERN. Return true if that ^ comes
3213 after an alternative or a begin-subexpression. We assume there is at 3223 after an alternative or a begin-subexpression. We assume there is at
3214 least one character before the ^. */ 3224 least one character before the ^. */
3215 3225
3216 static boolean 3226 static re_bool
3217 at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) 3227 at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax)
3218 { 3228 {
3219 re_char *prev = p - 2; 3229 re_char *prev = p - 2;
3220 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 3230 re_bool prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3221 3231
3222 return 3232 return
3223 /* After a subexpression? */ 3233 /* After a subexpression? */
3224 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) 3234 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3225 /* After an alternative? */ 3235 /* After an alternative? */
3228 3238
3229 3239
3230 /* The dual of at_begline_loc_p. This one is for $. We assume there is 3240 /* The dual of at_begline_loc_p. This one is for $. We assume there is
3231 at least one character after the $, i.e., `P < PEND'. */ 3241 at least one character after the $, i.e., `P < PEND'. */
3232 3242
3233 static boolean 3243 static re_bool
3234 at_endline_loc_p (re_char *p, re_char *pend, int syntax) 3244 at_endline_loc_p (re_char *p, re_char *pend, int syntax)
3235 { 3245 {
3236 re_char *next = p; 3246 re_char *next = p;
3237 boolean next_backslash = *next == '\\'; 3247 re_bool next_backslash = *next == '\\';
3238 re_char *next_next = p + 1 < pend ? p + 1 : 0; 3248 re_char *next_next = p + 1 < pend ? p + 1 : 0;
3239 3249
3240 return 3250 return
3241 /* Before a subexpression? */ 3251 /* Before a subexpression? */
3242 (syntax & RE_NO_BK_PARENS ? *next == ')' 3252 (syntax & RE_NO_BK_PARENS ? *next == ')'
3248 3258
3249 3259
3250 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and 3260 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3251 false if it's not. */ 3261 false if it's not. */
3252 3262
3253 static boolean 3263 static re_bool
3254 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) 3264 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
3255 { 3265 {
3256 int this_element; 3266 int this_element;
3257 3267
3258 for (this_element = compile_stack.avail - 1; 3268 for (this_element = compile_stack.avail - 1;
3419 3429
3420 /* Assume that each path through the pattern can be null until 3430 /* Assume that each path through the pattern can be null until
3421 proven otherwise. We set this false at the bottom of switch 3431 proven otherwise. We set this false at the bottom of switch
3422 statement, to which we get only if a particular path doesn't 3432 statement, to which we get only if a particular path doesn't
3423 match the empty string. */ 3433 match the empty string. */
3424 boolean path_can_be_null = true; 3434 re_bool path_can_be_null = true;
3425 3435
3426 /* We aren't doing a `succeed_n' to begin with. */ 3436 /* We aren't doing a `succeed_n' to begin with. */
3427 boolean succeed_n_p = false; 3437 re_bool succeed_n_p = false;
3428 3438
3429 assert (fastmap != NULL && p != NULL); 3439 assert (fastmap != NULL && p != NULL);
3430 3440
3431 INIT_FAIL_STACK (); 3441 INIT_FAIL_STACK ();
3432 memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 3442 memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */
3622 /* Otherwise, have to check alternative paths. */ 3632 /* Otherwise, have to check alternative paths. */
3623 break; 3633 break;
3624 } 3634 }
3625 3635
3626 #ifdef emacs 3636 #ifdef emacs
3637 case wordbound:
3638 case notwordbound:
3639 case wordbeg:
3640 case wordend:
3641 case notsyntaxspec:
3642 case syntaxspec:
3643 /* This match depends on text properties. These end with
3644 aborting optimizations. */
3645 bufp->can_be_null = 1;
3646 goto done;
3647
3648 #ifdef emacs
3649 #if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
3627 case syntaxspec: 3650 case syntaxspec:
3628 k = *p++; 3651 k = *p++;
3652 #endif
3629 matchsyntax: 3653 matchsyntax:
3630 #ifdef MULE 3654 #ifdef MULE
3631 for (j = 0; j < 0x80; j++) 3655 for (j = 0; j < 0x80; j++)
3632 if (SYNTAX_UNSAFE 3656 if (SYNTAX_UNSAFE
3633 (XCHAR_TABLE 3657 (XCHAR_TABLE
3663 fastmap[j] = 1; 3687 fastmap[j] = 1;
3664 #endif /* MULE */ 3688 #endif /* MULE */
3665 break; 3689 break;
3666 3690
3667 3691
3692 #if 0 /* Removed during syntax-table properties patch -- 2000/12/07 mct */
3668 case notsyntaxspec: 3693 case notsyntaxspec:
3669 k = *p++; 3694 k = *p++;
3695 #endif
3670 matchnotsyntax: 3696 matchnotsyntax:
3671 #ifdef MULE 3697 #ifdef MULE
3672 for (j = 0; j < 0x80; j++) 3698 for (j = 0; j < 0x80; j++)
3673 if (SYNTAX_UNSAFE 3699 if (SYNTAX_UNSAFE
3674 (XCHAR_TABLE 3700 (XCHAR_TABLE
3702 (regex_emacs_buffer->mirror_syntax_table), j) != 3728 (regex_emacs_buffer->mirror_syntax_table), j) !=
3703 (enum syntaxcode) k) 3729 (enum syntaxcode) k)
3704 fastmap[j] = 1; 3730 fastmap[j] = 1;
3705 #endif /* MULE */ 3731 #endif /* MULE */
3706 break; 3732 break;
3733 #endif /* emacs */
3707 3734
3708 #ifdef MULE 3735 #ifdef MULE
3709 /* 97/2/17 jhod category patch */ 3736 /* 97/2/17 jhod category patch */
3710 case categoryspec: 3737 case categoryspec:
3711 case notcategoryspec: 3738 case notcategoryspec:
3728 case no_op: 3755 case no_op:
3729 case begline: 3756 case begline:
3730 case endline: 3757 case endline:
3731 case begbuf: 3758 case begbuf:
3732 case endbuf: 3759 case endbuf:
3760 #ifndef emacs
3733 case wordbound: 3761 case wordbound:
3734 case notwordbound: 3762 case notwordbound:
3735 case wordbeg: 3763 case wordbeg:
3736 case wordend: 3764 case wordend:
3765 #endif
3737 case push_dummy_failure: 3766 case push_dummy_failure:
3738 continue; 3767 continue;
3739 3768
3740 3769
3741 case jump_n: 3770 case jump_n:
3972 (startpos >= size1 ? string2 - size1 : string1) + startpos); 4001 (startpos >= size1 ? string2 - size1 : string1) + startpos);
3973 range = charcount_to_bytecount (d, 1); 4002 range = charcount_to_bytecount (d, 1);
3974 } 4003 }
3975 } 4004 }
3976 4005
4006 #ifdef emacs
4007 /* In a forward search for something that starts with \=.
4008 don't keep searching past point. */
4009 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
4010 {
4011 range = BUF_PT (regex_emacs_buffer) - BUF_BEGV (regex_emacs_buffer)
4012 - startpos;
4013 if (range < 0)
4014 return -1;
4015 }
4016 #endif /* emacs */
4017
3977 /* Update the fastmap now if not correct already. */ 4018 /* Update the fastmap now if not correct already. */
3978 if (fastmap && !bufp->fastmap_accurate) 4019 if (fastmap && !bufp->fastmap_accurate)
3979 if (re_compile_fastmap (bufp) == -2) 4020 if (re_compile_fastmap (bufp) == -2)
3980 return -2; 4021 return -2;
3981 4022
3991 else 4032 else
3992 break; 4033 break;
3993 } 4034 }
3994 anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline; 4035 anchored_at_begline = i < bufp->used && bufp->buffer[i] == begline;
3995 } 4036 }
4037 #endif
4038
4039 #ifdef emacs
4040 SETUP_SYNTAX_CACHE_FOR_OBJECT (regex_match_object,
4041 regex_emacs_buffer,
4042 SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR (regex_match_object,
4043 regex_emacs_buffer,
4044 startpos),
4045 1);
3996 #endif 4046 #endif
3997 4047
3998 /* Loop through the string, looking for a place to start matching. */ 4048 /* Loop through the string, looking for a place to start matching. */
3999 for (;;) 4049 for (;;)
4000 { 4050 {
4256 int 4306 int
4257 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, 4307 re_match_2 (struct re_pattern_buffer *bufp, const char *string1,
4258 int size1, const char *string2, int size2, int pos, 4308 int size1, const char *string2, int size2, int pos,
4259 struct re_registers *regs, int stop) 4309 struct re_registers *regs, int stop)
4260 { 4310 {
4261 int result = re_match_2_internal (bufp, (re_char *) string1, size1, 4311 int result;
4262 (re_char *) string2, size2, 4312
4263 pos, regs, stop); 4313 #ifdef emacs
4314 SETUP_SYNTAX_CACHE_FOR_OBJECT (regex_match_object,
4315 regex_emacs_buffer,
4316 SYNTAX_CACHE_OBJECT_BYTE_TO_CHAR (regex_match_object,
4317 regex_emacs_buffer,
4318 pos),
4319 1);
4320 #endif
4321
4322 result = re_match_2_internal (bufp, (re_char *) string1, size1,
4323 (re_char *) string2, size2,
4324 pos, regs, stop);
4325
4264 alloca (0); 4326 alloca (0);
4265 return result; 4327 return result;
4266 } 4328 }
4267 4329
4268 /* This is a separate function so that we can force an alloca cleanup 4330 /* This is a separate function so that we can force an alloca cleanup
4393 unsigned num_regs_pushed = 0; 4455 unsigned num_regs_pushed = 0;
4394 #endif 4456 #endif
4395 4457
4396 /* 1 if this match ends in the same string (string1 or string2) 4458 /* 1 if this match ends in the same string (string1 or string2)
4397 as the best previous match. */ 4459 as the best previous match. */
4398 boolean same_str_p; 4460 re_bool same_str_p;
4399 4461
4400 /* 1 if this match is the best seen so far. */ 4462 /* 1 if this match is the best seen so far. */
4401 boolean best_match_p; 4463 re_bool best_match_p;
4402 4464
4403 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); 4465 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
4404 4466
4405 INIT_FAIL_STACK (); 4467 INIT_FAIL_STACK ();
4406 4468
4757 4819
4758 case charset: 4820 case charset:
4759 case charset_not: 4821 case charset_not:
4760 { 4822 {
4761 REGISTER unsigned char c; 4823 REGISTER unsigned char c;
4762 boolean not_p = (re_opcode_t) *(p - 1) == charset_not; 4824 re_bool not_p = (re_opcode_t) *(p - 1) == charset_not;
4763 4825
4764 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); 4826 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : "");
4765 4827
4766 REGEX_PREFETCH (); 4828 REGEX_PREFETCH ();
4767 c = TRANSLATE (*d); /* The character to match. */ 4829 c = TRANSLATE (*d); /* The character to match. */
4784 #ifdef MULE 4846 #ifdef MULE
4785 case charset_mule: 4847 case charset_mule:
4786 case charset_mule_not: 4848 case charset_mule_not:
4787 { 4849 {
4788 REGISTER Emchar c; 4850 REGISTER Emchar c;
4789 boolean not_p = (re_opcode_t) *(p - 1) == charset_mule_not; 4851 re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not;
4790 4852
4791 DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : ""); 4853 DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : "");
4792 4854
4793 REGEX_PREFETCH (); 4855 REGEX_PREFETCH ();
4794 c = charptr_emchar ((const Bufbyte *) d); 4856 c = charptr_emchar ((const Bufbyte *) d);
4931 last match. */ 4993 last match. */
4932 if ((!MATCHED_SOMETHING (reg_info[*p]) 4994 if ((!MATCHED_SOMETHING (reg_info[*p])
4933 || just_past_start_mem == p - 1) 4995 || just_past_start_mem == p - 1)
4934 && (p + 2) < pend) 4996 && (p + 2) < pend)
4935 { 4997 {
4936 boolean is_a_jump_n = false; 4998 re_bool is_a_jump_n = false;
4937 4999
4938 p1 = p + 2; 5000 p1 = p + 2;
4939 mcnt = 0; 5001 mcnt = 0;
4940 switch ((re_opcode_t) *p1++) 5002 switch ((re_opcode_t) *p1++)
4941 { 5003 {
5476 int result; 5538 int result;
5477 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) 5539 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5478 result = 1; 5540 result = 1;
5479 else 5541 else
5480 { 5542 {
5481 const unsigned char *d_before = 5543 re_char *d_before = POS_BEFORE_GAP_UNSAFE (d);
5482 (const unsigned char *) POS_BEFORE_GAP_UNSAFE (d); 5544 re_char *d_after = POS_AFTER_GAP_UNSAFE (d);
5483 const unsigned char *d_after = 5545
5484 (const unsigned char *) POS_AFTER_GAP_UNSAFE (d); 5546 /* emch1 is the character before d, syn1 is the syntax of emch1,
5547 emch2 is the character at d, and syn2 is the syntax of emch2. */
5485 Emchar emch1, emch2; 5548 Emchar emch1, emch2;
5549 int syn1, syn2;
5550 #ifdef emacs
5551 int pos_before;
5552 #endif
5486 5553
5487 DEC_CHARPTR (d_before); 5554 DEC_CHARPTR (d_before);
5488 emch1 = charptr_emchar (d_before); 5555 emch1 = charptr_emchar (d_before);
5489 emch2 = charptr_emchar (d_after); 5556 emch2 = charptr_emchar (d_after);
5490 result = (WORDCHAR_P_UNSAFE (emch1) != 5557
5491 WORDCHAR_P_UNSAFE (emch2)); 5558 #ifdef emacs
5559 pos_before = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)) - 1;
5560 UPDATE_SYNTAX_CACHE (pos_before);
5561 #endif
5562 syn1 = SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
5563 emch1);
5564 #ifdef emacs
5565 UPDATE_SYNTAX_CACHE_FORWARD (pos_before + 1);
5566 #endif
5567 syn2 = SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
5568 emch2);
5569
5570 result = ((syn1 == Sword) != (syn2 == Sword));
5492 } 5571 }
5493 if (result == should_succeed) 5572 if (result == should_succeed)
5494 break; 5573 break;
5495 goto fail; 5574 goto fail;
5496 } 5575 }
5500 should_succeed = 0; 5579 should_succeed = 0;
5501 goto matchwordbound; 5580 goto matchwordbound;
5502 5581
5503 case wordbeg: 5582 case wordbeg:
5504 DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 5583 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5584 if (AT_STRINGS_END (d))
5585 goto fail;
5505 { 5586 {
5506 /* XEmacs: this originally read: 5587 /* XEmacs: this originally read:
5507 5588
5508 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) 5589 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5509 break; 5590 break;
5510 5591
5511 */ 5592 */
5512 const unsigned char *dtmp = 5593 re_char *dtmp = POS_AFTER_GAP_UNSAFE (d);
5513 (const unsigned char *) POS_AFTER_GAP_UNSAFE (d);
5514 Emchar emch = charptr_emchar (dtmp); 5594 Emchar emch = charptr_emchar (dtmp);
5515 if (!WORDCHAR_P_UNSAFE (emch)) 5595 #ifdef emacs
5596 int charpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
5597 UPDATE_SYNTAX_CACHE (charpos);
5598 #endif
5599 if (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
5600 emch) != Sword)
5516 goto fail; 5601 goto fail;
5517 if (AT_STRINGS_BEG (d)) 5602 if (AT_STRINGS_BEG (d))
5518 break; 5603 break;
5519 dtmp = (const unsigned char *) POS_BEFORE_GAP_UNSAFE (d); 5604 dtmp = POS_BEFORE_GAP_UNSAFE (d);
5520 DEC_CHARPTR (dtmp); 5605 DEC_CHARPTR (dtmp);
5521 emch = charptr_emchar (dtmp); 5606 emch = charptr_emchar (dtmp);
5522 if (!WORDCHAR_P_UNSAFE (emch)) 5607 #ifdef emacs
5608 UPDATE_SYNTAX_CACHE_BACKWARD (charpos - 1);
5609 #endif
5610 if (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
5611 emch) != Sword)
5523 break; 5612 break;
5524 goto fail; 5613 goto fail;
5525 } 5614 }
5526 5615
5527 case wordend: 5616 case wordend:
5528 DEBUG_PRINT1 ("EXECUTING wordend.\n"); 5617 DEBUG_PRINT1 ("EXECUTING wordend.\n");
5618 if (AT_STRINGS_BEG (d))
5619 goto fail;
5529 { 5620 {
5530 /* XEmacs: this originally read: 5621 /* XEmacs: this originally read:
5531 5622
5532 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) 5623 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
5533 && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) 5624 && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
5534 break; 5625 break;
5535 5626
5536 The or condition is incorrect (reversed). 5627 The or condition is incorrect (reversed).
5537 */ 5628 */
5538 const unsigned char *dtmp; 5629 re_char *dtmp;
5539 Emchar emch; 5630 Emchar emch;
5540 if (AT_STRINGS_BEG (d)) 5631 #ifdef emacs
5541 goto fail; 5632 int charpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d)) - 1;
5542 dtmp = (const unsigned char *) POS_BEFORE_GAP_UNSAFE (d); 5633 UPDATE_SYNTAX_CACHE (charpos);
5634 #endif
5635 dtmp = POS_BEFORE_GAP_UNSAFE (d);
5543 DEC_CHARPTR (dtmp); 5636 DEC_CHARPTR (dtmp);
5544 emch = charptr_emchar (dtmp); 5637 emch = charptr_emchar (dtmp);
5545 if (!WORDCHAR_P_UNSAFE (emch)) 5638 if (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
5639 emch) != Sword)
5546 goto fail; 5640 goto fail;
5547 if (AT_STRINGS_END (d)) 5641 if (AT_STRINGS_END (d))
5548 break; 5642 break;
5549 dtmp = (const unsigned char *) POS_AFTER_GAP_UNSAFE (d); 5643 dtmp = POS_AFTER_GAP_UNSAFE (d);
5550 emch = charptr_emchar (dtmp); 5644 emch = charptr_emchar (dtmp);
5551 if (!WORDCHAR_P_UNSAFE (emch)) 5645 #ifdef emacs
5646 UPDATE_SYNTAX_CACHE_FORWARD (charpos + 1);
5647 #endif
5648 if (SYNTAX_FROM_CACHE (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
5649 emch) != Sword)
5552 break; 5650 break;
5553 goto fail; 5651 goto fail;
5554 } 5652 }
5555 5653
5556 #ifdef emacs 5654 #ifdef emacs
5557 case before_dot: 5655 case before_dot:
5558 DEBUG_PRINT1 ("EXECUTING before_dot.\n"); 5656 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5559 if (!regex_emacs_buffer_p 5657 if (! (NILP (regex_match_object) || BUFFERP (regex_match_object))
5560 || (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) 5658 || (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
5561 >= BUF_PT (regex_emacs_buffer))) 5659 >= BUF_PT (regex_emacs_buffer)))
5562 goto fail; 5660 goto fail;
5563 break; 5661 break;
5564 5662
5565 case at_dot: 5663 case at_dot:
5566 DEBUG_PRINT1 ("EXECUTING at_dot.\n"); 5664 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5567 if (!regex_emacs_buffer_p 5665 if (! (NILP (regex_match_object) || BUFFERP (regex_match_object))
5568 || (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) 5666 || (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
5569 != BUF_PT (regex_emacs_buffer))) 5667 != BUF_PT (regex_emacs_buffer)))
5570 goto fail; 5668 goto fail;
5571 break; 5669 break;
5572 5670
5573 case after_dot: 5671 case after_dot:
5574 DEBUG_PRINT1 ("EXECUTING after_dot.\n"); 5672 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5575 if (!regex_emacs_buffer_p 5673 if (! (NILP (regex_match_object) || BUFFERP (regex_match_object))
5576 || (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) 5674 || (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
5577 <= BUF_PT (regex_emacs_buffer))) 5675 <= BUF_PT (regex_emacs_buffer)))
5578 goto fail; 5676 goto fail;
5579 break; 5677 break;
5580 #if 0 /* not emacs19 */ 5678 #if 0 /* not emacs19 */
5600 { 5698 {
5601 int matches; 5699 int matches;
5602 Emchar emch; 5700 Emchar emch;
5603 5701
5604 REGEX_PREFETCH (); 5702 REGEX_PREFETCH ();
5703 #ifdef emacs
5704 {
5705 int charpos = SYNTAX_CACHE_BYTE_TO_CHAR (PTR_TO_OFFSET (d));
5706 UPDATE_SYNTAX_CACHE (charpos);
5707 }
5708 #endif
5709
5605 emch = charptr_emchar ((const Bufbyte *) d); 5710 emch = charptr_emchar ((const Bufbyte *) d);
5606 matches = (SYNTAX_UNSAFE 5711 matches = (SYNTAX_FROM_CACHE (regex_emacs_buffer->mirror_syntax_table,
5607 (XCHAR_TABLE (regex_emacs_buffer->mirror_syntax_table),
5608 emch) == (enum syntaxcode) mcnt); 5712 emch) == (enum syntaxcode) mcnt);
5609 INC_CHARPTR (d); 5713 INC_CHARPTR (d);
5610 if (matches != should_succeed) 5714 if (matches != should_succeed)
5611 goto fail; 5715 goto fail;
5612 SET_REGS_MATCHED (); 5716 SET_REGS_MATCHED ();
5690 5794
5691 /* If we failed to the end of the pattern, don't examine *p. */ 5795 /* If we failed to the end of the pattern, don't examine *p. */
5692 assert (p <= pend); 5796 assert (p <= pend);
5693 if (p < pend) 5797 if (p < pend)
5694 { 5798 {
5695 boolean is_a_jump_n = false; 5799 re_bool is_a_jump_n = false;
5696 5800
5697 /* If failed to a backwards jump that's part of a repetition 5801 /* If failed to a backwards jump that's part of a repetition
5698 loop, need to pop this failure point and use the next one. */ 5802 loop, need to pop this failure point and use the next one. */
5699 switch ((re_opcode_t) *p) 5803 switch ((re_opcode_t) *p)
5700 { 5804 {
5743 If we find the matching stop_memory, sets P to point to one past its number. 5847 If we find the matching stop_memory, sets P to point to one past its number.
5744 Otherwise, sets P to an undefined byte less than or equal to END. 5848 Otherwise, sets P to an undefined byte less than or equal to END.
5745 5849
5746 We don't handle duplicates properly (yet). */ 5850 We don't handle duplicates properly (yet). */
5747 5851
5748 static boolean 5852 static re_bool
5749 group_match_null_string_p (unsigned char **p, unsigned char *end, 5853 group_match_null_string_p (unsigned char **p, unsigned char *end,
5750 register_info_type *reg_info) 5854 register_info_type *reg_info)
5751 { 5855 {
5752 int mcnt; 5856 int mcnt;
5753 /* Point to after the args to the start_memory. */ 5857 /* Point to after the args to the start_memory. */
5851 5955
5852 /* Similar to group_match_null_string_p, but doesn't deal with alternatives: 5956 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5853 It expects P to be the first byte of a single alternative and END one 5957 It expects P to be the first byte of a single alternative and END one
5854 byte past the last. The alternative can contain groups. */ 5958 byte past the last. The alternative can contain groups. */
5855 5959
5856 static boolean 5960 static re_bool
5857 alt_match_null_string_p (unsigned char *p, unsigned char *end, 5961 alt_match_null_string_p (unsigned char *p, unsigned char *end,
5858 register_info_type *reg_info) 5962 register_info_type *reg_info)
5859 { 5963 {
5860 int mcnt; 5964 int mcnt;
5861 unsigned char *p1 = p; 5965 unsigned char *p1 = p;
5887 /* Deals with the ops common to group_match_null_string_p and 5991 /* Deals with the ops common to group_match_null_string_p and
5888 alt_match_null_string_p. 5992 alt_match_null_string_p.
5889 5993
5890 Sets P to one after the op and its arguments, if any. */ 5994 Sets P to one after the op and its arguments, if any. */
5891 5995
5892 static boolean 5996 static re_bool
5893 common_op_match_null_string_p (unsigned char **p, unsigned char *end, 5997 common_op_match_null_string_p (unsigned char **p, unsigned char *end,
5894 register_info_type *reg_info) 5998 register_info_type *reg_info)
5895 { 5999 {
5896 int mcnt; 6000 int mcnt;
5897 boolean ret; 6001 re_bool ret;
5898 int reg_no; 6002 int reg_no;
5899 unsigned char *p1 = *p; 6003 unsigned char *p1 = *p;
5900 6004
5901 switch ((re_opcode_t) *p1++) 6005 switch ((re_opcode_t) *p1++)
5902 { 6006 {
6218 { 6322 {
6219 int ret; 6323 int ret;
6220 struct re_registers regs; 6324 struct re_registers regs;
6221 regex_t private_preg; 6325 regex_t private_preg;
6222 int len = strlen (string); 6326 int len = strlen (string);
6223 boolean want_reg_info = !preg->no_sub && nmatch > 0; 6327 re_bool want_reg_info = !preg->no_sub && nmatch > 0;
6224 6328
6225 private_preg = *preg; 6329 private_preg = *preg;
6226 6330
6227 private_preg.not_bol = !!(eflags & REG_NOTBOL); 6331 private_preg.not_bol = !!(eflags & REG_NOTBOL);
6228 private_preg.not_eol = !!(eflags & REG_NOTEOL); 6332 private_preg.not_eol = !!(eflags & REG_NOTEOL);