comparison src/regex.c @ 203:850242ba4a81 r20-3b28

Import from CVS: tag r20-3b28
author cvs
date Mon, 13 Aug 2007 10:02:21 +0200
parents 3d6bfa290dbd
children 41ff10fd062f
comparison
equal deleted inserted replaced
202:61eefc8fc970 203:850242ba4a81
37 37
38 #ifdef HAVE_CONFIG_H 38 #ifdef HAVE_CONFIG_H
39 #include <config.h> 39 #include <config.h>
40 #endif 40 #endif
41 41
42 #ifndef REGISTER /* Rigidly enforced as of 20.3 */
43 #define REGISTER
44 #endif
45
42 #ifndef _GNU_SOURCE 46 #ifndef _GNU_SOURCE
43 #define _GNU_SOURCE 1 47 #define _GNU_SOURCE 1
44 #endif 48 #endif
45 49
46 /* We assume non-Mule if emacs isn't defined. */ 50 /* We assume non-Mule if emacs isn't defined. */
180 static char re_syntax_table[CHAR_SET_SIZE]; 184 static char re_syntax_table[CHAR_SET_SIZE];
181 185
182 static void 186 static void
183 init_syntax_once (void) 187 init_syntax_once (void)
184 { 188 {
185 register int c; 189 REGISTER int c;
186 static int done = 0; 190 static int done = 0;
187 191
188 if (done) 192 if (done)
189 return; 193 return;
190 194
741 break; 745 break;
742 746
743 case charset: 747 case charset:
744 case charset_not: 748 case charset_not:
745 { 749 {
746 register int c, last = -100; 750 REGISTER int c, last = -100;
747 register int in_range = 0; 751 REGISTER int in_range = 0;
748 752
749 printf ("/charset [%s", 753 printf ("/charset [%s",
750 (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); 754 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
751 755
752 assert (p + *p < pend); 756 assert (p + *p < pend);
1814 register_info_type *reg_info); 1818 register_info_type *reg_info);
1815 static boolean common_op_match_null_string_p (unsigned char **p, 1819 static boolean common_op_match_null_string_p (unsigned char **p,
1816 unsigned char *end, 1820 unsigned char *end,
1817 register_info_type *reg_info); 1821 register_info_type *reg_info);
1818 static int bcmp_translate (CONST unsigned char *s1, CONST unsigned char *s2, 1822 static int bcmp_translate (CONST unsigned char *s1, CONST unsigned char *s2,
1819 register int len, char *translate); 1823 REGISTER int len, char *translate);
1820 static int re_match_2_internal (struct re_pattern_buffer *bufp, 1824 static int re_match_2_internal (struct re_pattern_buffer *bufp,
1821 CONST char *string1, int size1, 1825 CONST char *string1, int size1,
1822 CONST char *string2, int size2, int pos, 1826 CONST char *string2, int size2, int pos,
1823 struct re_registers *regs, int stop); 1827 struct re_registers *regs, int stop);
1824 1828
1899 (or possibly long) so that chars above 127 can be used as 1903 (or possibly long) so that chars above 127 can be used as
1900 array indices. The macros that fetch a character from the pattern 1904 array indices. The macros that fetch a character from the pattern
1901 make sure to coerce to unsigned char before assigning, so we won't 1905 make sure to coerce to unsigned char before assigning, so we won't
1902 get bitten by negative numbers here. */ 1906 get bitten by negative numbers here. */
1903 /* XEmacs change: used to be unsigned char. */ 1907 /* XEmacs change: used to be unsigned char. */
1904 register EMACS_INT c, c1; 1908 REGISTER EMACS_INT c, c1;
1905 1909
1906 /* A random temporary spot in PATTERN. */ 1910 /* A random temporary spot in PATTERN. */
1907 CONST char *p1; 1911 CONST char *p1;
1908 1912
1909 /* Points to the end of the buffer, where we should append. */ 1913 /* Points to the end of the buffer, where we should append. */
1910 register unsigned char *b; 1914 REGISTER unsigned char *b;
1911 1915
1912 /* Keeps track of unclosed groups. */ 1916 /* Keeps track of unclosed groups. */
1913 compile_stack_type compile_stack; 1917 compile_stack_type compile_stack;
1914 1918
1915 /* Points to the current (ending) position in the pattern. */ 1919 /* Points to the current (ending) position in the pattern. */
3179 for OP followed by two-byte integer parameter ARG. */ 3183 for OP followed by two-byte integer parameter ARG. */
3180 3184
3181 static void 3185 static void
3182 insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) 3186 insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
3183 { 3187 {
3184 register unsigned char *pfrom = end; 3188 REGISTER unsigned char *pfrom = end;
3185 register unsigned char *pto = end + 3; 3189 REGISTER unsigned char *pto = end + 3;
3186 3190
3187 while (pfrom != loc) 3191 while (pfrom != loc)
3188 *--pto = *--pfrom; 3192 *--pto = *--pfrom;
3189 3193
3190 store_op1 (op, loc, arg); 3194 store_op1 (op, loc, arg);
3195 3199
3196 static void 3200 static void
3197 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, 3201 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
3198 unsigned char *end) 3202 unsigned char *end)
3199 { 3203 {
3200 register unsigned char *pfrom = end; 3204 REGISTER unsigned char *pfrom = end;
3201 register unsigned char *pto = end + 5; 3205 REGISTER unsigned char *pto = end + 5;
3202 3206
3203 while (pfrom != loc) 3207 while (pfrom != loc)
3204 *--pto = *--pfrom; 3208 *--pto = *--pfrom;
3205 3209
3206 store_op2 (op, loc, arg1, arg2); 3210 store_op2 (op, loc, arg1, arg2);
3400 fail_stack_type fail_stack; 3404 fail_stack_type fail_stack;
3401 #endif 3405 #endif
3402 DECLARE_DESTINATION 3406 DECLARE_DESTINATION
3403 /* We don't push any register information onto the failure stack. */ 3407 /* We don't push any register information onto the failure stack. */
3404 3408
3405 register char *fastmap = bufp->fastmap; 3409 REGISTER char *fastmap = bufp->fastmap;
3406 unsigned char *pattern = bufp->buffer; 3410 unsigned char *pattern = bufp->buffer;
3407 unsigned long size = bufp->used; 3411 unsigned long size = bufp->used;
3408 unsigned char *p = pattern; 3412 unsigned char *p = pattern;
3409 register unsigned char *pend = pattern + size; 3413 REGISTER unsigned char *pend = pattern + size;
3410 3414
3411 #ifdef REL_ALLOC 3415 #ifdef REL_ALLOC
3412 /* This holds the pointer to the failure stack, when 3416 /* This holds the pointer to the failure stack, when
3413 it is allocated relocatably. */ 3417 it is allocated relocatably. */
3414 fail_stack_elt_t *failure_stack_ptr; 3418 fail_stack_elt_t *failure_stack_ptr;
3932 re_search_2 (struct re_pattern_buffer *bufp, CONST char *string1, 3936 re_search_2 (struct re_pattern_buffer *bufp, CONST char *string1,
3933 int size1, CONST char *string2, int size2, int startpos, 3937 int size1, CONST char *string2, int size2, int startpos,
3934 int range, struct re_registers *regs, int stop) 3938 int range, struct re_registers *regs, int stop)
3935 { 3939 {
3936 int val; 3940 int val;
3937 register char *fastmap = bufp->fastmap; 3941 REGISTER char *fastmap = bufp->fastmap;
3938 register char *translate = bufp->translate; 3942 REGISTER char *translate = bufp->translate;
3939 int total_size = size1 + size2; 3943 int total_size = size1 + size2;
3940 int endpos = startpos + range; 3944 int endpos = startpos + range;
3941 #ifdef REGEX_BEGLINE_CHECK 3945 #ifdef REGEX_BEGLINE_CHECK
3942 int anchored_at_begline = 0; 3946 int anchored_at_begline = 0;
3943 #endif 3947 #endif
4276 /* Where we are in the data, and the end of the current string. */ 4280 /* Where we are in the data, and the end of the current string. */
4277 CONST char *d, *dend; 4281 CONST char *d, *dend;
4278 4282
4279 /* Where we are in the pattern, and the end of the pattern. */ 4283 /* Where we are in the pattern, and the end of the pattern. */
4280 unsigned char *p = bufp->buffer; 4284 unsigned char *p = bufp->buffer;
4281 register unsigned char *pend = p + bufp->used; 4285 REGISTER unsigned char *pend = p + bufp->used;
4282 4286
4283 /* Mark the opcode just after a start_memory, so we can test for an 4287 /* Mark the opcode just after a start_memory, so we can test for an
4284 empty subpattern when we get to the stop_memory. */ 4288 empty subpattern when we get to the stop_memory. */
4285 unsigned char *just_past_start_mem = 0; 4289 unsigned char *just_past_start_mem = 0;
4286 4290
4724 4728
4725 4729
4726 case charset: 4730 case charset:
4727 case charset_not: 4731 case charset_not:
4728 { 4732 {
4729 register unsigned char c; 4733 REGISTER unsigned char c;
4730 boolean not = (re_opcode_t) *(p - 1) == charset_not; 4734 boolean not = (re_opcode_t) *(p - 1) == charset_not;
4731 4735
4732 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); 4736 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4733 4737
4734 PREFETCH (); 4738 PREFETCH ();
4751 4755
4752 #ifdef MULE 4756 #ifdef MULE
4753 case charset_mule: 4757 case charset_mule:
4754 case charset_mule_not: 4758 case charset_mule_not:
4755 { 4759 {
4756 register Emchar c; 4760 REGISTER Emchar c;
4757 boolean not = (re_opcode_t) *(p - 1) == charset_mule_not; 4761 boolean not = (re_opcode_t) *(p - 1) == charset_mule_not;
4758 4762
4759 DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not ? "_not" : ""); 4763 DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not ? "_not" : "");
4760 4764
4761 PREFETCH (); 4765 PREFETCH ();
4964 4968
4965 /* \<digit> has been turned into a `duplicate' command which is 4969 /* \<digit> has been turned into a `duplicate' command which is
4966 followed by the numeric value of <digit> as the register number. */ 4970 followed by the numeric value of <digit> as the register number. */
4967 case duplicate: 4971 case duplicate:
4968 { 4972 {
4969 register CONST char *d2, *dend2; 4973 REGISTER CONST char *d2, *dend2;
4970 int regno = *p++; /* Get which register to match against. */ 4974 int regno = *p++; /* Get which register to match against. */
4971 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 4975 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4972 4976
4973 /* Can't back reference a group which we've never matched. */ 4977 /* Can't back reference a group which we've never matched. */
4974 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 4978 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
5163 We change it to either `pop_failure_jump' or `jump'. */ 5167 We change it to either `pop_failure_jump' or `jump'. */
5164 case maybe_pop_jump: 5168 case maybe_pop_jump:
5165 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5169 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5166 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); 5170 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
5167 { 5171 {
5168 register unsigned char *p2 = p; 5172 REGISTER unsigned char *p2 = p;
5169 5173
5170 /* Compare the beginning of the repeat with what in the 5174 /* Compare the beginning of the repeat with what in the
5171 pattern follows its end. If we can establish that there 5175 pattern follows its end. If we can establish that there
5172 is nothing that they would both match, i.e., that we 5176 is nothing that they would both match, i.e., that we
5173 would have to backtrack because of (as in, e.g., `a*a') 5177 would have to backtrack because of (as in, e.g., `a*a')
5214 } 5218 }
5215 5219
5216 else if ((re_opcode_t) *p2 == exactn 5220 else if ((re_opcode_t) *p2 == exactn
5217 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) 5221 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
5218 { 5222 {
5219 register unsigned char c 5223 REGISTER unsigned char c
5220 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 5224 = *p2 == (unsigned char) endline ? '\n' : p2[2];
5221 5225
5222 if ((re_opcode_t) p1[3] == exactn && p1[5] != c) 5226 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
5223 { 5227 {
5224 p[-3] = (unsigned char) pop_failure_jump; 5228 p[-3] = (unsigned char) pop_failure_jump;
5245 } 5249 }
5246 } 5250 }
5247 else if ((re_opcode_t) *p2 == charset) 5251 else if ((re_opcode_t) *p2 == charset)
5248 { 5252 {
5249 #ifdef DEBUG 5253 #ifdef DEBUG
5250 register unsigned char c 5254 REGISTER unsigned char c
5251 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 5255 = *p2 == (unsigned char) endline ? '\n' : p2[2];
5252 #endif 5256 #endif
5253 5257
5254 if ((re_opcode_t) p1[3] == exactn 5258 if ((re_opcode_t) p1[3] == exactn
5255 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] 5259 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
5932 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN 5936 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5933 bytes; nonzero otherwise. */ 5937 bytes; nonzero otherwise. */
5934 5938
5935 static int 5939 static int
5936 bcmp_translate (CONST unsigned char *s1, CONST unsigned char *s2, 5940 bcmp_translate (CONST unsigned char *s1, CONST unsigned char *s2,
5937 register int len, char *translate) 5941 REGISTER int len, char *translate)
5938 { 5942 {
5939 register CONST unsigned char *p1 = s1, *p2 = s2; 5943 REGISTER CONST unsigned char *p1 = s1, *p2 = s2;
5940 while (len) 5944 while (len)
5941 { 5945 {
5942 if (translate[*p1++] != translate[*p2++]) return 1; 5946 if (translate[*p1++] != translate[*p2++]) return 1;
5943 len--; 5947 len--;
5944 } 5948 }