Mercurial > hg > xemacs-beta
comparison src/regex.c @ 203:850242ba4a81 r20-3b28
Import from CVS: tag r20-3b28
author | cvs |
---|---|
date | Mon, 13 Aug 2007 10:02:21 +0200 |
parents | 3d6bfa290dbd |
children | 41ff10fd062f |
comparison
equal
deleted
inserted
replaced
202:61eefc8fc970 | 203:850242ba4a81 |
---|---|
37 | 37 |
38 #ifdef HAVE_CONFIG_H | 38 #ifdef HAVE_CONFIG_H |
39 #include <config.h> | 39 #include <config.h> |
40 #endif | 40 #endif |
41 | 41 |
42 #ifndef REGISTER /* Rigidly enforced as of 20.3 */ | |
43 #define REGISTER | |
44 #endif | |
45 | |
42 #ifndef _GNU_SOURCE | 46 #ifndef _GNU_SOURCE |
43 #define _GNU_SOURCE 1 | 47 #define _GNU_SOURCE 1 |
44 #endif | 48 #endif |
45 | 49 |
46 /* We assume non-Mule if emacs isn't defined. */ | 50 /* We assume non-Mule if emacs isn't defined. */ |
180 static char re_syntax_table[CHAR_SET_SIZE]; | 184 static char re_syntax_table[CHAR_SET_SIZE]; |
181 | 185 |
182 static void | 186 static void |
183 init_syntax_once (void) | 187 init_syntax_once (void) |
184 { | 188 { |
185 register int c; | 189 REGISTER int c; |
186 static int done = 0; | 190 static int done = 0; |
187 | 191 |
188 if (done) | 192 if (done) |
189 return; | 193 return; |
190 | 194 |
741 break; | 745 break; |
742 | 746 |
743 case charset: | 747 case charset: |
744 case charset_not: | 748 case charset_not: |
745 { | 749 { |
746 register int c, last = -100; | 750 REGISTER int c, last = -100; |
747 register int in_range = 0; | 751 REGISTER int in_range = 0; |
748 | 752 |
749 printf ("/charset [%s", | 753 printf ("/charset [%s", |
750 (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); | 754 (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); |
751 | 755 |
752 assert (p + *p < pend); | 756 assert (p + *p < pend); |
1814 register_info_type *reg_info); | 1818 register_info_type *reg_info); |
1815 static boolean common_op_match_null_string_p (unsigned char **p, | 1819 static boolean common_op_match_null_string_p (unsigned char **p, |
1816 unsigned char *end, | 1820 unsigned char *end, |
1817 register_info_type *reg_info); | 1821 register_info_type *reg_info); |
1818 static int bcmp_translate (CONST unsigned char *s1, CONST unsigned char *s2, | 1822 static int bcmp_translate (CONST unsigned char *s1, CONST unsigned char *s2, |
1819 register int len, char *translate); | 1823 REGISTER int len, char *translate); |
1820 static int re_match_2_internal (struct re_pattern_buffer *bufp, | 1824 static int re_match_2_internal (struct re_pattern_buffer *bufp, |
1821 CONST char *string1, int size1, | 1825 CONST char *string1, int size1, |
1822 CONST char *string2, int size2, int pos, | 1826 CONST char *string2, int size2, int pos, |
1823 struct re_registers *regs, int stop); | 1827 struct re_registers *regs, int stop); |
1824 | 1828 |
1899 (or possibly long) so that chars above 127 can be used as | 1903 (or possibly long) so that chars above 127 can be used as |
1900 array indices. The macros that fetch a character from the pattern | 1904 array indices. The macros that fetch a character from the pattern |
1901 make sure to coerce to unsigned char before assigning, so we won't | 1905 make sure to coerce to unsigned char before assigning, so we won't |
1902 get bitten by negative numbers here. */ | 1906 get bitten by negative numbers here. */ |
1903 /* XEmacs change: used to be unsigned char. */ | 1907 /* XEmacs change: used to be unsigned char. */ |
1904 register EMACS_INT c, c1; | 1908 REGISTER EMACS_INT c, c1; |
1905 | 1909 |
1906 /* A random temporary spot in PATTERN. */ | 1910 /* A random temporary spot in PATTERN. */ |
1907 CONST char *p1; | 1911 CONST char *p1; |
1908 | 1912 |
1909 /* Points to the end of the buffer, where we should append. */ | 1913 /* Points to the end of the buffer, where we should append. */ |
1910 register unsigned char *b; | 1914 REGISTER unsigned char *b; |
1911 | 1915 |
1912 /* Keeps track of unclosed groups. */ | 1916 /* Keeps track of unclosed groups. */ |
1913 compile_stack_type compile_stack; | 1917 compile_stack_type compile_stack; |
1914 | 1918 |
1915 /* Points to the current (ending) position in the pattern. */ | 1919 /* Points to the current (ending) position in the pattern. */ |
3179 for OP followed by two-byte integer parameter ARG. */ | 3183 for OP followed by two-byte integer parameter ARG. */ |
3180 | 3184 |
3181 static void | 3185 static void |
3182 insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) | 3186 insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end) |
3183 { | 3187 { |
3184 register unsigned char *pfrom = end; | 3188 REGISTER unsigned char *pfrom = end; |
3185 register unsigned char *pto = end + 3; | 3189 REGISTER unsigned char *pto = end + 3; |
3186 | 3190 |
3187 while (pfrom != loc) | 3191 while (pfrom != loc) |
3188 *--pto = *--pfrom; | 3192 *--pto = *--pfrom; |
3189 | 3193 |
3190 store_op1 (op, loc, arg); | 3194 store_op1 (op, loc, arg); |
3195 | 3199 |
3196 static void | 3200 static void |
3197 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, | 3201 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, |
3198 unsigned char *end) | 3202 unsigned char *end) |
3199 { | 3203 { |
3200 register unsigned char *pfrom = end; | 3204 REGISTER unsigned char *pfrom = end; |
3201 register unsigned char *pto = end + 5; | 3205 REGISTER unsigned char *pto = end + 5; |
3202 | 3206 |
3203 while (pfrom != loc) | 3207 while (pfrom != loc) |
3204 *--pto = *--pfrom; | 3208 *--pto = *--pfrom; |
3205 | 3209 |
3206 store_op2 (op, loc, arg1, arg2); | 3210 store_op2 (op, loc, arg1, arg2); |
3400 fail_stack_type fail_stack; | 3404 fail_stack_type fail_stack; |
3401 #endif | 3405 #endif |
3402 DECLARE_DESTINATION | 3406 DECLARE_DESTINATION |
3403 /* We don't push any register information onto the failure stack. */ | 3407 /* We don't push any register information onto the failure stack. */ |
3404 | 3408 |
3405 register char *fastmap = bufp->fastmap; | 3409 REGISTER char *fastmap = bufp->fastmap; |
3406 unsigned char *pattern = bufp->buffer; | 3410 unsigned char *pattern = bufp->buffer; |
3407 unsigned long size = bufp->used; | 3411 unsigned long size = bufp->used; |
3408 unsigned char *p = pattern; | 3412 unsigned char *p = pattern; |
3409 register unsigned char *pend = pattern + size; | 3413 REGISTER unsigned char *pend = pattern + size; |
3410 | 3414 |
3411 #ifdef REL_ALLOC | 3415 #ifdef REL_ALLOC |
3412 /* This holds the pointer to the failure stack, when | 3416 /* This holds the pointer to the failure stack, when |
3413 it is allocated relocatably. */ | 3417 it is allocated relocatably. */ |
3414 fail_stack_elt_t *failure_stack_ptr; | 3418 fail_stack_elt_t *failure_stack_ptr; |
3932 re_search_2 (struct re_pattern_buffer *bufp, CONST char *string1, | 3936 re_search_2 (struct re_pattern_buffer *bufp, CONST char *string1, |
3933 int size1, CONST char *string2, int size2, int startpos, | 3937 int size1, CONST char *string2, int size2, int startpos, |
3934 int range, struct re_registers *regs, int stop) | 3938 int range, struct re_registers *regs, int stop) |
3935 { | 3939 { |
3936 int val; | 3940 int val; |
3937 register char *fastmap = bufp->fastmap; | 3941 REGISTER char *fastmap = bufp->fastmap; |
3938 register char *translate = bufp->translate; | 3942 REGISTER char *translate = bufp->translate; |
3939 int total_size = size1 + size2; | 3943 int total_size = size1 + size2; |
3940 int endpos = startpos + range; | 3944 int endpos = startpos + range; |
3941 #ifdef REGEX_BEGLINE_CHECK | 3945 #ifdef REGEX_BEGLINE_CHECK |
3942 int anchored_at_begline = 0; | 3946 int anchored_at_begline = 0; |
3943 #endif | 3947 #endif |
4276 /* Where we are in the data, and the end of the current string. */ | 4280 /* Where we are in the data, and the end of the current string. */ |
4277 CONST char *d, *dend; | 4281 CONST char *d, *dend; |
4278 | 4282 |
4279 /* Where we are in the pattern, and the end of the pattern. */ | 4283 /* Where we are in the pattern, and the end of the pattern. */ |
4280 unsigned char *p = bufp->buffer; | 4284 unsigned char *p = bufp->buffer; |
4281 register unsigned char *pend = p + bufp->used; | 4285 REGISTER unsigned char *pend = p + bufp->used; |
4282 | 4286 |
4283 /* Mark the opcode just after a start_memory, so we can test for an | 4287 /* Mark the opcode just after a start_memory, so we can test for an |
4284 empty subpattern when we get to the stop_memory. */ | 4288 empty subpattern when we get to the stop_memory. */ |
4285 unsigned char *just_past_start_mem = 0; | 4289 unsigned char *just_past_start_mem = 0; |
4286 | 4290 |
4724 | 4728 |
4725 | 4729 |
4726 case charset: | 4730 case charset: |
4727 case charset_not: | 4731 case charset_not: |
4728 { | 4732 { |
4729 register unsigned char c; | 4733 REGISTER unsigned char c; |
4730 boolean not = (re_opcode_t) *(p - 1) == charset_not; | 4734 boolean not = (re_opcode_t) *(p - 1) == charset_not; |
4731 | 4735 |
4732 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); | 4736 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); |
4733 | 4737 |
4734 PREFETCH (); | 4738 PREFETCH (); |
4751 | 4755 |
4752 #ifdef MULE | 4756 #ifdef MULE |
4753 case charset_mule: | 4757 case charset_mule: |
4754 case charset_mule_not: | 4758 case charset_mule_not: |
4755 { | 4759 { |
4756 register Emchar c; | 4760 REGISTER Emchar c; |
4757 boolean not = (re_opcode_t) *(p - 1) == charset_mule_not; | 4761 boolean not = (re_opcode_t) *(p - 1) == charset_mule_not; |
4758 | 4762 |
4759 DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not ? "_not" : ""); | 4763 DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not ? "_not" : ""); |
4760 | 4764 |
4761 PREFETCH (); | 4765 PREFETCH (); |
4964 | 4968 |
4965 /* \<digit> has been turned into a `duplicate' command which is | 4969 /* \<digit> has been turned into a `duplicate' command which is |
4966 followed by the numeric value of <digit> as the register number. */ | 4970 followed by the numeric value of <digit> as the register number. */ |
4967 case duplicate: | 4971 case duplicate: |
4968 { | 4972 { |
4969 register CONST char *d2, *dend2; | 4973 REGISTER CONST char *d2, *dend2; |
4970 int regno = *p++; /* Get which register to match against. */ | 4974 int regno = *p++; /* Get which register to match against. */ |
4971 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); | 4975 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); |
4972 | 4976 |
4973 /* Can't back reference a group which we've never matched. */ | 4977 /* Can't back reference a group which we've never matched. */ |
4974 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) | 4978 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) |
5163 We change it to either `pop_failure_jump' or `jump'. */ | 5167 We change it to either `pop_failure_jump' or `jump'. */ |
5164 case maybe_pop_jump: | 5168 case maybe_pop_jump: |
5165 EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5169 EXTRACT_NUMBER_AND_INCR (mcnt, p); |
5166 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); | 5170 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); |
5167 { | 5171 { |
5168 register unsigned char *p2 = p; | 5172 REGISTER unsigned char *p2 = p; |
5169 | 5173 |
5170 /* Compare the beginning of the repeat with what in the | 5174 /* Compare the beginning of the repeat with what in the |
5171 pattern follows its end. If we can establish that there | 5175 pattern follows its end. If we can establish that there |
5172 is nothing that they would both match, i.e., that we | 5176 is nothing that they would both match, i.e., that we |
5173 would have to backtrack because of (as in, e.g., `a*a') | 5177 would have to backtrack because of (as in, e.g., `a*a') |
5214 } | 5218 } |
5215 | 5219 |
5216 else if ((re_opcode_t) *p2 == exactn | 5220 else if ((re_opcode_t) *p2 == exactn |
5217 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) | 5221 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) |
5218 { | 5222 { |
5219 register unsigned char c | 5223 REGISTER unsigned char c |
5220 = *p2 == (unsigned char) endline ? '\n' : p2[2]; | 5224 = *p2 == (unsigned char) endline ? '\n' : p2[2]; |
5221 | 5225 |
5222 if ((re_opcode_t) p1[3] == exactn && p1[5] != c) | 5226 if ((re_opcode_t) p1[3] == exactn && p1[5] != c) |
5223 { | 5227 { |
5224 p[-3] = (unsigned char) pop_failure_jump; | 5228 p[-3] = (unsigned char) pop_failure_jump; |
5245 } | 5249 } |
5246 } | 5250 } |
5247 else if ((re_opcode_t) *p2 == charset) | 5251 else if ((re_opcode_t) *p2 == charset) |
5248 { | 5252 { |
5249 #ifdef DEBUG | 5253 #ifdef DEBUG |
5250 register unsigned char c | 5254 REGISTER unsigned char c |
5251 = *p2 == (unsigned char) endline ? '\n' : p2[2]; | 5255 = *p2 == (unsigned char) endline ? '\n' : p2[2]; |
5252 #endif | 5256 #endif |
5253 | 5257 |
5254 if ((re_opcode_t) p1[3] == exactn | 5258 if ((re_opcode_t) p1[3] == exactn |
5255 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] | 5259 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] |
5932 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN | 5936 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN |
5933 bytes; nonzero otherwise. */ | 5937 bytes; nonzero otherwise. */ |
5934 | 5938 |
5935 static int | 5939 static int |
5936 bcmp_translate (CONST unsigned char *s1, CONST unsigned char *s2, | 5940 bcmp_translate (CONST unsigned char *s1, CONST unsigned char *s2, |
5937 register int len, char *translate) | 5941 REGISTER int len, char *translate) |
5938 { | 5942 { |
5939 register CONST unsigned char *p1 = s1, *p2 = s2; | 5943 REGISTER CONST unsigned char *p1 = s1, *p2 = s2; |
5940 while (len) | 5944 while (len) |
5941 { | 5945 { |
5942 if (translate[*p1++] != translate[*p2++]) return 1; | 5946 if (translate[*p1++] != translate[*p2++]) return 1; |
5943 len--; | 5947 len--; |
5944 } | 5948 } |