Mercurial > hg > xemacs-beta
comparison src/regex.c @ 183:e121b013d1f0 r20-3b18
Import from CVS: tag r20-3b18
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:54:23 +0200 |
parents | 9ad43877534d |
children | 3d6bfa290dbd |
comparison
equal
deleted
inserted
replaced
182:f07455f06202 | 183:e121b013d1f0 |
---|---|
162 | 162 |
163 /* Define the syntax stuff for \<, \>, etc. */ | 163 /* Define the syntax stuff for \<, \>, etc. */ |
164 | 164 |
165 /* This must be nonzero for the wordchar and notwordchar pattern | 165 /* This must be nonzero for the wordchar and notwordchar pattern |
166 commands in re_match_2. */ | 166 commands in re_match_2. */ |
167 #ifndef Sword | 167 #ifndef Sword |
168 #define Sword 1 | 168 #define Sword 1 |
169 #endif | 169 #endif |
170 | 170 |
171 #ifdef SYNTAX_TABLE | 171 #ifdef SYNTAX_TABLE |
172 | 172 |
290 | 290 |
291 /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we | 291 /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we |
292 use `alloca' instead of `malloc'. This is because using malloc in | 292 use `alloca' instead of `malloc'. This is because using malloc in |
293 re_search* or re_match* could cause memory leaks when C-g is used in | 293 re_search* or re_match* could cause memory leaks when C-g is used in |
294 Emacs; also, malloc is slower and causes storage fragmentation. On | 294 Emacs; also, malloc is slower and causes storage fragmentation. On |
295 the other hand, malloc is more portable, and easier to debug. | 295 the other hand, malloc is more portable, and easier to debug. |
296 | 296 |
297 Because we sometimes use alloca, some routines have to be macros, | 297 Because we sometimes use alloca, some routines have to be macros, |
298 not functions -- `alloca'-allocated space disappears at the end of the | 298 not functions -- `alloca'-allocated space disappears at the end of the |
299 function it is called in. */ | 299 function it is called in. */ |
300 | 300 |
301 #ifdef REGEX_MALLOC | 301 #ifdef REGEX_MALLOC |
317 #include <alloca.h> | 317 #include <alloca.h> |
318 #else /* not __GNUC__ or HAVE_ALLOCA_H */ | 318 #else /* not __GNUC__ or HAVE_ALLOCA_H */ |
319 #ifndef _AIX /* Already did AIX, up at the top. */ | 319 #ifndef _AIX /* Already did AIX, up at the top. */ |
320 char *alloca (); | 320 char *alloca (); |
321 #endif /* not _AIX */ | 321 #endif /* not _AIX */ |
322 #endif /* not HAVE_ALLOCA_H */ | 322 #endif /* not HAVE_ALLOCA_H */ |
323 #endif /* not __GNUC__ */ | 323 #endif /* not __GNUC__ */ |
324 | 324 |
325 #endif /* not alloca */ | 325 #endif /* not alloca */ |
326 | 326 |
327 #define REGEX_ALLOCATE alloca | 327 #define REGEX_ALLOCATE alloca |
457 of string to be matched (if not). */ | 457 of string to be matched (if not). */ |
458 begbuf, | 458 begbuf, |
459 | 459 |
460 /* Analogously, for end of buffer/string. */ | 460 /* Analogously, for end of buffer/string. */ |
461 endbuf, | 461 endbuf, |
462 | 462 |
463 /* Followed by two byte relative address to which to jump. */ | 463 /* Followed by two byte relative address to which to jump. */ |
464 jump, | 464 jump, |
465 | 465 |
466 /* Same as jump, but marks the end of an alternative. */ | 466 /* Same as jump, but marks the end of an alternative. */ |
467 jump_past_alt, | 467 jump_past_alt, |
468 | 468 |
469 /* Followed by two-byte relative address of place to resume at | 469 /* Followed by two-byte relative address of place to resume at |
470 in case of failure. */ | 470 in case of failure. */ |
471 on_failure_jump, | 471 on_failure_jump, |
472 | 472 |
473 /* Like on_failure_jump, but pushes a placeholder instead of the | 473 /* Like on_failure_jump, but pushes a placeholder instead of the |
474 current string position when executed. */ | 474 current string position when executed. */ |
475 on_failure_keep_string_jump, | 475 on_failure_keep_string_jump, |
476 | 476 |
477 /* Throw away latest failure point and then jump to following | 477 /* Throw away latest failure point and then jump to following |
478 two-byte relative address. */ | 478 two-byte relative address. */ |
479 pop_failure_jump, | 479 pop_failure_jump, |
480 | 480 |
481 /* Change to pop_failure_jump if know won't have to backtrack to | 481 /* Change to pop_failure_jump if know won't have to backtrack to |
549 /* 97/2/17 jhod: The following two were merged back in from the Mule | 549 /* 97/2/17 jhod: The following two were merged back in from the Mule |
550 2.3 code to enable some language specific processing */ | 550 2.3 code to enable some language specific processing */ |
551 ,categoryspec, /* Matches entries in the character category tables */ | 551 ,categoryspec, /* Matches entries in the character category tables */ |
552 notcategoryspec /* The opposite of the above */ | 552 notcategoryspec /* The opposite of the above */ |
553 #endif | 553 #endif |
554 | 554 |
555 } re_opcode_t; | 555 } re_opcode_t; |
556 | 556 |
557 /* Common operations on the compiled pattern. */ | 557 /* Common operations on the compiled pattern. */ |
558 | 558 |
559 /* Store NUMBER in two contiguous bytes starting at DESTINATION. */ | 559 /* Store NUMBER in two contiguous bytes starting at DESTINATION. */ |
585 | 585 |
586 #ifdef DEBUG | 586 #ifdef DEBUG |
587 static void | 587 static void |
588 extract_number (int *dest, unsigned char *source) | 588 extract_number (int *dest, unsigned char *source) |
589 { | 589 { |
590 int temp = SIGN_EXTEND_CHAR (*(source + 1)); | 590 int temp = SIGN_EXTEND_CHAR (*(source + 1)); |
591 *dest = *source & 0377; | 591 *dest = *source & 0377; |
592 *dest += temp << 8; | 592 *dest += temp << 8; |
593 } | 593 } |
594 | 594 |
595 #ifndef EXTRACT_MACROS /* To debug the macros. */ | 595 #ifndef EXTRACT_MACROS /* To debug the macros. */ |
609 } while (0) | 609 } while (0) |
610 | 610 |
611 #ifdef DEBUG | 611 #ifdef DEBUG |
612 static void | 612 static void |
613 extract_number_and_incr (int *destination, unsigned char **source) | 613 extract_number_and_incr (int *destination, unsigned char **source) |
614 { | 614 { |
615 extract_number (destination, *source); | 615 extract_number (destination, *source); |
616 *source += 2; | 616 *source += 2; |
617 } | 617 } |
618 | 618 |
619 #ifndef EXTRACT_MACROS | 619 #ifndef EXTRACT_MACROS |
658 | 658 |
659 static void | 659 static void |
660 print_fastmap (char *fastmap) | 660 print_fastmap (char *fastmap) |
661 { | 661 { |
662 unsigned was_a_range = 0; | 662 unsigned was_a_range = 0; |
663 unsigned i = 0; | 663 unsigned i = 0; |
664 | 664 |
665 while (i < (1 << BYTEWIDTH)) | 665 while (i < (1 << BYTEWIDTH)) |
666 { | 666 { |
667 if (fastmap[i++]) | 667 if (fastmap[i++]) |
668 { | 668 { |
669 was_a_range = 0; | 669 was_a_range = 0; |
678 printf ("-"); | 678 printf ("-"); |
679 putchar (i - 1); | 679 putchar (i - 1); |
680 } | 680 } |
681 } | 681 } |
682 } | 682 } |
683 putchar ('\n'); | 683 putchar ('\n'); |
684 } | 684 } |
685 | 685 |
686 | 686 |
687 /* Print a compiled pattern string in human-readable form, starting at | 687 /* Print a compiled pattern string in human-readable form, starting at |
688 the START pointer into it and ending just before the pointer END. */ | 688 the START pointer into it and ending just before the pointer END. */ |
697 if (start == NULL) | 697 if (start == NULL) |
698 { | 698 { |
699 printf ("(null)\n"); | 699 printf ("(null)\n"); |
700 return; | 700 return; |
701 } | 701 } |
702 | 702 |
703 /* Loop over pattern commands. */ | 703 /* Loop over pattern commands. */ |
704 while (p < pend) | 704 while (p < pend) |
705 { | 705 { |
706 printf ("%d:\t", p - start); | 706 printf ("%d:\t", p - start); |
707 | 707 |
746 register int c, last = -100; | 746 register int c, last = -100; |
747 register int in_range = 0; | 747 register int in_range = 0; |
748 | 748 |
749 printf ("/charset [%s", | 749 printf ("/charset [%s", |
750 (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); | 750 (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); |
751 | 751 |
752 assert (p + *p < pend); | 752 assert (p + *p < pend); |
753 | 753 |
754 for (c = 0; c < 256; c++) | 754 for (c = 0; c < 256; c++) |
755 if (((unsigned char) (c / 8) < *p) | 755 if (((unsigned char) (c / 8) < *p) |
756 && (p[1 + (c/8)] & (1 << (c % 8)))) | 756 && (p[1 + (c/8)] & (1 << (c % 8)))) |
765 else if (last + 1 != c && in_range) | 765 else if (last + 1 != c && in_range) |
766 { | 766 { |
767 putchar (last); | 767 putchar (last); |
768 in_range = 0; | 768 in_range = 0; |
769 } | 769 } |
770 | 770 |
771 if (! in_range) | 771 if (! in_range) |
772 putchar (c); | 772 putchar (c); |
773 | 773 |
774 last = c; | 774 last = c; |
775 } | 775 } |
842 break; | 842 break; |
843 | 843 |
844 case push_dummy_failure: | 844 case push_dummy_failure: |
845 printf ("/push_dummy_failure"); | 845 printf ("/push_dummy_failure"); |
846 break; | 846 break; |
847 | 847 |
848 case maybe_pop_jump: | 848 case maybe_pop_jump: |
849 extract_number_and_incr (&mcnt, &p); | 849 extract_number_and_incr (&mcnt, &p); |
850 printf ("/maybe_pop_jump to %d", p + mcnt - start); | 850 printf ("/maybe_pop_jump to %d", p + mcnt - start); |
851 break; | 851 break; |
852 | 852 |
853 case pop_failure_jump: | 853 case pop_failure_jump: |
854 extract_number_and_incr (&mcnt, &p); | 854 extract_number_and_incr (&mcnt, &p); |
855 printf ("/pop_failure_jump to %d", p + mcnt - start); | 855 printf ("/pop_failure_jump to %d", p + mcnt - start); |
856 break; | 856 break; |
857 | 857 |
858 case jump_past_alt: | 858 case jump_past_alt: |
859 extract_number_and_incr (&mcnt, &p); | 859 extract_number_and_incr (&mcnt, &p); |
860 printf ("/jump_past_alt to %d", p + mcnt - start); | 860 printf ("/jump_past_alt to %d", p + mcnt - start); |
861 break; | 861 break; |
862 | 862 |
863 case jump: | 863 case jump: |
864 extract_number_and_incr (&mcnt, &p); | 864 extract_number_and_incr (&mcnt, &p); |
865 printf ("/jump to %d", p + mcnt - start); | 865 printf ("/jump to %d", p + mcnt - start); |
866 break; | 866 break; |
867 | 867 |
868 case succeed_n: | 868 case succeed_n: |
869 extract_number_and_incr (&mcnt, &p); | 869 extract_number_and_incr (&mcnt, &p); |
870 extract_number_and_incr (&mcnt2, &p); | 870 extract_number_and_incr (&mcnt2, &p); |
871 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); | 871 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); |
872 break; | 872 break; |
873 | 873 |
874 case jump_n: | 874 case jump_n: |
875 extract_number_and_incr (&mcnt, &p); | 875 extract_number_and_incr (&mcnt, &p); |
876 extract_number_and_incr (&mcnt2, &p); | 876 extract_number_and_incr (&mcnt2, &p); |
877 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); | 877 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); |
878 break; | 878 break; |
879 | 879 |
880 case set_number_at: | 880 case set_number_at: |
881 extract_number_and_incr (&mcnt, &p); | 881 extract_number_and_incr (&mcnt, &p); |
882 extract_number_and_incr (&mcnt2, &p); | 882 extract_number_and_incr (&mcnt2, &p); |
883 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); | 883 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); |
884 break; | 884 break; |
885 | 885 |
886 case wordbound: | 886 case wordbound: |
887 printf ("/wordbound"); | 887 printf ("/wordbound"); |
888 break; | 888 break; |
889 | 889 |
890 case notwordbound: | 890 case notwordbound: |
892 break; | 892 break; |
893 | 893 |
894 case wordbeg: | 894 case wordbeg: |
895 printf ("/wordbeg"); | 895 printf ("/wordbeg"); |
896 break; | 896 break; |
897 | 897 |
898 case wordend: | 898 case wordend: |
899 printf ("/wordend"); | 899 printf ("/wordend"); |
900 | 900 |
901 #ifdef emacs | 901 #ifdef emacs |
902 case before_dot: | 902 case before_dot: |
903 printf ("/before_dot"); | 903 printf ("/before_dot"); |
904 break; | 904 break; |
905 | 905 |
914 case syntaxspec: | 914 case syntaxspec: |
915 printf ("/syntaxspec"); | 915 printf ("/syntaxspec"); |
916 mcnt = *p++; | 916 mcnt = *p++; |
917 printf ("/%d", mcnt); | 917 printf ("/%d", mcnt); |
918 break; | 918 break; |
919 | 919 |
920 case notsyntaxspec: | 920 case notsyntaxspec: |
921 printf ("/notsyntaxspec"); | 921 printf ("/notsyntaxspec"); |
922 mcnt = *p++; | 922 mcnt = *p++; |
923 printf ("/%d", mcnt); | 923 printf ("/%d", mcnt); |
924 break; | 924 break; |
925 | 925 |
926 #ifdef MULE | 926 #ifdef MULE |
927 /* 97/2/17 jhod Mule category patch */ | 927 /* 97/2/17 jhod Mule category patch */ |
928 case categoryspec: | 928 case categoryspec: |
929 printf ("/categoryspec"); | 929 printf ("/categoryspec"); |
930 mcnt = *p++; | 930 mcnt = *p++; |
931 printf ("/%d", mcnt); | 931 printf ("/%d", mcnt); |
932 break; | 932 break; |
934 case notcategoryspec: | 934 case notcategoryspec: |
935 printf ("/notcategoryspec"); | 935 printf ("/notcategoryspec"); |
936 mcnt = *p++; | 936 mcnt = *p++; |
937 printf ("/%d", mcnt); | 937 printf ("/%d", mcnt); |
938 break; | 938 break; |
939 /* end of category patch */ | 939 /* end of category patch */ |
940 #endif /* MULE */ | 940 #endif /* MULE */ |
941 #endif /* emacs */ | 941 #endif /* emacs */ |
942 | 942 |
943 case wordchar: | 943 case wordchar: |
944 printf ("/wordchar"); | 944 printf ("/wordchar"); |
945 break; | 945 break; |
946 | 946 |
947 case notwordchar: | 947 case notwordchar: |
948 printf ("/notwordchar"); | 948 printf ("/notwordchar"); |
949 break; | 949 break; |
950 | 950 |
951 case begbuf: | 951 case begbuf: |
998 static void | 998 static void |
999 print_double_string (CONST char *where, CONST char *string1, int size1, | 999 print_double_string (CONST char *where, CONST char *string1, int size1, |
1000 CONST char *string2, int size2) | 1000 CONST char *string2, int size2) |
1001 { | 1001 { |
1002 unsigned this_char; | 1002 unsigned this_char; |
1003 | 1003 |
1004 if (where == NULL) | 1004 if (where == NULL) |
1005 printf ("(null)"); | 1005 printf ("(null)"); |
1006 else | 1006 else |
1007 { | 1007 { |
1008 if (FIRST_STRING_P (where)) | 1008 if (FIRST_STRING_P (where)) |
1009 { | 1009 { |
1010 for (this_char = where - string1; this_char < size1; this_char++) | 1010 for (this_char = where - string1; this_char < size1; this_char++) |
1011 putchar (string1[this_char]); | 1011 putchar (string1[this_char]); |
1012 | 1012 |
1013 where = string2; | 1013 where = string2; |
1014 } | 1014 } |
1015 | 1015 |
1016 for (this_char = where - string2; this_char < size2; this_char++) | 1016 for (this_char = where - string2; this_char < size2; this_char++) |
1017 putchar (string2[this_char]); | 1017 putchar (string2[this_char]); |
1018 } | 1018 } |
1050 | 1050 |
1051 reg_syntax_t | 1051 reg_syntax_t |
1052 re_set_syntax (reg_syntax_t syntax) | 1052 re_set_syntax (reg_syntax_t syntax) |
1053 { | 1053 { |
1054 reg_syntax_t ret = re_syntax_options; | 1054 reg_syntax_t ret = re_syntax_options; |
1055 | 1055 |
1056 re_syntax_options = syntax; | 1056 re_syntax_options = syntax; |
1057 return ret; | 1057 return ret; |
1058 } | 1058 } |
1059 | 1059 |
1060 /* This table gives an error message for each of the error codes listed | 1060 /* This table gives an error message for each of the error codes listed |
1097 using the relocating allocator routines, then malloc could cause a | 1097 using the relocating allocator routines, then malloc could cause a |
1098 relocation, which might (if the strings being searched are in the | 1098 relocation, which might (if the strings being searched are in the |
1099 ralloc heap) shift the data out from underneath the regexp | 1099 ralloc heap) shift the data out from underneath the regexp |
1100 routines. | 1100 routines. |
1101 | 1101 |
1102 Here's another reason to avoid allocation: Emacs | 1102 Here's another reason to avoid allocation: Emacs |
1103 processes input from X in a signal handler; processing X input may | 1103 processes input from X in a signal handler; processing X input may |
1104 call malloc; if input arrives while a matching routine is calling | 1104 call malloc; if input arrives while a matching routine is calling |
1105 malloc, then we're scrod. But Emacs can't just block input while | 1105 malloc, then we're scrod. But Emacs can't just block input while |
1106 calling matching routines; then we don't notice interrupts when | 1106 calling matching routines; then we don't notice interrupts when |
1107 they come in. So, Emacs blocks input around all regexp calls | 1107 they come in. So, Emacs blocks input around all regexp calls |
1128 | 1128 |
1129 | 1129 |
1130 /* Failure stack declarations and macros; both re_compile_fastmap and | 1130 /* Failure stack declarations and macros; both re_compile_fastmap and |
1131 re_match_2 use a failure stack. These have to be macros because of | 1131 re_match_2 use a failure stack. These have to be macros because of |
1132 REGEX_ALLOCATE_STACK. */ | 1132 REGEX_ALLOCATE_STACK. */ |
1133 | 1133 |
1134 | 1134 |
1135 /* Number of failure points for which to initially allocate space | 1135 /* Number of failure points for which to initially allocate space |
1136 when matching. If this number is exceeded, we allocate more | 1136 when matching. If this number is exceeded, we allocate more |
1137 space, so it is not a hard limit. */ | 1137 space, so it is not a hard limit. */ |
1138 #ifndef INIT_FAILURE_ALLOC | 1138 #ifndef INIT_FAILURE_ALLOC |
1199 | 1199 |
1200 | 1200 |
1201 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. | 1201 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. |
1202 | 1202 |
1203 Return 1 if succeeds, and 0 if either ran out of memory | 1203 Return 1 if succeeds, and 0 if either ran out of memory |
1204 allocating space for it or it was already too large. | 1204 allocating space for it or it was already too large. |
1205 | 1205 |
1206 REGEX_REALLOCATE_STACK requires `destination' be declared. */ | 1206 REGEX_REALLOCATE_STACK requires `destination' be declared. */ |
1207 | 1207 |
1208 #define DOUBLE_FAIL_STACK(fail_stack) \ | 1208 #define DOUBLE_FAIL_STACK(fail_stack) \ |
1209 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ | 1209 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ |
1210 ? 0 \ | 1210 ? 0 \ |
1217 ? 0 \ | 1217 ? 0 \ |
1218 : ((fail_stack).size <<= 1, \ | 1218 : ((fail_stack).size <<= 1, \ |
1219 1))) | 1219 1))) |
1220 | 1220 |
1221 | 1221 |
1222 /* Push pointer POINTER on FAIL_STACK. | 1222 /* Push pointer POINTER on FAIL_STACK. |
1223 Return 1 if was able to do so and 0 if ran out of memory allocating | 1223 Return 1 if was able to do so and 0 if ran out of memory allocating |
1224 space to do so. */ | 1224 space to do so. */ |
1225 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ | 1225 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ |
1226 ((FAIL_STACK_FULL () \ | 1226 ((FAIL_STACK_FULL () \ |
1227 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ | 1227 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ |
1262 #define DEBUG_POP(item_addr) | 1262 #define DEBUG_POP(item_addr) |
1263 #endif | 1263 #endif |
1264 | 1264 |
1265 | 1265 |
1266 /* Push the information about the state we will need | 1266 /* Push the information about the state we will need |
1267 if we ever fail back to it. | 1267 if we ever fail back to it. |
1268 | 1268 |
1269 Requires variables fail_stack, regstart, regend, reg_info, and | 1269 Requires variables fail_stack, regstart, regend, reg_info, and |
1270 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be | 1270 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be |
1271 declared. | 1271 declared. |
1272 | 1272 |
1273 Does `return FAILURE_CODE' if runs out of memory. */ | 1273 Does `return FAILURE_CODE' if runs out of memory. */ |
1274 | 1274 |
1275 #if !defined (REGEX_MALLOC) && !defined (REL_ALLOC) | 1275 #if !defined (REGEX_MALLOC) && !defined (REL_ALLOC) |
1276 #define DECLARE_DESTINATION char *destination; | 1276 #define DECLARE_DESTINATION char *destination; |
1277 #else | 1277 #else |
1385 STR -- the saved data position. | 1385 STR -- the saved data position. |
1386 PAT -- the saved pattern position. | 1386 PAT -- the saved pattern position. |
1387 LOW_REG, HIGH_REG -- the highest and lowest active registers. | 1387 LOW_REG, HIGH_REG -- the highest and lowest active registers. |
1388 REGSTART, REGEND -- arrays of string positions. | 1388 REGSTART, REGEND -- arrays of string positions. |
1389 REG_INFO -- array of information about each subexpression. | 1389 REG_INFO -- array of information about each subexpression. |
1390 | 1390 |
1391 Also assumes the variables `fail_stack' and (if debugging), `bufp', | 1391 Also assumes the variables `fail_stack' and (if debugging), `bufp', |
1392 `pend', `string1', `size1', `string2', and `size2'. */ | 1392 `pend', `string1', `size1', `string2', and `size2'. */ |
1393 | 1393 |
1394 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ | 1394 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ |
1395 { \ | 1395 { \ |
1455 | 1455 |
1456 /* Structure for per-register (a.k.a. per-group) information. | 1456 /* Structure for per-register (a.k.a. per-group) information. |
1457 Other register information, such as the | 1457 Other register information, such as the |
1458 starting and ending positions (which are addresses), and the list of | 1458 starting and ending positions (which are addresses), and the list of |
1459 inner groups (which is a bits list) are maintained in separate | 1459 inner groups (which is a bits list) are maintained in separate |
1460 variables. | 1460 variables. |
1461 | 1461 |
1462 We are making a (strictly speaking) nonportable assumption here: that | 1462 We are making a (strictly speaking) nonportable assumption here: that |
1463 the compiler will pack our bit fields into something that fits into | 1463 the compiler will pack our bit fields into something that fits into |
1464 the type of `word', i.e., is something that fits into one item on the | 1464 the type of `word', i.e., is something that fits into one item on the |
1465 failure stack. */ | 1465 failure stack. */ |
1466 | 1466 |
1510 #define REG_UNSET_VALUE (®_unset_dummy) | 1510 #define REG_UNSET_VALUE (®_unset_dummy) |
1511 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) | 1511 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) |
1512 | 1512 |
1513 /* Subroutine declarations and macros for regex_compile. */ | 1513 /* Subroutine declarations and macros for regex_compile. */ |
1514 | 1514 |
1515 /* Fetch the next character in the uncompiled pattern---translating it | 1515 /* Fetch the next character in the uncompiled pattern---translating it |
1516 if necessary. Also cast from a signed character in the constant | 1516 if necessary. Also cast from a signed character in the constant |
1517 string passed to us by the user to an unsigned char that we can use | 1517 string passed to us by the user to an unsigned char that we can use |
1518 as an array index (in, e.g., `translate'). */ | 1518 as an array index (in, e.g., `translate'). */ |
1519 #define PATFETCH(c) \ | 1519 #define PATFETCH(c) \ |
1520 do {if (p == pend) return REG_EEND; \ | 1520 do {if (p == pend) return REG_EEND; \ |
1709 typedef struct | 1709 typedef struct |
1710 { | 1710 { |
1711 pattern_offset_t begalt_offset; | 1711 pattern_offset_t begalt_offset; |
1712 pattern_offset_t fixup_alt_jump; | 1712 pattern_offset_t fixup_alt_jump; |
1713 pattern_offset_t inner_group_offset; | 1713 pattern_offset_t inner_group_offset; |
1714 pattern_offset_t laststart_offset; | 1714 pattern_offset_t laststart_offset; |
1715 regnum_t regnum; | 1715 regnum_t regnum; |
1716 } compile_stack_elt_t; | 1716 } compile_stack_elt_t; |
1717 | 1717 |
1718 | 1718 |
1719 typedef struct | 1719 typedef struct |
1772 if (p == pend) \ | 1772 if (p == pend) \ |
1773 break; \ | 1773 break; \ |
1774 PATFETCH (c); \ | 1774 PATFETCH (c); \ |
1775 } \ | 1775 } \ |
1776 } \ | 1776 } \ |
1777 } | 1777 } |
1778 | 1778 |
1779 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ | 1779 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ |
1780 | 1780 |
1781 #define IS_CHAR_CLASS(string) \ | 1781 #define IS_CHAR_CLASS(string) \ |
1782 (STREQ (string, "alpha") || STREQ (string, "upper") \ | 1782 (STREQ (string, "alpha") || STREQ (string, "upper") \ |
1839 static int regs_allocated_size; | 1839 static int regs_allocated_size; |
1840 | 1840 |
1841 static CONST char ** regstart, ** regend; | 1841 static CONST char ** regstart, ** regend; |
1842 static CONST char ** old_regstart, ** old_regend; | 1842 static CONST char ** old_regstart, ** old_regend; |
1843 static CONST char **best_regstart, **best_regend; | 1843 static CONST char **best_regstart, **best_regend; |
1844 static register_info_type *reg_info; | 1844 static register_info_type *reg_info; |
1845 static CONST char **reg_dummy; | 1845 static CONST char **reg_dummy; |
1846 static register_info_type *reg_info_dummy; | 1846 static register_info_type *reg_info_dummy; |
1847 | 1847 |
1848 /* Make the register vectors big enough for NUM_REGS registers, | 1848 /* Make the register vectors big enough for NUM_REGS registers, |
1849 but don't make them smaller. */ | 1849 but don't make them smaller. */ |
1881 `syntax' is set to SYNTAX; | 1881 `syntax' is set to SYNTAX; |
1882 `used' is set to the length of the compiled pattern; | 1882 `used' is set to the length of the compiled pattern; |
1883 `fastmap_accurate' is zero; | 1883 `fastmap_accurate' is zero; |
1884 `re_nsub' is the number of subexpressions in PATTERN; | 1884 `re_nsub' is the number of subexpressions in PATTERN; |
1885 `not_bol' and `not_eol' are zero; | 1885 `not_bol' and `not_eol' are zero; |
1886 | 1886 |
1887 The `fastmap' and `newline_anchor' fields are neither | 1887 The `fastmap' and `newline_anchor' fields are neither |
1888 examined nor set. */ | 1888 examined nor set. */ |
1889 | 1889 |
1890 /* Return, freeing storage we allocated. */ | 1890 /* Return, freeing storage we allocated. */ |
1891 #define FREE_STACK_RETURN(value) \ | 1891 #define FREE_STACK_RETURN(value) \ |
1900 array indices. The macros that fetch a character from the pattern | 1900 array indices. The macros that fetch a character from the pattern |
1901 make sure to coerce to unsigned char before assigning, so we won't | 1901 make sure to coerce to unsigned char before assigning, so we won't |
1902 get bitten by negative numbers here. */ | 1902 get bitten by negative numbers here. */ |
1903 /* XEmacs change: used to be unsigned char. */ | 1903 /* XEmacs change: used to be unsigned char. */ |
1904 register EMACS_INT c, c1; | 1904 register EMACS_INT c, c1; |
1905 | 1905 |
1906 /* A random temporary spot in PATTERN. */ | 1906 /* A random temporary spot in PATTERN. */ |
1907 CONST char *p1; | 1907 CONST char *p1; |
1908 | 1908 |
1909 /* Points to the end of the buffer, where we should append. */ | 1909 /* Points to the end of the buffer, where we should append. */ |
1910 register unsigned char *b; | 1910 register unsigned char *b; |
1911 | 1911 |
1912 /* Keeps track of unclosed groups. */ | 1912 /* Keeps track of unclosed groups. */ |
1913 compile_stack_type compile_stack; | 1913 compile_stack_type compile_stack; |
1914 | 1914 |
1915 /* Points to the current (ending) position in the pattern. */ | 1915 /* Points to the current (ending) position in the pattern. */ |
1916 CONST char *p = pattern; | 1916 CONST char *p = pattern; |
1917 CONST char *pend = pattern + size; | 1917 CONST char *pend = pattern + size; |
1918 | 1918 |
1919 /* How to translate the characters in the pattern. */ | 1919 /* How to translate the characters in the pattern. */ |
1920 char *translate = bufp->translate; | 1920 char *translate = bufp->translate; |
1921 | 1921 |
1922 /* Address of the count-byte of the most recently inserted `exactn' | 1922 /* Address of the count-byte of the most recently inserted `exactn' |
1923 command. This makes it possible to tell if a new exact-match | 1923 command. This makes it possible to tell if a new exact-match |
1934 unsigned char *begalt; | 1934 unsigned char *begalt; |
1935 | 1935 |
1936 /* Place in the uncompiled pattern (i.e., the {) to | 1936 /* Place in the uncompiled pattern (i.e., the {) to |
1937 which to go back if the interval is invalid. */ | 1937 which to go back if the interval is invalid. */ |
1938 CONST char *beg_interval; | 1938 CONST char *beg_interval; |
1939 | 1939 |
1940 /* Address of the place where a forward jump should go to the end of | 1940 /* Address of the place where a forward jump should go to the end of |
1941 the containing expression. Each alternative of an `or' -- except the | 1941 the containing expression. Each alternative of an `or' -- except the |
1942 last -- ends with a forward jump of this sort. */ | 1942 last -- ends with a forward jump of this sort. */ |
1943 unsigned char *fixup_alt_jump = 0; | 1943 unsigned char *fixup_alt_jump = 0; |
1944 | 1944 |
1950 #ifdef DEBUG | 1950 #ifdef DEBUG |
1951 DEBUG_PRINT1 ("\nCompiling pattern: "); | 1951 DEBUG_PRINT1 ("\nCompiling pattern: "); |
1952 if (debug) | 1952 if (debug) |
1953 { | 1953 { |
1954 unsigned debug_count; | 1954 unsigned debug_count; |
1955 | 1955 |
1956 for (debug_count = 0; debug_count < size; debug_count++) | 1956 for (debug_count = 0; debug_count < size; debug_count++) |
1957 putchar (pattern[debug_count]); | 1957 putchar (pattern[debug_count]); |
1958 putchar ('\n'); | 1958 putchar ('\n'); |
1959 } | 1959 } |
1960 #endif /* DEBUG */ | 1960 #endif /* DEBUG */ |
1974 | 1974 |
1975 /* Set `used' to zero, so that if we return an error, the pattern | 1975 /* Set `used' to zero, so that if we return an error, the pattern |
1976 printer (for debugging) will think there's no pattern. We reset it | 1976 printer (for debugging) will think there's no pattern. We reset it |
1977 at the end. */ | 1977 at the end. */ |
1978 bufp->used = 0; | 1978 bufp->used = 0; |
1979 | 1979 |
1980 /* Always count groups, whether or not bufp->no_sub is set. */ | 1980 /* Always count groups, whether or not bufp->no_sub is set. */ |
1981 bufp->re_nsub = 0; | 1981 bufp->re_nsub = 0; |
1982 | 1982 |
1983 #if !defined (emacs) && !defined (SYNTAX_TABLE) | 1983 #if !defined (emacs) && !defined (SYNTAX_TABLE) |
1984 /* Initialize the syntax table. */ | 1984 /* Initialize the syntax table. */ |
1985 init_syntax_once (); | 1985 init_syntax_once (); |
1986 #endif | 1986 #endif |
2027 | 2027 |
2028 | 2028 |
2029 case '$': | 2029 case '$': |
2030 { | 2030 { |
2031 if ( /* If at end of pattern, it's an operator. */ | 2031 if ( /* If at end of pattern, it's an operator. */ |
2032 p == pend | 2032 p == pend |
2033 /* If context independent, it's an operator. */ | 2033 /* If context independent, it's an operator. */ |
2034 || syntax & RE_CONTEXT_INDEP_ANCHORS | 2034 || syntax & RE_CONTEXT_INDEP_ANCHORS |
2035 /* Otherwise, depends on what's next. */ | 2035 /* Otherwise, depends on what's next. */ |
2036 || at_endline_loc_p (p, pend, syntax)) | 2036 || at_endline_loc_p (p, pend, syntax)) |
2037 BUF_PUSH (endline); | 2037 BUF_PUSH (endline); |
2058 } | 2058 } |
2059 | 2059 |
2060 { | 2060 { |
2061 /* Are we optimizing this jump? */ | 2061 /* Are we optimizing this jump? */ |
2062 boolean keep_string_p = false; | 2062 boolean keep_string_p = false; |
2063 | 2063 |
2064 /* 1 means zero (many) matches is allowed. */ | 2064 /* 1 means zero (many) matches is allowed. */ |
2065 char zero_times_ok = 0, many_times_ok = 0; | 2065 char zero_times_ok = 0, many_times_ok = 0; |
2066 | 2066 |
2067 /* If there is a sequence of repetition chars, collapse it | 2067 /* If there is a sequence of repetition chars, collapse it |
2068 down to just one (the right one). We can't combine | 2068 down to just one (the right one). We can't combine |
2106 /* If we get here, we found another repeat character. */ | 2106 /* If we get here, we found another repeat character. */ |
2107 } | 2107 } |
2108 | 2108 |
2109 /* Star, etc. applied to an empty pattern is equivalent | 2109 /* Star, etc. applied to an empty pattern is equivalent |
2110 to an empty pattern. */ | 2110 to an empty pattern. */ |
2111 if (!laststart) | 2111 if (!laststart) |
2112 break; | 2112 break; |
2113 | 2113 |
2114 /* Now we know whether or not zero matches is allowed | 2114 /* Now we know whether or not zero matches is allowed |
2115 and also whether or not two or more matches is allowed. */ | 2115 and also whether or not two or more matches is allowed. */ |
2116 if (many_times_ok) | 2116 if (many_times_ok) |
2117 { /* More than one repetition is allowed, so put in at the | 2117 { /* More than one repetition is allowed, so put in at the |
2118 end a backward relative jump from `b' to before the next | 2118 end a backward relative jump from `b' to before the next |
2119 jump we're going to put in below (which jumps from | 2119 jump we're going to put in below (which jumps from |
2120 laststart to after this jump). | 2120 laststart to after this jump). |
2121 | 2121 |
2122 But if we are at the `*' in the exact sequence `.*\n', | 2122 But if we are at the `*' in the exact sequence `.*\n', |
2123 insert an unconditional jump backwards to the ., | 2123 insert an unconditional jump backwards to the ., |
2124 instead of the beginning of the loop. This way we only | 2124 instead of the beginning of the loop. This way we only |
2125 push a failure point once, instead of every time | 2125 push a failure point once, instead of every time |
2197 | 2197 |
2198 laststart = b; | 2198 laststart = b; |
2199 | 2199 |
2200 /* We test `*p == '^' twice, instead of using an if | 2200 /* We test `*p == '^' twice, instead of using an if |
2201 statement, so we only need one BUF_PUSH. */ | 2201 statement, so we only need one BUF_PUSH. */ |
2202 BUF_PUSH (*p == '^' ? charset_not : charset); | 2202 BUF_PUSH (*p == '^' ? charset_not : charset); |
2203 if (*p == '^') | 2203 if (*p == '^') |
2204 p++; | 2204 p++; |
2205 | 2205 |
2206 /* Remember the first position in the bracket expression. */ | 2206 /* Remember the first position in the bracket expression. */ |
2207 p1 = p; | 2207 p1 = p; |
2289 | 2289 |
2290 /* Look ahead to see if it's a range when the last thing | 2290 /* Look ahead to see if it's a range when the last thing |
2291 was a character: if this is a hyphen not at the | 2291 was a character: if this is a hyphen not at the |
2292 beginning or the end of a list, then it's the range | 2292 beginning or the end of a list, then it's the range |
2293 operator. */ | 2293 operator. */ |
2294 if (c == '-' | 2294 if (c == '-' |
2295 && !(p - 2 >= pattern && p[-2] == '[') | 2295 && !(p - 2 >= pattern && p[-2] == '[') |
2296 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') | 2296 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') |
2297 && *p != ']') | 2297 && *p != ']') |
2298 { | 2298 { |
2299 reg_errcode_t ret; | 2299 reg_errcode_t ret; |
2300 | 2300 |
2363 str[c1++] = c; | 2363 str[c1++] = c; |
2364 } | 2364 } |
2365 str[c1] = '\0'; | 2365 str[c1] = '\0'; |
2366 | 2366 |
2367 /* If isn't a word bracketed by `[:' and:`]': | 2367 /* If isn't a word bracketed by `[:' and:`]': |
2368 undo the ending character, the letters, and leave | 2368 undo the ending character, the letters, and leave |
2369 the leading `:' and `[' (but set bits for them). */ | 2369 the leading `:' and `[' (but set bits for them). */ |
2370 if (c == ':' && *p == ']') | 2370 if (c == ':' && *p == ']') |
2371 { | 2371 { |
2372 int ch; | 2372 int ch; |
2373 boolean is_alnum = STREQ (str, "alnum"); | 2373 boolean is_alnum = STREQ (str, "alnum"); |
2380 boolean is_print = STREQ (str, "print"); | 2380 boolean is_print = STREQ (str, "print"); |
2381 boolean is_punct = STREQ (str, "punct"); | 2381 boolean is_punct = STREQ (str, "punct"); |
2382 boolean is_space = STREQ (str, "space"); | 2382 boolean is_space = STREQ (str, "space"); |
2383 boolean is_upper = STREQ (str, "upper"); | 2383 boolean is_upper = STREQ (str, "upper"); |
2384 boolean is_xdigit = STREQ (str, "xdigit"); | 2384 boolean is_xdigit = STREQ (str, "xdigit"); |
2385 | 2385 |
2386 if (!IS_CHAR_CLASS (str)) | 2386 if (!IS_CHAR_CLASS (str)) |
2387 FREE_STACK_RETURN (REG_ECTYPE); | 2387 FREE_STACK_RETURN (REG_ECTYPE); |
2388 | 2388 |
2389 /* Throw away the ] at the end of the character | 2389 /* Throw away the ] at the end of the character |
2390 class. */ | 2390 class. */ |
2391 PATFETCH (c); | 2391 PATFETCH (c); |
2392 | 2392 |
2393 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | 2393 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); |
2394 | 2394 |
2395 for (ch = 0; ch < 1 << BYTEWIDTH; ch++) | 2395 for (ch = 0; ch < 1 << BYTEWIDTH; ch++) |
2396 { | 2396 { |
2415 had_char_class = true; | 2415 had_char_class = true; |
2416 } | 2416 } |
2417 else | 2417 else |
2418 { | 2418 { |
2419 c1++; | 2419 c1++; |
2420 while (c1--) | 2420 while (c1--) |
2421 PATUNFETCH; | 2421 PATUNFETCH; |
2422 SET_EITHER_BIT ('['); | 2422 SET_EITHER_BIT ('['); |
2423 SET_EITHER_BIT (':'); | 2423 SET_EITHER_BIT (':'); |
2424 had_char_class = false; | 2424 had_char_class = false; |
2425 } | 2425 } |
2443 break; | 2443 break; |
2444 } | 2444 } |
2445 #endif /* MULE */ | 2445 #endif /* MULE */ |
2446 /* Discard any (non)matching list bytes that are all 0 at the | 2446 /* Discard any (non)matching list bytes that are all 0 at the |
2447 end of the map. Decrease the map-length byte too. */ | 2447 end of the map. Decrease the map-length byte too. */ |
2448 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) | 2448 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) |
2449 b[-1]--; | 2449 b[-1]--; |
2450 b += b[-1]; | 2450 b += b[-1]; |
2451 } | 2451 } |
2452 break; | 2452 break; |
2453 | 2453 |
2454 | 2454 |
2504 handle_open: | 2504 handle_open: |
2505 bufp->re_nsub++; | 2505 bufp->re_nsub++; |
2506 regnum++; | 2506 regnum++; |
2507 | 2507 |
2508 if (COMPILE_STACK_FULL) | 2508 if (COMPILE_STACK_FULL) |
2509 { | 2509 { |
2510 RETALLOC (compile_stack.stack, compile_stack.size << 1, | 2510 RETALLOC (compile_stack.stack, compile_stack.size << 1, |
2511 compile_stack_elt_t); | 2511 compile_stack_elt_t); |
2512 if (compile_stack.stack == NULL) return REG_ESPACE; | 2512 if (compile_stack.stack == NULL) return REG_ESPACE; |
2513 | 2513 |
2514 compile_stack.size <<= 1; | 2514 compile_stack.size <<= 1; |
2517 /* These are the values to restore when we hit end of this | 2517 /* These are the values to restore when we hit end of this |
2518 group. They are all relative offsets, so that if the | 2518 group. They are all relative offsets, so that if the |
2519 whole pattern moves because of realloc, they will still | 2519 whole pattern moves because of realloc, they will still |
2520 be valid. */ | 2520 be valid. */ |
2521 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; | 2521 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; |
2522 COMPILE_STACK_TOP.fixup_alt_jump | 2522 COMPILE_STACK_TOP.fixup_alt_jump |
2523 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; | 2523 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; |
2524 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; | 2524 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; |
2525 COMPILE_STACK_TOP.regnum = regnum; | 2525 COMPILE_STACK_TOP.regnum = regnum; |
2526 | 2526 |
2527 /* We will eventually replace the 0 with the number of | 2527 /* We will eventually replace the 0 with the number of |
2531 if (regnum <= MAX_REGNUM) | 2531 if (regnum <= MAX_REGNUM) |
2532 { | 2532 { |
2533 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; | 2533 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; |
2534 BUF_PUSH_3 (start_memory, regnum, 0); | 2534 BUF_PUSH_3 (start_memory, regnum, 0); |
2535 } | 2535 } |
2536 | 2536 |
2537 compile_stack.avail++; | 2537 compile_stack.avail++; |
2538 | 2538 |
2539 fixup_alt_jump = 0; | 2539 fixup_alt_jump = 0; |
2540 laststart = 0; | 2540 laststart = 0; |
2541 begalt = b; | 2541 begalt = b; |
2560 { /* Push a dummy failure point at the end of the | 2560 { /* Push a dummy failure point at the end of the |
2561 alternative for a possible future | 2561 alternative for a possible future |
2562 `pop_failure_jump' to pop. See comments at | 2562 `pop_failure_jump' to pop. See comments at |
2563 `push_dummy_failure' in `re_match_2'. */ | 2563 `push_dummy_failure' in `re_match_2'. */ |
2564 BUF_PUSH (push_dummy_failure); | 2564 BUF_PUSH (push_dummy_failure); |
2565 | 2565 |
2566 /* We allocated space for this jump when we assigned | 2566 /* We allocated space for this jump when we assigned |
2567 to `fixup_alt_jump', in the `handle_alt' case below. */ | 2567 to `fixup_alt_jump', in the `handle_alt' case below. */ |
2568 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); | 2568 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); |
2569 } | 2569 } |
2570 | 2570 |
2582 /* We don't just want to restore into `regnum', because | 2582 /* We don't just want to restore into `regnum', because |
2583 later groups should continue to be numbered higher, | 2583 later groups should continue to be numbered higher, |
2584 as in `(ab)c(de)' -- the second group is #2. */ | 2584 as in `(ab)c(de)' -- the second group is #2. */ |
2585 regnum_t this_group_regnum; | 2585 regnum_t this_group_regnum; |
2586 | 2586 |
2587 compile_stack.avail--; | 2587 compile_stack.avail--; |
2588 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; | 2588 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; |
2589 fixup_alt_jump | 2589 fixup_alt_jump |
2590 = COMPILE_STACK_TOP.fixup_alt_jump | 2590 = COMPILE_STACK_TOP.fixup_alt_jump |
2591 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 | 2591 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 |
2592 : 0; | 2592 : 0; |
2593 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; | 2593 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; |
2594 this_group_regnum = COMPILE_STACK_TOP.regnum; | 2594 this_group_regnum = COMPILE_STACK_TOP.regnum; |
2595 /* If we've reached MAX_REGNUM groups, then this open | 2595 /* If we've reached MAX_REGNUM groups, then this open |
2596 won't actually generate any code, so we'll have to | 2596 won't actually generate any code, so we'll have to |
2601 groups were inside this one. */ | 2601 groups were inside this one. */ |
2602 if (this_group_regnum <= MAX_REGNUM) | 2602 if (this_group_regnum <= MAX_REGNUM) |
2603 { | 2603 { |
2604 unsigned char *inner_group_loc | 2604 unsigned char *inner_group_loc |
2605 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; | 2605 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; |
2606 | 2606 |
2607 *inner_group_loc = regnum - this_group_regnum; | 2607 *inner_group_loc = regnum - this_group_regnum; |
2608 BUF_PUSH_3 (stop_memory, this_group_regnum, | 2608 BUF_PUSH_3 (stop_memory, this_group_regnum, |
2609 regnum - this_group_regnum); | 2609 regnum - this_group_regnum); |
2610 } | 2610 } |
2611 } | 2611 } |
2630 which gets executed if it gets matched. Adjust that | 2630 which gets executed if it gets matched. Adjust that |
2631 jump so it will jump to this alternative's analogous | 2631 jump so it will jump to this alternative's analogous |
2632 jump (put in below, which in turn will jump to the next | 2632 jump (put in below, which in turn will jump to the next |
2633 (if any) alternative's such jump, etc.). The last such | 2633 (if any) alternative's such jump, etc.). The last such |
2634 jump jumps to the correct final destination. A picture: | 2634 jump jumps to the correct final destination. A picture: |
2635 _____ _____ | 2635 _____ _____ |
2636 | | | | | 2636 | | | | |
2637 | v | v | 2637 | v | v |
2638 a | b | c | 2638 a | b | c |
2639 | 2639 |
2640 If we are at `b', then fixup_alt_jump right now points to a | 2640 If we are at `b', then fixup_alt_jump right now points to a |
2641 three-byte space after `a'. We'll put in the jump, set | 2641 three-byte space after `a'. We'll put in the jump, set |
2642 fixup_alt_jump to right after `b', and leave behind three | 2642 fixup_alt_jump to right after `b', and leave behind three |
2643 bytes which we'll fill in when we get to after `c'. */ | 2643 bytes which we'll fill in when we get to after `c'. */ |
2655 laststart = 0; | 2655 laststart = 0; |
2656 begalt = b; | 2656 begalt = b; |
2657 break; | 2657 break; |
2658 | 2658 |
2659 | 2659 |
2660 case '{': | 2660 case '{': |
2661 /* If \{ is a literal. */ | 2661 /* If \{ is a literal. */ |
2662 if (!(syntax & RE_INTERVALS) | 2662 if (!(syntax & RE_INTERVALS) |
2663 /* If we're at `\{' and it's not the open-interval | 2663 /* If we're at `\{' and it's not the open-interval |
2664 operator. */ | 2664 operator. */ |
2665 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) | 2665 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) |
2666 || (p - 2 == pattern && p == pend)) | 2666 || (p - 2 == pattern && p == pend)) |
2667 goto normal_backslash; | 2667 goto normal_backslash; |
2668 | 2668 |
2697 if (lower_bound < 0 || upper_bound > RE_DUP_MAX | 2697 if (lower_bound < 0 || upper_bound > RE_DUP_MAX |
2698 || lower_bound > upper_bound) | 2698 || lower_bound > upper_bound) |
2699 { | 2699 { |
2700 if (syntax & RE_NO_BK_BRACES) | 2700 if (syntax & RE_NO_BK_BRACES) |
2701 goto unfetch_interval; | 2701 goto unfetch_interval; |
2702 else | 2702 else |
2703 FREE_STACK_RETURN (REG_BADBR); | 2703 FREE_STACK_RETURN (REG_BADBR); |
2704 } | 2704 } |
2705 | 2705 |
2706 if (!(syntax & RE_NO_BK_BRACES)) | 2706 if (!(syntax & RE_NO_BK_BRACES)) |
2707 { | 2707 { |
2708 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); | 2708 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); |
2709 | 2709 |
2710 PATFETCH (c); | 2710 PATFETCH (c); |
2711 } | 2711 } |
2712 | 2712 |
2713 if (c != '}') | 2713 if (c != '}') |
2714 { | 2714 { |
2715 if (syntax & RE_NO_BK_BRACES) | 2715 if (syntax & RE_NO_BK_BRACES) |
2716 goto unfetch_interval; | 2716 goto unfetch_interval; |
2717 else | 2717 else |
2718 FREE_STACK_RETURN (REG_BADBR); | 2718 FREE_STACK_RETURN (REG_BADBR); |
2719 } | 2719 } |
2720 | 2720 |
2721 /* We just parsed a valid interval. */ | 2721 /* We just parsed a valid interval. */ |
2722 | 2722 |
2748 succeed_n <after jump addr> <succeed_n count> | 2748 succeed_n <after jump addr> <succeed_n count> |
2749 <body of loop> | 2749 <body of loop> |
2750 jump_n <succeed_n addr> <jump count> | 2750 jump_n <succeed_n addr> <jump count> |
2751 (The upper bound and `jump_n' are omitted if | 2751 (The upper bound and `jump_n' are omitted if |
2752 `upper_bound' is 1, though.) */ | 2752 `upper_bound' is 1, though.) */ |
2753 else | 2753 else |
2754 { /* If the upper bound is > 1, we need to insert | 2754 { /* If the upper bound is > 1, we need to insert |
2755 more at the end of the loop. */ | 2755 more at the end of the loop. */ |
2756 unsigned nbytes = 10 + (upper_bound > 1) * 10; | 2756 unsigned nbytes = 10 + (upper_bound > 1) * 10; |
2757 | 2757 |
2758 GET_BUFFER_SPACE (nbytes); | 2758 GET_BUFFER_SPACE (nbytes); |
2765 INSERT_JUMP2 (succeed_n, laststart, | 2765 INSERT_JUMP2 (succeed_n, laststart, |
2766 b + 5 + (upper_bound > 1) * 5, | 2766 b + 5 + (upper_bound > 1) * 5, |
2767 lower_bound); | 2767 lower_bound); |
2768 b += 5; | 2768 b += 5; |
2769 | 2769 |
2770 /* Code to initialize the lower bound. Insert | 2770 /* Code to initialize the lower bound. Insert |
2771 before the `succeed_n'. The `5' is the last two | 2771 before the `succeed_n'. The `5' is the last two |
2772 bytes of this `set_number_at', plus 3 bytes of | 2772 bytes of this `set_number_at', plus 3 bytes of |
2773 the following `succeed_n'. */ | 2773 the following `succeed_n'. */ |
2774 insert_op2 (set_number_at, laststart, 5, lower_bound, b); | 2774 insert_op2 (set_number_at, laststart, 5, lower_bound, b); |
2775 b += 5; | 2775 b += 5; |
2776 | 2776 |
2777 if (upper_bound > 1) | 2777 if (upper_bound > 1) |
2778 { /* More than one repetition is allowed, so | 2778 { /* More than one repetition is allowed, so |
2779 append a backward jump to the `succeed_n' | 2779 append a backward jump to the `succeed_n' |
2780 that starts this interval. | 2780 that starts this interval. |
2781 | 2781 |
2782 When we've reached this during matching, | 2782 When we've reached this during matching, |
2783 we'll have matched the interval once, so | 2783 we'll have matched the interval once, so |
2784 jump back only `upper_bound - 1' times. */ | 2784 jump back only `upper_bound - 1' times. */ |
2785 STORE_JUMP2 (jump_n, b, laststart + 5, | 2785 STORE_JUMP2 (jump_n, b, laststart + 5, |
2786 upper_bound - 1); | 2786 upper_bound - 1); |
2794 for the relative address. But we are | 2794 for the relative address. But we are |
2795 inserting into the middle of the pattern -- | 2795 inserting into the middle of the pattern -- |
2796 so everything is getting moved up by 5. | 2796 so everything is getting moved up by 5. |
2797 Conclusion: (b - 2) - (laststart + 3) + 5, | 2797 Conclusion: (b - 2) - (laststart + 3) + 5, |
2798 i.e., b - laststart. | 2798 i.e., b - laststart. |
2799 | 2799 |
2800 We insert this at the beginning of the loop | 2800 We insert this at the beginning of the loop |
2801 so that if we fail during matching, we'll | 2801 so that if we fail during matching, we'll |
2802 reinitialize the bounds. */ | 2802 reinitialize the bounds. */ |
2803 insert_op2 (set_number_at, laststart, b - laststart, | 2803 insert_op2 (set_number_at, laststart, b - laststart, |
2804 upper_bound - 1, b); | 2804 upper_bound - 1, b); |
2815 assert (beg_interval); | 2815 assert (beg_interval); |
2816 p = beg_interval; | 2816 p = beg_interval; |
2817 beg_interval = NULL; | 2817 beg_interval = NULL; |
2818 | 2818 |
2819 /* normal_char and normal_backslash need `c'. */ | 2819 /* normal_char and normal_backslash need `c'. */ |
2820 PATFETCH (c); | 2820 PATFETCH (c); |
2821 | 2821 |
2822 if (!(syntax & RE_NO_BK_BRACES)) | 2822 if (!(syntax & RE_NO_BK_BRACES)) |
2823 { | 2823 { |
2824 if (p > pattern && p[-1] == '\\') | 2824 if (p > pattern && p[-1] == '\\') |
2825 goto normal_backslash; | 2825 goto normal_backslash; |
2831 operators. rms says this is ok. --karl */ | 2831 operators. rms says this is ok. --karl */ |
2832 case '=': | 2832 case '=': |
2833 BUF_PUSH (at_dot); | 2833 BUF_PUSH (at_dot); |
2834 break; | 2834 break; |
2835 | 2835 |
2836 case 's': | 2836 case 's': |
2837 laststart = b; | 2837 laststart = b; |
2838 PATFETCH (c); | 2838 PATFETCH (c); |
2839 /* XEmacs addition */ | 2839 /* XEmacs addition */ |
2840 if (c >= 0x80 || syntax_spec_code[c] == 0377) | 2840 if (c >= 0x80 || syntax_spec_code[c] == 0377) |
2841 FREE_STACK_RETURN (REG_ESYNTAX); | 2841 FREE_STACK_RETURN (REG_ESYNTAX); |
2851 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); | 2851 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); |
2852 break; | 2852 break; |
2853 | 2853 |
2854 #ifdef MULE | 2854 #ifdef MULE |
2855 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ | 2855 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ |
2856 case 'c': | 2856 case 'c': |
2857 laststart = b; | 2857 laststart = b; |
2858 PATFETCH_RAW (c); | 2858 PATFETCH_RAW (c); |
2859 if (c < 32 || c > 127) | 2859 if (c < 32 || c > 127) |
2860 FREE_STACK_RETURN (REG_ECATEGORY); | 2860 FREE_STACK_RETURN (REG_ECATEGORY); |
2861 BUF_PUSH_2 (categoryspec, c); | 2861 BUF_PUSH_2 (categoryspec, c); |
2955 /* `q' points to the beginning of the next char. */ | 2955 /* `q' points to the beginning of the next char. */ |
2956 CONST char *q = p - 1; | 2956 CONST char *q = p - 1; |
2957 INC_CHARPTR (q); | 2957 INC_CHARPTR (q); |
2958 | 2958 |
2959 /* If no exactn currently being built. */ | 2959 /* If no exactn currently being built. */ |
2960 if (!pending_exact | 2960 if (!pending_exact |
2961 | 2961 |
2962 /* If last exactn not at current position. */ | 2962 /* If last exactn not at current position. */ |
2963 || pending_exact + *pending_exact + 1 != b | 2963 || pending_exact + *pending_exact + 1 != b |
2964 | 2964 |
2965 /* We have only one byte following the exactn for the count. */ | 2965 /* We have only one byte following the exactn for the count. */ |
2966 || ((unsigned int) (*pending_exact + (q - p)) >= | 2966 || ((unsigned int) (*pending_exact + (q - p)) >= |
2967 ((unsigned int) (1 << BYTEWIDTH) - 1)) | 2967 ((unsigned int) (1 << BYTEWIDTH) - 1)) |
2968 | 2968 |
2969 /* If followed by a repetition operator. */ | 2969 /* If followed by a repetition operator. */ |
2975 && ((syntax & RE_NO_BK_BRACES) | 2975 && ((syntax & RE_NO_BK_BRACES) |
2976 ? *q == '{' | 2976 ? *q == '{' |
2977 : (q[0] == '\\' && q[1] == '{')))) | 2977 : (q[0] == '\\' && q[1] == '{')))) |
2978 { | 2978 { |
2979 /* Start building a new exactn. */ | 2979 /* Start building a new exactn. */ |
2980 | 2980 |
2981 laststart = b; | 2981 laststart = b; |
2982 | 2982 |
2983 BUF_PUSH_2 (exactn, 0); | 2983 BUF_PUSH_2 (exactn, 0); |
2984 pending_exact = b - 1; | 2984 pending_exact = b - 1; |
2985 } | 2985 } |
2986 | 2986 |
2987 BUF_PUSH (c); | 2987 BUF_PUSH (c); |
2988 (*pending_exact)++; | 2988 (*pending_exact)++; |
2989 | 2989 |
2990 while (p < q) | 2990 while (p < q) |
2991 { | 2991 { |
2996 break; | 2996 break; |
2997 } | 2997 } |
2998 } /* switch (c) */ | 2998 } /* switch (c) */ |
2999 } /* while p != pend */ | 2999 } /* while p != pend */ |
3000 | 3000 |
3001 | 3001 |
3002 /* Through the pattern now. */ | 3002 /* Through the pattern now. */ |
3003 | 3003 |
3004 if (fixup_alt_jump) | 3004 if (fixup_alt_jump) |
3005 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); | 3005 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); |
3006 | 3006 |
3007 if (!COMPILE_STACK_EMPTY) | 3007 if (!COMPILE_STACK_EMPTY) |
3008 FREE_STACK_RETURN (REG_EPAREN); | 3008 FREE_STACK_RETURN (REG_EPAREN); |
3009 | 3009 |
3010 /* If we don't want backtracking, force success | 3010 /* If we don't want backtracking, force success |
3011 the first time we reach the end of the compiled pattern. */ | 3011 the first time we reach the end of the compiled pattern. */ |
3012 if (syntax & RE_NO_POSIX_BACKTRACKING) | 3012 if (syntax & RE_NO_POSIX_BACKTRACKING) |
3040 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); | 3040 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); |
3041 | 3041 |
3042 #ifdef emacs | 3042 #ifdef emacs |
3043 if (! fail_stack.stack) | 3043 if (! fail_stack.stack) |
3044 fail_stack.stack | 3044 fail_stack.stack |
3045 = (fail_stack_elt_t *) xmalloc (fail_stack.size | 3045 = (fail_stack_elt_t *) xmalloc (fail_stack.size |
3046 * sizeof (fail_stack_elt_t)); | 3046 * sizeof (fail_stack_elt_t)); |
3047 else | 3047 else |
3048 fail_stack.stack | 3048 fail_stack.stack |
3049 = (fail_stack_elt_t *) xrealloc (fail_stack.stack, | 3049 = (fail_stack_elt_t *) xrealloc (fail_stack.stack, |
3050 (fail_stack.size | 3050 (fail_stack.size |
3051 * sizeof (fail_stack_elt_t))); | 3051 * sizeof (fail_stack_elt_t))); |
3052 #else /* not emacs */ | 3052 #else /* not emacs */ |
3053 if (! fail_stack.stack) | 3053 if (! fail_stack.stack) |
3054 fail_stack.stack | 3054 fail_stack.stack |
3055 = (fail_stack_elt_t *) malloc (fail_stack.size | 3055 = (fail_stack_elt_t *) malloc (fail_stack.size |
3056 * sizeof (fail_stack_elt_t)); | 3056 * sizeof (fail_stack_elt_t)); |
3057 else | 3057 else |
3058 fail_stack.stack | 3058 fail_stack.stack |
3059 = (fail_stack_elt_t *) realloc (fail_stack.stack, | 3059 = (fail_stack_elt_t *) realloc (fail_stack.stack, |
3060 (fail_stack.size | 3060 (fail_stack.size |
3101 register unsigned char *pfrom = end; | 3101 register unsigned char *pfrom = end; |
3102 register unsigned char *pto = end + 3; | 3102 register unsigned char *pto = end + 3; |
3103 | 3103 |
3104 while (pfrom != loc) | 3104 while (pfrom != loc) |
3105 *--pto = *--pfrom; | 3105 *--pto = *--pfrom; |
3106 | 3106 |
3107 store_op1 (op, loc, arg); | 3107 store_op1 (op, loc, arg); |
3108 } | 3108 } |
3109 | 3109 |
3110 | 3110 |
3111 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ | 3111 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ |
3112 | 3112 |
3113 static void | 3113 static void |
3114 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, | 3114 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, |
3115 unsigned char *end) | 3115 unsigned char *end) |
3116 { | 3116 { |
3117 register unsigned char *pfrom = end; | 3117 register unsigned char *pfrom = end; |
3118 register unsigned char *pto = end + 5; | 3118 register unsigned char *pto = end + 5; |
3119 | 3119 |
3120 while (pfrom != loc) | 3120 while (pfrom != loc) |
3121 *--pto = *--pfrom; | 3121 *--pto = *--pfrom; |
3122 | 3122 |
3123 store_op2 (op, loc, arg1, arg2); | 3123 store_op2 (op, loc, arg1, arg2); |
3124 } | 3124 } |
3125 | 3125 |
3126 | 3126 |
3127 /* P points to just after a ^ in PATTERN. Return true if that ^ comes | 3127 /* P points to just after a ^ in PATTERN. Return true if that ^ comes |
3131 static boolean | 3131 static boolean |
3132 at_begline_loc_p (CONST char *pattern, CONST char *p, reg_syntax_t syntax) | 3132 at_begline_loc_p (CONST char *pattern, CONST char *p, reg_syntax_t syntax) |
3133 { | 3133 { |
3134 CONST char *prev = p - 2; | 3134 CONST char *prev = p - 2; |
3135 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; | 3135 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; |
3136 | 3136 |
3137 return | 3137 return |
3138 /* After a subexpression? */ | 3138 /* After a subexpression? */ |
3139 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) | 3139 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) |
3140 /* After an alternative? */ | 3140 /* After an alternative? */ |
3141 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); | 3141 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); |
3149 at_endline_loc_p (CONST char *p, CONST char *pend, int syntax) | 3149 at_endline_loc_p (CONST char *p, CONST char *pend, int syntax) |
3150 { | 3150 { |
3151 CONST char *next = p; | 3151 CONST char *next = p; |
3152 boolean next_backslash = *next == '\\'; | 3152 boolean next_backslash = *next == '\\'; |
3153 CONST char *next_next = p + 1 < pend ? p + 1 : 0; | 3153 CONST char *next_next = p + 1 < pend ? p + 1 : 0; |
3154 | 3154 |
3155 return | 3155 return |
3156 /* Before a subexpression? */ | 3156 /* Before a subexpression? */ |
3157 (syntax & RE_NO_BK_PARENS ? *next == ')' | 3157 (syntax & RE_NO_BK_PARENS ? *next == ')' |
3158 : next_backslash && next_next && *next_next == ')') | 3158 : next_backslash && next_next && *next_next == ')') |
3159 /* Before an alternative? */ | 3159 /* Before an alternative? */ |
3160 || (syntax & RE_NO_BK_VBAR ? *next == '|' | 3160 || (syntax & RE_NO_BK_VBAR ? *next == '|' |
3161 : next_backslash && next_next && *next_next == '|'); | 3161 : next_backslash && next_next && *next_next == '|'); |
3162 } | 3162 } |
3163 | 3163 |
3164 | 3164 |
3165 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and | 3165 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and |
3166 false if it's not. */ | 3166 false if it's not. */ |
3167 | 3167 |
3168 static boolean | 3168 static boolean |
3169 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) | 3169 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) |
3170 { | 3170 { |
3171 int this_element; | 3171 int this_element; |
3172 | 3172 |
3173 for (this_element = compile_stack.avail - 1; | 3173 for (this_element = compile_stack.avail - 1; |
3174 this_element >= 0; | 3174 this_element >= 0; |
3175 this_element--) | 3175 this_element--) |
3176 if (compile_stack.stack[this_element].regnum == regnum) | 3176 if (compile_stack.stack[this_element].regnum == regnum) |
3177 return true; | 3177 return true; |
3178 | 3178 |
3179 return false; | 3179 return false; |
3183 /* Read the ending character of a range (in a bracket expression) from the | 3183 /* Read the ending character of a range (in a bracket expression) from the |
3184 uncompiled pattern *P_PTR (which ends at PEND). We assume the | 3184 uncompiled pattern *P_PTR (which ends at PEND). We assume the |
3185 starting character is in `P[-2]'. (`P[-1]' is the character `-'.) | 3185 starting character is in `P[-2]'. (`P[-1]' is the character `-'.) |
3186 Then we set the translation of all bits between the starting and | 3186 Then we set the translation of all bits between the starting and |
3187 ending characters (inclusive) in the compiled pattern B. | 3187 ending characters (inclusive) in the compiled pattern B. |
3188 | 3188 |
3189 Return an error code. | 3189 Return an error code. |
3190 | 3190 |
3191 We use these short variable names so we can use the same macros as | 3191 We use these short variable names so we can use the same macros as |
3192 `regex_compile' itself. */ | 3192 `regex_compile' itself. */ |
3193 | 3193 |
3194 static reg_errcode_t | 3194 static reg_errcode_t |
3195 compile_range (CONST char **p_ptr, CONST char *pend, char *translate, | 3195 compile_range (CONST char **p_ptr, CONST char *pend, char *translate, |
3197 { | 3197 { |
3198 unsigned this_char; | 3198 unsigned this_char; |
3199 | 3199 |
3200 CONST char *p = *p_ptr; | 3200 CONST char *p = *p_ptr; |
3201 int range_start, range_end; | 3201 int range_start, range_end; |
3202 | 3202 |
3203 if (p == pend) | 3203 if (p == pend) |
3204 return REG_ERANGE; | 3204 return REG_ERANGE; |
3205 | 3205 |
3206 /* Even though the pattern is a signed `char *', we need to fetch | 3206 /* Even though the pattern is a signed `char *', we need to fetch |
3207 with unsigned char *'s; if the high bit of the pattern character | 3207 with unsigned char *'s; if the high bit of the pattern character |
3208 is set, the range endpoints will be negative if we fetch using a | 3208 is set, the range endpoints will be negative if we fetch using a |
3209 signed char *. | 3209 signed char *. |
3210 | 3210 |
3211 We also want to fetch the endpoints without translating them; the | 3211 We also want to fetch the endpoints without translating them; the |
3212 appropriate translation is done in the bit-setting loop below. */ | 3212 appropriate translation is done in the bit-setting loop below. */ |
3213 /* The SVR4 compiler on the 3B2 had trouble with unsigned CONST char *. */ | 3213 /* The SVR4 compiler on the 3B2 had trouble with unsigned CONST char *. */ |
3214 range_start = ((CONST unsigned char *) p)[-2]; | 3214 range_start = ((CONST unsigned char *) p)[-2]; |
3215 range_end = ((CONST unsigned char *) p)[0]; | 3215 range_end = ((CONST unsigned char *) p)[0]; |
3216 | 3216 |
3228 loop, since all characters <= 0xff. */ | 3228 loop, since all characters <= 0xff. */ |
3229 for (this_char = range_start; this_char <= range_end; this_char++) | 3229 for (this_char = range_start; this_char <= range_end; this_char++) |
3230 { | 3230 { |
3231 SET_LIST_BIT (TRANSLATE (this_char)); | 3231 SET_LIST_BIT (TRANSLATE (this_char)); |
3232 } | 3232 } |
3233 | 3233 |
3234 return REG_NOERROR; | 3234 return REG_NOERROR; |
3235 } | 3235 } |
3236 | 3236 |
3237 #ifdef MULE | 3237 #ifdef MULE |
3238 | 3238 |
3240 compile_extended_range (CONST char **p_ptr, CONST char *pend, char *translate, | 3240 compile_extended_range (CONST char **p_ptr, CONST char *pend, char *translate, |
3241 reg_syntax_t syntax, Lisp_Object rtab) | 3241 reg_syntax_t syntax, Lisp_Object rtab) |
3242 { | 3242 { |
3243 Emchar this_char, range_start, range_end; | 3243 Emchar this_char, range_start, range_end; |
3244 CONST Bufbyte *p; | 3244 CONST Bufbyte *p; |
3245 | 3245 |
3246 if (*p_ptr == pend) | 3246 if (*p_ptr == pend) |
3247 return REG_ERANGE; | 3247 return REG_ERANGE; |
3248 | 3248 |
3249 p = (CONST Bufbyte *) *p_ptr; | 3249 p = (CONST Bufbyte *) *p_ptr; |
3250 range_end = charptr_emchar (p); | 3250 range_end = charptr_emchar (p); |
3251 p--; /* back to '-' */ | 3251 p--; /* back to '-' */ |
3252 DEC_CHARPTR (p); /* back to start of range */ | 3252 DEC_CHARPTR (p); /* back to start of range */ |
3253 /* We also want to fetch the endpoints without translating them; the | 3253 /* We also want to fetch the endpoints without translating them; the |
3254 appropriate translation is done in the bit-setting loop below. */ | 3254 appropriate translation is done in the bit-setting loop below. */ |
3255 range_start = charptr_emchar (p); | 3255 range_start = charptr_emchar (p); |
3256 INC_CHARPTR (*p_ptr); | 3256 INC_CHARPTR (*p_ptr); |
3257 | 3257 |
3258 /* If the start is after the end, the range is empty. */ | 3258 /* If the start is after the end, the range is empty. */ |
3259 if (range_start > range_end) | 3259 if (range_start > range_end) |
3260 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; | 3260 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; |
3261 | 3261 |
3262 /* Can't have ranges spanning different charsets, except maybe for | 3262 /* Can't have ranges spanning different charsets, except maybe for |
3263 ranges entirely witin the first 256 chars. */ | 3263 ranges entirely witin the first 256 chars. */ |
3264 | 3264 |
3265 if ((range_start >= 0x100 || range_end >= 0x100) | 3265 if ((range_start >= 0x100 || range_end >= 0x100) |
3266 && CHAR_LEADING_BYTE (range_start) != | 3266 && CHAR_LEADING_BYTE (range_start) != |
3267 CHAR_LEADING_BYTE (range_end)) | 3267 CHAR_LEADING_BYTE (range_end)) |
3268 return REG_ERANGESPAN; | 3268 return REG_ERANGESPAN; |
3269 | 3269 |
3288 SET_RANGETAB_BIT (TRANSLATE (this_char)); | 3288 SET_RANGETAB_BIT (TRANSLATE (this_char)); |
3289 } | 3289 } |
3290 | 3290 |
3291 if (this_char <= range_end) | 3291 if (this_char <= range_end) |
3292 put_range_table (rtab, this_char, range_end, Qt); | 3292 put_range_table (rtab, this_char, range_end, Qt); |
3293 | 3293 |
3294 return REG_NOERROR; | 3294 return REG_NOERROR; |
3295 } | 3295 } |
3296 | 3296 |
3297 #endif /* MULE */ | 3297 #endif /* MULE */ |
3298 | 3298 |
3301 characters can start a string that matches the pattern. This fastmap | 3301 characters can start a string that matches the pattern. This fastmap |
3302 is used by re_search to skip quickly over impossible starting points. | 3302 is used by re_search to skip quickly over impossible starting points. |
3303 | 3303 |
3304 The caller must supply the address of a (1 << BYTEWIDTH)-byte data | 3304 The caller must supply the address of a (1 << BYTEWIDTH)-byte data |
3305 area as BUFP->fastmap. | 3305 area as BUFP->fastmap. |
3306 | 3306 |
3307 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in | 3307 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in |
3308 the pattern buffer. | 3308 the pattern buffer. |
3309 | 3309 |
3310 Returns 0 if we succeed, -2 if an internal error. */ | 3310 Returns 0 if we succeed, -2 if an internal error. */ |
3311 | 3311 |
3316 #ifdef MATCH_MAY_ALLOCATE | 3316 #ifdef MATCH_MAY_ALLOCATE |
3317 fail_stack_type fail_stack; | 3317 fail_stack_type fail_stack; |
3318 #endif | 3318 #endif |
3319 DECLARE_DESTINATION | 3319 DECLARE_DESTINATION |
3320 /* We don't push any register information onto the failure stack. */ | 3320 /* We don't push any register information onto the failure stack. */ |
3321 | 3321 |
3322 register char *fastmap = bufp->fastmap; | 3322 register char *fastmap = bufp->fastmap; |
3323 unsigned char *pattern = bufp->buffer; | 3323 unsigned char *pattern = bufp->buffer; |
3324 unsigned long size = bufp->used; | 3324 unsigned long size = bufp->used; |
3325 unsigned char *p = pattern; | 3325 unsigned char *p = pattern; |
3326 register unsigned char *pend = pattern + size; | 3326 register unsigned char *pend = pattern + size; |
3339 | 3339 |
3340 /* We aren't doing a `succeed_n' to begin with. */ | 3340 /* We aren't doing a `succeed_n' to begin with. */ |
3341 boolean succeed_n_p = false; | 3341 boolean succeed_n_p = false; |
3342 | 3342 |
3343 assert (fastmap != NULL && p != NULL); | 3343 assert (fastmap != NULL && p != NULL); |
3344 | 3344 |
3345 INIT_FAIL_STACK (); | 3345 INIT_FAIL_STACK (); |
3346 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ | 3346 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ |
3347 bufp->fastmap_accurate = 1; /* It will be when we're done. */ | 3347 bufp->fastmap_accurate = 1; /* It will be when we're done. */ |
3348 bufp->can_be_null = 0; | 3348 bufp->can_be_null = 0; |
3349 | 3349 |
3350 while (1) | 3350 while (1) |
3351 { | 3351 { |
3352 if (p == pend || *p == succeed) | 3352 if (p == pend || *p == succeed) |
3353 { | 3353 { |
3354 /* We have reached the (effective) end of pattern. */ | 3354 /* We have reached the (effective) end of pattern. */ |
3367 break; | 3367 break; |
3368 } | 3368 } |
3369 | 3369 |
3370 /* We should never be about to go beyond the end of the pattern. */ | 3370 /* We should never be about to go beyond the end of the pattern. */ |
3371 assert (p < pend); | 3371 assert (p < pend); |
3372 | 3372 |
3373 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) | 3373 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) |
3374 { | 3374 { |
3375 | 3375 |
3376 /* I guess the idea here is to simply not bother with a fastmap | 3376 /* I guess the idea here is to simply not bother with a fastmap |
3377 if a backreference is used, since it's too hard to figure out | 3377 if a backreference is used, since it's too hard to figure out |
3548 (regex_emacs_buffer->mirror_syntax_table), j) == | 3548 (regex_emacs_buffer->mirror_syntax_table), j) == |
3549 (enum syntaxcode) k) | 3549 (enum syntaxcode) k) |
3550 fastmap[j] = 1; | 3550 fastmap[j] = 1; |
3551 for (j = 0x80; j < 0xA0; j++) | 3551 for (j = 0x80; j < 0xA0; j++) |
3552 { | 3552 { |
3553 if (j == PRE_LEADING_BYTE_PRIVATE_1 | 3553 if (LEADING_BYTE_PREFIX_P(j)) |
3554 || j == PRE_LEADING_BYTE_PRIVATE_2) | |
3555 /* too complicated to calculate this right */ | 3554 /* too complicated to calculate this right */ |
3556 fastmap[j] = 1; | 3555 fastmap[j] = 1; |
3557 else | 3556 else |
3558 { | 3557 { |
3559 int multi_p; | 3558 int multi_p; |
3590 (regex_emacs_buffer->mirror_syntax_table), j) != | 3589 (regex_emacs_buffer->mirror_syntax_table), j) != |
3591 (enum syntaxcode) k) | 3590 (enum syntaxcode) k) |
3592 fastmap[j] = 1; | 3591 fastmap[j] = 1; |
3593 for (j = 0x80; j < 0xA0; j++) | 3592 for (j = 0x80; j < 0xA0; j++) |
3594 { | 3593 { |
3595 if (j == PRE_LEADING_BYTE_PRIVATE_1 | 3594 if (LEADING_BYTE_PREFIX_P(j)) |
3596 || j == PRE_LEADING_BYTE_PRIVATE_2) | |
3597 /* too complicated to calculate this right */ | 3595 /* too complicated to calculate this right */ |
3598 fastmap[j] = 1; | 3596 fastmap[j] = 1; |
3599 else | 3597 else |
3600 { | 3598 { |
3601 int multi_p; | 3599 int multi_p; |
3659 case maybe_pop_jump: | 3657 case maybe_pop_jump: |
3660 case jump: | 3658 case jump: |
3661 case jump_past_alt: | 3659 case jump_past_alt: |
3662 case dummy_failure_jump: | 3660 case dummy_failure_jump: |
3663 EXTRACT_NUMBER_AND_INCR (j, p); | 3661 EXTRACT_NUMBER_AND_INCR (j, p); |
3664 p += j; | 3662 p += j; |
3665 if (j > 0) | 3663 if (j > 0) |
3666 continue; | 3664 continue; |
3667 | 3665 |
3668 /* Jump backward implies we just went through the body of a | 3666 /* Jump backward implies we just went through the body of a |
3669 loop and matched nothing. Opcode jumped to should be | 3667 loop and matched nothing. Opcode jumped to should be |
3670 `on_failure_jump' or `succeed_n'. Just treat it like an | 3668 `on_failure_jump' or `succeed_n'. Just treat it like an |
3671 ordinary jump. For a * loop, it has pushed its failure | 3669 ordinary jump. For a * loop, it has pushed its failure |
3672 point already; if so, discard that as redundant. */ | 3670 point already; if so, discard that as redundant. */ |
3674 && (re_opcode_t) *p != succeed_n) | 3672 && (re_opcode_t) *p != succeed_n) |
3675 continue; | 3673 continue; |
3676 | 3674 |
3677 p++; | 3675 p++; |
3678 EXTRACT_NUMBER_AND_INCR (j, p); | 3676 EXTRACT_NUMBER_AND_INCR (j, p); |
3679 p += j; | 3677 p += j; |
3680 | 3678 |
3681 /* If what's on the stack is where we are now, pop it. */ | 3679 /* If what's on the stack is where we are now, pop it. */ |
3682 if (!FAIL_STACK_EMPTY () | 3680 if (!FAIL_STACK_EMPTY () |
3683 && fail_stack.stack[fail_stack.avail - 1].pointer == p) | 3681 && fail_stack.stack[fail_stack.avail - 1].pointer == p) |
3684 fail_stack.avail--; | 3682 fail_stack.avail--; |
3685 | 3683 |
3686 continue; | 3684 continue; |
3687 | 3685 |
3718 continue; | 3716 continue; |
3719 | 3717 |
3720 | 3718 |
3721 case succeed_n: | 3719 case succeed_n: |
3722 /* Get to the number of times to succeed. */ | 3720 /* Get to the number of times to succeed. */ |
3723 p += 2; | 3721 p += 2; |
3724 | 3722 |
3725 /* Increment p past the n for when k != 0. */ | 3723 /* Increment p past the n for when k != 0. */ |
3726 EXTRACT_NUMBER_AND_INCR (k, p); | 3724 EXTRACT_NUMBER_AND_INCR (k, p); |
3727 if (k == 0) | 3725 if (k == 0) |
3728 { | 3726 { |
3806 | 3804 |
3807 int | 3805 int |
3808 re_search (struct re_pattern_buffer *bufp, CONST char *string, int size, | 3806 re_search (struct re_pattern_buffer *bufp, CONST char *string, int size, |
3809 int startpos, int range, struct re_registers *regs) | 3807 int startpos, int range, struct re_registers *regs) |
3810 { | 3808 { |
3811 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, | 3809 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, |
3812 regs, size); | 3810 regs, size); |
3813 } | 3811 } |
3814 | 3812 |
3815 #ifndef emacs | 3813 #ifndef emacs |
3816 /* Snarfed from src/lisp.h, needed for compiling [ce]tags. */ | 3814 /* Snarfed from src/lisp.h, needed for compiling [ce]tags. */ |
3824 STARTPOS, then at STARTPOS + 1, and so on. | 3822 STARTPOS, then at STARTPOS + 1, and so on. |
3825 | 3823 |
3826 With MULE, STARTPOS is a byte position, not a char position. And the | 3824 With MULE, STARTPOS is a byte position, not a char position. And the |
3827 search will increment STARTPOS by the width of the current leading | 3825 search will increment STARTPOS by the width of the current leading |
3828 character. | 3826 character. |
3829 | 3827 |
3830 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. | 3828 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. |
3831 | 3829 |
3832 RANGE is how far to scan while trying to match. RANGE = 0 means try | 3830 RANGE is how far to scan while trying to match. RANGE = 0 means try |
3833 only at STARTPOS; in general, the last start tried is STARTPOS + | 3831 only at STARTPOS; in general, the last start tried is STARTPOS + |
3834 RANGE. | 3832 RANGE. |
3835 | 3833 |
3836 With MULE, RANGE is a byte position, not a char position. The last | 3834 With MULE, RANGE is a byte position, not a char position. The last |
3837 start tried is the character starting <= STARTPOS + RANGE. | 3835 start tried is the character starting <= STARTPOS + RANGE. |
3838 | 3836 |
3839 In REGS, return the indices of the virtual concatenation of STRING1 | 3837 In REGS, return the indices of the virtual concatenation of STRING1 |
3840 and STRING2 that matched the entire BUFP->buffer and its contained | 3838 and STRING2 that matched the entire BUFP->buffer and its contained |
3841 subexpressions. | 3839 subexpressions. |
3842 | 3840 |
3843 Do not consider matching one past the index STOP in the virtual | 3841 Do not consider matching one past the index STOP in the virtual |
3844 concatenation of STRING1 and STRING2. | 3842 concatenation of STRING1 and STRING2. |
3845 | 3843 |
3846 We return either the position in the strings at which the match was | 3844 We return either the position in the strings at which the match was |
3847 found, -1 if no match, or -2 if error (such as failure | 3845 found, -1 if no match, or -2 if error (such as failure |
3864 Charcount d_size; | 3862 Charcount d_size; |
3865 | 3863 |
3866 /* Check for out-of-range STARTPOS. */ | 3864 /* Check for out-of-range STARTPOS. */ |
3867 if (startpos < 0 || startpos > total_size) | 3865 if (startpos < 0 || startpos > total_size) |
3868 return -1; | 3866 return -1; |
3869 | 3867 |
3870 /* Fix up RANGE if it might eventually take us outside | 3868 /* Fix up RANGE if it might eventually take us outside |
3871 the virtual concatenation of STRING1 and STRING2. */ | 3869 the virtual concatenation of STRING1 and STRING2. */ |
3872 if (endpos < 0) | 3870 if (endpos < 0) |
3873 range = 0 - startpos; | 3871 range = 0 - startpos; |
3874 else if (endpos > total_size) | 3872 else if (endpos > total_size) |
3890 | 3888 |
3891 /* Update the fastmap now if not correct already. */ | 3889 /* Update the fastmap now if not correct already. */ |
3892 if (fastmap && !bufp->fastmap_accurate) | 3890 if (fastmap && !bufp->fastmap_accurate) |
3893 if (re_compile_fastmap (bufp) == -2) | 3891 if (re_compile_fastmap (bufp) == -2) |
3894 return -2; | 3892 return -2; |
3895 | 3893 |
3896 #ifdef REGEX_BEGLINE_CHECK | 3894 #ifdef REGEX_BEGLINE_CHECK |
3897 { | 3895 { |
3898 int i = 0; | 3896 int i = 0; |
3899 | 3897 |
3900 while (i < bufp->used) | 3898 while (i < bufp->used) |
3909 } | 3907 } |
3910 #endif | 3908 #endif |
3911 | 3909 |
3912 /* Loop through the string, looking for a place to start matching. */ | 3910 /* Loop through the string, looking for a place to start matching. */ |
3913 for (;;) | 3911 for (;;) |
3914 { | 3912 { |
3915 #ifdef REGEX_BEGLINE_CHECK | 3913 #ifdef REGEX_BEGLINE_CHECK |
3916 /* If the regex is anchored at the beginning of a line (i.e. with a ^), | 3914 /* If the regex is anchored at the beginning of a line (i.e. with a ^), |
3917 then we can speed things up by skipping to the next beginning-of- | 3915 then we can speed things up by skipping to the next beginning-of- |
3918 line. */ | 3916 line. */ |
3919 if (anchored_at_begline && startpos > 0 && startpos != size1 && | 3917 if (anchored_at_begline && startpos > 0 && startpos != size1 && |
3996 startpos += irange - range; | 3994 startpos += irange - range; |
3997 } | 3995 } |
3998 else /* Searching backwards. */ | 3996 else /* Searching backwards. */ |
3999 { | 3997 { |
4000 unsigned char c = (size1 == 0 || startpos >= size1 | 3998 unsigned char c = (size1 == 0 || startpos >= size1 |
4001 ? string2[startpos - size1] | 3999 ? string2[startpos - size1] |
4002 : string1[startpos]); | 4000 : string1[startpos]); |
4003 #ifdef MULE | 4001 #ifdef MULE |
4004 if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)]) | 4002 if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)]) |
4005 #else | 4003 #else |
4006 if (!fastmap[(unsigned char) TRANSLATE (c)]) | 4004 if (!fastmap[(unsigned char) TRANSLATE (c)]) |
4026 #endif | 4024 #endif |
4027 #endif | 4025 #endif |
4028 | 4026 |
4029 if (val >= 0) | 4027 if (val >= 0) |
4030 return startpos; | 4028 return startpos; |
4031 | 4029 |
4032 if (val == -2) | 4030 if (val == -2) |
4033 return -2; | 4031 return -2; |
4034 | 4032 |
4035 advance: | 4033 advance: |
4036 if (!range) | 4034 if (!range) |
4037 break; | 4035 break; |
4038 else if (range > 0) | 4036 else if (range > 0) |
4039 { | 4037 { |
4040 d = ((CONST unsigned char *) | 4038 d = ((CONST unsigned char *) |
4041 (startpos >= size1 ? string2 - size1 : string1) + startpos); | 4039 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
4042 d_size = charcount_to_bytecount (d, 1); | 4040 d_size = charcount_to_bytecount (d, 1); |
4043 range -= d_size; | 4041 range -= d_size; |
4086 | 4084 |
4087 | 4085 |
4088 /* Test if at very beginning or at very end of the virtual concatenation | 4086 /* Test if at very beginning or at very end of the virtual concatenation |
4089 of `string1' and `string2'. If only one string, it's `string2'. */ | 4087 of `string1' and `string2'. If only one string, it's `string2'. */ |
4090 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) | 4088 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) |
4091 #define AT_STRINGS_END(d) ((d) == end2) | 4089 #define AT_STRINGS_END(d) ((d) == end2) |
4092 | 4090 |
4093 /* XEmacs change: | 4091 /* XEmacs change: |
4094 If the given position straddles the string gap, return the equivalent | 4092 If the given position straddles the string gap, return the equivalent |
4095 position that is before or after the gap, respectively; otherwise, | 4093 position that is before or after the gap, respectively; otherwise, |
4096 return the same position. */ | 4094 return the same position. */ |
4151 | 4149 |
4152 /* re_match_2 matches the compiled pattern in BUFP against the | 4150 /* re_match_2 matches the compiled pattern in BUFP against the |
4153 (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and | 4151 (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and |
4154 SIZE2, respectively). We start matching at POS, and stop matching | 4152 SIZE2, respectively). We start matching at POS, and stop matching |
4155 at STOP. | 4153 at STOP. |
4156 | 4154 |
4157 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we | 4155 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we |
4158 store offsets for the substring each group matched in REGS. See the | 4156 store offsets for the substring each group matched in REGS. See the |
4159 documentation for exactly how many groups we fill. | 4157 documentation for exactly how many groups we fill. |
4160 | 4158 |
4161 We return -1 if no match, -2 if an internal error (such as the | 4159 We return -1 if no match, -2 if an internal error (such as the |
4192 each to consider matching. */ | 4190 each to consider matching. */ |
4193 CONST char *end_match_1, *end_match_2; | 4191 CONST char *end_match_1, *end_match_2; |
4194 | 4192 |
4195 /* Where we are in the data, and the end of the current string. */ | 4193 /* Where we are in the data, and the end of the current string. */ |
4196 CONST char *d, *dend; | 4194 CONST char *d, *dend; |
4197 | 4195 |
4198 /* Where we are in the pattern, and the end of the pattern. */ | 4196 /* Where we are in the pattern, and the end of the pattern. */ |
4199 unsigned char *p = bufp->buffer; | 4197 unsigned char *p = bufp->buffer; |
4200 register unsigned char *pend = p + bufp->used; | 4198 register unsigned char *pend = p + bufp->used; |
4201 | 4199 |
4202 /* Mark the opcode just after a start_memory, so we can test for an | 4200 /* Mark the opcode just after a start_memory, so we can test for an |
4231 | 4229 |
4232 /* We fill all the registers internally, independent of what we | 4230 /* We fill all the registers internally, independent of what we |
4233 return, for use in backreferences. The number here includes | 4231 return, for use in backreferences. The number here includes |
4234 an element for register zero. */ | 4232 an element for register zero. */ |
4235 unsigned num_regs = bufp->re_nsub + 1; | 4233 unsigned num_regs = bufp->re_nsub + 1; |
4236 | 4234 |
4237 /* The currently active registers. */ | 4235 /* The currently active registers. */ |
4238 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; | 4236 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; |
4239 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; | 4237 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; |
4240 | 4238 |
4241 /* Information on the contents of registers. These are pointers into | 4239 /* Information on the contents of registers. These are pointers into |
4263 field of reg_info[reg_num] helps us tell whether or not we have | 4261 field of reg_info[reg_num] helps us tell whether or not we have |
4264 matched any of the pattern so far this time through the reg_num-th | 4262 matched any of the pattern so far this time through the reg_num-th |
4265 subexpression. These two fields get reset each time through any | 4263 subexpression. These two fields get reset each time through any |
4266 loop their register is in. */ | 4264 loop their register is in. */ |
4267 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ | 4265 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ |
4268 register_info_type *reg_info; | 4266 register_info_type *reg_info; |
4269 #endif | 4267 #endif |
4270 | 4268 |
4271 /* The following record the register info as found in the above | 4269 /* The following record the register info as found in the above |
4272 variables when we find a match better than any we've seen before. | 4270 variables when we find a match better than any we've seen before. |
4273 This happens as we backtrack through the failure points, which in | 4271 This happens as we backtrack through the failure points, which in |
4274 turn happens only if we have not yet matched the entire string. */ | 4272 turn happens only if we have not yet matched the entire string. */ |
4275 unsigned best_regs_set = false; | 4273 unsigned best_regs_set = false; |
4276 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | 4274 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ |
4277 CONST char **best_regstart, **best_regend; | 4275 CONST char **best_regstart, **best_regend; |
4278 #endif | 4276 #endif |
4279 | 4277 |
4280 /* Logically, this is `best_regend[0]'. But we don't want to have to | 4278 /* Logically, this is `best_regend[0]'. But we don't want to have to |
4281 allocate space for that if we're not allocating space for anything | 4279 allocate space for that if we're not allocating space for anything |
4282 else (see below). Also, we never need info about register 0 for | 4280 else (see below). Also, we never need info about register 0 for |
4283 any of the other register vectors, and it seems rather a kludge to | 4281 any of the other register vectors, and it seems rather a kludge to |
4284 treat `best_regend' differently than the rest. So we keep track of | 4282 treat `best_regend' differently than the rest. So we keep track of |
4296 register_info_type *reg_info_dummy; | 4294 register_info_type *reg_info_dummy; |
4297 #endif | 4295 #endif |
4298 | 4296 |
4299 #ifdef DEBUG | 4297 #ifdef DEBUG |
4300 /* Counts the total number of registers pushed. */ | 4298 /* Counts the total number of registers pushed. */ |
4301 unsigned num_regs_pushed = 0; | 4299 unsigned num_regs_pushed = 0; |
4302 #endif | 4300 #endif |
4303 | 4301 |
4304 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); | 4302 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); |
4305 | 4303 |
4306 INIT_FAIL_STACK (); | 4304 INIT_FAIL_STACK (); |
4307 | 4305 |
4308 #ifdef MATCH_MAY_ALLOCATE | 4306 #ifdef MATCH_MAY_ALLOCATE |
4309 /* Do not bother to initialize all the register variables if there are | 4307 /* Do not bother to initialize all the register variables if there are |
4310 no groups in the pattern, as it takes a fair amount of time. If | 4308 no groups in the pattern, as it takes a fair amount of time. If |
4311 there are groups, we include space for register 0 (the whole | 4309 there are groups, we include space for register 0 (the whole |
4312 pattern), even though we never use it, since it simplifies the | 4310 pattern), even though we never use it, since it simplifies the |
4321 best_regend = REGEX_TALLOC (num_regs, CONST char *); | 4319 best_regend = REGEX_TALLOC (num_regs, CONST char *); |
4322 reg_info = REGEX_TALLOC (num_regs, register_info_type); | 4320 reg_info = REGEX_TALLOC (num_regs, register_info_type); |
4323 reg_dummy = REGEX_TALLOC (num_regs, CONST char *); | 4321 reg_dummy = REGEX_TALLOC (num_regs, CONST char *); |
4324 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); | 4322 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); |
4325 | 4323 |
4326 if (!(regstart && regend && old_regstart && old_regend && reg_info | 4324 if (!(regstart && regend && old_regstart && old_regend && reg_info |
4327 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) | 4325 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) |
4328 { | 4326 { |
4329 FREE_VARIABLES (); | 4327 FREE_VARIABLES (); |
4330 return -2; | 4328 return -2; |
4331 } | 4329 } |
4332 } | 4330 } |
4344 if (pos < 0 || pos > size1 + size2) | 4342 if (pos < 0 || pos > size1 + size2) |
4345 { | 4343 { |
4346 FREE_VARIABLES (); | 4344 FREE_VARIABLES (); |
4347 return -1; | 4345 return -1; |
4348 } | 4346 } |
4349 | 4347 |
4350 /* Initialize subexpression text positions to -1 to mark ones that no | 4348 /* Initialize subexpression text positions to -1 to mark ones that no |
4351 start_memory/stop_memory has been seen for. Also initialize the | 4349 start_memory/stop_memory has been seen for. Also initialize the |
4352 register information struct. */ | 4350 register information struct. */ |
4353 for (mcnt = 1; mcnt < num_regs; mcnt++) | 4351 for (mcnt = 1; mcnt < num_regs; mcnt++) |
4354 { | 4352 { |
4355 regstart[mcnt] = regend[mcnt] | 4353 regstart[mcnt] = regend[mcnt] |
4356 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; | 4354 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; |
4357 | 4355 |
4358 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; | 4356 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; |
4359 IS_ACTIVE (reg_info[mcnt]) = 0; | 4357 IS_ACTIVE (reg_info[mcnt]) = 0; |
4360 MATCHED_SOMETHING (reg_info[mcnt]) = 0; | 4358 MATCHED_SOMETHING (reg_info[mcnt]) = 0; |
4361 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; | 4359 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; |
4362 } | 4360 } |
4363 | 4361 |
4364 /* We move `string1' into `string2' if the latter's empty -- but not if | 4362 /* We move `string1' into `string2' if the latter's empty -- but not if |
4365 `string1' is null. */ | 4363 `string1' is null. */ |
4366 if (size2 == 0 && string1 != NULL) | 4364 if (size2 == 0 && string1 != NULL) |
4367 { | 4365 { |
4368 string2 = string1; | 4366 string2 = string1; |
4383 { | 4381 { |
4384 end_match_1 = end1; | 4382 end_match_1 = end1; |
4385 end_match_2 = string2 + stop - size1; | 4383 end_match_2 = string2 + stop - size1; |
4386 } | 4384 } |
4387 | 4385 |
4388 /* `p' scans through the pattern as `d' scans through the data. | 4386 /* `p' scans through the pattern as `d' scans through the data. |
4389 `dend' is the end of the input string that `d' points within. `d' | 4387 `dend' is the end of the input string that `d' points within. `d' |
4390 is advanced into the following input string whenever necessary, but | 4388 is advanced into the following input string whenever necessary, but |
4391 this happens before fetching; therefore, at the beginning of the | 4389 this happens before fetching; therefore, at the beginning of the |
4392 loop, `d' can be pointing at the end of a string, but it cannot | 4390 loop, `d' can be pointing at the end of a string, but it cannot |
4393 equal `string2'. */ | 4391 equal `string2'. */ |
4405 DEBUG_PRINT1 ("The compiled pattern is: "); | 4403 DEBUG_PRINT1 ("The compiled pattern is: "); |
4406 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); | 4404 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); |
4407 DEBUG_PRINT1 ("The string to match is: `"); | 4405 DEBUG_PRINT1 ("The string to match is: `"); |
4408 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); | 4406 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); |
4409 DEBUG_PRINT1 ("'\n"); | 4407 DEBUG_PRINT1 ("'\n"); |
4410 | 4408 |
4411 /* This loops over pattern commands. It exits by returning from the | 4409 /* This loops over pattern commands. It exits by returning from the |
4412 function if the match is complete, or it drops through if the match | 4410 function if the match is complete, or it drops through if the match |
4413 fails at this starting point in the input data. */ | 4411 fails at this starting point in the input data. */ |
4414 for (;;) | 4412 for (;;) |
4415 { | 4413 { |
4416 DEBUG_PRINT2 ("\n0x%p: ", p); | 4414 DEBUG_PRINT2 ("\n0x%p: ", p); |
4417 | 4415 |
4418 if (p == pend) | 4416 if (p == pend) |
4419 { /* End of pattern means we might have succeeded. */ | 4417 { /* End of pattern means we might have succeeded. */ |
4420 DEBUG_PRINT1 ("end of pattern ... "); | 4418 DEBUG_PRINT1 ("end of pattern ... "); |
4421 | 4419 |
4422 /* If we haven't matched the entire string, and we want the | 4420 /* If we haven't matched the entire string, and we want the |
4423 longest match, try backtracking. */ | 4421 longest match, try backtracking. */ |
4424 if (d != end_match_2) | 4422 if (d != end_match_2) |
4425 { | 4423 { |
4426 /* 1 if this match ends in the same string (string1 or string2) | 4424 /* 1 if this match ends in the same string (string1 or string2) |
4427 as the best previous match. */ | 4425 as the best previous match. */ |
4428 boolean same_str_p = (FIRST_STRING_P (match_end) | 4426 boolean same_str_p = (FIRST_STRING_P (match_end) |
4429 == MATCHING_IN_FIRST_STRING); | 4427 == MATCHING_IN_FIRST_STRING); |
4430 /* 1 if this match is the best seen so far. */ | 4428 /* 1 if this match is the best seen so far. */ |
4431 boolean best_match_p; | 4429 boolean best_match_p; |
4432 | 4430 |
4433 /* AIX compiler got confused when this was combined | 4431 /* AIX compiler got confused when this was combined |
4436 best_match_p = d > match_end; | 4434 best_match_p = d > match_end; |
4437 else | 4435 else |
4438 best_match_p = !MATCHING_IN_FIRST_STRING; | 4436 best_match_p = !MATCHING_IN_FIRST_STRING; |
4439 | 4437 |
4440 DEBUG_PRINT1 ("backtracking.\n"); | 4438 DEBUG_PRINT1 ("backtracking.\n"); |
4441 | 4439 |
4442 if (!FAIL_STACK_EMPTY ()) | 4440 if (!FAIL_STACK_EMPTY ()) |
4443 { /* More failure points to try. */ | 4441 { /* More failure points to try. */ |
4444 | 4442 |
4445 /* If exceeds best match so far, save it. */ | 4443 /* If exceeds best match so far, save it. */ |
4446 if (!best_regs_set || best_match_p) | 4444 if (!best_regs_set || best_match_p) |
4447 { | 4445 { |
4448 best_regs_set = true; | 4446 best_regs_set = true; |
4449 match_end = d; | 4447 match_end = d; |
4450 | 4448 |
4451 DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); | 4449 DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); |
4452 | 4450 |
4453 for (mcnt = 1; mcnt < num_regs; mcnt++) | 4451 for (mcnt = 1; mcnt < num_regs; mcnt++) |
4454 { | 4452 { |
4455 best_regstart[mcnt] = regstart[mcnt]; | 4453 best_regstart[mcnt] = regstart[mcnt]; |
4456 best_regend[mcnt] = regend[mcnt]; | 4454 best_regend[mcnt] = regend[mcnt]; |
4457 } | 4455 } |
4458 } | 4456 } |
4459 goto fail; | 4457 goto fail; |
4460 } | 4458 } |
4461 | 4459 |
4462 /* If no failure points, don't restore garbage. And if | 4460 /* If no failure points, don't restore garbage. And if |
4463 last match is real best match, don't restore second | 4461 last match is real best match, don't restore second |
4464 best one. */ | 4462 best one. */ |
4469 end_match_1' while the restored d is in string2. | 4467 end_match_1' while the restored d is in string2. |
4470 For example, the pattern `x.*y.*z' against the | 4468 For example, the pattern `x.*y.*z' against the |
4471 strings `x-' and `y-z-', if the two strings are | 4469 strings `x-' and `y-z-', if the two strings are |
4472 not consecutive in memory. */ | 4470 not consecutive in memory. */ |
4473 DEBUG_PRINT1 ("Restoring best registers.\n"); | 4471 DEBUG_PRINT1 ("Restoring best registers.\n"); |
4474 | 4472 |
4475 d = match_end; | 4473 d = match_end; |
4476 dend = ((d >= string1 && d <= end1) | 4474 dend = ((d >= string1 && d <= end1) |
4477 ? end_match_1 : end_match_2); | 4475 ? end_match_1 : end_match_2); |
4478 | 4476 |
4479 for (mcnt = 1; mcnt < num_regs; mcnt++) | 4477 for (mcnt = 1; mcnt < num_regs; mcnt++) |
4536 regs->start[0] = pos; | 4534 regs->start[0] = pos; |
4537 regs->end[0] = (MATCHING_IN_FIRST_STRING | 4535 regs->end[0] = (MATCHING_IN_FIRST_STRING |
4538 ? ((regoff_t) (d - string1)) | 4536 ? ((regoff_t) (d - string1)) |
4539 : ((regoff_t) (d - string2 + size1))); | 4537 : ((regoff_t) (d - string2 + size1))); |
4540 } | 4538 } |
4541 | 4539 |
4542 /* Go through the first `min (num_regs, regs->num_regs)' | 4540 /* Go through the first `min (num_regs, regs->num_regs)' |
4543 registers, since that is all we initialized. */ | 4541 registers, since that is all we initialized. */ |
4544 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) | 4542 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) |
4545 { | 4543 { |
4546 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) | 4544 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) |
4551 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); | 4549 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); |
4552 regs->end[mcnt] | 4550 regs->end[mcnt] |
4553 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); | 4551 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); |
4554 } | 4552 } |
4555 } | 4553 } |
4556 | 4554 |
4557 /* If the regs structure we return has more elements than | 4555 /* If the regs structure we return has more elements than |
4558 were in the pattern, set the extra elements to -1. If | 4556 were in the pattern, set the extra elements to -1. If |
4559 we (re)allocated the registers, this is the case, | 4557 we (re)allocated the registers, this is the case, |
4560 because we always allocate enough to have at least one | 4558 because we always allocate enough to have at least one |
4561 -1 at the end. */ | 4559 -1 at the end. */ |
4566 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", | 4564 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", |
4567 nfailure_points_pushed, nfailure_points_popped, | 4565 nfailure_points_pushed, nfailure_points_popped, |
4568 nfailure_points_pushed - nfailure_points_popped); | 4566 nfailure_points_pushed - nfailure_points_popped); |
4569 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); | 4567 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); |
4570 | 4568 |
4571 mcnt = d - pos - (MATCHING_IN_FIRST_STRING | 4569 mcnt = d - pos - (MATCHING_IN_FIRST_STRING |
4572 ? string1 | 4570 ? string1 |
4573 : string2 - size1); | 4571 : string2 - size1); |
4574 | 4572 |
4575 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); | 4573 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); |
4576 | 4574 |
4577 FREE_VARIABLES (); | 4575 FREE_VARIABLES (); |
4657 not = !not; | 4655 not = !not; |
4658 | 4656 |
4659 p += 1 + *p; | 4657 p += 1 + *p; |
4660 | 4658 |
4661 if (!not) goto fail; | 4659 if (!not) goto fail; |
4662 | 4660 |
4663 SET_REGS_MATCHED (); | 4661 SET_REGS_MATCHED (); |
4664 INC_CHARPTR (d); /* XEmacs change */ | 4662 INC_CHARPTR (d); /* XEmacs change */ |
4665 break; | 4663 break; |
4666 } | 4664 } |
4667 | 4665 |
4680 | 4678 |
4681 if (EQ (Qt, unified_range_table_lookup (p, c, Qnil))) | 4679 if (EQ (Qt, unified_range_table_lookup (p, c, Qnil))) |
4682 not = !not; | 4680 not = !not; |
4683 | 4681 |
4684 p += unified_range_table_bytes_used (p); | 4682 p += unified_range_table_bytes_used (p); |
4685 | 4683 |
4686 if (!not) goto fail; | 4684 if (!not) goto fail; |
4687 | 4685 |
4688 SET_REGS_MATCHED (); | 4686 SET_REGS_MATCHED (); |
4689 INC_CHARPTR (d); | 4687 INC_CHARPTR (d); |
4690 break; | 4688 break; |
4691 } | 4689 } |
4692 #endif | 4690 #endif |
4700 case start_memory: | 4698 case start_memory: |
4701 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); | 4699 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); |
4702 | 4700 |
4703 /* Find out if this group can match the empty string. */ | 4701 /* Find out if this group can match the empty string. */ |
4704 p1 = p; /* To send to group_match_null_string_p. */ | 4702 p1 = p; /* To send to group_match_null_string_p. */ |
4705 | 4703 |
4706 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) | 4704 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) |
4707 REG_MATCH_NULL_STRING_P (reg_info[*p]) | 4705 REG_MATCH_NULL_STRING_P (reg_info[*p]) |
4708 = group_match_null_string_p (&p1, pend, reg_info); | 4706 = group_match_null_string_p (&p1, pend, reg_info); |
4709 | 4707 |
4710 /* Save the position in the string where we were the last time | 4708 /* Save the position in the string where we were the last time |
4711 we were at this open-group operator in case the group is | 4709 we were at this open-group operator in case the group is |
4712 operated upon by a repetition operator, e.g., with `(a*)*b' | 4710 operated upon by a repetition operator, e.g., with `(a*)*b' |
4713 against `ab'; then we want to ignore where we are now in | 4711 against `ab'; then we want to ignore where we are now in |
4714 the string in case this attempt to match fails. */ | 4712 the string in case this attempt to match fails. */ |
4715 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) | 4713 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) |
4716 ? REG_UNSET (regstart[*p]) ? d : regstart[*p] | 4714 ? REG_UNSET (regstart[*p]) ? d : regstart[*p] |
4717 : regstart[*p]; | 4715 : regstart[*p]; |
4718 DEBUG_PRINT2 (" old_regstart: %d\n", | 4716 DEBUG_PRINT2 (" old_regstart: %d\n", |
4719 POINTER_TO_OFFSET (old_regstart[*p])); | 4717 POINTER_TO_OFFSET (old_regstart[*p])); |
4720 | 4718 |
4721 regstart[*p] = d; | 4719 regstart[*p] = d; |
4722 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); | 4720 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); |
4723 | 4721 |
4724 IS_ACTIVE (reg_info[*p]) = 1; | 4722 IS_ACTIVE (reg_info[*p]) = 1; |
4725 MATCHED_SOMETHING (reg_info[*p]) = 0; | 4723 MATCHED_SOMETHING (reg_info[*p]) = 0; |
4726 | 4724 |
4727 /* Clear this whenever we change the register activity status. */ | 4725 /* Clear this whenever we change the register activity status. */ |
4728 set_regs_matched_done = 0; | 4726 set_regs_matched_done = 0; |
4729 | 4727 |
4730 /* This is the new highest active register. */ | 4728 /* This is the new highest active register. */ |
4731 highest_active_reg = *p; | 4729 highest_active_reg = *p; |
4732 | 4730 |
4733 /* If nothing was active before, this is the new lowest active | 4731 /* If nothing was active before, this is the new lowest active |
4734 register. */ | 4732 register. */ |
4735 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) | 4733 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) |
4736 lowest_active_reg = *p; | 4734 lowest_active_reg = *p; |
4737 | 4735 |
4745 /* The stop_memory opcode represents the end of a group. Its | 4743 /* The stop_memory opcode represents the end of a group. Its |
4746 arguments are the same as start_memory's: the register | 4744 arguments are the same as start_memory's: the register |
4747 number, and the number of inner groups. */ | 4745 number, and the number of inner groups. */ |
4748 case stop_memory: | 4746 case stop_memory: |
4749 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); | 4747 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); |
4750 | 4748 |
4751 /* We need to save the string position the last time we were at | 4749 /* We need to save the string position the last time we were at |
4752 this close-group operator in case the group is operated | 4750 this close-group operator in case the group is operated |
4753 upon by a repetition operator, e.g., with `((a*)*(b*)*)*' | 4751 upon by a repetition operator, e.g., with `((a*)*(b*)*)*' |
4754 against `aba'; then we want to ignore where we are now in | 4752 against `aba'; then we want to ignore where we are now in |
4755 the string in case this attempt to match fails. */ | 4753 the string in case this attempt to match fails. */ |
4756 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) | 4754 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) |
4757 ? REG_UNSET (regend[*p]) ? d : regend[*p] | 4755 ? REG_UNSET (regend[*p]) ? d : regend[*p] |
4758 : regend[*p]; | 4756 : regend[*p]; |
4759 DEBUG_PRINT2 (" old_regend: %d\n", | 4757 DEBUG_PRINT2 (" old_regend: %d\n", |
4760 POINTER_TO_OFFSET (old_regend[*p])); | 4758 POINTER_TO_OFFSET (old_regend[*p])); |
4761 | 4759 |
4762 regend[*p] = d; | 4760 regend[*p] = d; |
4763 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); | 4761 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); |
4764 | 4762 |
4781 (a(b)c(d(e)f)g). When group 3 ends, after the f), the | 4779 (a(b)c(d(e)f)g). When group 3 ends, after the f), the |
4782 new highest active register is 1. */ | 4780 new highest active register is 1. */ |
4783 unsigned char r = *p - 1; | 4781 unsigned char r = *p - 1; |
4784 while (r > 0 && !IS_ACTIVE (reg_info[r])) | 4782 while (r > 0 && !IS_ACTIVE (reg_info[r])) |
4785 r--; | 4783 r--; |
4786 | 4784 |
4787 /* If we end up at register zero, that means that we saved | 4785 /* If we end up at register zero, that means that we saved |
4788 the registers as the result of an `on_failure_jump', not | 4786 the registers as the result of an `on_failure_jump', not |
4789 a `start_memory', and we jumped to past the innermost | 4787 a `start_memory', and we jumped to past the innermost |
4790 `stop_memory'. For example, in ((.)*) we save | 4788 `stop_memory'. For example, in ((.)*) we save |
4791 registers 1 and 2 as a result of the *, but when we pop | 4789 registers 1 and 2 as a result of the *, but when we pop |
4797 highest_active_reg = NO_HIGHEST_ACTIVE_REG; | 4795 highest_active_reg = NO_HIGHEST_ACTIVE_REG; |
4798 } | 4796 } |
4799 else | 4797 else |
4800 highest_active_reg = r; | 4798 highest_active_reg = r; |
4801 } | 4799 } |
4802 | 4800 |
4803 /* If just failed to match something this time around with a | 4801 /* If just failed to match something this time around with a |
4804 group that's operated on by a repetition operator, try to | 4802 group that's operated on by a repetition operator, try to |
4805 force exit from the ``loop'', and restore the register | 4803 force exit from the ``loop'', and restore the register |
4806 information for this group that we had before trying this | 4804 information for this group that we had before trying this |
4807 last match. */ | 4805 last match. */ |
4808 if ((!MATCHED_SOMETHING (reg_info[*p]) | 4806 if ((!MATCHED_SOMETHING (reg_info[*p]) |
4809 || just_past_start_mem == p - 1) | 4807 || just_past_start_mem == p - 1) |
4810 && (p + 2) < pend) | 4808 && (p + 2) < pend) |
4811 { | 4809 { |
4812 boolean is_a_jump_n = false; | 4810 boolean is_a_jump_n = false; |
4813 | 4811 |
4814 p1 = p + 2; | 4812 p1 = p + 2; |
4815 mcnt = 0; | 4813 mcnt = 0; |
4816 switch ((re_opcode_t) *p1++) | 4814 switch ((re_opcode_t) *p1++) |
4817 { | 4815 { |
4818 case jump_n: | 4816 case jump_n: |
4823 case dummy_failure_jump: | 4821 case dummy_failure_jump: |
4824 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 4822 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
4825 if (is_a_jump_n) | 4823 if (is_a_jump_n) |
4826 p1 += 2; | 4824 p1 += 2; |
4827 break; | 4825 break; |
4828 | 4826 |
4829 default: | 4827 default: |
4830 /* do nothing */ ; | 4828 /* do nothing */ ; |
4831 } | 4829 } |
4832 p1 += mcnt; | 4830 p1 += mcnt; |
4833 | 4831 |
4834 /* If the next operation is a jump backwards in the pattern | 4832 /* If the next operation is a jump backwards in the pattern |
4835 to an on_failure_jump right before the start_memory | 4833 to an on_failure_jump right before the start_memory |
4836 corresponding to this stop_memory, exit from the loop | 4834 corresponding to this stop_memory, exit from the loop |
4837 by forcing a failure after pushing on the stack the | 4835 by forcing a failure after pushing on the stack the |
4838 on_failure_jump's jump in the pattern, and d. */ | 4836 on_failure_jump's jump in the pattern, and d. */ |
4842 /* If this group ever matched anything, then restore | 4840 /* If this group ever matched anything, then restore |
4843 what its registers were before trying this last | 4841 what its registers were before trying this last |
4844 failed match, e.g., with `(a*)*b' against `ab' for | 4842 failed match, e.g., with `(a*)*b' against `ab' for |
4845 regstart[1], and, e.g., with `((a*)*(b*)*)*' | 4843 regstart[1], and, e.g., with `((a*)*(b*)*)*' |
4846 against `aba' for regend[3]. | 4844 against `aba' for regend[3]. |
4847 | 4845 |
4848 Also restore the registers for inner groups for, | 4846 Also restore the registers for inner groups for, |
4849 e.g., `((a*)(b*))*' against `aba' (register 3 would | 4847 e.g., `((a*)(b*))*' against `aba' (register 3 would |
4850 otherwise get trashed). */ | 4848 otherwise get trashed). */ |
4851 | 4849 |
4852 if (EVER_MATCHED_SOMETHING (reg_info[*p])) | 4850 if (EVER_MATCHED_SOMETHING (reg_info[*p])) |
4853 { | 4851 { |
4854 unsigned r; | 4852 unsigned r; |
4855 | 4853 |
4856 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; | 4854 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; |
4857 | 4855 |
4858 /* Restore this and inner groups' (if any) registers. */ | 4856 /* Restore this and inner groups' (if any) registers. */ |
4859 for (r = *p; r < *p + *(p + 1); r++) | 4857 for (r = *p; r < *p + *(p + 1); r++) |
4860 { | 4858 { |
4861 regstart[r] = old_regstart[r]; | 4859 regstart[r] = old_regstart[r]; |
4862 | 4860 |
4863 /* xx why this test? */ | 4861 /* xx why this test? */ |
4864 if (old_regend[r] >= regstart[r]) | 4862 if (old_regend[r] >= regstart[r]) |
4865 regend[r] = old_regend[r]; | 4863 regend[r] = old_regend[r]; |
4866 } | 4864 } |
4867 } | 4865 } |
4868 p1++; | 4866 p1++; |
4869 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 4867 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
4870 PUSH_FAILURE_POINT (p1 + mcnt, d, -2); | 4868 PUSH_FAILURE_POINT (p1 + mcnt, d, -2); |
4871 | 4869 |
4872 goto fail; | 4870 goto fail; |
4873 } | 4871 } |
4874 } | 4872 } |
4875 | 4873 |
4876 /* Move past the register number and the inner group count. */ | 4874 /* Move past the register number and the inner group count. */ |
4877 p += 2; | 4875 p += 2; |
4878 break; | 4876 break; |
4879 | 4877 |
4880 | 4878 |
4887 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); | 4885 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); |
4888 | 4886 |
4889 /* Can't back reference a group which we've never matched. */ | 4887 /* Can't back reference a group which we've never matched. */ |
4890 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) | 4888 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) |
4891 goto fail; | 4889 goto fail; |
4892 | 4890 |
4893 /* Where in input to try to start matching. */ | 4891 /* Where in input to try to start matching. */ |
4894 d2 = regstart[regno]; | 4892 d2 = regstart[regno]; |
4895 | 4893 |
4896 /* Where to stop matching; if both the place to start and | 4894 /* Where to stop matching; if both the place to start and |
4897 the place to stop matching are in the same string, then | 4895 the place to stop matching are in the same string, then |
4898 set to the place to stop, otherwise, for now have to use | 4896 set to the place to stop, otherwise, for now have to use |
4899 the end of the first string. */ | 4897 the end of the first string. */ |
4900 | 4898 |
4901 dend2 = ((FIRST_STRING_P (regstart[regno]) | 4899 dend2 = ((FIRST_STRING_P (regstart[regno]) |
4902 == FIRST_STRING_P (regend[regno])) | 4900 == FIRST_STRING_P (regend[regno])) |
4903 ? regend[regno] : end_match_1); | 4901 ? regend[regno] : end_match_1); |
4904 for (;;) | 4902 for (;;) |
4905 { | 4903 { |
4906 /* If necessary, advance to next segment in register | 4904 /* If necessary, advance to next segment in register |
4920 /* If necessary, advance to next segment in data. */ | 4918 /* If necessary, advance to next segment in data. */ |
4921 PREFETCH (); | 4919 PREFETCH (); |
4922 | 4920 |
4923 /* How many characters left in this segment to match. */ | 4921 /* How many characters left in this segment to match. */ |
4924 mcnt = dend - d; | 4922 mcnt = dend - d; |
4925 | 4923 |
4926 /* Want how many consecutive characters we can match in | 4924 /* Want how many consecutive characters we can match in |
4927 one shot, so, if necessary, adjust the count. */ | 4925 one shot, so, if necessary, adjust the count. */ |
4928 if (mcnt > dend2 - d2) | 4926 if (mcnt > dend2 - d2) |
4929 mcnt = dend2 - d2; | 4927 mcnt = dend2 - d2; |
4930 | 4928 |
4931 /* Compare that many; failure if mismatch, else move | 4929 /* Compare that many; failure if mismatch, else move |
4932 past them. */ | 4930 past them. */ |
4933 if (translate | 4931 if (translate |
4934 ? bcmp_translate ((unsigned char *) d, | 4932 ? bcmp_translate ((unsigned char *) d, |
4935 (unsigned char *) d2, mcnt, translate) | 4933 (unsigned char *) d2, mcnt, translate) |
4936 : memcmp (d, d2, mcnt)) | 4934 : memcmp (d, d2, mcnt)) |
4937 goto fail; | 4935 goto fail; |
4938 d += mcnt, d2 += mcnt; | 4936 d += mcnt, d2 += mcnt; |
4939 | 4937 |
4940 /* Do this because we've match some characters. */ | 4938 /* Do this because we've match some characters. */ |
4947 /* begline matches the empty string at the beginning of the string | 4945 /* begline matches the empty string at the beginning of the string |
4948 (unless `not_bol' is set in `bufp'), and, if | 4946 (unless `not_bol' is set in `bufp'), and, if |
4949 `newline_anchor' is set, after newlines. */ | 4947 `newline_anchor' is set, after newlines. */ |
4950 case begline: | 4948 case begline: |
4951 DEBUG_PRINT1 ("EXECUTING begline.\n"); | 4949 DEBUG_PRINT1 ("EXECUTING begline.\n"); |
4952 | 4950 |
4953 if (AT_STRINGS_BEG (d)) | 4951 if (AT_STRINGS_BEG (d)) |
4954 { | 4952 { |
4955 if (!bufp->not_bol) break; | 4953 if (!bufp->not_bol) break; |
4956 } | 4954 } |
4957 else if (d[-1] == '\n' && bufp->newline_anchor) | 4955 else if (d[-1] == '\n' && bufp->newline_anchor) |
4968 | 4966 |
4969 if (AT_STRINGS_END (d)) | 4967 if (AT_STRINGS_END (d)) |
4970 { | 4968 { |
4971 if (!bufp->not_eol) break; | 4969 if (!bufp->not_eol) break; |
4972 } | 4970 } |
4973 | 4971 |
4974 /* We have to ``prefetch'' the next character. */ | 4972 /* We have to ``prefetch'' the next character. */ |
4975 else if ((d == end1 ? *string2 : *d) == '\n' | 4973 else if ((d == end1 ? *string2 : *d) == '\n' |
4976 && bufp->newline_anchor) | 4974 && bufp->newline_anchor) |
4977 { | 4975 { |
4978 break; | 4976 break; |
5002 string, instead of restoring it. To see why, consider | 5000 string, instead of restoring it. To see why, consider |
5003 matching `foo\nbar' against `.*\n'. The .* matches the foo; | 5001 matching `foo\nbar' against `.*\n'. The .* matches the foo; |
5004 then the . fails against the \n. But the next thing we want | 5002 then the . fails against the \n. But the next thing we want |
5005 to do is match the \n against the \n; if we restored the | 5003 to do is match the \n against the \n; if we restored the |
5006 string value, we would be back at the foo. | 5004 string value, we would be back at the foo. |
5007 | 5005 |
5008 Because this is used only in specific cases, we don't need to | 5006 Because this is used only in specific cases, we don't need to |
5009 check all the things that `on_failure_jump' does, to make | 5007 check all the things that `on_failure_jump' does, to make |
5010 sure the right things get saved on the stack. Hence we don't | 5008 sure the right things get saved on the stack. Hence we don't |
5011 share its code. The only reason to push anything on the | 5009 share its code. The only reason to push anything on the |
5012 stack at all is that otherwise we would have to change | 5010 stack at all is that otherwise we would have to change |
5013 `anychar's code to do something besides goto fail in this | 5011 `anychar's code to do something besides goto fail in this |
5014 case; that seems worse than this. */ | 5012 case; that seems worse than this. */ |
5015 case on_failure_keep_string_jump: | 5013 case on_failure_keep_string_jump: |
5016 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); | 5014 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); |
5017 | 5015 |
5018 EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5016 EXTRACT_NUMBER_AND_INCR (mcnt, p); |
5019 DEBUG_PRINT3 (" %d (to 0x%p):\n", mcnt, p + mcnt); | 5017 DEBUG_PRINT3 (" %d (to 0x%p):\n", mcnt, p + mcnt); |
5020 | 5018 |
5021 PUSH_FAILURE_POINT (p + mcnt, (void *) 0, -2); | 5019 PUSH_FAILURE_POINT (p + mcnt, (void *) 0, -2); |
5022 break; | 5020 break; |
5023 | 5021 |
5024 | 5022 |
5025 /* Uses of on_failure_jump: | 5023 /* Uses of on_failure_jump: |
5026 | 5024 |
5027 Each alternative starts with an on_failure_jump that points | 5025 Each alternative starts with an on_failure_jump that points |
5028 to the beginning of the next alternative. Each alternative | 5026 to the beginning of the next alternative. Each alternative |
5029 except the last ends with a jump that in effect jumps past | 5027 except the last ends with a jump that in effect jumps past |
5030 the rest of the alternatives. (They really jump to the | 5028 the rest of the alternatives. (They really jump to the |
5031 ending jump of the following alternative, because tensioning | 5029 ending jump of the following alternative, because tensioning |
5087 pattern follows its end. If we can establish that there | 5085 pattern follows its end. If we can establish that there |
5088 is nothing that they would both match, i.e., that we | 5086 is nothing that they would both match, i.e., that we |
5089 would have to backtrack because of (as in, e.g., `a*a') | 5087 would have to backtrack because of (as in, e.g., `a*a') |
5090 then we can change to pop_failure_jump, because we'll | 5088 then we can change to pop_failure_jump, because we'll |
5091 never have to backtrack. | 5089 never have to backtrack. |
5092 | 5090 |
5093 This is not true in the case of alternatives: in | 5091 This is not true in the case of alternatives: in |
5094 `(a|ab)*' we do need to backtrack to the `ab' alternative | 5092 `(a|ab)*' we do need to backtrack to the `ab' alternative |
5095 (e.g., if the string was `ab'). But instead of trying to | 5093 (e.g., if the string was `ab'). But instead of trying to |
5096 detect that here, the alternative has put on a dummy | 5094 detect that here, the alternative has put on a dummy |
5097 failure point which is what we will end up popping. */ | 5095 failure point which is what we will end up popping. */ |
5113 break; | 5111 break; |
5114 } | 5112 } |
5115 | 5113 |
5116 p1 = p + mcnt; | 5114 p1 = p + mcnt; |
5117 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding | 5115 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding |
5118 to the `maybe_finalize_jump' of this case. Examine what | 5116 to the `maybe_finalize_jump' of this case. Examine what |
5119 follows. */ | 5117 follows. */ |
5120 | 5118 |
5121 /* If we're at the end of the pattern, we can change. */ | 5119 /* If we're at the end of the pattern, we can change. */ |
5122 if (p2 == pend) | 5120 if (p2 == pend) |
5123 { | 5121 { |
5139 { | 5137 { |
5140 p[-3] = (unsigned char) pop_failure_jump; | 5138 p[-3] = (unsigned char) pop_failure_jump; |
5141 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", | 5139 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", |
5142 c, p1[5]); | 5140 c, p1[5]); |
5143 } | 5141 } |
5144 | 5142 |
5145 else if ((re_opcode_t) p1[3] == charset | 5143 else if ((re_opcode_t) p1[3] == charset |
5146 || (re_opcode_t) p1[3] == charset_not) | 5144 || (re_opcode_t) p1[3] == charset_not) |
5147 { | 5145 { |
5148 int not = (re_opcode_t) p1[3] == charset_not; | 5146 int not = (re_opcode_t) p1[3] == charset_not; |
5149 | 5147 |
5150 if (c < (unsigned char) (p1[4] * BYTEWIDTH) | 5148 if (c < (unsigned char) (p1[4] * BYTEWIDTH) |
5151 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) | 5149 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) |
5152 not = !not; | 5150 not = !not; |
5153 | 5151 |
5154 /* `not' is equal to 1 if c would match, which means | 5152 /* `not' is equal to 1 if c would match, which means |
5174 { | 5172 { |
5175 p[-3] = (unsigned char) pop_failure_jump; | 5173 p[-3] = (unsigned char) pop_failure_jump; |
5176 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", | 5174 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", |
5177 c, p1[5]); | 5175 c, p1[5]); |
5178 } | 5176 } |
5179 | 5177 |
5180 else if ((re_opcode_t) p1[3] == charset_not) | 5178 else if ((re_opcode_t) p1[3] == charset_not) |
5181 { | 5179 { |
5182 int idx; | 5180 int idx; |
5183 /* We win if the charset_not inside the loop | 5181 /* We win if the charset_not inside the loop |
5184 lists every character listed in the charset after. */ | 5182 lists every character listed in the charset after. */ |
5245 dummy_low_reg, dummy_high_reg, | 5243 dummy_low_reg, dummy_high_reg, |
5246 reg_dummy, reg_dummy, reg_info_dummy); | 5244 reg_dummy, reg_dummy, reg_info_dummy); |
5247 } | 5245 } |
5248 /* Note fall through. */ | 5246 /* Note fall through. */ |
5249 | 5247 |
5250 | 5248 |
5251 /* Unconditionally jump (without popping any failure points). */ | 5249 /* Unconditionally jump (without popping any failure points). */ |
5252 case jump: | 5250 case jump: |
5253 unconditional_jump: | 5251 unconditional_jump: |
5254 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ | 5252 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ |
5255 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); | 5253 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); |
5256 p += mcnt; /* Do the jump. */ | 5254 p += mcnt; /* Do the jump. */ |
5257 DEBUG_PRINT2 ("(to 0x%p).\n", p); | 5255 DEBUG_PRINT2 ("(to 0x%p).\n", p); |
5258 break; | 5256 break; |
5259 | 5257 |
5260 | 5258 |
5261 /* We need this opcode so we can detect where alternatives end | 5259 /* We need this opcode so we can detect where alternatives end |
5262 in `group_match_null_string_p' et al. */ | 5260 in `group_match_null_string_p' et al. */ |
5263 case jump_past_alt: | 5261 case jump_past_alt: |
5264 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); | 5262 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); |
5265 goto unconditional_jump; | 5263 goto unconditional_jump; |
5290 PUSH_FAILURE_POINT ((void *) 0, (void *) 0, -2); | 5288 PUSH_FAILURE_POINT ((void *) 0, (void *) 0, -2); |
5291 break; | 5289 break; |
5292 | 5290 |
5293 /* Have to succeed matching what follows at least n times. | 5291 /* Have to succeed matching what follows at least n times. |
5294 After that, handle like `on_failure_jump'. */ | 5292 After that, handle like `on_failure_jump'. */ |
5295 case succeed_n: | 5293 case succeed_n: |
5296 EXTRACT_NUMBER (mcnt, p + 2); | 5294 EXTRACT_NUMBER (mcnt, p + 2); |
5297 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); | 5295 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); |
5298 | 5296 |
5299 assert (mcnt >= 0); | 5297 assert (mcnt >= 0); |
5300 /* Originally, this is how many times we HAVE to succeed. */ | 5298 /* Originally, this is how many times we HAVE to succeed. */ |
5311 p[2] = (unsigned char) no_op; | 5309 p[2] = (unsigned char) no_op; |
5312 p[3] = (unsigned char) no_op; | 5310 p[3] = (unsigned char) no_op; |
5313 goto on_failure; | 5311 goto on_failure; |
5314 } | 5312 } |
5315 break; | 5313 break; |
5316 | 5314 |
5317 case jump_n: | 5315 case jump_n: |
5318 EXTRACT_NUMBER (mcnt, p + 2); | 5316 EXTRACT_NUMBER (mcnt, p + 2); |
5319 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); | 5317 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); |
5320 | 5318 |
5321 /* Originally, this is how many times we CAN jump. */ | 5319 /* Originally, this is how many times we CAN jump. */ |
5322 if (mcnt) | 5320 if (mcnt) |
5323 { | 5321 { |
5324 mcnt--; | 5322 mcnt--; |
5325 STORE_NUMBER (p + 2, mcnt); | 5323 STORE_NUMBER (p + 2, mcnt); |
5326 goto unconditional_jump; | 5324 goto unconditional_jump; |
5327 } | 5325 } |
5328 /* If don't have to jump any more, skip over the rest of command. */ | 5326 /* If don't have to jump any more, skip over the rest of command. */ |
5329 else | 5327 else |
5330 p += 4; | 5328 p += 4; |
5331 break; | 5329 break; |
5332 | 5330 |
5333 case set_number_at: | 5331 case set_number_at: |
5334 { | 5332 { |
5335 DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); | 5333 DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); |
5336 | 5334 |
5337 EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5335 EXTRACT_NUMBER_AND_INCR (mcnt, p); |
5433 DEBUG_PRINT1 ("EXECUTING before_dot.\n"); | 5431 DEBUG_PRINT1 ("EXECUTING before_dot.\n"); |
5434 if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) >= | 5432 if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) >= |
5435 BUF_PT (regex_emacs_buffer)) | 5433 BUF_PT (regex_emacs_buffer)) |
5436 goto fail; | 5434 goto fail; |
5437 break; | 5435 break; |
5438 | 5436 |
5439 case at_dot: | 5437 case at_dot: |
5440 DEBUG_PRINT1 ("EXECUTING at_dot.\n"); | 5438 DEBUG_PRINT1 ("EXECUTING at_dot.\n"); |
5441 if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) | 5439 if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) |
5442 != BUF_PT (regex_emacs_buffer)) | 5440 != BUF_PT (regex_emacs_buffer)) |
5443 goto fail; | 5441 goto fail; |
5444 break; | 5442 break; |
5445 | 5443 |
5446 case after_dot: | 5444 case after_dot: |
5447 DEBUG_PRINT1 ("EXECUTING after_dot.\n"); | 5445 DEBUG_PRINT1 ("EXECUTING after_dot.\n"); |
5448 if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) | 5446 if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) |
5449 <= BUF_PT (regex_emacs_buffer)) | 5447 <= BUF_PT (regex_emacs_buffer)) |
5450 goto fail; | 5448 goto fail; |
5513 mcnt, should_succeed)) | 5511 mcnt, should_succeed)) |
5514 goto fail; | 5512 goto fail; |
5515 SET_REGS_MATCHED (); | 5513 SET_REGS_MATCHED (); |
5516 } | 5514 } |
5517 break; | 5515 break; |
5518 | 5516 |
5519 case notcategoryspec: | 5517 case notcategoryspec: |
5520 should_succeed = 0; | 5518 should_succeed = 0; |
5521 goto matchornotcategory; | 5519 goto matchornotcategory; |
5522 /* end of category patch */ | 5520 /* end of category patch */ |
5523 #endif /* MULE */ | 5521 #endif /* MULE */ |
5528 if (!WORDCHAR_P_UNSAFE ((int) (*d))) | 5526 if (!WORDCHAR_P_UNSAFE ((int) (*d))) |
5529 goto fail; | 5527 goto fail; |
5530 SET_REGS_MATCHED (); | 5528 SET_REGS_MATCHED (); |
5531 d++; | 5529 d++; |
5532 break; | 5530 break; |
5533 | 5531 |
5534 case notwordchar: | 5532 case notwordchar: |
5535 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); | 5533 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); |
5536 PREFETCH (); | 5534 PREFETCH (); |
5537 if (!WORDCHAR_P_UNSAFE ((int) (*d))) | 5535 if (!WORDCHAR_P_UNSAFE ((int) (*d))) |
5538 goto fail; | 5536 goto fail; |
5539 SET_REGS_MATCHED (); | 5537 SET_REGS_MATCHED (); |
5540 d++; | 5538 d++; |
5541 break; | 5539 break; |
5542 #endif /* not emacs */ | 5540 #endif /* not emacs */ |
5543 | 5541 |
5544 default: | 5542 default: |
5545 abort (); | 5543 abort (); |
5546 } | 5544 } |
5547 continue; /* Successfully executed one pattern command; keep going. */ | 5545 continue; /* Successfully executed one pattern command; keep going. */ |
5548 | 5546 |
5563 /* If we failed to the end of the pattern, don't examine *p. */ | 5561 /* If we failed to the end of the pattern, don't examine *p. */ |
5564 assert (p <= pend); | 5562 assert (p <= pend); |
5565 if (p < pend) | 5563 if (p < pend) |
5566 { | 5564 { |
5567 boolean is_a_jump_n = false; | 5565 boolean is_a_jump_n = false; |
5568 | 5566 |
5569 /* If failed to a backwards jump that's part of a repetition | 5567 /* If failed to a backwards jump that's part of a repetition |
5570 loop, need to pop this failure point and use the next one. */ | 5568 loop, need to pop this failure point and use the next one. */ |
5571 switch ((re_opcode_t) *p) | 5569 switch ((re_opcode_t) *p) |
5572 { | 5570 { |
5573 case jump_n: | 5571 case jump_n: |
5575 case maybe_pop_jump: | 5573 case maybe_pop_jump: |
5576 case pop_failure_jump: | 5574 case pop_failure_jump: |
5577 case jump: | 5575 case jump: |
5578 p1 = p + 1; | 5576 p1 = p + 1; |
5579 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 5577 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
5580 p1 += mcnt; | 5578 p1 += mcnt; |
5581 | 5579 |
5582 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) | 5580 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) |
5583 || (!is_a_jump_n | 5581 || (!is_a_jump_n |
5584 && (re_opcode_t) *p1 == on_failure_jump)) | 5582 && (re_opcode_t) *p1 == on_failure_jump)) |
5585 goto fail; | 5583 goto fail; |
5606 | 5604 |
5607 /* Subroutine definitions for re_match_2. */ | 5605 /* Subroutine definitions for re_match_2. */ |
5608 | 5606 |
5609 | 5607 |
5610 /* We are passed P pointing to a register number after a start_memory. | 5608 /* We are passed P pointing to a register number after a start_memory. |
5611 | 5609 |
5612 Return true if the pattern up to the corresponding stop_memory can | 5610 Return true if the pattern up to the corresponding stop_memory can |
5613 match the empty string, and false otherwise. | 5611 match the empty string, and false otherwise. |
5614 | 5612 |
5615 If we find the matching stop_memory, sets P to point to one past its number. | 5613 If we find the matching stop_memory, sets P to point to one past its number. |
5616 Otherwise, sets P to an undefined byte less than or equal to END. | 5614 Otherwise, sets P to an undefined byte less than or equal to END. |
5617 | 5615 |
5618 We don't handle duplicates properly (yet). */ | 5616 We don't handle duplicates properly (yet). */ |
5619 | 5617 |
5622 register_info_type *reg_info) | 5620 register_info_type *reg_info) |
5623 { | 5621 { |
5624 int mcnt; | 5622 int mcnt; |
5625 /* Point to after the args to the start_memory. */ | 5623 /* Point to after the args to the start_memory. */ |
5626 unsigned char *p1 = *p + 2; | 5624 unsigned char *p1 = *p + 2; |
5627 | 5625 |
5628 while (p1 < end) | 5626 while (p1 < end) |
5629 { | 5627 { |
5630 /* Skip over opcodes that can match nothing, and return true or | 5628 /* Skip over opcodes that can match nothing, and return true or |
5631 false, as appropriate, when we get to one that can't, or to the | 5629 false, as appropriate, when we get to one that can't, or to the |
5632 matching stop_memory. */ | 5630 matching stop_memory. */ |
5633 | 5631 |
5634 switch ((re_opcode_t) *p1) | 5632 switch ((re_opcode_t) *p1) |
5635 { | 5633 { |
5636 /* Could be either a loop or a series of alternatives. */ | 5634 /* Could be either a loop or a series of alternatives. */ |
5637 case on_failure_jump: | 5635 case on_failure_jump: |
5638 p1++; | 5636 p1++; |
5639 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 5637 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
5640 | 5638 |
5641 /* If the next operation is not a jump backwards in the | 5639 /* If the next operation is not a jump backwards in the |
5642 pattern. */ | 5640 pattern. */ |
5643 | 5641 |
5644 if (mcnt >= 0) | 5642 if (mcnt >= 0) |
5645 { | 5643 { |
5649 whereas the rest start with on_failure_jump and end | 5647 whereas the rest start with on_failure_jump and end |
5650 with a jump, e.g., here is the pattern for `a|b|c': | 5648 with a jump, e.g., here is the pattern for `a|b|c': |
5651 | 5649 |
5652 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 | 5650 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 |
5653 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 | 5651 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 |
5654 /exactn/1/c | 5652 /exactn/1/c |
5655 | 5653 |
5656 So, we have to first go through the first (n-1) | 5654 So, we have to first go through the first (n-1) |
5657 alternatives and then deal with the last one separately. */ | 5655 alternatives and then deal with the last one separately. */ |
5658 | 5656 |
5659 | 5657 |
5665 { | 5663 { |
5666 /* `mcnt' holds how many bytes long the alternative | 5664 /* `mcnt' holds how many bytes long the alternative |
5667 is, including the ending `jump_past_alt' and | 5665 is, including the ending `jump_past_alt' and |
5668 its number. */ | 5666 its number. */ |
5669 | 5667 |
5670 if (!alt_match_null_string_p (p1, p1 + mcnt - 3, | 5668 if (!alt_match_null_string_p (p1, p1 + mcnt - 3, |
5671 reg_info)) | 5669 reg_info)) |
5672 return false; | 5670 return false; |
5673 | 5671 |
5674 /* Move to right after this alternative, including the | 5672 /* Move to right after this alternative, including the |
5675 jump_past_alt. */ | 5673 jump_past_alt. */ |
5676 p1 += mcnt; | 5674 p1 += mcnt; |
5677 | 5675 |
5678 /* Break if it's the beginning of an n-th alternative | 5676 /* Break if it's the beginning of an n-th alternative |
5679 that doesn't begin with an on_failure_jump. */ | 5677 that doesn't begin with an on_failure_jump. */ |
5680 if ((re_opcode_t) *p1 != on_failure_jump) | 5678 if ((re_opcode_t) *p1 != on_failure_jump) |
5681 break; | 5679 break; |
5682 | 5680 |
5683 /* Still have to check that it's not an n-th | 5681 /* Still have to check that it's not an n-th |
5684 alternative that starts with an on_failure_jump. */ | 5682 alternative that starts with an on_failure_jump. */ |
5685 p1++; | 5683 p1++; |
5686 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 5684 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
5687 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) | 5685 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) |
5702 | 5700 |
5703 p1 += mcnt; /* Get past the n-th alternative. */ | 5701 p1 += mcnt; /* Get past the n-th alternative. */ |
5704 } /* if mcnt > 0 */ | 5702 } /* if mcnt > 0 */ |
5705 break; | 5703 break; |
5706 | 5704 |
5707 | 5705 |
5708 case stop_memory: | 5706 case stop_memory: |
5709 assert (p1[1] == **p); | 5707 assert (p1[1] == **p); |
5710 *p = p1 + 2; | 5708 *p = p1 + 2; |
5711 return true; | 5709 return true; |
5712 | 5710 |
5713 | 5711 |
5714 default: | 5712 default: |
5715 if (!common_op_match_null_string_p (&p1, end, reg_info)) | 5713 if (!common_op_match_null_string_p (&p1, end, reg_info)) |
5716 return false; | 5714 return false; |
5717 } | 5715 } |
5718 } /* while p1 < end */ | 5716 } /* while p1 < end */ |
5719 | 5717 |
5722 | 5720 |
5723 | 5721 |
5724 /* Similar to group_match_null_string_p, but doesn't deal with alternatives: | 5722 /* Similar to group_match_null_string_p, but doesn't deal with alternatives: |
5725 It expects P to be the first byte of a single alternative and END one | 5723 It expects P to be the first byte of a single alternative and END one |
5726 byte past the last. The alternative can contain groups. */ | 5724 byte past the last. The alternative can contain groups. */ |
5727 | 5725 |
5728 static boolean | 5726 static boolean |
5729 alt_match_null_string_p (unsigned char *p, unsigned char *end, | 5727 alt_match_null_string_p (unsigned char *p, unsigned char *end, |
5730 register_info_type *reg_info) | 5728 register_info_type *reg_info) |
5731 { | 5729 { |
5732 int mcnt; | 5730 int mcnt; |
5733 unsigned char *p1 = p; | 5731 unsigned char *p1 = p; |
5734 | 5732 |
5735 while (p1 < end) | 5733 while (p1 < end) |
5736 { | 5734 { |
5737 /* Skip over opcodes that can match nothing, and break when we get | 5735 /* Skip over opcodes that can match nothing, and break when we get |
5738 to one that can't. */ | 5736 to one that can't. */ |
5739 | 5737 |
5740 switch ((re_opcode_t) *p1) | 5738 switch ((re_opcode_t) *p1) |
5741 { | 5739 { |
5742 /* It's a loop. */ | 5740 /* It's a loop. */ |
5743 case on_failure_jump: | 5741 case on_failure_jump: |
5744 p1++; | 5742 p1++; |
5745 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 5743 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
5746 p1 += mcnt; | 5744 p1 += mcnt; |
5747 break; | 5745 break; |
5748 | 5746 |
5749 default: | 5747 default: |
5750 if (!common_op_match_null_string_p (&p1, end, reg_info)) | 5748 if (!common_op_match_null_string_p (&p1, end, reg_info)) |
5751 return false; | 5749 return false; |
5752 } | 5750 } |
5753 } /* while p1 < end */ | 5751 } /* while p1 < end */ |
5754 | 5752 |
5755 return true; | 5753 return true; |
5756 } /* alt_match_null_string_p */ | 5754 } /* alt_match_null_string_p */ |
5757 | 5755 |
5758 | 5756 |
5759 /* Deals with the ops common to group_match_null_string_p and | 5757 /* Deals with the ops common to group_match_null_string_p and |
5760 alt_match_null_string_p. | 5758 alt_match_null_string_p. |
5761 | 5759 |
5762 Sets P to one after the op and its arguments, if any. */ | 5760 Sets P to one after the op and its arguments, if any. */ |
5763 | 5761 |
5764 static boolean | 5762 static boolean |
5765 common_op_match_null_string_p (unsigned char **p, unsigned char *end, | 5763 common_op_match_null_string_p (unsigned char **p, unsigned char *end, |
5766 register_info_type *reg_info) | 5764 register_info_type *reg_info) |
5790 | 5788 |
5791 case start_memory: | 5789 case start_memory: |
5792 reg_no = *p1; | 5790 reg_no = *p1; |
5793 assert (reg_no > 0 && reg_no <= MAX_REGNUM); | 5791 assert (reg_no > 0 && reg_no <= MAX_REGNUM); |
5794 ret = group_match_null_string_p (&p1, end, reg_info); | 5792 ret = group_match_null_string_p (&p1, end, reg_info); |
5795 | 5793 |
5796 /* Have to set this here in case we're checking a group which | 5794 /* Have to set this here in case we're checking a group which |
5797 contains a group and a back reference to it. */ | 5795 contains a group and a back reference to it. */ |
5798 | 5796 |
5799 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) | 5797 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) |
5800 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; | 5798 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; |
5801 | 5799 |
5802 if (!ret) | 5800 if (!ret) |
5803 return false; | 5801 return false; |
5804 break; | 5802 break; |
5805 | 5803 |
5806 /* If this is an optimized succeed_n for zero times, make the jump. */ | 5804 /* If this is an optimized succeed_n for zero times, make the jump. */ |
5807 case jump: | 5805 case jump: |
5808 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 5806 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
5809 if (mcnt >= 0) | 5807 if (mcnt >= 0) |
5810 p1 += mcnt; | 5808 p1 += mcnt; |
5812 return false; | 5810 return false; |
5813 break; | 5811 break; |
5814 | 5812 |
5815 case succeed_n: | 5813 case succeed_n: |
5816 /* Get to the number of times to succeed. */ | 5814 /* Get to the number of times to succeed. */ |
5817 p1 += 2; | 5815 p1 += 2; |
5818 EXTRACT_NUMBER_AND_INCR (mcnt, p1); | 5816 EXTRACT_NUMBER_AND_INCR (mcnt, p1); |
5819 | 5817 |
5820 if (mcnt == 0) | 5818 if (mcnt == 0) |
5821 { | 5819 { |
5822 p1 -= 4; | 5820 p1 -= 4; |
5825 } | 5823 } |
5826 else | 5824 else |
5827 return false; | 5825 return false; |
5828 break; | 5826 break; |
5829 | 5827 |
5830 case duplicate: | 5828 case duplicate: |
5831 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) | 5829 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) |
5832 return false; | 5830 return false; |
5833 break; | 5831 break; |
5834 | 5832 |
5835 case set_number_at: | 5833 case set_number_at: |
5845 } /* common_op_match_null_string_p */ | 5843 } /* common_op_match_null_string_p */ |
5846 | 5844 |
5847 | 5845 |
5848 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN | 5846 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN |
5849 bytes; nonzero otherwise. */ | 5847 bytes; nonzero otherwise. */ |
5850 | 5848 |
5851 static int | 5849 static int |
5852 bcmp_translate (CONST unsigned char *s1, CONST unsigned char *s2, | 5850 bcmp_translate (CONST unsigned char *s1, CONST unsigned char *s2, |
5853 register int len, char *translate) | 5851 register int len, char *translate) |
5854 { | 5852 { |
5855 register CONST unsigned char *p1 = s1, *p2 = s2; | 5853 register CONST unsigned char *p1 = s1, *p2 = s2; |
5864 /* Entry points for GNU code. */ | 5862 /* Entry points for GNU code. */ |
5865 | 5863 |
5866 /* re_compile_pattern is the GNU regular expression compiler: it | 5864 /* re_compile_pattern is the GNU regular expression compiler: it |
5867 compiles PATTERN (of length SIZE) and puts the result in BUFP. | 5865 compiles PATTERN (of length SIZE) and puts the result in BUFP. |
5868 Returns 0 if the pattern was valid, otherwise an error string. | 5866 Returns 0 if the pattern was valid, otherwise an error string. |
5869 | 5867 |
5870 Assumes the `allocated' (and perhaps `buffer') and `translate' fields | 5868 Assumes the `allocated' (and perhaps `buffer') and `translate' fields |
5871 are set in BUFP on entry. | 5869 are set in BUFP on entry. |
5872 | 5870 |
5873 We call regex_compile to do the actual compilation. */ | 5871 We call regex_compile to do the actual compilation. */ |
5874 | 5872 |
5875 CONST char * | 5873 CONST char * |
5876 re_compile_pattern (CONST char *pattern, int length, | 5874 re_compile_pattern (CONST char *pattern, int length, |
5877 struct re_pattern_buffer *bufp) | 5875 struct re_pattern_buffer *bufp) |
5878 { | 5876 { |
5879 reg_errcode_t ret; | 5877 reg_errcode_t ret; |
5880 | 5878 |
5881 /* GNU code is written to assume at least RE_NREGS registers will be set | 5879 /* GNU code is written to assume at least RE_NREGS registers will be set |
5882 (and at least one extra will be -1). */ | 5880 (and at least one extra will be -1). */ |
5883 bufp->regs_allocated = REGS_UNALLOCATED; | 5881 bufp->regs_allocated = REGS_UNALLOCATED; |
5884 | 5882 |
5885 /* And GNU code determines whether or not to get register information | 5883 /* And GNU code determines whether or not to get register information |
5886 by passing null for the REGS argument to re_match, etc., not by | 5884 by passing null for the REGS argument to re_match, etc., not by |
5887 setting no_sub. */ | 5885 setting no_sub. */ |
5888 bufp->no_sub = 0; | 5886 bufp->no_sub = 0; |
5889 | 5887 |
5890 /* Match anchors at newline. */ | 5888 /* Match anchors at newline. */ |
5891 bufp->newline_anchor = 1; | 5889 bufp->newline_anchor = 1; |
5892 | 5890 |
5893 ret = regex_compile (pattern, length, re_syntax_options, bufp); | 5891 ret = regex_compile (pattern, length, re_syntax_options, bufp); |
5894 | 5892 |
5895 if (!ret) | 5893 if (!ret) |
5896 return NULL; | 5894 return NULL; |
5897 return gettext (re_error_msgid[(int) ret]); | 5895 return gettext (re_error_msgid[(int) ret]); |
5898 } | 5896 } |
5899 | 5897 |
5900 /* Entry points compatible with 4.2 BSD regex library. We don't define | 5898 /* Entry points compatible with 4.2 BSD regex library. We don't define |
5901 them unless specifically requested. */ | 5899 them unless specifically requested. */ |
5902 | 5900 |
5903 #ifdef _REGEX_RE_COMP | 5901 #ifdef _REGEX_RE_COMP |
5907 | 5905 |
5908 char * | 5906 char * |
5909 re_comp (CONST char *s) | 5907 re_comp (CONST char *s) |
5910 { | 5908 { |
5911 reg_errcode_t ret; | 5909 reg_errcode_t ret; |
5912 | 5910 |
5913 if (!s) | 5911 if (!s) |
5914 { | 5912 { |
5915 if (!re_comp_buf.buffer) | 5913 if (!re_comp_buf.buffer) |
5916 return gettext ("No previous regular expression"); | 5914 return gettext ("No previous regular expression"); |
5917 return 0; | 5915 return 0; |
5934 | 5932 |
5935 /* Match anchors at newlines. */ | 5933 /* Match anchors at newlines. */ |
5936 re_comp_buf.newline_anchor = 1; | 5934 re_comp_buf.newline_anchor = 1; |
5937 | 5935 |
5938 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); | 5936 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); |
5939 | 5937 |
5940 if (!ret) | 5938 if (!ret) |
5941 return NULL; | 5939 return NULL; |
5942 | 5940 |
5943 /* Yes, we're discarding `CONST' here if !HAVE_LIBINTL. */ | 5941 /* Yes, we're discarding `CONST' here if !HAVE_LIBINTL. */ |
5944 return (char *) gettext (re_error_msgid[(int) ret]); | 5942 return (char *) gettext (re_error_msgid[(int) ret]); |
6002 | 6000 |
6003 /* regex_compile will allocate the space for the compiled pattern. */ | 6001 /* regex_compile will allocate the space for the compiled pattern. */ |
6004 preg->buffer = 0; | 6002 preg->buffer = 0; |
6005 preg->allocated = 0; | 6003 preg->allocated = 0; |
6006 preg->used = 0; | 6004 preg->used = 0; |
6007 | 6005 |
6008 /* Don't bother to use a fastmap when searching. This simplifies the | 6006 /* Don't bother to use a fastmap when searching. This simplifies the |
6009 REG_NEWLINE case: if we used a fastmap, we'd have to put all the | 6007 REG_NEWLINE case: if we used a fastmap, we'd have to put all the |
6010 characters after newlines into the fastmap. This way, we just try | 6008 characters after newlines into the fastmap. This way, we just try |
6011 every character. */ | 6009 every character. */ |
6012 preg->fastmap = 0; | 6010 preg->fastmap = 0; |
6013 | 6011 |
6014 if (cflags & REG_ICASE) | 6012 if (cflags & REG_ICASE) |
6015 { | 6013 { |
6016 unsigned i; | 6014 unsigned i; |
6017 | 6015 |
6018 preg->translate = (char *) malloc (CHAR_SET_SIZE); | 6016 preg->translate = (char *) malloc (CHAR_SET_SIZE); |
6019 if (preg->translate == NULL) | 6017 if (preg->translate == NULL) |
6020 return (int) REG_ESPACE; | 6018 return (int) REG_ESPACE; |
6021 | 6019 |
6022 /* Map uppercase characters to corresponding lowercase ones. */ | 6020 /* Map uppercase characters to corresponding lowercase ones. */ |
6037 else | 6035 else |
6038 preg->newline_anchor = 0; | 6036 preg->newline_anchor = 0; |
6039 | 6037 |
6040 preg->no_sub = !!(cflags & REG_NOSUB); | 6038 preg->no_sub = !!(cflags & REG_NOSUB); |
6041 | 6039 |
6042 /* POSIX says a null character in the pattern terminates it, so we | 6040 /* POSIX says a null character in the pattern terminates it, so we |
6043 can use strlen here in compiling the pattern. */ | 6041 can use strlen here in compiling the pattern. */ |
6044 ret = regex_compile (pattern, strlen (pattern), syntax, preg); | 6042 ret = regex_compile (pattern, strlen (pattern), syntax, preg); |
6045 | 6043 |
6046 /* POSIX doesn't distinguish between an unmatched open-group and an | 6044 /* POSIX doesn't distinguish between an unmatched open-group and an |
6047 unmatched close-group: both are REG_EPAREN. */ | 6045 unmatched close-group: both are REG_EPAREN. */ |
6048 if (ret == REG_ERPAREN) ret = REG_EPAREN; | 6046 if (ret == REG_ERPAREN) ret = REG_EPAREN; |
6049 | 6047 |
6050 return (int) ret; | 6048 return (int) ret; |
6051 } | 6049 } |
6052 | 6050 |
6053 | 6051 |
6054 /* regexec searches for a given pattern, specified by PREG, in the | 6052 /* regexec searches for a given pattern, specified by PREG, in the |
6055 string STRING. | 6053 string STRING. |
6056 | 6054 |
6057 If NMATCH is zero or REG_NOSUB was set in the cflags argument to | 6055 If NMATCH is zero or REG_NOSUB was set in the cflags argument to |
6058 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at | 6056 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at |
6059 least NMATCH elements, and we set them to the offsets of the | 6057 least NMATCH elements, and we set them to the offsets of the |
6060 corresponding matched substrings. | 6058 corresponding matched substrings. |
6061 | 6059 |
6062 EFLAGS specifies `execution flags' which affect matching: if | 6060 EFLAGS specifies `execution flags' which affect matching: if |
6063 REG_NOTBOL is set, then ^ does not match at the beginning of the | 6061 REG_NOTBOL is set, then ^ does not match at the beginning of the |
6064 string; if REG_NOTEOL is set, then $ does not match at the end. | 6062 string; if REG_NOTEOL is set, then $ does not match at the end. |
6065 | 6063 |
6066 We return 0 if we find a match and REG_NOMATCH if not. */ | 6064 We return 0 if we find a match and REG_NOMATCH if not. */ |
6067 | 6065 |
6068 int | 6066 int |
6069 regexec (CONST regex_t *preg, CONST char *string, size_t nmatch, | 6067 regexec (CONST regex_t *preg, CONST char *string, size_t nmatch, |
6070 regmatch_t pmatch[], int eflags) | 6068 regmatch_t pmatch[], int eflags) |
6074 regex_t private_preg; | 6072 regex_t private_preg; |
6075 int len = strlen (string); | 6073 int len = strlen (string); |
6076 boolean want_reg_info = !preg->no_sub && nmatch > 0; | 6074 boolean want_reg_info = !preg->no_sub && nmatch > 0; |
6077 | 6075 |
6078 private_preg = *preg; | 6076 private_preg = *preg; |
6079 | 6077 |
6080 private_preg.not_bol = !!(eflags & REG_NOTBOL); | 6078 private_preg.not_bol = !!(eflags & REG_NOTBOL); |
6081 private_preg.not_eol = !!(eflags & REG_NOTEOL); | 6079 private_preg.not_eol = !!(eflags & REG_NOTEOL); |
6082 | 6080 |
6083 /* The user has told us exactly how many registers to return | 6081 /* The user has told us exactly how many registers to return |
6084 information about, via `nmatch'. We have to pass that on to the | 6082 information about, via `nmatch'. We have to pass that on to the |
6085 matching routines. */ | 6083 matching routines. */ |
6086 private_preg.regs_allocated = REGS_FIXED; | 6084 private_preg.regs_allocated = REGS_FIXED; |
6087 | 6085 |
6088 if (want_reg_info) | 6086 if (want_reg_info) |
6089 { | 6087 { |
6090 regs.num_regs = nmatch; | 6088 regs.num_regs = nmatch; |
6091 regs.start = TALLOC (nmatch, regoff_t); | 6089 regs.start = TALLOC (nmatch, regoff_t); |
6092 regs.end = TALLOC (nmatch, regoff_t); | 6090 regs.end = TALLOC (nmatch, regoff_t); |
6096 | 6094 |
6097 /* Perform the searching operation. */ | 6095 /* Perform the searching operation. */ |
6098 ret = re_search (&private_preg, string, len, | 6096 ret = re_search (&private_preg, string, len, |
6099 /* start: */ 0, /* range: */ len, | 6097 /* start: */ 0, /* range: */ len, |
6100 want_reg_info ? ®s : (struct re_registers *) 0); | 6098 want_reg_info ? ®s : (struct re_registers *) 0); |
6101 | 6099 |
6102 /* Copy the register information to the POSIX structure. */ | 6100 /* Copy the register information to the POSIX structure. */ |
6103 if (want_reg_info) | 6101 if (want_reg_info) |
6104 { | 6102 { |
6105 if (ret >= 0) | 6103 if (ret >= 0) |
6106 { | 6104 { |
6132 CONST char *msg; | 6130 CONST char *msg; |
6133 size_t msg_size; | 6131 size_t msg_size; |
6134 | 6132 |
6135 if (errcode < 0 | 6133 if (errcode < 0 |
6136 || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0]))) | 6134 || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0]))) |
6137 /* Only error codes returned by the rest of the code should be passed | 6135 /* Only error codes returned by the rest of the code should be passed |
6138 to this routine. If we are given anything else, or if other regex | 6136 to this routine. If we are given anything else, or if other regex |
6139 code generates an invalid error code, then the program has a bug. | 6137 code generates an invalid error code, then the program has a bug. |
6140 Dump core so we can fix it. */ | 6138 Dump core so we can fix it. */ |
6141 abort (); | 6139 abort (); |
6142 | 6140 |
6143 msg = gettext (re_error_msgid[errcode]); | 6141 msg = gettext (re_error_msgid[errcode]); |
6144 | 6142 |
6145 msg_size = strlen (msg) + 1; /* Includes the null. */ | 6143 msg_size = strlen (msg) + 1; /* Includes the null. */ |
6146 | 6144 |
6147 if (errbuf_size != 0) | 6145 if (errbuf_size != 0) |
6148 { | 6146 { |
6149 if (msg_size > errbuf_size) | 6147 if (msg_size > errbuf_size) |
6150 { | 6148 { |
6151 strncpy (errbuf, msg, errbuf_size - 1); | 6149 strncpy (errbuf, msg, errbuf_size - 1); |
6165 regfree (regex_t *preg) | 6163 regfree (regex_t *preg) |
6166 { | 6164 { |
6167 if (preg->buffer != NULL) | 6165 if (preg->buffer != NULL) |
6168 free (preg->buffer); | 6166 free (preg->buffer); |
6169 preg->buffer = NULL; | 6167 preg->buffer = NULL; |
6170 | 6168 |
6171 preg->allocated = 0; | 6169 preg->allocated = 0; |
6172 preg->used = 0; | 6170 preg->used = 0; |
6173 | 6171 |
6174 if (preg->fastmap != NULL) | 6172 if (preg->fastmap != NULL) |
6175 free (preg->fastmap); | 6173 free (preg->fastmap); |