comparison src/regex.c @ 183:e121b013d1f0 r20-3b18

Import from CVS: tag r20-3b18
author cvs
date Mon, 13 Aug 2007 09:54:23 +0200
parents 9ad43877534d
children 3d6bfa290dbd
comparison
equal deleted inserted replaced
182:f07455f06202 183:e121b013d1f0
162 162
163 /* Define the syntax stuff for \<, \>, etc. */ 163 /* Define the syntax stuff for \<, \>, etc. */
164 164
165 /* This must be nonzero for the wordchar and notwordchar pattern 165 /* This must be nonzero for the wordchar and notwordchar pattern
166 commands in re_match_2. */ 166 commands in re_match_2. */
167 #ifndef Sword 167 #ifndef Sword
168 #define Sword 1 168 #define Sword 1
169 #endif 169 #endif
170 170
171 #ifdef SYNTAX_TABLE 171 #ifdef SYNTAX_TABLE
172 172
290 290
291 /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we 291 /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
292 use `alloca' instead of `malloc'. This is because using malloc in 292 use `alloca' instead of `malloc'. This is because using malloc in
293 re_search* or re_match* could cause memory leaks when C-g is used in 293 re_search* or re_match* could cause memory leaks when C-g is used in
294 Emacs; also, malloc is slower and causes storage fragmentation. On 294 Emacs; also, malloc is slower and causes storage fragmentation. On
295 the other hand, malloc is more portable, and easier to debug. 295 the other hand, malloc is more portable, and easier to debug.
296 296
297 Because we sometimes use alloca, some routines have to be macros, 297 Because we sometimes use alloca, some routines have to be macros,
298 not functions -- `alloca'-allocated space disappears at the end of the 298 not functions -- `alloca'-allocated space disappears at the end of the
299 function it is called in. */ 299 function it is called in. */
300 300
301 #ifdef REGEX_MALLOC 301 #ifdef REGEX_MALLOC
317 #include <alloca.h> 317 #include <alloca.h>
318 #else /* not __GNUC__ or HAVE_ALLOCA_H */ 318 #else /* not __GNUC__ or HAVE_ALLOCA_H */
319 #ifndef _AIX /* Already did AIX, up at the top. */ 319 #ifndef _AIX /* Already did AIX, up at the top. */
320 char *alloca (); 320 char *alloca ();
321 #endif /* not _AIX */ 321 #endif /* not _AIX */
322 #endif /* not HAVE_ALLOCA_H */ 322 #endif /* not HAVE_ALLOCA_H */
323 #endif /* not __GNUC__ */ 323 #endif /* not __GNUC__ */
324 324
325 #endif /* not alloca */ 325 #endif /* not alloca */
326 326
327 #define REGEX_ALLOCATE alloca 327 #define REGEX_ALLOCATE alloca
457 of string to be matched (if not). */ 457 of string to be matched (if not). */
458 begbuf, 458 begbuf,
459 459
460 /* Analogously, for end of buffer/string. */ 460 /* Analogously, for end of buffer/string. */
461 endbuf, 461 endbuf,
462 462
463 /* Followed by two byte relative address to which to jump. */ 463 /* Followed by two byte relative address to which to jump. */
464 jump, 464 jump,
465 465
466 /* Same as jump, but marks the end of an alternative. */ 466 /* Same as jump, but marks the end of an alternative. */
467 jump_past_alt, 467 jump_past_alt,
468 468
469 /* Followed by two-byte relative address of place to resume at 469 /* Followed by two-byte relative address of place to resume at
470 in case of failure. */ 470 in case of failure. */
471 on_failure_jump, 471 on_failure_jump,
472 472
473 /* Like on_failure_jump, but pushes a placeholder instead of the 473 /* Like on_failure_jump, but pushes a placeholder instead of the
474 current string position when executed. */ 474 current string position when executed. */
475 on_failure_keep_string_jump, 475 on_failure_keep_string_jump,
476 476
477 /* Throw away latest failure point and then jump to following 477 /* Throw away latest failure point and then jump to following
478 two-byte relative address. */ 478 two-byte relative address. */
479 pop_failure_jump, 479 pop_failure_jump,
480 480
481 /* Change to pop_failure_jump if know won't have to backtrack to 481 /* Change to pop_failure_jump if know won't have to backtrack to
549 /* 97/2/17 jhod: The following two were merged back in from the Mule 549 /* 97/2/17 jhod: The following two were merged back in from the Mule
550 2.3 code to enable some language specific processing */ 550 2.3 code to enable some language specific processing */
551 ,categoryspec, /* Matches entries in the character category tables */ 551 ,categoryspec, /* Matches entries in the character category tables */
552 notcategoryspec /* The opposite of the above */ 552 notcategoryspec /* The opposite of the above */
553 #endif 553 #endif
554 554
555 } re_opcode_t; 555 } re_opcode_t;
556 556
557 /* Common operations on the compiled pattern. */ 557 /* Common operations on the compiled pattern. */
558 558
559 /* Store NUMBER in two contiguous bytes starting at DESTINATION. */ 559 /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
585 585
586 #ifdef DEBUG 586 #ifdef DEBUG
587 static void 587 static void
588 extract_number (int *dest, unsigned char *source) 588 extract_number (int *dest, unsigned char *source)
589 { 589 {
590 int temp = SIGN_EXTEND_CHAR (*(source + 1)); 590 int temp = SIGN_EXTEND_CHAR (*(source + 1));
591 *dest = *source & 0377; 591 *dest = *source & 0377;
592 *dest += temp << 8; 592 *dest += temp << 8;
593 } 593 }
594 594
595 #ifndef EXTRACT_MACROS /* To debug the macros. */ 595 #ifndef EXTRACT_MACROS /* To debug the macros. */
609 } while (0) 609 } while (0)
610 610
611 #ifdef DEBUG 611 #ifdef DEBUG
612 static void 612 static void
613 extract_number_and_incr (int *destination, unsigned char **source) 613 extract_number_and_incr (int *destination, unsigned char **source)
614 { 614 {
615 extract_number (destination, *source); 615 extract_number (destination, *source);
616 *source += 2; 616 *source += 2;
617 } 617 }
618 618
619 #ifndef EXTRACT_MACROS 619 #ifndef EXTRACT_MACROS
658 658
659 static void 659 static void
660 print_fastmap (char *fastmap) 660 print_fastmap (char *fastmap)
661 { 661 {
662 unsigned was_a_range = 0; 662 unsigned was_a_range = 0;
663 unsigned i = 0; 663 unsigned i = 0;
664 664
665 while (i < (1 << BYTEWIDTH)) 665 while (i < (1 << BYTEWIDTH))
666 { 666 {
667 if (fastmap[i++]) 667 if (fastmap[i++])
668 { 668 {
669 was_a_range = 0; 669 was_a_range = 0;
678 printf ("-"); 678 printf ("-");
679 putchar (i - 1); 679 putchar (i - 1);
680 } 680 }
681 } 681 }
682 } 682 }
683 putchar ('\n'); 683 putchar ('\n');
684 } 684 }
685 685
686 686
687 /* Print a compiled pattern string in human-readable form, starting at 687 /* Print a compiled pattern string in human-readable form, starting at
688 the START pointer into it and ending just before the pointer END. */ 688 the START pointer into it and ending just before the pointer END. */
697 if (start == NULL) 697 if (start == NULL)
698 { 698 {
699 printf ("(null)\n"); 699 printf ("(null)\n");
700 return; 700 return;
701 } 701 }
702 702
703 /* Loop over pattern commands. */ 703 /* Loop over pattern commands. */
704 while (p < pend) 704 while (p < pend)
705 { 705 {
706 printf ("%d:\t", p - start); 706 printf ("%d:\t", p - start);
707 707
746 register int c, last = -100; 746 register int c, last = -100;
747 register int in_range = 0; 747 register int in_range = 0;
748 748
749 printf ("/charset [%s", 749 printf ("/charset [%s",
750 (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); 750 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
751 751
752 assert (p + *p < pend); 752 assert (p + *p < pend);
753 753
754 for (c = 0; c < 256; c++) 754 for (c = 0; c < 256; c++)
755 if (((unsigned char) (c / 8) < *p) 755 if (((unsigned char) (c / 8) < *p)
756 && (p[1 + (c/8)] & (1 << (c % 8)))) 756 && (p[1 + (c/8)] & (1 << (c % 8))))
765 else if (last + 1 != c && in_range) 765 else if (last + 1 != c && in_range)
766 { 766 {
767 putchar (last); 767 putchar (last);
768 in_range = 0; 768 in_range = 0;
769 } 769 }
770 770
771 if (! in_range) 771 if (! in_range)
772 putchar (c); 772 putchar (c);
773 773
774 last = c; 774 last = c;
775 } 775 }
842 break; 842 break;
843 843
844 case push_dummy_failure: 844 case push_dummy_failure:
845 printf ("/push_dummy_failure"); 845 printf ("/push_dummy_failure");
846 break; 846 break;
847 847
848 case maybe_pop_jump: 848 case maybe_pop_jump:
849 extract_number_and_incr (&mcnt, &p); 849 extract_number_and_incr (&mcnt, &p);
850 printf ("/maybe_pop_jump to %d", p + mcnt - start); 850 printf ("/maybe_pop_jump to %d", p + mcnt - start);
851 break; 851 break;
852 852
853 case pop_failure_jump: 853 case pop_failure_jump:
854 extract_number_and_incr (&mcnt, &p); 854 extract_number_and_incr (&mcnt, &p);
855 printf ("/pop_failure_jump to %d", p + mcnt - start); 855 printf ("/pop_failure_jump to %d", p + mcnt - start);
856 break; 856 break;
857 857
858 case jump_past_alt: 858 case jump_past_alt:
859 extract_number_and_incr (&mcnt, &p); 859 extract_number_and_incr (&mcnt, &p);
860 printf ("/jump_past_alt to %d", p + mcnt - start); 860 printf ("/jump_past_alt to %d", p + mcnt - start);
861 break; 861 break;
862 862
863 case jump: 863 case jump:
864 extract_number_and_incr (&mcnt, &p); 864 extract_number_and_incr (&mcnt, &p);
865 printf ("/jump to %d", p + mcnt - start); 865 printf ("/jump to %d", p + mcnt - start);
866 break; 866 break;
867 867
868 case succeed_n: 868 case succeed_n:
869 extract_number_and_incr (&mcnt, &p); 869 extract_number_and_incr (&mcnt, &p);
870 extract_number_and_incr (&mcnt2, &p); 870 extract_number_and_incr (&mcnt2, &p);
871 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); 871 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
872 break; 872 break;
873 873
874 case jump_n: 874 case jump_n:
875 extract_number_and_incr (&mcnt, &p); 875 extract_number_and_incr (&mcnt, &p);
876 extract_number_and_incr (&mcnt2, &p); 876 extract_number_and_incr (&mcnt2, &p);
877 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); 877 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
878 break; 878 break;
879 879
880 case set_number_at: 880 case set_number_at:
881 extract_number_and_incr (&mcnt, &p); 881 extract_number_and_incr (&mcnt, &p);
882 extract_number_and_incr (&mcnt2, &p); 882 extract_number_and_incr (&mcnt2, &p);
883 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); 883 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
884 break; 884 break;
885 885
886 case wordbound: 886 case wordbound:
887 printf ("/wordbound"); 887 printf ("/wordbound");
888 break; 888 break;
889 889
890 case notwordbound: 890 case notwordbound:
892 break; 892 break;
893 893
894 case wordbeg: 894 case wordbeg:
895 printf ("/wordbeg"); 895 printf ("/wordbeg");
896 break; 896 break;
897 897
898 case wordend: 898 case wordend:
899 printf ("/wordend"); 899 printf ("/wordend");
900 900
901 #ifdef emacs 901 #ifdef emacs
902 case before_dot: 902 case before_dot:
903 printf ("/before_dot"); 903 printf ("/before_dot");
904 break; 904 break;
905 905
914 case syntaxspec: 914 case syntaxspec:
915 printf ("/syntaxspec"); 915 printf ("/syntaxspec");
916 mcnt = *p++; 916 mcnt = *p++;
917 printf ("/%d", mcnt); 917 printf ("/%d", mcnt);
918 break; 918 break;
919 919
920 case notsyntaxspec: 920 case notsyntaxspec:
921 printf ("/notsyntaxspec"); 921 printf ("/notsyntaxspec");
922 mcnt = *p++; 922 mcnt = *p++;
923 printf ("/%d", mcnt); 923 printf ("/%d", mcnt);
924 break; 924 break;
925 925
926 #ifdef MULE 926 #ifdef MULE
927 /* 97/2/17 jhod Mule category patch */ 927 /* 97/2/17 jhod Mule category patch */
928 case categoryspec: 928 case categoryspec:
929 printf ("/categoryspec"); 929 printf ("/categoryspec");
930 mcnt = *p++; 930 mcnt = *p++;
931 printf ("/%d", mcnt); 931 printf ("/%d", mcnt);
932 break; 932 break;
934 case notcategoryspec: 934 case notcategoryspec:
935 printf ("/notcategoryspec"); 935 printf ("/notcategoryspec");
936 mcnt = *p++; 936 mcnt = *p++;
937 printf ("/%d", mcnt); 937 printf ("/%d", mcnt);
938 break; 938 break;
939 /* end of category patch */ 939 /* end of category patch */
940 #endif /* MULE */ 940 #endif /* MULE */
941 #endif /* emacs */ 941 #endif /* emacs */
942 942
943 case wordchar: 943 case wordchar:
944 printf ("/wordchar"); 944 printf ("/wordchar");
945 break; 945 break;
946 946
947 case notwordchar: 947 case notwordchar:
948 printf ("/notwordchar"); 948 printf ("/notwordchar");
949 break; 949 break;
950 950
951 case begbuf: 951 case begbuf:
998 static void 998 static void
999 print_double_string (CONST char *where, CONST char *string1, int size1, 999 print_double_string (CONST char *where, CONST char *string1, int size1,
1000 CONST char *string2, int size2) 1000 CONST char *string2, int size2)
1001 { 1001 {
1002 unsigned this_char; 1002 unsigned this_char;
1003 1003
1004 if (where == NULL) 1004 if (where == NULL)
1005 printf ("(null)"); 1005 printf ("(null)");
1006 else 1006 else
1007 { 1007 {
1008 if (FIRST_STRING_P (where)) 1008 if (FIRST_STRING_P (where))
1009 { 1009 {
1010 for (this_char = where - string1; this_char < size1; this_char++) 1010 for (this_char = where - string1; this_char < size1; this_char++)
1011 putchar (string1[this_char]); 1011 putchar (string1[this_char]);
1012 1012
1013 where = string2; 1013 where = string2;
1014 } 1014 }
1015 1015
1016 for (this_char = where - string2; this_char < size2; this_char++) 1016 for (this_char = where - string2; this_char < size2; this_char++)
1017 putchar (string2[this_char]); 1017 putchar (string2[this_char]);
1018 } 1018 }
1050 1050
1051 reg_syntax_t 1051 reg_syntax_t
1052 re_set_syntax (reg_syntax_t syntax) 1052 re_set_syntax (reg_syntax_t syntax)
1053 { 1053 {
1054 reg_syntax_t ret = re_syntax_options; 1054 reg_syntax_t ret = re_syntax_options;
1055 1055
1056 re_syntax_options = syntax; 1056 re_syntax_options = syntax;
1057 return ret; 1057 return ret;
1058 } 1058 }
1059 1059
1060 /* This table gives an error message for each of the error codes listed 1060 /* This table gives an error message for each of the error codes listed
1097 using the relocating allocator routines, then malloc could cause a 1097 using the relocating allocator routines, then malloc could cause a
1098 relocation, which might (if the strings being searched are in the 1098 relocation, which might (if the strings being searched are in the
1099 ralloc heap) shift the data out from underneath the regexp 1099 ralloc heap) shift the data out from underneath the regexp
1100 routines. 1100 routines.
1101 1101
1102 Here's another reason to avoid allocation: Emacs 1102 Here's another reason to avoid allocation: Emacs
1103 processes input from X in a signal handler; processing X input may 1103 processes input from X in a signal handler; processing X input may
1104 call malloc; if input arrives while a matching routine is calling 1104 call malloc; if input arrives while a matching routine is calling
1105 malloc, then we're scrod. But Emacs can't just block input while 1105 malloc, then we're scrod. But Emacs can't just block input while
1106 calling matching routines; then we don't notice interrupts when 1106 calling matching routines; then we don't notice interrupts when
1107 they come in. So, Emacs blocks input around all regexp calls 1107 they come in. So, Emacs blocks input around all regexp calls
1128 1128
1129 1129
1130 /* Failure stack declarations and macros; both re_compile_fastmap and 1130 /* Failure stack declarations and macros; both re_compile_fastmap and
1131 re_match_2 use a failure stack. These have to be macros because of 1131 re_match_2 use a failure stack. These have to be macros because of
1132 REGEX_ALLOCATE_STACK. */ 1132 REGEX_ALLOCATE_STACK. */
1133 1133
1134 1134
1135 /* Number of failure points for which to initially allocate space 1135 /* Number of failure points for which to initially allocate space
1136 when matching. If this number is exceeded, we allocate more 1136 when matching. If this number is exceeded, we allocate more
1137 space, so it is not a hard limit. */ 1137 space, so it is not a hard limit. */
1138 #ifndef INIT_FAILURE_ALLOC 1138 #ifndef INIT_FAILURE_ALLOC
1199 1199
1200 1200
1201 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. 1201 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1202 1202
1203 Return 1 if succeeds, and 0 if either ran out of memory 1203 Return 1 if succeeds, and 0 if either ran out of memory
1204 allocating space for it or it was already too large. 1204 allocating space for it or it was already too large.
1205 1205
1206 REGEX_REALLOCATE_STACK requires `destination' be declared. */ 1206 REGEX_REALLOCATE_STACK requires `destination' be declared. */
1207 1207
1208 #define DOUBLE_FAIL_STACK(fail_stack) \ 1208 #define DOUBLE_FAIL_STACK(fail_stack) \
1209 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ 1209 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
1210 ? 0 \ 1210 ? 0 \
1217 ? 0 \ 1217 ? 0 \
1218 : ((fail_stack).size <<= 1, \ 1218 : ((fail_stack).size <<= 1, \
1219 1))) 1219 1)))
1220 1220
1221 1221
1222 /* Push pointer POINTER on FAIL_STACK. 1222 /* Push pointer POINTER on FAIL_STACK.
1223 Return 1 if was able to do so and 0 if ran out of memory allocating 1223 Return 1 if was able to do so and 0 if ran out of memory allocating
1224 space to do so. */ 1224 space to do so. */
1225 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ 1225 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
1226 ((FAIL_STACK_FULL () \ 1226 ((FAIL_STACK_FULL () \
1227 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ 1227 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
1262 #define DEBUG_POP(item_addr) 1262 #define DEBUG_POP(item_addr)
1263 #endif 1263 #endif
1264 1264
1265 1265
1266 /* Push the information about the state we will need 1266 /* Push the information about the state we will need
1267 if we ever fail back to it. 1267 if we ever fail back to it.
1268 1268
1269 Requires variables fail_stack, regstart, regend, reg_info, and 1269 Requires variables fail_stack, regstart, regend, reg_info, and
1270 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be 1270 num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
1271 declared. 1271 declared.
1272 1272
1273 Does `return FAILURE_CODE' if runs out of memory. */ 1273 Does `return FAILURE_CODE' if runs out of memory. */
1274 1274
1275 #if !defined (REGEX_MALLOC) && !defined (REL_ALLOC) 1275 #if !defined (REGEX_MALLOC) && !defined (REL_ALLOC)
1276 #define DECLARE_DESTINATION char *destination; 1276 #define DECLARE_DESTINATION char *destination;
1277 #else 1277 #else
1385 STR -- the saved data position. 1385 STR -- the saved data position.
1386 PAT -- the saved pattern position. 1386 PAT -- the saved pattern position.
1387 LOW_REG, HIGH_REG -- the highest and lowest active registers. 1387 LOW_REG, HIGH_REG -- the highest and lowest active registers.
1388 REGSTART, REGEND -- arrays of string positions. 1388 REGSTART, REGEND -- arrays of string positions.
1389 REG_INFO -- array of information about each subexpression. 1389 REG_INFO -- array of information about each subexpression.
1390 1390
1391 Also assumes the variables `fail_stack' and (if debugging), `bufp', 1391 Also assumes the variables `fail_stack' and (if debugging), `bufp',
1392 `pend', `string1', `size1', `string2', and `size2'. */ 1392 `pend', `string1', `size1', `string2', and `size2'. */
1393 1393
1394 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ 1394 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1395 { \ 1395 { \
1455 1455
1456 /* Structure for per-register (a.k.a. per-group) information. 1456 /* Structure for per-register (a.k.a. per-group) information.
1457 Other register information, such as the 1457 Other register information, such as the
1458 starting and ending positions (which are addresses), and the list of 1458 starting and ending positions (which are addresses), and the list of
1459 inner groups (which is a bits list) are maintained in separate 1459 inner groups (which is a bits list) are maintained in separate
1460 variables. 1460 variables.
1461 1461
1462 We are making a (strictly speaking) nonportable assumption here: that 1462 We are making a (strictly speaking) nonportable assumption here: that
1463 the compiler will pack our bit fields into something that fits into 1463 the compiler will pack our bit fields into something that fits into
1464 the type of `word', i.e., is something that fits into one item on the 1464 the type of `word', i.e., is something that fits into one item on the
1465 failure stack. */ 1465 failure stack. */
1466 1466
1510 #define REG_UNSET_VALUE (&reg_unset_dummy) 1510 #define REG_UNSET_VALUE (&reg_unset_dummy)
1511 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) 1511 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1512 1512
1513 /* Subroutine declarations and macros for regex_compile. */ 1513 /* Subroutine declarations and macros for regex_compile. */
1514 1514
1515 /* Fetch the next character in the uncompiled pattern---translating it 1515 /* Fetch the next character in the uncompiled pattern---translating it
1516 if necessary. Also cast from a signed character in the constant 1516 if necessary. Also cast from a signed character in the constant
1517 string passed to us by the user to an unsigned char that we can use 1517 string passed to us by the user to an unsigned char that we can use
1518 as an array index (in, e.g., `translate'). */ 1518 as an array index (in, e.g., `translate'). */
1519 #define PATFETCH(c) \ 1519 #define PATFETCH(c) \
1520 do {if (p == pend) return REG_EEND; \ 1520 do {if (p == pend) return REG_EEND; \
1709 typedef struct 1709 typedef struct
1710 { 1710 {
1711 pattern_offset_t begalt_offset; 1711 pattern_offset_t begalt_offset;
1712 pattern_offset_t fixup_alt_jump; 1712 pattern_offset_t fixup_alt_jump;
1713 pattern_offset_t inner_group_offset; 1713 pattern_offset_t inner_group_offset;
1714 pattern_offset_t laststart_offset; 1714 pattern_offset_t laststart_offset;
1715 regnum_t regnum; 1715 regnum_t regnum;
1716 } compile_stack_elt_t; 1716 } compile_stack_elt_t;
1717 1717
1718 1718
1719 typedef struct 1719 typedef struct
1772 if (p == pend) \ 1772 if (p == pend) \
1773 break; \ 1773 break; \
1774 PATFETCH (c); \ 1774 PATFETCH (c); \
1775 } \ 1775 } \
1776 } \ 1776 } \
1777 } 1777 }
1778 1778
1779 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ 1779 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
1780 1780
1781 #define IS_CHAR_CLASS(string) \ 1781 #define IS_CHAR_CLASS(string) \
1782 (STREQ (string, "alpha") || STREQ (string, "upper") \ 1782 (STREQ (string, "alpha") || STREQ (string, "upper") \
1839 static int regs_allocated_size; 1839 static int regs_allocated_size;
1840 1840
1841 static CONST char ** regstart, ** regend; 1841 static CONST char ** regstart, ** regend;
1842 static CONST char ** old_regstart, ** old_regend; 1842 static CONST char ** old_regstart, ** old_regend;
1843 static CONST char **best_regstart, **best_regend; 1843 static CONST char **best_regstart, **best_regend;
1844 static register_info_type *reg_info; 1844 static register_info_type *reg_info;
1845 static CONST char **reg_dummy; 1845 static CONST char **reg_dummy;
1846 static register_info_type *reg_info_dummy; 1846 static register_info_type *reg_info_dummy;
1847 1847
1848 /* Make the register vectors big enough for NUM_REGS registers, 1848 /* Make the register vectors big enough for NUM_REGS registers,
1849 but don't make them smaller. */ 1849 but don't make them smaller. */
1881 `syntax' is set to SYNTAX; 1881 `syntax' is set to SYNTAX;
1882 `used' is set to the length of the compiled pattern; 1882 `used' is set to the length of the compiled pattern;
1883 `fastmap_accurate' is zero; 1883 `fastmap_accurate' is zero;
1884 `re_nsub' is the number of subexpressions in PATTERN; 1884 `re_nsub' is the number of subexpressions in PATTERN;
1885 `not_bol' and `not_eol' are zero; 1885 `not_bol' and `not_eol' are zero;
1886 1886
1887 The `fastmap' and `newline_anchor' fields are neither 1887 The `fastmap' and `newline_anchor' fields are neither
1888 examined nor set. */ 1888 examined nor set. */
1889 1889
1890 /* Return, freeing storage we allocated. */ 1890 /* Return, freeing storage we allocated. */
1891 #define FREE_STACK_RETURN(value) \ 1891 #define FREE_STACK_RETURN(value) \
1900 array indices. The macros that fetch a character from the pattern 1900 array indices. The macros that fetch a character from the pattern
1901 make sure to coerce to unsigned char before assigning, so we won't 1901 make sure to coerce to unsigned char before assigning, so we won't
1902 get bitten by negative numbers here. */ 1902 get bitten by negative numbers here. */
1903 /* XEmacs change: used to be unsigned char. */ 1903 /* XEmacs change: used to be unsigned char. */
1904 register EMACS_INT c, c1; 1904 register EMACS_INT c, c1;
1905 1905
1906 /* A random temporary spot in PATTERN. */ 1906 /* A random temporary spot in PATTERN. */
1907 CONST char *p1; 1907 CONST char *p1;
1908 1908
1909 /* Points to the end of the buffer, where we should append. */ 1909 /* Points to the end of the buffer, where we should append. */
1910 register unsigned char *b; 1910 register unsigned char *b;
1911 1911
1912 /* Keeps track of unclosed groups. */ 1912 /* Keeps track of unclosed groups. */
1913 compile_stack_type compile_stack; 1913 compile_stack_type compile_stack;
1914 1914
1915 /* Points to the current (ending) position in the pattern. */ 1915 /* Points to the current (ending) position in the pattern. */
1916 CONST char *p = pattern; 1916 CONST char *p = pattern;
1917 CONST char *pend = pattern + size; 1917 CONST char *pend = pattern + size;
1918 1918
1919 /* How to translate the characters in the pattern. */ 1919 /* How to translate the characters in the pattern. */
1920 char *translate = bufp->translate; 1920 char *translate = bufp->translate;
1921 1921
1922 /* Address of the count-byte of the most recently inserted `exactn' 1922 /* Address of the count-byte of the most recently inserted `exactn'
1923 command. This makes it possible to tell if a new exact-match 1923 command. This makes it possible to tell if a new exact-match
1934 unsigned char *begalt; 1934 unsigned char *begalt;
1935 1935
1936 /* Place in the uncompiled pattern (i.e., the {) to 1936 /* Place in the uncompiled pattern (i.e., the {) to
1937 which to go back if the interval is invalid. */ 1937 which to go back if the interval is invalid. */
1938 CONST char *beg_interval; 1938 CONST char *beg_interval;
1939 1939
1940 /* Address of the place where a forward jump should go to the end of 1940 /* Address of the place where a forward jump should go to the end of
1941 the containing expression. Each alternative of an `or' -- except the 1941 the containing expression. Each alternative of an `or' -- except the
1942 last -- ends with a forward jump of this sort. */ 1942 last -- ends with a forward jump of this sort. */
1943 unsigned char *fixup_alt_jump = 0; 1943 unsigned char *fixup_alt_jump = 0;
1944 1944
1950 #ifdef DEBUG 1950 #ifdef DEBUG
1951 DEBUG_PRINT1 ("\nCompiling pattern: "); 1951 DEBUG_PRINT1 ("\nCompiling pattern: ");
1952 if (debug) 1952 if (debug)
1953 { 1953 {
1954 unsigned debug_count; 1954 unsigned debug_count;
1955 1955
1956 for (debug_count = 0; debug_count < size; debug_count++) 1956 for (debug_count = 0; debug_count < size; debug_count++)
1957 putchar (pattern[debug_count]); 1957 putchar (pattern[debug_count]);
1958 putchar ('\n'); 1958 putchar ('\n');
1959 } 1959 }
1960 #endif /* DEBUG */ 1960 #endif /* DEBUG */
1974 1974
1975 /* Set `used' to zero, so that if we return an error, the pattern 1975 /* Set `used' to zero, so that if we return an error, the pattern
1976 printer (for debugging) will think there's no pattern. We reset it 1976 printer (for debugging) will think there's no pattern. We reset it
1977 at the end. */ 1977 at the end. */
1978 bufp->used = 0; 1978 bufp->used = 0;
1979 1979
1980 /* Always count groups, whether or not bufp->no_sub is set. */ 1980 /* Always count groups, whether or not bufp->no_sub is set. */
1981 bufp->re_nsub = 0; 1981 bufp->re_nsub = 0;
1982 1982
1983 #if !defined (emacs) && !defined (SYNTAX_TABLE) 1983 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1984 /* Initialize the syntax table. */ 1984 /* Initialize the syntax table. */
1985 init_syntax_once (); 1985 init_syntax_once ();
1986 #endif 1986 #endif
2027 2027
2028 2028
2029 case '$': 2029 case '$':
2030 { 2030 {
2031 if ( /* If at end of pattern, it's an operator. */ 2031 if ( /* If at end of pattern, it's an operator. */
2032 p == pend 2032 p == pend
2033 /* If context independent, it's an operator. */ 2033 /* If context independent, it's an operator. */
2034 || syntax & RE_CONTEXT_INDEP_ANCHORS 2034 || syntax & RE_CONTEXT_INDEP_ANCHORS
2035 /* Otherwise, depends on what's next. */ 2035 /* Otherwise, depends on what's next. */
2036 || at_endline_loc_p (p, pend, syntax)) 2036 || at_endline_loc_p (p, pend, syntax))
2037 BUF_PUSH (endline); 2037 BUF_PUSH (endline);
2058 } 2058 }
2059 2059
2060 { 2060 {
2061 /* Are we optimizing this jump? */ 2061 /* Are we optimizing this jump? */
2062 boolean keep_string_p = false; 2062 boolean keep_string_p = false;
2063 2063
2064 /* 1 means zero (many) matches is allowed. */ 2064 /* 1 means zero (many) matches is allowed. */
2065 char zero_times_ok = 0, many_times_ok = 0; 2065 char zero_times_ok = 0, many_times_ok = 0;
2066 2066
2067 /* If there is a sequence of repetition chars, collapse it 2067 /* If there is a sequence of repetition chars, collapse it
2068 down to just one (the right one). We can't combine 2068 down to just one (the right one). We can't combine
2106 /* If we get here, we found another repeat character. */ 2106 /* If we get here, we found another repeat character. */
2107 } 2107 }
2108 2108
2109 /* Star, etc. applied to an empty pattern is equivalent 2109 /* Star, etc. applied to an empty pattern is equivalent
2110 to an empty pattern. */ 2110 to an empty pattern. */
2111 if (!laststart) 2111 if (!laststart)
2112 break; 2112 break;
2113 2113
2114 /* Now we know whether or not zero matches is allowed 2114 /* Now we know whether or not zero matches is allowed
2115 and also whether or not two or more matches is allowed. */ 2115 and also whether or not two or more matches is allowed. */
2116 if (many_times_ok) 2116 if (many_times_ok)
2117 { /* More than one repetition is allowed, so put in at the 2117 { /* More than one repetition is allowed, so put in at the
2118 end a backward relative jump from `b' to before the next 2118 end a backward relative jump from `b' to before the next
2119 jump we're going to put in below (which jumps from 2119 jump we're going to put in below (which jumps from
2120 laststart to after this jump). 2120 laststart to after this jump).
2121 2121
2122 But if we are at the `*' in the exact sequence `.*\n', 2122 But if we are at the `*' in the exact sequence `.*\n',
2123 insert an unconditional jump backwards to the ., 2123 insert an unconditional jump backwards to the .,
2124 instead of the beginning of the loop. This way we only 2124 instead of the beginning of the loop. This way we only
2125 push a failure point once, instead of every time 2125 push a failure point once, instead of every time
2197 2197
2198 laststart = b; 2198 laststart = b;
2199 2199
2200 /* We test `*p == '^' twice, instead of using an if 2200 /* We test `*p == '^' twice, instead of using an if
2201 statement, so we only need one BUF_PUSH. */ 2201 statement, so we only need one BUF_PUSH. */
2202 BUF_PUSH (*p == '^' ? charset_not : charset); 2202 BUF_PUSH (*p == '^' ? charset_not : charset);
2203 if (*p == '^') 2203 if (*p == '^')
2204 p++; 2204 p++;
2205 2205
2206 /* Remember the first position in the bracket expression. */ 2206 /* Remember the first position in the bracket expression. */
2207 p1 = p; 2207 p1 = p;
2289 2289
2290 /* Look ahead to see if it's a range when the last thing 2290 /* Look ahead to see if it's a range when the last thing
2291 was a character: if this is a hyphen not at the 2291 was a character: if this is a hyphen not at the
2292 beginning or the end of a list, then it's the range 2292 beginning or the end of a list, then it's the range
2293 operator. */ 2293 operator. */
2294 if (c == '-' 2294 if (c == '-'
2295 && !(p - 2 >= pattern && p[-2] == '[') 2295 && !(p - 2 >= pattern && p[-2] == '[')
2296 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 2296 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2297 && *p != ']') 2297 && *p != ']')
2298 { 2298 {
2299 reg_errcode_t ret; 2299 reg_errcode_t ret;
2300 2300
2363 str[c1++] = c; 2363 str[c1++] = c;
2364 } 2364 }
2365 str[c1] = '\0'; 2365 str[c1] = '\0';
2366 2366
2367 /* If isn't a word bracketed by `[:' and:`]': 2367 /* If isn't a word bracketed by `[:' and:`]':
2368 undo the ending character, the letters, and leave 2368 undo the ending character, the letters, and leave
2369 the leading `:' and `[' (but set bits for them). */ 2369 the leading `:' and `[' (but set bits for them). */
2370 if (c == ':' && *p == ']') 2370 if (c == ':' && *p == ']')
2371 { 2371 {
2372 int ch; 2372 int ch;
2373 boolean is_alnum = STREQ (str, "alnum"); 2373 boolean is_alnum = STREQ (str, "alnum");
2380 boolean is_print = STREQ (str, "print"); 2380 boolean is_print = STREQ (str, "print");
2381 boolean is_punct = STREQ (str, "punct"); 2381 boolean is_punct = STREQ (str, "punct");
2382 boolean is_space = STREQ (str, "space"); 2382 boolean is_space = STREQ (str, "space");
2383 boolean is_upper = STREQ (str, "upper"); 2383 boolean is_upper = STREQ (str, "upper");
2384 boolean is_xdigit = STREQ (str, "xdigit"); 2384 boolean is_xdigit = STREQ (str, "xdigit");
2385 2385
2386 if (!IS_CHAR_CLASS (str)) 2386 if (!IS_CHAR_CLASS (str))
2387 FREE_STACK_RETURN (REG_ECTYPE); 2387 FREE_STACK_RETURN (REG_ECTYPE);
2388 2388
2389 /* Throw away the ] at the end of the character 2389 /* Throw away the ] at the end of the character
2390 class. */ 2390 class. */
2391 PATFETCH (c); 2391 PATFETCH (c);
2392 2392
2393 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2393 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2394 2394
2395 for (ch = 0; ch < 1 << BYTEWIDTH; ch++) 2395 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2396 { 2396 {
2415 had_char_class = true; 2415 had_char_class = true;
2416 } 2416 }
2417 else 2417 else
2418 { 2418 {
2419 c1++; 2419 c1++;
2420 while (c1--) 2420 while (c1--)
2421 PATUNFETCH; 2421 PATUNFETCH;
2422 SET_EITHER_BIT ('['); 2422 SET_EITHER_BIT ('[');
2423 SET_EITHER_BIT (':'); 2423 SET_EITHER_BIT (':');
2424 had_char_class = false; 2424 had_char_class = false;
2425 } 2425 }
2443 break; 2443 break;
2444 } 2444 }
2445 #endif /* MULE */ 2445 #endif /* MULE */
2446 /* Discard any (non)matching list bytes that are all 0 at the 2446 /* Discard any (non)matching list bytes that are all 0 at the
2447 end of the map. Decrease the map-length byte too. */ 2447 end of the map. Decrease the map-length byte too. */
2448 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 2448 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2449 b[-1]--; 2449 b[-1]--;
2450 b += b[-1]; 2450 b += b[-1];
2451 } 2451 }
2452 break; 2452 break;
2453 2453
2454 2454
2504 handle_open: 2504 handle_open:
2505 bufp->re_nsub++; 2505 bufp->re_nsub++;
2506 regnum++; 2506 regnum++;
2507 2507
2508 if (COMPILE_STACK_FULL) 2508 if (COMPILE_STACK_FULL)
2509 { 2509 {
2510 RETALLOC (compile_stack.stack, compile_stack.size << 1, 2510 RETALLOC (compile_stack.stack, compile_stack.size << 1,
2511 compile_stack_elt_t); 2511 compile_stack_elt_t);
2512 if (compile_stack.stack == NULL) return REG_ESPACE; 2512 if (compile_stack.stack == NULL) return REG_ESPACE;
2513 2513
2514 compile_stack.size <<= 1; 2514 compile_stack.size <<= 1;
2517 /* These are the values to restore when we hit end of this 2517 /* These are the values to restore when we hit end of this
2518 group. They are all relative offsets, so that if the 2518 group. They are all relative offsets, so that if the
2519 whole pattern moves because of realloc, they will still 2519 whole pattern moves because of realloc, they will still
2520 be valid. */ 2520 be valid. */
2521 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; 2521 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2522 COMPILE_STACK_TOP.fixup_alt_jump 2522 COMPILE_STACK_TOP.fixup_alt_jump
2523 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; 2523 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2524 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; 2524 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2525 COMPILE_STACK_TOP.regnum = regnum; 2525 COMPILE_STACK_TOP.regnum = regnum;
2526 2526
2527 /* We will eventually replace the 0 with the number of 2527 /* We will eventually replace the 0 with the number of
2531 if (regnum <= MAX_REGNUM) 2531 if (regnum <= MAX_REGNUM)
2532 { 2532 {
2533 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; 2533 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2534 BUF_PUSH_3 (start_memory, regnum, 0); 2534 BUF_PUSH_3 (start_memory, regnum, 0);
2535 } 2535 }
2536 2536
2537 compile_stack.avail++; 2537 compile_stack.avail++;
2538 2538
2539 fixup_alt_jump = 0; 2539 fixup_alt_jump = 0;
2540 laststart = 0; 2540 laststart = 0;
2541 begalt = b; 2541 begalt = b;
2560 { /* Push a dummy failure point at the end of the 2560 { /* Push a dummy failure point at the end of the
2561 alternative for a possible future 2561 alternative for a possible future
2562 `pop_failure_jump' to pop. See comments at 2562 `pop_failure_jump' to pop. See comments at
2563 `push_dummy_failure' in `re_match_2'. */ 2563 `push_dummy_failure' in `re_match_2'. */
2564 BUF_PUSH (push_dummy_failure); 2564 BUF_PUSH (push_dummy_failure);
2565 2565
2566 /* We allocated space for this jump when we assigned 2566 /* We allocated space for this jump when we assigned
2567 to `fixup_alt_jump', in the `handle_alt' case below. */ 2567 to `fixup_alt_jump', in the `handle_alt' case below. */
2568 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); 2568 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2569 } 2569 }
2570 2570
2582 /* We don't just want to restore into `regnum', because 2582 /* We don't just want to restore into `regnum', because
2583 later groups should continue to be numbered higher, 2583 later groups should continue to be numbered higher,
2584 as in `(ab)c(de)' -- the second group is #2. */ 2584 as in `(ab)c(de)' -- the second group is #2. */
2585 regnum_t this_group_regnum; 2585 regnum_t this_group_regnum;
2586 2586
2587 compile_stack.avail--; 2587 compile_stack.avail--;
2588 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; 2588 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2589 fixup_alt_jump 2589 fixup_alt_jump
2590 = COMPILE_STACK_TOP.fixup_alt_jump 2590 = COMPILE_STACK_TOP.fixup_alt_jump
2591 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 2591 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2592 : 0; 2592 : 0;
2593 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; 2593 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2594 this_group_regnum = COMPILE_STACK_TOP.regnum; 2594 this_group_regnum = COMPILE_STACK_TOP.regnum;
2595 /* If we've reached MAX_REGNUM groups, then this open 2595 /* If we've reached MAX_REGNUM groups, then this open
2596 won't actually generate any code, so we'll have to 2596 won't actually generate any code, so we'll have to
2601 groups were inside this one. */ 2601 groups were inside this one. */
2602 if (this_group_regnum <= MAX_REGNUM) 2602 if (this_group_regnum <= MAX_REGNUM)
2603 { 2603 {
2604 unsigned char *inner_group_loc 2604 unsigned char *inner_group_loc
2605 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; 2605 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2606 2606
2607 *inner_group_loc = regnum - this_group_regnum; 2607 *inner_group_loc = regnum - this_group_regnum;
2608 BUF_PUSH_3 (stop_memory, this_group_regnum, 2608 BUF_PUSH_3 (stop_memory, this_group_regnum,
2609 regnum - this_group_regnum); 2609 regnum - this_group_regnum);
2610 } 2610 }
2611 } 2611 }
2630 which gets executed if it gets matched. Adjust that 2630 which gets executed if it gets matched. Adjust that
2631 jump so it will jump to this alternative's analogous 2631 jump so it will jump to this alternative's analogous
2632 jump (put in below, which in turn will jump to the next 2632 jump (put in below, which in turn will jump to the next
2633 (if any) alternative's such jump, etc.). The last such 2633 (if any) alternative's such jump, etc.). The last such
2634 jump jumps to the correct final destination. A picture: 2634 jump jumps to the correct final destination. A picture:
2635 _____ _____ 2635 _____ _____
2636 | | | | 2636 | | | |
2637 | v | v 2637 | v | v
2638 a | b | c 2638 a | b | c
2639 2639
2640 If we are at `b', then fixup_alt_jump right now points to a 2640 If we are at `b', then fixup_alt_jump right now points to a
2641 three-byte space after `a'. We'll put in the jump, set 2641 three-byte space after `a'. We'll put in the jump, set
2642 fixup_alt_jump to right after `b', and leave behind three 2642 fixup_alt_jump to right after `b', and leave behind three
2643 bytes which we'll fill in when we get to after `c'. */ 2643 bytes which we'll fill in when we get to after `c'. */
2655 laststart = 0; 2655 laststart = 0;
2656 begalt = b; 2656 begalt = b;
2657 break; 2657 break;
2658 2658
2659 2659
2660 case '{': 2660 case '{':
2661 /* If \{ is a literal. */ 2661 /* If \{ is a literal. */
2662 if (!(syntax & RE_INTERVALS) 2662 if (!(syntax & RE_INTERVALS)
2663 /* If we're at `\{' and it's not the open-interval 2663 /* If we're at `\{' and it's not the open-interval
2664 operator. */ 2664 operator. */
2665 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) 2665 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2666 || (p - 2 == pattern && p == pend)) 2666 || (p - 2 == pattern && p == pend))
2667 goto normal_backslash; 2667 goto normal_backslash;
2668 2668
2697 if (lower_bound < 0 || upper_bound > RE_DUP_MAX 2697 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2698 || lower_bound > upper_bound) 2698 || lower_bound > upper_bound)
2699 { 2699 {
2700 if (syntax & RE_NO_BK_BRACES) 2700 if (syntax & RE_NO_BK_BRACES)
2701 goto unfetch_interval; 2701 goto unfetch_interval;
2702 else 2702 else
2703 FREE_STACK_RETURN (REG_BADBR); 2703 FREE_STACK_RETURN (REG_BADBR);
2704 } 2704 }
2705 2705
2706 if (!(syntax & RE_NO_BK_BRACES)) 2706 if (!(syntax & RE_NO_BK_BRACES))
2707 { 2707 {
2708 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); 2708 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2709 2709
2710 PATFETCH (c); 2710 PATFETCH (c);
2711 } 2711 }
2712 2712
2713 if (c != '}') 2713 if (c != '}')
2714 { 2714 {
2715 if (syntax & RE_NO_BK_BRACES) 2715 if (syntax & RE_NO_BK_BRACES)
2716 goto unfetch_interval; 2716 goto unfetch_interval;
2717 else 2717 else
2718 FREE_STACK_RETURN (REG_BADBR); 2718 FREE_STACK_RETURN (REG_BADBR);
2719 } 2719 }
2720 2720
2721 /* We just parsed a valid interval. */ 2721 /* We just parsed a valid interval. */
2722 2722
2748 succeed_n <after jump addr> <succeed_n count> 2748 succeed_n <after jump addr> <succeed_n count>
2749 <body of loop> 2749 <body of loop>
2750 jump_n <succeed_n addr> <jump count> 2750 jump_n <succeed_n addr> <jump count>
2751 (The upper bound and `jump_n' are omitted if 2751 (The upper bound and `jump_n' are omitted if
2752 `upper_bound' is 1, though.) */ 2752 `upper_bound' is 1, though.) */
2753 else 2753 else
2754 { /* If the upper bound is > 1, we need to insert 2754 { /* If the upper bound is > 1, we need to insert
2755 more at the end of the loop. */ 2755 more at the end of the loop. */
2756 unsigned nbytes = 10 + (upper_bound > 1) * 10; 2756 unsigned nbytes = 10 + (upper_bound > 1) * 10;
2757 2757
2758 GET_BUFFER_SPACE (nbytes); 2758 GET_BUFFER_SPACE (nbytes);
2765 INSERT_JUMP2 (succeed_n, laststart, 2765 INSERT_JUMP2 (succeed_n, laststart,
2766 b + 5 + (upper_bound > 1) * 5, 2766 b + 5 + (upper_bound > 1) * 5,
2767 lower_bound); 2767 lower_bound);
2768 b += 5; 2768 b += 5;
2769 2769
2770 /* Code to initialize the lower bound. Insert 2770 /* Code to initialize the lower bound. Insert
2771 before the `succeed_n'. The `5' is the last two 2771 before the `succeed_n'. The `5' is the last two
2772 bytes of this `set_number_at', plus 3 bytes of 2772 bytes of this `set_number_at', plus 3 bytes of
2773 the following `succeed_n'. */ 2773 the following `succeed_n'. */
2774 insert_op2 (set_number_at, laststart, 5, lower_bound, b); 2774 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2775 b += 5; 2775 b += 5;
2776 2776
2777 if (upper_bound > 1) 2777 if (upper_bound > 1)
2778 { /* More than one repetition is allowed, so 2778 { /* More than one repetition is allowed, so
2779 append a backward jump to the `succeed_n' 2779 append a backward jump to the `succeed_n'
2780 that starts this interval. 2780 that starts this interval.
2781 2781
2782 When we've reached this during matching, 2782 When we've reached this during matching,
2783 we'll have matched the interval once, so 2783 we'll have matched the interval once, so
2784 jump back only `upper_bound - 1' times. */ 2784 jump back only `upper_bound - 1' times. */
2785 STORE_JUMP2 (jump_n, b, laststart + 5, 2785 STORE_JUMP2 (jump_n, b, laststart + 5,
2786 upper_bound - 1); 2786 upper_bound - 1);
2794 for the relative address. But we are 2794 for the relative address. But we are
2795 inserting into the middle of the pattern -- 2795 inserting into the middle of the pattern --
2796 so everything is getting moved up by 5. 2796 so everything is getting moved up by 5.
2797 Conclusion: (b - 2) - (laststart + 3) + 5, 2797 Conclusion: (b - 2) - (laststart + 3) + 5,
2798 i.e., b - laststart. 2798 i.e., b - laststart.
2799 2799
2800 We insert this at the beginning of the loop 2800 We insert this at the beginning of the loop
2801 so that if we fail during matching, we'll 2801 so that if we fail during matching, we'll
2802 reinitialize the bounds. */ 2802 reinitialize the bounds. */
2803 insert_op2 (set_number_at, laststart, b - laststart, 2803 insert_op2 (set_number_at, laststart, b - laststart,
2804 upper_bound - 1, b); 2804 upper_bound - 1, b);
2815 assert (beg_interval); 2815 assert (beg_interval);
2816 p = beg_interval; 2816 p = beg_interval;
2817 beg_interval = NULL; 2817 beg_interval = NULL;
2818 2818
2819 /* normal_char and normal_backslash need `c'. */ 2819 /* normal_char and normal_backslash need `c'. */
2820 PATFETCH (c); 2820 PATFETCH (c);
2821 2821
2822 if (!(syntax & RE_NO_BK_BRACES)) 2822 if (!(syntax & RE_NO_BK_BRACES))
2823 { 2823 {
2824 if (p > pattern && p[-1] == '\\') 2824 if (p > pattern && p[-1] == '\\')
2825 goto normal_backslash; 2825 goto normal_backslash;
2831 operators. rms says this is ok. --karl */ 2831 operators. rms says this is ok. --karl */
2832 case '=': 2832 case '=':
2833 BUF_PUSH (at_dot); 2833 BUF_PUSH (at_dot);
2834 break; 2834 break;
2835 2835
2836 case 's': 2836 case 's':
2837 laststart = b; 2837 laststart = b;
2838 PATFETCH (c); 2838 PATFETCH (c);
2839 /* XEmacs addition */ 2839 /* XEmacs addition */
2840 if (c >= 0x80 || syntax_spec_code[c] == 0377) 2840 if (c >= 0x80 || syntax_spec_code[c] == 0377)
2841 FREE_STACK_RETURN (REG_ESYNTAX); 2841 FREE_STACK_RETURN (REG_ESYNTAX);
2851 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); 2851 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2852 break; 2852 break;
2853 2853
2854 #ifdef MULE 2854 #ifdef MULE
2855 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ 2855 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */
2856 case 'c': 2856 case 'c':
2857 laststart = b; 2857 laststart = b;
2858 PATFETCH_RAW (c); 2858 PATFETCH_RAW (c);
2859 if (c < 32 || c > 127) 2859 if (c < 32 || c > 127)
2860 FREE_STACK_RETURN (REG_ECATEGORY); 2860 FREE_STACK_RETURN (REG_ECATEGORY);
2861 BUF_PUSH_2 (categoryspec, c); 2861 BUF_PUSH_2 (categoryspec, c);
2955 /* `q' points to the beginning of the next char. */ 2955 /* `q' points to the beginning of the next char. */
2956 CONST char *q = p - 1; 2956 CONST char *q = p - 1;
2957 INC_CHARPTR (q); 2957 INC_CHARPTR (q);
2958 2958
2959 /* If no exactn currently being built. */ 2959 /* If no exactn currently being built. */
2960 if (!pending_exact 2960 if (!pending_exact
2961 2961
2962 /* If last exactn not at current position. */ 2962 /* If last exactn not at current position. */
2963 || pending_exact + *pending_exact + 1 != b 2963 || pending_exact + *pending_exact + 1 != b
2964 2964
2965 /* We have only one byte following the exactn for the count. */ 2965 /* We have only one byte following the exactn for the count. */
2966 || ((unsigned int) (*pending_exact + (q - p)) >= 2966 || ((unsigned int) (*pending_exact + (q - p)) >=
2967 ((unsigned int) (1 << BYTEWIDTH) - 1)) 2967 ((unsigned int) (1 << BYTEWIDTH) - 1))
2968 2968
2969 /* If followed by a repetition operator. */ 2969 /* If followed by a repetition operator. */
2975 && ((syntax & RE_NO_BK_BRACES) 2975 && ((syntax & RE_NO_BK_BRACES)
2976 ? *q == '{' 2976 ? *q == '{'
2977 : (q[0] == '\\' && q[1] == '{')))) 2977 : (q[0] == '\\' && q[1] == '{'))))
2978 { 2978 {
2979 /* Start building a new exactn. */ 2979 /* Start building a new exactn. */
2980 2980
2981 laststart = b; 2981 laststart = b;
2982 2982
2983 BUF_PUSH_2 (exactn, 0); 2983 BUF_PUSH_2 (exactn, 0);
2984 pending_exact = b - 1; 2984 pending_exact = b - 1;
2985 } 2985 }
2986 2986
2987 BUF_PUSH (c); 2987 BUF_PUSH (c);
2988 (*pending_exact)++; 2988 (*pending_exact)++;
2989 2989
2990 while (p < q) 2990 while (p < q)
2991 { 2991 {
2996 break; 2996 break;
2997 } 2997 }
2998 } /* switch (c) */ 2998 } /* switch (c) */
2999 } /* while p != pend */ 2999 } /* while p != pend */
3000 3000
3001 3001
3002 /* Through the pattern now. */ 3002 /* Through the pattern now. */
3003 3003
3004 if (fixup_alt_jump) 3004 if (fixup_alt_jump)
3005 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 3005 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3006 3006
3007 if (!COMPILE_STACK_EMPTY) 3007 if (!COMPILE_STACK_EMPTY)
3008 FREE_STACK_RETURN (REG_EPAREN); 3008 FREE_STACK_RETURN (REG_EPAREN);
3009 3009
3010 /* If we don't want backtracking, force success 3010 /* If we don't want backtracking, force success
3011 the first time we reach the end of the compiled pattern. */ 3011 the first time we reach the end of the compiled pattern. */
3012 if (syntax & RE_NO_POSIX_BACKTRACKING) 3012 if (syntax & RE_NO_POSIX_BACKTRACKING)
3040 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); 3040 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
3041 3041
3042 #ifdef emacs 3042 #ifdef emacs
3043 if (! fail_stack.stack) 3043 if (! fail_stack.stack)
3044 fail_stack.stack 3044 fail_stack.stack
3045 = (fail_stack_elt_t *) xmalloc (fail_stack.size 3045 = (fail_stack_elt_t *) xmalloc (fail_stack.size
3046 * sizeof (fail_stack_elt_t)); 3046 * sizeof (fail_stack_elt_t));
3047 else 3047 else
3048 fail_stack.stack 3048 fail_stack.stack
3049 = (fail_stack_elt_t *) xrealloc (fail_stack.stack, 3049 = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
3050 (fail_stack.size 3050 (fail_stack.size
3051 * sizeof (fail_stack_elt_t))); 3051 * sizeof (fail_stack_elt_t)));
3052 #else /* not emacs */ 3052 #else /* not emacs */
3053 if (! fail_stack.stack) 3053 if (! fail_stack.stack)
3054 fail_stack.stack 3054 fail_stack.stack
3055 = (fail_stack_elt_t *) malloc (fail_stack.size 3055 = (fail_stack_elt_t *) malloc (fail_stack.size
3056 * sizeof (fail_stack_elt_t)); 3056 * sizeof (fail_stack_elt_t));
3057 else 3057 else
3058 fail_stack.stack 3058 fail_stack.stack
3059 = (fail_stack_elt_t *) realloc (fail_stack.stack, 3059 = (fail_stack_elt_t *) realloc (fail_stack.stack,
3060 (fail_stack.size 3060 (fail_stack.size
3101 register unsigned char *pfrom = end; 3101 register unsigned char *pfrom = end;
3102 register unsigned char *pto = end + 3; 3102 register unsigned char *pto = end + 3;
3103 3103
3104 while (pfrom != loc) 3104 while (pfrom != loc)
3105 *--pto = *--pfrom; 3105 *--pto = *--pfrom;
3106 3106
3107 store_op1 (op, loc, arg); 3107 store_op1 (op, loc, arg);
3108 } 3108 }
3109 3109
3110 3110
3111 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ 3111 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
3112 3112
3113 static void 3113 static void
3114 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, 3114 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
3115 unsigned char *end) 3115 unsigned char *end)
3116 { 3116 {
3117 register unsigned char *pfrom = end; 3117 register unsigned char *pfrom = end;
3118 register unsigned char *pto = end + 5; 3118 register unsigned char *pto = end + 5;
3119 3119
3120 while (pfrom != loc) 3120 while (pfrom != loc)
3121 *--pto = *--pfrom; 3121 *--pto = *--pfrom;
3122 3122
3123 store_op2 (op, loc, arg1, arg2); 3123 store_op2 (op, loc, arg1, arg2);
3124 } 3124 }
3125 3125
3126 3126
3127 /* P points to just after a ^ in PATTERN. Return true if that ^ comes 3127 /* P points to just after a ^ in PATTERN. Return true if that ^ comes
3131 static boolean 3131 static boolean
3132 at_begline_loc_p (CONST char *pattern, CONST char *p, reg_syntax_t syntax) 3132 at_begline_loc_p (CONST char *pattern, CONST char *p, reg_syntax_t syntax)
3133 { 3133 {
3134 CONST char *prev = p - 2; 3134 CONST char *prev = p - 2;
3135 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 3135 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3136 3136
3137 return 3137 return
3138 /* After a subexpression? */ 3138 /* After a subexpression? */
3139 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) 3139 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3140 /* After an alternative? */ 3140 /* After an alternative? */
3141 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); 3141 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3149 at_endline_loc_p (CONST char *p, CONST char *pend, int syntax) 3149 at_endline_loc_p (CONST char *p, CONST char *pend, int syntax)
3150 { 3150 {
3151 CONST char *next = p; 3151 CONST char *next = p;
3152 boolean next_backslash = *next == '\\'; 3152 boolean next_backslash = *next == '\\';
3153 CONST char *next_next = p + 1 < pend ? p + 1 : 0; 3153 CONST char *next_next = p + 1 < pend ? p + 1 : 0;
3154 3154
3155 return 3155 return
3156 /* Before a subexpression? */ 3156 /* Before a subexpression? */
3157 (syntax & RE_NO_BK_PARENS ? *next == ')' 3157 (syntax & RE_NO_BK_PARENS ? *next == ')'
3158 : next_backslash && next_next && *next_next == ')') 3158 : next_backslash && next_next && *next_next == ')')
3159 /* Before an alternative? */ 3159 /* Before an alternative? */
3160 || (syntax & RE_NO_BK_VBAR ? *next == '|' 3160 || (syntax & RE_NO_BK_VBAR ? *next == '|'
3161 : next_backslash && next_next && *next_next == '|'); 3161 : next_backslash && next_next && *next_next == '|');
3162 } 3162 }
3163 3163
3164 3164
3165 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and 3165 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3166 false if it's not. */ 3166 false if it's not. */
3167 3167
3168 static boolean 3168 static boolean
3169 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum) 3169 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
3170 { 3170 {
3171 int this_element; 3171 int this_element;
3172 3172
3173 for (this_element = compile_stack.avail - 1; 3173 for (this_element = compile_stack.avail - 1;
3174 this_element >= 0; 3174 this_element >= 0;
3175 this_element--) 3175 this_element--)
3176 if (compile_stack.stack[this_element].regnum == regnum) 3176 if (compile_stack.stack[this_element].regnum == regnum)
3177 return true; 3177 return true;
3178 3178
3179 return false; 3179 return false;
3183 /* Read the ending character of a range (in a bracket expression) from the 3183 /* Read the ending character of a range (in a bracket expression) from the
3184 uncompiled pattern *P_PTR (which ends at PEND). We assume the 3184 uncompiled pattern *P_PTR (which ends at PEND). We assume the
3185 starting character is in `P[-2]'. (`P[-1]' is the character `-'.) 3185 starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
3186 Then we set the translation of all bits between the starting and 3186 Then we set the translation of all bits between the starting and
3187 ending characters (inclusive) in the compiled pattern B. 3187 ending characters (inclusive) in the compiled pattern B.
3188 3188
3189 Return an error code. 3189 Return an error code.
3190 3190
3191 We use these short variable names so we can use the same macros as 3191 We use these short variable names so we can use the same macros as
3192 `regex_compile' itself. */ 3192 `regex_compile' itself. */
3193 3193
3194 static reg_errcode_t 3194 static reg_errcode_t
3195 compile_range (CONST char **p_ptr, CONST char *pend, char *translate, 3195 compile_range (CONST char **p_ptr, CONST char *pend, char *translate,
3197 { 3197 {
3198 unsigned this_char; 3198 unsigned this_char;
3199 3199
3200 CONST char *p = *p_ptr; 3200 CONST char *p = *p_ptr;
3201 int range_start, range_end; 3201 int range_start, range_end;
3202 3202
3203 if (p == pend) 3203 if (p == pend)
3204 return REG_ERANGE; 3204 return REG_ERANGE;
3205 3205
3206 /* Even though the pattern is a signed `char *', we need to fetch 3206 /* Even though the pattern is a signed `char *', we need to fetch
3207 with unsigned char *'s; if the high bit of the pattern character 3207 with unsigned char *'s; if the high bit of the pattern character
3208 is set, the range endpoints will be negative if we fetch using a 3208 is set, the range endpoints will be negative if we fetch using a
3209 signed char *. 3209 signed char *.
3210 3210
3211 We also want to fetch the endpoints without translating them; the 3211 We also want to fetch the endpoints without translating them; the
3212 appropriate translation is done in the bit-setting loop below. */ 3212 appropriate translation is done in the bit-setting loop below. */
3213 /* The SVR4 compiler on the 3B2 had trouble with unsigned CONST char *. */ 3213 /* The SVR4 compiler on the 3B2 had trouble with unsigned CONST char *. */
3214 range_start = ((CONST unsigned char *) p)[-2]; 3214 range_start = ((CONST unsigned char *) p)[-2];
3215 range_end = ((CONST unsigned char *) p)[0]; 3215 range_end = ((CONST unsigned char *) p)[0];
3216 3216
3228 loop, since all characters <= 0xff. */ 3228 loop, since all characters <= 0xff. */
3229 for (this_char = range_start; this_char <= range_end; this_char++) 3229 for (this_char = range_start; this_char <= range_end; this_char++)
3230 { 3230 {
3231 SET_LIST_BIT (TRANSLATE (this_char)); 3231 SET_LIST_BIT (TRANSLATE (this_char));
3232 } 3232 }
3233 3233
3234 return REG_NOERROR; 3234 return REG_NOERROR;
3235 } 3235 }
3236 3236
3237 #ifdef MULE 3237 #ifdef MULE
3238 3238
3240 compile_extended_range (CONST char **p_ptr, CONST char *pend, char *translate, 3240 compile_extended_range (CONST char **p_ptr, CONST char *pend, char *translate,
3241 reg_syntax_t syntax, Lisp_Object rtab) 3241 reg_syntax_t syntax, Lisp_Object rtab)
3242 { 3242 {
3243 Emchar this_char, range_start, range_end; 3243 Emchar this_char, range_start, range_end;
3244 CONST Bufbyte *p; 3244 CONST Bufbyte *p;
3245 3245
3246 if (*p_ptr == pend) 3246 if (*p_ptr == pend)
3247 return REG_ERANGE; 3247 return REG_ERANGE;
3248 3248
3249 p = (CONST Bufbyte *) *p_ptr; 3249 p = (CONST Bufbyte *) *p_ptr;
3250 range_end = charptr_emchar (p); 3250 range_end = charptr_emchar (p);
3251 p--; /* back to '-' */ 3251 p--; /* back to '-' */
3252 DEC_CHARPTR (p); /* back to start of range */ 3252 DEC_CHARPTR (p); /* back to start of range */
3253 /* We also want to fetch the endpoints without translating them; the 3253 /* We also want to fetch the endpoints without translating them; the
3254 appropriate translation is done in the bit-setting loop below. */ 3254 appropriate translation is done in the bit-setting loop below. */
3255 range_start = charptr_emchar (p); 3255 range_start = charptr_emchar (p);
3256 INC_CHARPTR (*p_ptr); 3256 INC_CHARPTR (*p_ptr);
3257 3257
3258 /* If the start is after the end, the range is empty. */ 3258 /* If the start is after the end, the range is empty. */
3259 if (range_start > range_end) 3259 if (range_start > range_end)
3260 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; 3260 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3261 3261
3262 /* Can't have ranges spanning different charsets, except maybe for 3262 /* Can't have ranges spanning different charsets, except maybe for
3263 ranges entirely witin the first 256 chars. */ 3263 ranges entirely witin the first 256 chars. */
3264 3264
3265 if ((range_start >= 0x100 || range_end >= 0x100) 3265 if ((range_start >= 0x100 || range_end >= 0x100)
3266 && CHAR_LEADING_BYTE (range_start) != 3266 && CHAR_LEADING_BYTE (range_start) !=
3267 CHAR_LEADING_BYTE (range_end)) 3267 CHAR_LEADING_BYTE (range_end))
3268 return REG_ERANGESPAN; 3268 return REG_ERANGESPAN;
3269 3269
3288 SET_RANGETAB_BIT (TRANSLATE (this_char)); 3288 SET_RANGETAB_BIT (TRANSLATE (this_char));
3289 } 3289 }
3290 3290
3291 if (this_char <= range_end) 3291 if (this_char <= range_end)
3292 put_range_table (rtab, this_char, range_end, Qt); 3292 put_range_table (rtab, this_char, range_end, Qt);
3293 3293
3294 return REG_NOERROR; 3294 return REG_NOERROR;
3295 } 3295 }
3296 3296
3297 #endif /* MULE */ 3297 #endif /* MULE */
3298 3298
3301 characters can start a string that matches the pattern. This fastmap 3301 characters can start a string that matches the pattern. This fastmap
3302 is used by re_search to skip quickly over impossible starting points. 3302 is used by re_search to skip quickly over impossible starting points.
3303 3303
3304 The caller must supply the address of a (1 << BYTEWIDTH)-byte data 3304 The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3305 area as BUFP->fastmap. 3305 area as BUFP->fastmap.
3306 3306
3307 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in 3307 We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3308 the pattern buffer. 3308 the pattern buffer.
3309 3309
3310 Returns 0 if we succeed, -2 if an internal error. */ 3310 Returns 0 if we succeed, -2 if an internal error. */
3311 3311
3316 #ifdef MATCH_MAY_ALLOCATE 3316 #ifdef MATCH_MAY_ALLOCATE
3317 fail_stack_type fail_stack; 3317 fail_stack_type fail_stack;
3318 #endif 3318 #endif
3319 DECLARE_DESTINATION 3319 DECLARE_DESTINATION
3320 /* We don't push any register information onto the failure stack. */ 3320 /* We don't push any register information onto the failure stack. */
3321 3321
3322 register char *fastmap = bufp->fastmap; 3322 register char *fastmap = bufp->fastmap;
3323 unsigned char *pattern = bufp->buffer; 3323 unsigned char *pattern = bufp->buffer;
3324 unsigned long size = bufp->used; 3324 unsigned long size = bufp->used;
3325 unsigned char *p = pattern; 3325 unsigned char *p = pattern;
3326 register unsigned char *pend = pattern + size; 3326 register unsigned char *pend = pattern + size;
3339 3339
3340 /* We aren't doing a `succeed_n' to begin with. */ 3340 /* We aren't doing a `succeed_n' to begin with. */
3341 boolean succeed_n_p = false; 3341 boolean succeed_n_p = false;
3342 3342
3343 assert (fastmap != NULL && p != NULL); 3343 assert (fastmap != NULL && p != NULL);
3344 3344
3345 INIT_FAIL_STACK (); 3345 INIT_FAIL_STACK ();
3346 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 3346 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
3347 bufp->fastmap_accurate = 1; /* It will be when we're done. */ 3347 bufp->fastmap_accurate = 1; /* It will be when we're done. */
3348 bufp->can_be_null = 0; 3348 bufp->can_be_null = 0;
3349 3349
3350 while (1) 3350 while (1)
3351 { 3351 {
3352 if (p == pend || *p == succeed) 3352 if (p == pend || *p == succeed)
3353 { 3353 {
3354 /* We have reached the (effective) end of pattern. */ 3354 /* We have reached the (effective) end of pattern. */
3367 break; 3367 break;
3368 } 3368 }
3369 3369
3370 /* We should never be about to go beyond the end of the pattern. */ 3370 /* We should never be about to go beyond the end of the pattern. */
3371 assert (p < pend); 3371 assert (p < pend);
3372 3372
3373 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 3373 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3374 { 3374 {
3375 3375
3376 /* I guess the idea here is to simply not bother with a fastmap 3376 /* I guess the idea here is to simply not bother with a fastmap
3377 if a backreference is used, since it's too hard to figure out 3377 if a backreference is used, since it's too hard to figure out
3548 (regex_emacs_buffer->mirror_syntax_table), j) == 3548 (regex_emacs_buffer->mirror_syntax_table), j) ==
3549 (enum syntaxcode) k) 3549 (enum syntaxcode) k)
3550 fastmap[j] = 1; 3550 fastmap[j] = 1;
3551 for (j = 0x80; j < 0xA0; j++) 3551 for (j = 0x80; j < 0xA0; j++)
3552 { 3552 {
3553 if (j == PRE_LEADING_BYTE_PRIVATE_1 3553 if (LEADING_BYTE_PREFIX_P(j))
3554 || j == PRE_LEADING_BYTE_PRIVATE_2)
3555 /* too complicated to calculate this right */ 3554 /* too complicated to calculate this right */
3556 fastmap[j] = 1; 3555 fastmap[j] = 1;
3557 else 3556 else
3558 { 3557 {
3559 int multi_p; 3558 int multi_p;
3590 (regex_emacs_buffer->mirror_syntax_table), j) != 3589 (regex_emacs_buffer->mirror_syntax_table), j) !=
3591 (enum syntaxcode) k) 3590 (enum syntaxcode) k)
3592 fastmap[j] = 1; 3591 fastmap[j] = 1;
3593 for (j = 0x80; j < 0xA0; j++) 3592 for (j = 0x80; j < 0xA0; j++)
3594 { 3593 {
3595 if (j == PRE_LEADING_BYTE_PRIVATE_1 3594 if (LEADING_BYTE_PREFIX_P(j))
3596 || j == PRE_LEADING_BYTE_PRIVATE_2)
3597 /* too complicated to calculate this right */ 3595 /* too complicated to calculate this right */
3598 fastmap[j] = 1; 3596 fastmap[j] = 1;
3599 else 3597 else
3600 { 3598 {
3601 int multi_p; 3599 int multi_p;
3659 case maybe_pop_jump: 3657 case maybe_pop_jump:
3660 case jump: 3658 case jump:
3661 case jump_past_alt: 3659 case jump_past_alt:
3662 case dummy_failure_jump: 3660 case dummy_failure_jump:
3663 EXTRACT_NUMBER_AND_INCR (j, p); 3661 EXTRACT_NUMBER_AND_INCR (j, p);
3664 p += j; 3662 p += j;
3665 if (j > 0) 3663 if (j > 0)
3666 continue; 3664 continue;
3667 3665
3668 /* Jump backward implies we just went through the body of a 3666 /* Jump backward implies we just went through the body of a
3669 loop and matched nothing. Opcode jumped to should be 3667 loop and matched nothing. Opcode jumped to should be
3670 `on_failure_jump' or `succeed_n'. Just treat it like an 3668 `on_failure_jump' or `succeed_n'. Just treat it like an
3671 ordinary jump. For a * loop, it has pushed its failure 3669 ordinary jump. For a * loop, it has pushed its failure
3672 point already; if so, discard that as redundant. */ 3670 point already; if so, discard that as redundant. */
3674 && (re_opcode_t) *p != succeed_n) 3672 && (re_opcode_t) *p != succeed_n)
3675 continue; 3673 continue;
3676 3674
3677 p++; 3675 p++;
3678 EXTRACT_NUMBER_AND_INCR (j, p); 3676 EXTRACT_NUMBER_AND_INCR (j, p);
3679 p += j; 3677 p += j;
3680 3678
3681 /* If what's on the stack is where we are now, pop it. */ 3679 /* If what's on the stack is where we are now, pop it. */
3682 if (!FAIL_STACK_EMPTY () 3680 if (!FAIL_STACK_EMPTY ()
3683 && fail_stack.stack[fail_stack.avail - 1].pointer == p) 3681 && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3684 fail_stack.avail--; 3682 fail_stack.avail--;
3685 3683
3686 continue; 3684 continue;
3687 3685
3718 continue; 3716 continue;
3719 3717
3720 3718
3721 case succeed_n: 3719 case succeed_n:
3722 /* Get to the number of times to succeed. */ 3720 /* Get to the number of times to succeed. */
3723 p += 2; 3721 p += 2;
3724 3722
3725 /* Increment p past the n for when k != 0. */ 3723 /* Increment p past the n for when k != 0. */
3726 EXTRACT_NUMBER_AND_INCR (k, p); 3724 EXTRACT_NUMBER_AND_INCR (k, p);
3727 if (k == 0) 3725 if (k == 0)
3728 { 3726 {
3806 3804
3807 int 3805 int
3808 re_search (struct re_pattern_buffer *bufp, CONST char *string, int size, 3806 re_search (struct re_pattern_buffer *bufp, CONST char *string, int size,
3809 int startpos, int range, struct re_registers *regs) 3807 int startpos, int range, struct re_registers *regs)
3810 { 3808 {
3811 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 3809 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3812 regs, size); 3810 regs, size);
3813 } 3811 }
3814 3812
3815 #ifndef emacs 3813 #ifndef emacs
3816 /* Snarfed from src/lisp.h, needed for compiling [ce]tags. */ 3814 /* Snarfed from src/lisp.h, needed for compiling [ce]tags. */
3824 STARTPOS, then at STARTPOS + 1, and so on. 3822 STARTPOS, then at STARTPOS + 1, and so on.
3825 3823
3826 With MULE, STARTPOS is a byte position, not a char position. And the 3824 With MULE, STARTPOS is a byte position, not a char position. And the
3827 search will increment STARTPOS by the width of the current leading 3825 search will increment STARTPOS by the width of the current leading
3828 character. 3826 character.
3829 3827
3830 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. 3828 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3831 3829
3832 RANGE is how far to scan while trying to match. RANGE = 0 means try 3830 RANGE is how far to scan while trying to match. RANGE = 0 means try
3833 only at STARTPOS; in general, the last start tried is STARTPOS + 3831 only at STARTPOS; in general, the last start tried is STARTPOS +
3834 RANGE. 3832 RANGE.
3835 3833
3836 With MULE, RANGE is a byte position, not a char position. The last 3834 With MULE, RANGE is a byte position, not a char position. The last
3837 start tried is the character starting <= STARTPOS + RANGE. 3835 start tried is the character starting <= STARTPOS + RANGE.
3838 3836
3839 In REGS, return the indices of the virtual concatenation of STRING1 3837 In REGS, return the indices of the virtual concatenation of STRING1
3840 and STRING2 that matched the entire BUFP->buffer and its contained 3838 and STRING2 that matched the entire BUFP->buffer and its contained
3841 subexpressions. 3839 subexpressions.
3842 3840
3843 Do not consider matching one past the index STOP in the virtual 3841 Do not consider matching one past the index STOP in the virtual
3844 concatenation of STRING1 and STRING2. 3842 concatenation of STRING1 and STRING2.
3845 3843
3846 We return either the position in the strings at which the match was 3844 We return either the position in the strings at which the match was
3847 found, -1 if no match, or -2 if error (such as failure 3845 found, -1 if no match, or -2 if error (such as failure
3864 Charcount d_size; 3862 Charcount d_size;
3865 3863
3866 /* Check for out-of-range STARTPOS. */ 3864 /* Check for out-of-range STARTPOS. */
3867 if (startpos < 0 || startpos > total_size) 3865 if (startpos < 0 || startpos > total_size)
3868 return -1; 3866 return -1;
3869 3867
3870 /* Fix up RANGE if it might eventually take us outside 3868 /* Fix up RANGE if it might eventually take us outside
3871 the virtual concatenation of STRING1 and STRING2. */ 3869 the virtual concatenation of STRING1 and STRING2. */
3872 if (endpos < 0) 3870 if (endpos < 0)
3873 range = 0 - startpos; 3871 range = 0 - startpos;
3874 else if (endpos > total_size) 3872 else if (endpos > total_size)
3890 3888
3891 /* Update the fastmap now if not correct already. */ 3889 /* Update the fastmap now if not correct already. */
3892 if (fastmap && !bufp->fastmap_accurate) 3890 if (fastmap && !bufp->fastmap_accurate)
3893 if (re_compile_fastmap (bufp) == -2) 3891 if (re_compile_fastmap (bufp) == -2)
3894 return -2; 3892 return -2;
3895 3893
3896 #ifdef REGEX_BEGLINE_CHECK 3894 #ifdef REGEX_BEGLINE_CHECK
3897 { 3895 {
3898 int i = 0; 3896 int i = 0;
3899 3897
3900 while (i < bufp->used) 3898 while (i < bufp->used)
3909 } 3907 }
3910 #endif 3908 #endif
3911 3909
3912 /* Loop through the string, looking for a place to start matching. */ 3910 /* Loop through the string, looking for a place to start matching. */
3913 for (;;) 3911 for (;;)
3914 { 3912 {
3915 #ifdef REGEX_BEGLINE_CHECK 3913 #ifdef REGEX_BEGLINE_CHECK
3916 /* If the regex is anchored at the beginning of a line (i.e. with a ^), 3914 /* If the regex is anchored at the beginning of a line (i.e. with a ^),
3917 then we can speed things up by skipping to the next beginning-of- 3915 then we can speed things up by skipping to the next beginning-of-
3918 line. */ 3916 line. */
3919 if (anchored_at_begline && startpos > 0 && startpos != size1 && 3917 if (anchored_at_begline && startpos > 0 && startpos != size1 &&
3996 startpos += irange - range; 3994 startpos += irange - range;
3997 } 3995 }
3998 else /* Searching backwards. */ 3996 else /* Searching backwards. */
3999 { 3997 {
4000 unsigned char c = (size1 == 0 || startpos >= size1 3998 unsigned char c = (size1 == 0 || startpos >= size1
4001 ? string2[startpos - size1] 3999 ? string2[startpos - size1]
4002 : string1[startpos]); 4000 : string1[startpos]);
4003 #ifdef MULE 4001 #ifdef MULE
4004 if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)]) 4002 if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)])
4005 #else 4003 #else
4006 if (!fastmap[(unsigned char) TRANSLATE (c)]) 4004 if (!fastmap[(unsigned char) TRANSLATE (c)])
4026 #endif 4024 #endif
4027 #endif 4025 #endif
4028 4026
4029 if (val >= 0) 4027 if (val >= 0)
4030 return startpos; 4028 return startpos;
4031 4029
4032 if (val == -2) 4030 if (val == -2)
4033 return -2; 4031 return -2;
4034 4032
4035 advance: 4033 advance:
4036 if (!range) 4034 if (!range)
4037 break; 4035 break;
4038 else if (range > 0) 4036 else if (range > 0)
4039 { 4037 {
4040 d = ((CONST unsigned char *) 4038 d = ((CONST unsigned char *)
4041 (startpos >= size1 ? string2 - size1 : string1) + startpos); 4039 (startpos >= size1 ? string2 - size1 : string1) + startpos);
4042 d_size = charcount_to_bytecount (d, 1); 4040 d_size = charcount_to_bytecount (d, 1);
4043 range -= d_size; 4041 range -= d_size;
4086 4084
4087 4085
4088 /* Test if at very beginning or at very end of the virtual concatenation 4086 /* Test if at very beginning or at very end of the virtual concatenation
4089 of `string1' and `string2'. If only one string, it's `string2'. */ 4087 of `string1' and `string2'. If only one string, it's `string2'. */
4090 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) 4088 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4091 #define AT_STRINGS_END(d) ((d) == end2) 4089 #define AT_STRINGS_END(d) ((d) == end2)
4092 4090
4093 /* XEmacs change: 4091 /* XEmacs change:
4094 If the given position straddles the string gap, return the equivalent 4092 If the given position straddles the string gap, return the equivalent
4095 position that is before or after the gap, respectively; otherwise, 4093 position that is before or after the gap, respectively; otherwise,
4096 return the same position. */ 4094 return the same position. */
4151 4149
4152 /* re_match_2 matches the compiled pattern in BUFP against the 4150 /* re_match_2 matches the compiled pattern in BUFP against the
4153 (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and 4151 (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and
4154 SIZE2, respectively). We start matching at POS, and stop matching 4152 SIZE2, respectively). We start matching at POS, and stop matching
4155 at STOP. 4153 at STOP.
4156 4154
4157 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we 4155 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4158 store offsets for the substring each group matched in REGS. See the 4156 store offsets for the substring each group matched in REGS. See the
4159 documentation for exactly how many groups we fill. 4157 documentation for exactly how many groups we fill.
4160 4158
4161 We return -1 if no match, -2 if an internal error (such as the 4159 We return -1 if no match, -2 if an internal error (such as the
4192 each to consider matching. */ 4190 each to consider matching. */
4193 CONST char *end_match_1, *end_match_2; 4191 CONST char *end_match_1, *end_match_2;
4194 4192
4195 /* Where we are in the data, and the end of the current string. */ 4193 /* Where we are in the data, and the end of the current string. */
4196 CONST char *d, *dend; 4194 CONST char *d, *dend;
4197 4195
4198 /* Where we are in the pattern, and the end of the pattern. */ 4196 /* Where we are in the pattern, and the end of the pattern. */
4199 unsigned char *p = bufp->buffer; 4197 unsigned char *p = bufp->buffer;
4200 register unsigned char *pend = p + bufp->used; 4198 register unsigned char *pend = p + bufp->used;
4201 4199
4202 /* Mark the opcode just after a start_memory, so we can test for an 4200 /* Mark the opcode just after a start_memory, so we can test for an
4231 4229
4232 /* We fill all the registers internally, independent of what we 4230 /* We fill all the registers internally, independent of what we
4233 return, for use in backreferences. The number here includes 4231 return, for use in backreferences. The number here includes
4234 an element for register zero. */ 4232 an element for register zero. */
4235 unsigned num_regs = bufp->re_nsub + 1; 4233 unsigned num_regs = bufp->re_nsub + 1;
4236 4234
4237 /* The currently active registers. */ 4235 /* The currently active registers. */
4238 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; 4236 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4239 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; 4237 unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4240 4238
4241 /* Information on the contents of registers. These are pointers into 4239 /* Information on the contents of registers. These are pointers into
4263 field of reg_info[reg_num] helps us tell whether or not we have 4261 field of reg_info[reg_num] helps us tell whether or not we have
4264 matched any of the pattern so far this time through the reg_num-th 4262 matched any of the pattern so far this time through the reg_num-th
4265 subexpression. These two fields get reset each time through any 4263 subexpression. These two fields get reset each time through any
4266 loop their register is in. */ 4264 loop their register is in. */
4267 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 4265 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4268 register_info_type *reg_info; 4266 register_info_type *reg_info;
4269 #endif 4267 #endif
4270 4268
4271 /* The following record the register info as found in the above 4269 /* The following record the register info as found in the above
4272 variables when we find a match better than any we've seen before. 4270 variables when we find a match better than any we've seen before.
4273 This happens as we backtrack through the failure points, which in 4271 This happens as we backtrack through the failure points, which in
4274 turn happens only if we have not yet matched the entire string. */ 4272 turn happens only if we have not yet matched the entire string. */
4275 unsigned best_regs_set = false; 4273 unsigned best_regs_set = false;
4276 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 4274 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4277 CONST char **best_regstart, **best_regend; 4275 CONST char **best_regstart, **best_regend;
4278 #endif 4276 #endif
4279 4277
4280 /* Logically, this is `best_regend[0]'. But we don't want to have to 4278 /* Logically, this is `best_regend[0]'. But we don't want to have to
4281 allocate space for that if we're not allocating space for anything 4279 allocate space for that if we're not allocating space for anything
4282 else (see below). Also, we never need info about register 0 for 4280 else (see below). Also, we never need info about register 0 for
4283 any of the other register vectors, and it seems rather a kludge to 4281 any of the other register vectors, and it seems rather a kludge to
4284 treat `best_regend' differently than the rest. So we keep track of 4282 treat `best_regend' differently than the rest. So we keep track of
4296 register_info_type *reg_info_dummy; 4294 register_info_type *reg_info_dummy;
4297 #endif 4295 #endif
4298 4296
4299 #ifdef DEBUG 4297 #ifdef DEBUG
4300 /* Counts the total number of registers pushed. */ 4298 /* Counts the total number of registers pushed. */
4301 unsigned num_regs_pushed = 0; 4299 unsigned num_regs_pushed = 0;
4302 #endif 4300 #endif
4303 4301
4304 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); 4302 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
4305 4303
4306 INIT_FAIL_STACK (); 4304 INIT_FAIL_STACK ();
4307 4305
4308 #ifdef MATCH_MAY_ALLOCATE 4306 #ifdef MATCH_MAY_ALLOCATE
4309 /* Do not bother to initialize all the register variables if there are 4307 /* Do not bother to initialize all the register variables if there are
4310 no groups in the pattern, as it takes a fair amount of time. If 4308 no groups in the pattern, as it takes a fair amount of time. If
4311 there are groups, we include space for register 0 (the whole 4309 there are groups, we include space for register 0 (the whole
4312 pattern), even though we never use it, since it simplifies the 4310 pattern), even though we never use it, since it simplifies the
4321 best_regend = REGEX_TALLOC (num_regs, CONST char *); 4319 best_regend = REGEX_TALLOC (num_regs, CONST char *);
4322 reg_info = REGEX_TALLOC (num_regs, register_info_type); 4320 reg_info = REGEX_TALLOC (num_regs, register_info_type);
4323 reg_dummy = REGEX_TALLOC (num_regs, CONST char *); 4321 reg_dummy = REGEX_TALLOC (num_regs, CONST char *);
4324 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); 4322 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
4325 4323
4326 if (!(regstart && regend && old_regstart && old_regend && reg_info 4324 if (!(regstart && regend && old_regstart && old_regend && reg_info
4327 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 4325 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
4328 { 4326 {
4329 FREE_VARIABLES (); 4327 FREE_VARIABLES ();
4330 return -2; 4328 return -2;
4331 } 4329 }
4332 } 4330 }
4344 if (pos < 0 || pos > size1 + size2) 4342 if (pos < 0 || pos > size1 + size2)
4345 { 4343 {
4346 FREE_VARIABLES (); 4344 FREE_VARIABLES ();
4347 return -1; 4345 return -1;
4348 } 4346 }
4349 4347
4350 /* Initialize subexpression text positions to -1 to mark ones that no 4348 /* Initialize subexpression text positions to -1 to mark ones that no
4351 start_memory/stop_memory has been seen for. Also initialize the 4349 start_memory/stop_memory has been seen for. Also initialize the
4352 register information struct. */ 4350 register information struct. */
4353 for (mcnt = 1; mcnt < num_regs; mcnt++) 4351 for (mcnt = 1; mcnt < num_regs; mcnt++)
4354 { 4352 {
4355 regstart[mcnt] = regend[mcnt] 4353 regstart[mcnt] = regend[mcnt]
4356 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; 4354 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4357 4355
4358 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; 4356 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
4359 IS_ACTIVE (reg_info[mcnt]) = 0; 4357 IS_ACTIVE (reg_info[mcnt]) = 0;
4360 MATCHED_SOMETHING (reg_info[mcnt]) = 0; 4358 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4361 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; 4359 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4362 } 4360 }
4363 4361
4364 /* We move `string1' into `string2' if the latter's empty -- but not if 4362 /* We move `string1' into `string2' if the latter's empty -- but not if
4365 `string1' is null. */ 4363 `string1' is null. */
4366 if (size2 == 0 && string1 != NULL) 4364 if (size2 == 0 && string1 != NULL)
4367 { 4365 {
4368 string2 = string1; 4366 string2 = string1;
4383 { 4381 {
4384 end_match_1 = end1; 4382 end_match_1 = end1;
4385 end_match_2 = string2 + stop - size1; 4383 end_match_2 = string2 + stop - size1;
4386 } 4384 }
4387 4385
4388 /* `p' scans through the pattern as `d' scans through the data. 4386 /* `p' scans through the pattern as `d' scans through the data.
4389 `dend' is the end of the input string that `d' points within. `d' 4387 `dend' is the end of the input string that `d' points within. `d'
4390 is advanced into the following input string whenever necessary, but 4388 is advanced into the following input string whenever necessary, but
4391 this happens before fetching; therefore, at the beginning of the 4389 this happens before fetching; therefore, at the beginning of the
4392 loop, `d' can be pointing at the end of a string, but it cannot 4390 loop, `d' can be pointing at the end of a string, but it cannot
4393 equal `string2'. */ 4391 equal `string2'. */
4405 DEBUG_PRINT1 ("The compiled pattern is: "); 4403 DEBUG_PRINT1 ("The compiled pattern is: ");
4406 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); 4404 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4407 DEBUG_PRINT1 ("The string to match is: `"); 4405 DEBUG_PRINT1 ("The string to match is: `");
4408 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); 4406 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4409 DEBUG_PRINT1 ("'\n"); 4407 DEBUG_PRINT1 ("'\n");
4410 4408
4411 /* This loops over pattern commands. It exits by returning from the 4409 /* This loops over pattern commands. It exits by returning from the
4412 function if the match is complete, or it drops through if the match 4410 function if the match is complete, or it drops through if the match
4413 fails at this starting point in the input data. */ 4411 fails at this starting point in the input data. */
4414 for (;;) 4412 for (;;)
4415 { 4413 {
4416 DEBUG_PRINT2 ("\n0x%p: ", p); 4414 DEBUG_PRINT2 ("\n0x%p: ", p);
4417 4415
4418 if (p == pend) 4416 if (p == pend)
4419 { /* End of pattern means we might have succeeded. */ 4417 { /* End of pattern means we might have succeeded. */
4420 DEBUG_PRINT1 ("end of pattern ... "); 4418 DEBUG_PRINT1 ("end of pattern ... ");
4421 4419
4422 /* If we haven't matched the entire string, and we want the 4420 /* If we haven't matched the entire string, and we want the
4423 longest match, try backtracking. */ 4421 longest match, try backtracking. */
4424 if (d != end_match_2) 4422 if (d != end_match_2)
4425 { 4423 {
4426 /* 1 if this match ends in the same string (string1 or string2) 4424 /* 1 if this match ends in the same string (string1 or string2)
4427 as the best previous match. */ 4425 as the best previous match. */
4428 boolean same_str_p = (FIRST_STRING_P (match_end) 4426 boolean same_str_p = (FIRST_STRING_P (match_end)
4429 == MATCHING_IN_FIRST_STRING); 4427 == MATCHING_IN_FIRST_STRING);
4430 /* 1 if this match is the best seen so far. */ 4428 /* 1 if this match is the best seen so far. */
4431 boolean best_match_p; 4429 boolean best_match_p;
4432 4430
4433 /* AIX compiler got confused when this was combined 4431 /* AIX compiler got confused when this was combined
4436 best_match_p = d > match_end; 4434 best_match_p = d > match_end;
4437 else 4435 else
4438 best_match_p = !MATCHING_IN_FIRST_STRING; 4436 best_match_p = !MATCHING_IN_FIRST_STRING;
4439 4437
4440 DEBUG_PRINT1 ("backtracking.\n"); 4438 DEBUG_PRINT1 ("backtracking.\n");
4441 4439
4442 if (!FAIL_STACK_EMPTY ()) 4440 if (!FAIL_STACK_EMPTY ())
4443 { /* More failure points to try. */ 4441 { /* More failure points to try. */
4444 4442
4445 /* If exceeds best match so far, save it. */ 4443 /* If exceeds best match so far, save it. */
4446 if (!best_regs_set || best_match_p) 4444 if (!best_regs_set || best_match_p)
4447 { 4445 {
4448 best_regs_set = true; 4446 best_regs_set = true;
4449 match_end = d; 4447 match_end = d;
4450 4448
4451 DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); 4449 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4452 4450
4453 for (mcnt = 1; mcnt < num_regs; mcnt++) 4451 for (mcnt = 1; mcnt < num_regs; mcnt++)
4454 { 4452 {
4455 best_regstart[mcnt] = regstart[mcnt]; 4453 best_regstart[mcnt] = regstart[mcnt];
4456 best_regend[mcnt] = regend[mcnt]; 4454 best_regend[mcnt] = regend[mcnt];
4457 } 4455 }
4458 } 4456 }
4459 goto fail; 4457 goto fail;
4460 } 4458 }
4461 4459
4462 /* If no failure points, don't restore garbage. And if 4460 /* If no failure points, don't restore garbage. And if
4463 last match is real best match, don't restore second 4461 last match is real best match, don't restore second
4464 best one. */ 4462 best one. */
4469 end_match_1' while the restored d is in string2. 4467 end_match_1' while the restored d is in string2.
4470 For example, the pattern `x.*y.*z' against the 4468 For example, the pattern `x.*y.*z' against the
4471 strings `x-' and `y-z-', if the two strings are 4469 strings `x-' and `y-z-', if the two strings are
4472 not consecutive in memory. */ 4470 not consecutive in memory. */
4473 DEBUG_PRINT1 ("Restoring best registers.\n"); 4471 DEBUG_PRINT1 ("Restoring best registers.\n");
4474 4472
4475 d = match_end; 4473 d = match_end;
4476 dend = ((d >= string1 && d <= end1) 4474 dend = ((d >= string1 && d <= end1)
4477 ? end_match_1 : end_match_2); 4475 ? end_match_1 : end_match_2);
4478 4476
4479 for (mcnt = 1; mcnt < num_regs; mcnt++) 4477 for (mcnt = 1; mcnt < num_regs; mcnt++)
4536 regs->start[0] = pos; 4534 regs->start[0] = pos;
4537 regs->end[0] = (MATCHING_IN_FIRST_STRING 4535 regs->end[0] = (MATCHING_IN_FIRST_STRING
4538 ? ((regoff_t) (d - string1)) 4536 ? ((regoff_t) (d - string1))
4539 : ((regoff_t) (d - string2 + size1))); 4537 : ((regoff_t) (d - string2 + size1)));
4540 } 4538 }
4541 4539
4542 /* Go through the first `min (num_regs, regs->num_regs)' 4540 /* Go through the first `min (num_regs, regs->num_regs)'
4543 registers, since that is all we initialized. */ 4541 registers, since that is all we initialized. */
4544 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) 4542 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
4545 { 4543 {
4546 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) 4544 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4551 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); 4549 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4552 regs->end[mcnt] 4550 regs->end[mcnt]
4553 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); 4551 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4554 } 4552 }
4555 } 4553 }
4556 4554
4557 /* If the regs structure we return has more elements than 4555 /* If the regs structure we return has more elements than
4558 were in the pattern, set the extra elements to -1. If 4556 were in the pattern, set the extra elements to -1. If
4559 we (re)allocated the registers, this is the case, 4557 we (re)allocated the registers, this is the case,
4560 because we always allocate enough to have at least one 4558 because we always allocate enough to have at least one
4561 -1 at the end. */ 4559 -1 at the end. */
4566 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", 4564 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4567 nfailure_points_pushed, nfailure_points_popped, 4565 nfailure_points_pushed, nfailure_points_popped,
4568 nfailure_points_pushed - nfailure_points_popped); 4566 nfailure_points_pushed - nfailure_points_popped);
4569 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); 4567 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4570 4568
4571 mcnt = d - pos - (MATCHING_IN_FIRST_STRING 4569 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4572 ? string1 4570 ? string1
4573 : string2 - size1); 4571 : string2 - size1);
4574 4572
4575 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); 4573 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4576 4574
4577 FREE_VARIABLES (); 4575 FREE_VARIABLES ();
4657 not = !not; 4655 not = !not;
4658 4656
4659 p += 1 + *p; 4657 p += 1 + *p;
4660 4658
4661 if (!not) goto fail; 4659 if (!not) goto fail;
4662 4660
4663 SET_REGS_MATCHED (); 4661 SET_REGS_MATCHED ();
4664 INC_CHARPTR (d); /* XEmacs change */ 4662 INC_CHARPTR (d); /* XEmacs change */
4665 break; 4663 break;
4666 } 4664 }
4667 4665
4680 4678
4681 if (EQ (Qt, unified_range_table_lookup (p, c, Qnil))) 4679 if (EQ (Qt, unified_range_table_lookup (p, c, Qnil)))
4682 not = !not; 4680 not = !not;
4683 4681
4684 p += unified_range_table_bytes_used (p); 4682 p += unified_range_table_bytes_used (p);
4685 4683
4686 if (!not) goto fail; 4684 if (!not) goto fail;
4687 4685
4688 SET_REGS_MATCHED (); 4686 SET_REGS_MATCHED ();
4689 INC_CHARPTR (d); 4687 INC_CHARPTR (d);
4690 break; 4688 break;
4691 } 4689 }
4692 #endif 4690 #endif
4700 case start_memory: 4698 case start_memory:
4701 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); 4699 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4702 4700
4703 /* Find out if this group can match the empty string. */ 4701 /* Find out if this group can match the empty string. */
4704 p1 = p; /* To send to group_match_null_string_p. */ 4702 p1 = p; /* To send to group_match_null_string_p. */
4705 4703
4706 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) 4704 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4707 REG_MATCH_NULL_STRING_P (reg_info[*p]) 4705 REG_MATCH_NULL_STRING_P (reg_info[*p])
4708 = group_match_null_string_p (&p1, pend, reg_info); 4706 = group_match_null_string_p (&p1, pend, reg_info);
4709 4707
4710 /* Save the position in the string where we were the last time 4708 /* Save the position in the string where we were the last time
4711 we were at this open-group operator in case the group is 4709 we were at this open-group operator in case the group is
4712 operated upon by a repetition operator, e.g., with `(a*)*b' 4710 operated upon by a repetition operator, e.g., with `(a*)*b'
4713 against `ab'; then we want to ignore where we are now in 4711 against `ab'; then we want to ignore where we are now in
4714 the string in case this attempt to match fails. */ 4712 the string in case this attempt to match fails. */
4715 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 4713 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4716 ? REG_UNSET (regstart[*p]) ? d : regstart[*p] 4714 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4717 : regstart[*p]; 4715 : regstart[*p];
4718 DEBUG_PRINT2 (" old_regstart: %d\n", 4716 DEBUG_PRINT2 (" old_regstart: %d\n",
4719 POINTER_TO_OFFSET (old_regstart[*p])); 4717 POINTER_TO_OFFSET (old_regstart[*p]));
4720 4718
4721 regstart[*p] = d; 4719 regstart[*p] = d;
4722 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 4720 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4723 4721
4724 IS_ACTIVE (reg_info[*p]) = 1; 4722 IS_ACTIVE (reg_info[*p]) = 1;
4725 MATCHED_SOMETHING (reg_info[*p]) = 0; 4723 MATCHED_SOMETHING (reg_info[*p]) = 0;
4726 4724
4727 /* Clear this whenever we change the register activity status. */ 4725 /* Clear this whenever we change the register activity status. */
4728 set_regs_matched_done = 0; 4726 set_regs_matched_done = 0;
4729 4727
4730 /* This is the new highest active register. */ 4728 /* This is the new highest active register. */
4731 highest_active_reg = *p; 4729 highest_active_reg = *p;
4732 4730
4733 /* If nothing was active before, this is the new lowest active 4731 /* If nothing was active before, this is the new lowest active
4734 register. */ 4732 register. */
4735 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 4733 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4736 lowest_active_reg = *p; 4734 lowest_active_reg = *p;
4737 4735
4745 /* The stop_memory opcode represents the end of a group. Its 4743 /* The stop_memory opcode represents the end of a group. Its
4746 arguments are the same as start_memory's: the register 4744 arguments are the same as start_memory's: the register
4747 number, and the number of inner groups. */ 4745 number, and the number of inner groups. */
4748 case stop_memory: 4746 case stop_memory:
4749 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); 4747 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4750 4748
4751 /* We need to save the string position the last time we were at 4749 /* We need to save the string position the last time we were at
4752 this close-group operator in case the group is operated 4750 this close-group operator in case the group is operated
4753 upon by a repetition operator, e.g., with `((a*)*(b*)*)*' 4751 upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4754 against `aba'; then we want to ignore where we are now in 4752 against `aba'; then we want to ignore where we are now in
4755 the string in case this attempt to match fails. */ 4753 the string in case this attempt to match fails. */
4756 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 4754 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4757 ? REG_UNSET (regend[*p]) ? d : regend[*p] 4755 ? REG_UNSET (regend[*p]) ? d : regend[*p]
4758 : regend[*p]; 4756 : regend[*p];
4759 DEBUG_PRINT2 (" old_regend: %d\n", 4757 DEBUG_PRINT2 (" old_regend: %d\n",
4760 POINTER_TO_OFFSET (old_regend[*p])); 4758 POINTER_TO_OFFSET (old_regend[*p]));
4761 4759
4762 regend[*p] = d; 4760 regend[*p] = d;
4763 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); 4761 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4764 4762
4781 (a(b)c(d(e)f)g). When group 3 ends, after the f), the 4779 (a(b)c(d(e)f)g). When group 3 ends, after the f), the
4782 new highest active register is 1. */ 4780 new highest active register is 1. */
4783 unsigned char r = *p - 1; 4781 unsigned char r = *p - 1;
4784 while (r > 0 && !IS_ACTIVE (reg_info[r])) 4782 while (r > 0 && !IS_ACTIVE (reg_info[r]))
4785 r--; 4783 r--;
4786 4784
4787 /* If we end up at register zero, that means that we saved 4785 /* If we end up at register zero, that means that we saved
4788 the registers as the result of an `on_failure_jump', not 4786 the registers as the result of an `on_failure_jump', not
4789 a `start_memory', and we jumped to past the innermost 4787 a `start_memory', and we jumped to past the innermost
4790 `stop_memory'. For example, in ((.)*) we save 4788 `stop_memory'. For example, in ((.)*) we save
4791 registers 1 and 2 as a result of the *, but when we pop 4789 registers 1 and 2 as a result of the *, but when we pop
4797 highest_active_reg = NO_HIGHEST_ACTIVE_REG; 4795 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4798 } 4796 }
4799 else 4797 else
4800 highest_active_reg = r; 4798 highest_active_reg = r;
4801 } 4799 }
4802 4800
4803 /* If just failed to match something this time around with a 4801 /* If just failed to match something this time around with a
4804 group that's operated on by a repetition operator, try to 4802 group that's operated on by a repetition operator, try to
4805 force exit from the ``loop'', and restore the register 4803 force exit from the ``loop'', and restore the register
4806 information for this group that we had before trying this 4804 information for this group that we had before trying this
4807 last match. */ 4805 last match. */
4808 if ((!MATCHED_SOMETHING (reg_info[*p]) 4806 if ((!MATCHED_SOMETHING (reg_info[*p])
4809 || just_past_start_mem == p - 1) 4807 || just_past_start_mem == p - 1)
4810 && (p + 2) < pend) 4808 && (p + 2) < pend)
4811 { 4809 {
4812 boolean is_a_jump_n = false; 4810 boolean is_a_jump_n = false;
4813 4811
4814 p1 = p + 2; 4812 p1 = p + 2;
4815 mcnt = 0; 4813 mcnt = 0;
4816 switch ((re_opcode_t) *p1++) 4814 switch ((re_opcode_t) *p1++)
4817 { 4815 {
4818 case jump_n: 4816 case jump_n:
4823 case dummy_failure_jump: 4821 case dummy_failure_jump:
4824 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 4822 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4825 if (is_a_jump_n) 4823 if (is_a_jump_n)
4826 p1 += 2; 4824 p1 += 2;
4827 break; 4825 break;
4828 4826
4829 default: 4827 default:
4830 /* do nothing */ ; 4828 /* do nothing */ ;
4831 } 4829 }
4832 p1 += mcnt; 4830 p1 += mcnt;
4833 4831
4834 /* If the next operation is a jump backwards in the pattern 4832 /* If the next operation is a jump backwards in the pattern
4835 to an on_failure_jump right before the start_memory 4833 to an on_failure_jump right before the start_memory
4836 corresponding to this stop_memory, exit from the loop 4834 corresponding to this stop_memory, exit from the loop
4837 by forcing a failure after pushing on the stack the 4835 by forcing a failure after pushing on the stack the
4838 on_failure_jump's jump in the pattern, and d. */ 4836 on_failure_jump's jump in the pattern, and d. */
4842 /* If this group ever matched anything, then restore 4840 /* If this group ever matched anything, then restore
4843 what its registers were before trying this last 4841 what its registers were before trying this last
4844 failed match, e.g., with `(a*)*b' against `ab' for 4842 failed match, e.g., with `(a*)*b' against `ab' for
4845 regstart[1], and, e.g., with `((a*)*(b*)*)*' 4843 regstart[1], and, e.g., with `((a*)*(b*)*)*'
4846 against `aba' for regend[3]. 4844 against `aba' for regend[3].
4847 4845
4848 Also restore the registers for inner groups for, 4846 Also restore the registers for inner groups for,
4849 e.g., `((a*)(b*))*' against `aba' (register 3 would 4847 e.g., `((a*)(b*))*' against `aba' (register 3 would
4850 otherwise get trashed). */ 4848 otherwise get trashed). */
4851 4849
4852 if (EVER_MATCHED_SOMETHING (reg_info[*p])) 4850 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4853 { 4851 {
4854 unsigned r; 4852 unsigned r;
4855 4853
4856 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; 4854 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4857 4855
4858 /* Restore this and inner groups' (if any) registers. */ 4856 /* Restore this and inner groups' (if any) registers. */
4859 for (r = *p; r < *p + *(p + 1); r++) 4857 for (r = *p; r < *p + *(p + 1); r++)
4860 { 4858 {
4861 regstart[r] = old_regstart[r]; 4859 regstart[r] = old_regstart[r];
4862 4860
4863 /* xx why this test? */ 4861 /* xx why this test? */
4864 if (old_regend[r] >= regstart[r]) 4862 if (old_regend[r] >= regstart[r])
4865 regend[r] = old_regend[r]; 4863 regend[r] = old_regend[r];
4866 } 4864 }
4867 } 4865 }
4868 p1++; 4866 p1++;
4869 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 4867 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4870 PUSH_FAILURE_POINT (p1 + mcnt, d, -2); 4868 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4871 4869
4872 goto fail; 4870 goto fail;
4873 } 4871 }
4874 } 4872 }
4875 4873
4876 /* Move past the register number and the inner group count. */ 4874 /* Move past the register number and the inner group count. */
4877 p += 2; 4875 p += 2;
4878 break; 4876 break;
4879 4877
4880 4878
4887 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 4885 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4888 4886
4889 /* Can't back reference a group which we've never matched. */ 4887 /* Can't back reference a group which we've never matched. */
4890 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 4888 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4891 goto fail; 4889 goto fail;
4892 4890
4893 /* Where in input to try to start matching. */ 4891 /* Where in input to try to start matching. */
4894 d2 = regstart[regno]; 4892 d2 = regstart[regno];
4895 4893
4896 /* Where to stop matching; if both the place to start and 4894 /* Where to stop matching; if both the place to start and
4897 the place to stop matching are in the same string, then 4895 the place to stop matching are in the same string, then
4898 set to the place to stop, otherwise, for now have to use 4896 set to the place to stop, otherwise, for now have to use
4899 the end of the first string. */ 4897 the end of the first string. */
4900 4898
4901 dend2 = ((FIRST_STRING_P (regstart[regno]) 4899 dend2 = ((FIRST_STRING_P (regstart[regno])
4902 == FIRST_STRING_P (regend[regno])) 4900 == FIRST_STRING_P (regend[regno]))
4903 ? regend[regno] : end_match_1); 4901 ? regend[regno] : end_match_1);
4904 for (;;) 4902 for (;;)
4905 { 4903 {
4906 /* If necessary, advance to next segment in register 4904 /* If necessary, advance to next segment in register
4920 /* If necessary, advance to next segment in data. */ 4918 /* If necessary, advance to next segment in data. */
4921 PREFETCH (); 4919 PREFETCH ();
4922 4920
4923 /* How many characters left in this segment to match. */ 4921 /* How many characters left in this segment to match. */
4924 mcnt = dend - d; 4922 mcnt = dend - d;
4925 4923
4926 /* Want how many consecutive characters we can match in 4924 /* Want how many consecutive characters we can match in
4927 one shot, so, if necessary, adjust the count. */ 4925 one shot, so, if necessary, adjust the count. */
4928 if (mcnt > dend2 - d2) 4926 if (mcnt > dend2 - d2)
4929 mcnt = dend2 - d2; 4927 mcnt = dend2 - d2;
4930 4928
4931 /* Compare that many; failure if mismatch, else move 4929 /* Compare that many; failure if mismatch, else move
4932 past them. */ 4930 past them. */
4933 if (translate 4931 if (translate
4934 ? bcmp_translate ((unsigned char *) d, 4932 ? bcmp_translate ((unsigned char *) d,
4935 (unsigned char *) d2, mcnt, translate) 4933 (unsigned char *) d2, mcnt, translate)
4936 : memcmp (d, d2, mcnt)) 4934 : memcmp (d, d2, mcnt))
4937 goto fail; 4935 goto fail;
4938 d += mcnt, d2 += mcnt; 4936 d += mcnt, d2 += mcnt;
4939 4937
4940 /* Do this because we've match some characters. */ 4938 /* Do this because we've match some characters. */
4947 /* begline matches the empty string at the beginning of the string 4945 /* begline matches the empty string at the beginning of the string
4948 (unless `not_bol' is set in `bufp'), and, if 4946 (unless `not_bol' is set in `bufp'), and, if
4949 `newline_anchor' is set, after newlines. */ 4947 `newline_anchor' is set, after newlines. */
4950 case begline: 4948 case begline:
4951 DEBUG_PRINT1 ("EXECUTING begline.\n"); 4949 DEBUG_PRINT1 ("EXECUTING begline.\n");
4952 4950
4953 if (AT_STRINGS_BEG (d)) 4951 if (AT_STRINGS_BEG (d))
4954 { 4952 {
4955 if (!bufp->not_bol) break; 4953 if (!bufp->not_bol) break;
4956 } 4954 }
4957 else if (d[-1] == '\n' && bufp->newline_anchor) 4955 else if (d[-1] == '\n' && bufp->newline_anchor)
4968 4966
4969 if (AT_STRINGS_END (d)) 4967 if (AT_STRINGS_END (d))
4970 { 4968 {
4971 if (!bufp->not_eol) break; 4969 if (!bufp->not_eol) break;
4972 } 4970 }
4973 4971
4974 /* We have to ``prefetch'' the next character. */ 4972 /* We have to ``prefetch'' the next character. */
4975 else if ((d == end1 ? *string2 : *d) == '\n' 4973 else if ((d == end1 ? *string2 : *d) == '\n'
4976 && bufp->newline_anchor) 4974 && bufp->newline_anchor)
4977 { 4975 {
4978 break; 4976 break;
5002 string, instead of restoring it. To see why, consider 5000 string, instead of restoring it. To see why, consider
5003 matching `foo\nbar' against `.*\n'. The .* matches the foo; 5001 matching `foo\nbar' against `.*\n'. The .* matches the foo;
5004 then the . fails against the \n. But the next thing we want 5002 then the . fails against the \n. But the next thing we want
5005 to do is match the \n against the \n; if we restored the 5003 to do is match the \n against the \n; if we restored the
5006 string value, we would be back at the foo. 5004 string value, we would be back at the foo.
5007 5005
5008 Because this is used only in specific cases, we don't need to 5006 Because this is used only in specific cases, we don't need to
5009 check all the things that `on_failure_jump' does, to make 5007 check all the things that `on_failure_jump' does, to make
5010 sure the right things get saved on the stack. Hence we don't 5008 sure the right things get saved on the stack. Hence we don't
5011 share its code. The only reason to push anything on the 5009 share its code. The only reason to push anything on the
5012 stack at all is that otherwise we would have to change 5010 stack at all is that otherwise we would have to change
5013 `anychar's code to do something besides goto fail in this 5011 `anychar's code to do something besides goto fail in this
5014 case; that seems worse than this. */ 5012 case; that seems worse than this. */
5015 case on_failure_keep_string_jump: 5013 case on_failure_keep_string_jump:
5016 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); 5014 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
5017 5015
5018 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5016 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5019 DEBUG_PRINT3 (" %d (to 0x%p):\n", mcnt, p + mcnt); 5017 DEBUG_PRINT3 (" %d (to 0x%p):\n", mcnt, p + mcnt);
5020 5018
5021 PUSH_FAILURE_POINT (p + mcnt, (void *) 0, -2); 5019 PUSH_FAILURE_POINT (p + mcnt, (void *) 0, -2);
5022 break; 5020 break;
5023 5021
5024 5022
5025 /* Uses of on_failure_jump: 5023 /* Uses of on_failure_jump:
5026 5024
5027 Each alternative starts with an on_failure_jump that points 5025 Each alternative starts with an on_failure_jump that points
5028 to the beginning of the next alternative. Each alternative 5026 to the beginning of the next alternative. Each alternative
5029 except the last ends with a jump that in effect jumps past 5027 except the last ends with a jump that in effect jumps past
5030 the rest of the alternatives. (They really jump to the 5028 the rest of the alternatives. (They really jump to the
5031 ending jump of the following alternative, because tensioning 5029 ending jump of the following alternative, because tensioning
5087 pattern follows its end. If we can establish that there 5085 pattern follows its end. If we can establish that there
5088 is nothing that they would both match, i.e., that we 5086 is nothing that they would both match, i.e., that we
5089 would have to backtrack because of (as in, e.g., `a*a') 5087 would have to backtrack because of (as in, e.g., `a*a')
5090 then we can change to pop_failure_jump, because we'll 5088 then we can change to pop_failure_jump, because we'll
5091 never have to backtrack. 5089 never have to backtrack.
5092 5090
5093 This is not true in the case of alternatives: in 5091 This is not true in the case of alternatives: in
5094 `(a|ab)*' we do need to backtrack to the `ab' alternative 5092 `(a|ab)*' we do need to backtrack to the `ab' alternative
5095 (e.g., if the string was `ab'). But instead of trying to 5093 (e.g., if the string was `ab'). But instead of trying to
5096 detect that here, the alternative has put on a dummy 5094 detect that here, the alternative has put on a dummy
5097 failure point which is what we will end up popping. */ 5095 failure point which is what we will end up popping. */
5113 break; 5111 break;
5114 } 5112 }
5115 5113
5116 p1 = p + mcnt; 5114 p1 = p + mcnt;
5117 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding 5115 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5118 to the `maybe_finalize_jump' of this case. Examine what 5116 to the `maybe_finalize_jump' of this case. Examine what
5119 follows. */ 5117 follows. */
5120 5118
5121 /* If we're at the end of the pattern, we can change. */ 5119 /* If we're at the end of the pattern, we can change. */
5122 if (p2 == pend) 5120 if (p2 == pend)
5123 { 5121 {
5139 { 5137 {
5140 p[-3] = (unsigned char) pop_failure_jump; 5138 p[-3] = (unsigned char) pop_failure_jump;
5141 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 5139 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
5142 c, p1[5]); 5140 c, p1[5]);
5143 } 5141 }
5144 5142
5145 else if ((re_opcode_t) p1[3] == charset 5143 else if ((re_opcode_t) p1[3] == charset
5146 || (re_opcode_t) p1[3] == charset_not) 5144 || (re_opcode_t) p1[3] == charset_not)
5147 { 5145 {
5148 int not = (re_opcode_t) p1[3] == charset_not; 5146 int not = (re_opcode_t) p1[3] == charset_not;
5149 5147
5150 if (c < (unsigned char) (p1[4] * BYTEWIDTH) 5148 if (c < (unsigned char) (p1[4] * BYTEWIDTH)
5151 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) 5149 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
5152 not = !not; 5150 not = !not;
5153 5151
5154 /* `not' is equal to 1 if c would match, which means 5152 /* `not' is equal to 1 if c would match, which means
5174 { 5172 {
5175 p[-3] = (unsigned char) pop_failure_jump; 5173 p[-3] = (unsigned char) pop_failure_jump;
5176 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 5174 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
5177 c, p1[5]); 5175 c, p1[5]);
5178 } 5176 }
5179 5177
5180 else if ((re_opcode_t) p1[3] == charset_not) 5178 else if ((re_opcode_t) p1[3] == charset_not)
5181 { 5179 {
5182 int idx; 5180 int idx;
5183 /* We win if the charset_not inside the loop 5181 /* We win if the charset_not inside the loop
5184 lists every character listed in the charset after. */ 5182 lists every character listed in the charset after. */
5245 dummy_low_reg, dummy_high_reg, 5243 dummy_low_reg, dummy_high_reg,
5246 reg_dummy, reg_dummy, reg_info_dummy); 5244 reg_dummy, reg_dummy, reg_info_dummy);
5247 } 5245 }
5248 /* Note fall through. */ 5246 /* Note fall through. */
5249 5247
5250 5248
5251 /* Unconditionally jump (without popping any failure points). */ 5249 /* Unconditionally jump (without popping any failure points). */
5252 case jump: 5250 case jump:
5253 unconditional_jump: 5251 unconditional_jump:
5254 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ 5252 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
5255 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); 5253 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5256 p += mcnt; /* Do the jump. */ 5254 p += mcnt; /* Do the jump. */
5257 DEBUG_PRINT2 ("(to 0x%p).\n", p); 5255 DEBUG_PRINT2 ("(to 0x%p).\n", p);
5258 break; 5256 break;
5259 5257
5260 5258
5261 /* We need this opcode so we can detect where alternatives end 5259 /* We need this opcode so we can detect where alternatives end
5262 in `group_match_null_string_p' et al. */ 5260 in `group_match_null_string_p' et al. */
5263 case jump_past_alt: 5261 case jump_past_alt:
5264 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); 5262 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
5265 goto unconditional_jump; 5263 goto unconditional_jump;
5290 PUSH_FAILURE_POINT ((void *) 0, (void *) 0, -2); 5288 PUSH_FAILURE_POINT ((void *) 0, (void *) 0, -2);
5291 break; 5289 break;
5292 5290
5293 /* Have to succeed matching what follows at least n times. 5291 /* Have to succeed matching what follows at least n times.
5294 After that, handle like `on_failure_jump'. */ 5292 After that, handle like `on_failure_jump'. */
5295 case succeed_n: 5293 case succeed_n:
5296 EXTRACT_NUMBER (mcnt, p + 2); 5294 EXTRACT_NUMBER (mcnt, p + 2);
5297 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); 5295 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5298 5296
5299 assert (mcnt >= 0); 5297 assert (mcnt >= 0);
5300 /* Originally, this is how many times we HAVE to succeed. */ 5298 /* Originally, this is how many times we HAVE to succeed. */
5311 p[2] = (unsigned char) no_op; 5309 p[2] = (unsigned char) no_op;
5312 p[3] = (unsigned char) no_op; 5310 p[3] = (unsigned char) no_op;
5313 goto on_failure; 5311 goto on_failure;
5314 } 5312 }
5315 break; 5313 break;
5316 5314
5317 case jump_n: 5315 case jump_n:
5318 EXTRACT_NUMBER (mcnt, p + 2); 5316 EXTRACT_NUMBER (mcnt, p + 2);
5319 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); 5317 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5320 5318
5321 /* Originally, this is how many times we CAN jump. */ 5319 /* Originally, this is how many times we CAN jump. */
5322 if (mcnt) 5320 if (mcnt)
5323 { 5321 {
5324 mcnt--; 5322 mcnt--;
5325 STORE_NUMBER (p + 2, mcnt); 5323 STORE_NUMBER (p + 2, mcnt);
5326 goto unconditional_jump; 5324 goto unconditional_jump;
5327 } 5325 }
5328 /* If don't have to jump any more, skip over the rest of command. */ 5326 /* If don't have to jump any more, skip over the rest of command. */
5329 else 5327 else
5330 p += 4; 5328 p += 4;
5331 break; 5329 break;
5332 5330
5333 case set_number_at: 5331 case set_number_at:
5334 { 5332 {
5335 DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); 5333 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5336 5334
5337 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5335 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5433 DEBUG_PRINT1 ("EXECUTING before_dot.\n"); 5431 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5434 if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) >= 5432 if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) >=
5435 BUF_PT (regex_emacs_buffer)) 5433 BUF_PT (regex_emacs_buffer))
5436 goto fail; 5434 goto fail;
5437 break; 5435 break;
5438 5436
5439 case at_dot: 5437 case at_dot:
5440 DEBUG_PRINT1 ("EXECUTING at_dot.\n"); 5438 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5441 if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) 5439 if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
5442 != BUF_PT (regex_emacs_buffer)) 5440 != BUF_PT (regex_emacs_buffer))
5443 goto fail; 5441 goto fail;
5444 break; 5442 break;
5445 5443
5446 case after_dot: 5444 case after_dot:
5447 DEBUG_PRINT1 ("EXECUTING after_dot.\n"); 5445 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5448 if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d) 5446 if (BUF_PTR_BYTE_POS (regex_emacs_buffer, (unsigned char *) d)
5449 <= BUF_PT (regex_emacs_buffer)) 5447 <= BUF_PT (regex_emacs_buffer))
5450 goto fail; 5448 goto fail;
5513 mcnt, should_succeed)) 5511 mcnt, should_succeed))
5514 goto fail; 5512 goto fail;
5515 SET_REGS_MATCHED (); 5513 SET_REGS_MATCHED ();
5516 } 5514 }
5517 break; 5515 break;
5518 5516
5519 case notcategoryspec: 5517 case notcategoryspec:
5520 should_succeed = 0; 5518 should_succeed = 0;
5521 goto matchornotcategory; 5519 goto matchornotcategory;
5522 /* end of category patch */ 5520 /* end of category patch */
5523 #endif /* MULE */ 5521 #endif /* MULE */
5528 if (!WORDCHAR_P_UNSAFE ((int) (*d))) 5526 if (!WORDCHAR_P_UNSAFE ((int) (*d)))
5529 goto fail; 5527 goto fail;
5530 SET_REGS_MATCHED (); 5528 SET_REGS_MATCHED ();
5531 d++; 5529 d++;
5532 break; 5530 break;
5533 5531
5534 case notwordchar: 5532 case notwordchar:
5535 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); 5533 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5536 PREFETCH (); 5534 PREFETCH ();
5537 if (!WORDCHAR_P_UNSAFE ((int) (*d))) 5535 if (!WORDCHAR_P_UNSAFE ((int) (*d)))
5538 goto fail; 5536 goto fail;
5539 SET_REGS_MATCHED (); 5537 SET_REGS_MATCHED ();
5540 d++; 5538 d++;
5541 break; 5539 break;
5542 #endif /* not emacs */ 5540 #endif /* not emacs */
5543 5541
5544 default: 5542 default:
5545 abort (); 5543 abort ();
5546 } 5544 }
5547 continue; /* Successfully executed one pattern command; keep going. */ 5545 continue; /* Successfully executed one pattern command; keep going. */
5548 5546
5563 /* If we failed to the end of the pattern, don't examine *p. */ 5561 /* If we failed to the end of the pattern, don't examine *p. */
5564 assert (p <= pend); 5562 assert (p <= pend);
5565 if (p < pend) 5563 if (p < pend)
5566 { 5564 {
5567 boolean is_a_jump_n = false; 5565 boolean is_a_jump_n = false;
5568 5566
5569 /* If failed to a backwards jump that's part of a repetition 5567 /* If failed to a backwards jump that's part of a repetition
5570 loop, need to pop this failure point and use the next one. */ 5568 loop, need to pop this failure point and use the next one. */
5571 switch ((re_opcode_t) *p) 5569 switch ((re_opcode_t) *p)
5572 { 5570 {
5573 case jump_n: 5571 case jump_n:
5575 case maybe_pop_jump: 5573 case maybe_pop_jump:
5576 case pop_failure_jump: 5574 case pop_failure_jump:
5577 case jump: 5575 case jump:
5578 p1 = p + 1; 5576 p1 = p + 1;
5579 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 5577 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5580 p1 += mcnt; 5578 p1 += mcnt;
5581 5579
5582 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) 5580 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5583 || (!is_a_jump_n 5581 || (!is_a_jump_n
5584 && (re_opcode_t) *p1 == on_failure_jump)) 5582 && (re_opcode_t) *p1 == on_failure_jump))
5585 goto fail; 5583 goto fail;
5606 5604
5607 /* Subroutine definitions for re_match_2. */ 5605 /* Subroutine definitions for re_match_2. */
5608 5606
5609 5607
5610 /* We are passed P pointing to a register number after a start_memory. 5608 /* We are passed P pointing to a register number after a start_memory.
5611 5609
5612 Return true if the pattern up to the corresponding stop_memory can 5610 Return true if the pattern up to the corresponding stop_memory can
5613 match the empty string, and false otherwise. 5611 match the empty string, and false otherwise.
5614 5612
5615 If we find the matching stop_memory, sets P to point to one past its number. 5613 If we find the matching stop_memory, sets P to point to one past its number.
5616 Otherwise, sets P to an undefined byte less than or equal to END. 5614 Otherwise, sets P to an undefined byte less than or equal to END.
5617 5615
5618 We don't handle duplicates properly (yet). */ 5616 We don't handle duplicates properly (yet). */
5619 5617
5622 register_info_type *reg_info) 5620 register_info_type *reg_info)
5623 { 5621 {
5624 int mcnt; 5622 int mcnt;
5625 /* Point to after the args to the start_memory. */ 5623 /* Point to after the args to the start_memory. */
5626 unsigned char *p1 = *p + 2; 5624 unsigned char *p1 = *p + 2;
5627 5625
5628 while (p1 < end) 5626 while (p1 < end)
5629 { 5627 {
5630 /* Skip over opcodes that can match nothing, and return true or 5628 /* Skip over opcodes that can match nothing, and return true or
5631 false, as appropriate, when we get to one that can't, or to the 5629 false, as appropriate, when we get to one that can't, or to the
5632 matching stop_memory. */ 5630 matching stop_memory. */
5633 5631
5634 switch ((re_opcode_t) *p1) 5632 switch ((re_opcode_t) *p1)
5635 { 5633 {
5636 /* Could be either a loop or a series of alternatives. */ 5634 /* Could be either a loop or a series of alternatives. */
5637 case on_failure_jump: 5635 case on_failure_jump:
5638 p1++; 5636 p1++;
5639 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 5637 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5640 5638
5641 /* If the next operation is not a jump backwards in the 5639 /* If the next operation is not a jump backwards in the
5642 pattern. */ 5640 pattern. */
5643 5641
5644 if (mcnt >= 0) 5642 if (mcnt >= 0)
5645 { 5643 {
5649 whereas the rest start with on_failure_jump and end 5647 whereas the rest start with on_failure_jump and end
5650 with a jump, e.g., here is the pattern for `a|b|c': 5648 with a jump, e.g., here is the pattern for `a|b|c':
5651 5649
5652 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 5650 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5653 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 5651 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5654 /exactn/1/c 5652 /exactn/1/c
5655 5653
5656 So, we have to first go through the first (n-1) 5654 So, we have to first go through the first (n-1)
5657 alternatives and then deal with the last one separately. */ 5655 alternatives and then deal with the last one separately. */
5658 5656
5659 5657
5665 { 5663 {
5666 /* `mcnt' holds how many bytes long the alternative 5664 /* `mcnt' holds how many bytes long the alternative
5667 is, including the ending `jump_past_alt' and 5665 is, including the ending `jump_past_alt' and
5668 its number. */ 5666 its number. */
5669 5667
5670 if (!alt_match_null_string_p (p1, p1 + mcnt - 3, 5668 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5671 reg_info)) 5669 reg_info))
5672 return false; 5670 return false;
5673 5671
5674 /* Move to right after this alternative, including the 5672 /* Move to right after this alternative, including the
5675 jump_past_alt. */ 5673 jump_past_alt. */
5676 p1 += mcnt; 5674 p1 += mcnt;
5677 5675
5678 /* Break if it's the beginning of an n-th alternative 5676 /* Break if it's the beginning of an n-th alternative
5679 that doesn't begin with an on_failure_jump. */ 5677 that doesn't begin with an on_failure_jump. */
5680 if ((re_opcode_t) *p1 != on_failure_jump) 5678 if ((re_opcode_t) *p1 != on_failure_jump)
5681 break; 5679 break;
5682 5680
5683 /* Still have to check that it's not an n-th 5681 /* Still have to check that it's not an n-th
5684 alternative that starts with an on_failure_jump. */ 5682 alternative that starts with an on_failure_jump. */
5685 p1++; 5683 p1++;
5686 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 5684 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5687 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) 5685 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5702 5700
5703 p1 += mcnt; /* Get past the n-th alternative. */ 5701 p1 += mcnt; /* Get past the n-th alternative. */
5704 } /* if mcnt > 0 */ 5702 } /* if mcnt > 0 */
5705 break; 5703 break;
5706 5704
5707 5705
5708 case stop_memory: 5706 case stop_memory:
5709 assert (p1[1] == **p); 5707 assert (p1[1] == **p);
5710 *p = p1 + 2; 5708 *p = p1 + 2;
5711 return true; 5709 return true;
5712 5710
5713 5711
5714 default: 5712 default:
5715 if (!common_op_match_null_string_p (&p1, end, reg_info)) 5713 if (!common_op_match_null_string_p (&p1, end, reg_info))
5716 return false; 5714 return false;
5717 } 5715 }
5718 } /* while p1 < end */ 5716 } /* while p1 < end */
5719 5717
5722 5720
5723 5721
5724 /* Similar to group_match_null_string_p, but doesn't deal with alternatives: 5722 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5725 It expects P to be the first byte of a single alternative and END one 5723 It expects P to be the first byte of a single alternative and END one
5726 byte past the last. The alternative can contain groups. */ 5724 byte past the last. The alternative can contain groups. */
5727 5725
5728 static boolean 5726 static boolean
5729 alt_match_null_string_p (unsigned char *p, unsigned char *end, 5727 alt_match_null_string_p (unsigned char *p, unsigned char *end,
5730 register_info_type *reg_info) 5728 register_info_type *reg_info)
5731 { 5729 {
5732 int mcnt; 5730 int mcnt;
5733 unsigned char *p1 = p; 5731 unsigned char *p1 = p;
5734 5732
5735 while (p1 < end) 5733 while (p1 < end)
5736 { 5734 {
5737 /* Skip over opcodes that can match nothing, and break when we get 5735 /* Skip over opcodes that can match nothing, and break when we get
5738 to one that can't. */ 5736 to one that can't. */
5739 5737
5740 switch ((re_opcode_t) *p1) 5738 switch ((re_opcode_t) *p1)
5741 { 5739 {
5742 /* It's a loop. */ 5740 /* It's a loop. */
5743 case on_failure_jump: 5741 case on_failure_jump:
5744 p1++; 5742 p1++;
5745 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 5743 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5746 p1 += mcnt; 5744 p1 += mcnt;
5747 break; 5745 break;
5748 5746
5749 default: 5747 default:
5750 if (!common_op_match_null_string_p (&p1, end, reg_info)) 5748 if (!common_op_match_null_string_p (&p1, end, reg_info))
5751 return false; 5749 return false;
5752 } 5750 }
5753 } /* while p1 < end */ 5751 } /* while p1 < end */
5754 5752
5755 return true; 5753 return true;
5756 } /* alt_match_null_string_p */ 5754 } /* alt_match_null_string_p */
5757 5755
5758 5756
5759 /* Deals with the ops common to group_match_null_string_p and 5757 /* Deals with the ops common to group_match_null_string_p and
5760 alt_match_null_string_p. 5758 alt_match_null_string_p.
5761 5759
5762 Sets P to one after the op and its arguments, if any. */ 5760 Sets P to one after the op and its arguments, if any. */
5763 5761
5764 static boolean 5762 static boolean
5765 common_op_match_null_string_p (unsigned char **p, unsigned char *end, 5763 common_op_match_null_string_p (unsigned char **p, unsigned char *end,
5766 register_info_type *reg_info) 5764 register_info_type *reg_info)
5790 5788
5791 case start_memory: 5789 case start_memory:
5792 reg_no = *p1; 5790 reg_no = *p1;
5793 assert (reg_no > 0 && reg_no <= MAX_REGNUM); 5791 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5794 ret = group_match_null_string_p (&p1, end, reg_info); 5792 ret = group_match_null_string_p (&p1, end, reg_info);
5795 5793
5796 /* Have to set this here in case we're checking a group which 5794 /* Have to set this here in case we're checking a group which
5797 contains a group and a back reference to it. */ 5795 contains a group and a back reference to it. */
5798 5796
5799 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) 5797 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5800 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; 5798 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5801 5799
5802 if (!ret) 5800 if (!ret)
5803 return false; 5801 return false;
5804 break; 5802 break;
5805 5803
5806 /* If this is an optimized succeed_n for zero times, make the jump. */ 5804 /* If this is an optimized succeed_n for zero times, make the jump. */
5807 case jump: 5805 case jump:
5808 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 5806 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5809 if (mcnt >= 0) 5807 if (mcnt >= 0)
5810 p1 += mcnt; 5808 p1 += mcnt;
5812 return false; 5810 return false;
5813 break; 5811 break;
5814 5812
5815 case succeed_n: 5813 case succeed_n:
5816 /* Get to the number of times to succeed. */ 5814 /* Get to the number of times to succeed. */
5817 p1 += 2; 5815 p1 += 2;
5818 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 5816 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5819 5817
5820 if (mcnt == 0) 5818 if (mcnt == 0)
5821 { 5819 {
5822 p1 -= 4; 5820 p1 -= 4;
5825 } 5823 }
5826 else 5824 else
5827 return false; 5825 return false;
5828 break; 5826 break;
5829 5827
5830 case duplicate: 5828 case duplicate:
5831 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) 5829 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5832 return false; 5830 return false;
5833 break; 5831 break;
5834 5832
5835 case set_number_at: 5833 case set_number_at:
5845 } /* common_op_match_null_string_p */ 5843 } /* common_op_match_null_string_p */
5846 5844
5847 5845
5848 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN 5846 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5849 bytes; nonzero otherwise. */ 5847 bytes; nonzero otherwise. */
5850 5848
5851 static int 5849 static int
5852 bcmp_translate (CONST unsigned char *s1, CONST unsigned char *s2, 5850 bcmp_translate (CONST unsigned char *s1, CONST unsigned char *s2,
5853 register int len, char *translate) 5851 register int len, char *translate)
5854 { 5852 {
5855 register CONST unsigned char *p1 = s1, *p2 = s2; 5853 register CONST unsigned char *p1 = s1, *p2 = s2;
5864 /* Entry points for GNU code. */ 5862 /* Entry points for GNU code. */
5865 5863
5866 /* re_compile_pattern is the GNU regular expression compiler: it 5864 /* re_compile_pattern is the GNU regular expression compiler: it
5867 compiles PATTERN (of length SIZE) and puts the result in BUFP. 5865 compiles PATTERN (of length SIZE) and puts the result in BUFP.
5868 Returns 0 if the pattern was valid, otherwise an error string. 5866 Returns 0 if the pattern was valid, otherwise an error string.
5869 5867
5870 Assumes the `allocated' (and perhaps `buffer') and `translate' fields 5868 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5871 are set in BUFP on entry. 5869 are set in BUFP on entry.
5872 5870
5873 We call regex_compile to do the actual compilation. */ 5871 We call regex_compile to do the actual compilation. */
5874 5872
5875 CONST char * 5873 CONST char *
5876 re_compile_pattern (CONST char *pattern, int length, 5874 re_compile_pattern (CONST char *pattern, int length,
5877 struct re_pattern_buffer *bufp) 5875 struct re_pattern_buffer *bufp)
5878 { 5876 {
5879 reg_errcode_t ret; 5877 reg_errcode_t ret;
5880 5878
5881 /* GNU code is written to assume at least RE_NREGS registers will be set 5879 /* GNU code is written to assume at least RE_NREGS registers will be set
5882 (and at least one extra will be -1). */ 5880 (and at least one extra will be -1). */
5883 bufp->regs_allocated = REGS_UNALLOCATED; 5881 bufp->regs_allocated = REGS_UNALLOCATED;
5884 5882
5885 /* And GNU code determines whether or not to get register information 5883 /* And GNU code determines whether or not to get register information
5886 by passing null for the REGS argument to re_match, etc., not by 5884 by passing null for the REGS argument to re_match, etc., not by
5887 setting no_sub. */ 5885 setting no_sub. */
5888 bufp->no_sub = 0; 5886 bufp->no_sub = 0;
5889 5887
5890 /* Match anchors at newline. */ 5888 /* Match anchors at newline. */
5891 bufp->newline_anchor = 1; 5889 bufp->newline_anchor = 1;
5892 5890
5893 ret = regex_compile (pattern, length, re_syntax_options, bufp); 5891 ret = regex_compile (pattern, length, re_syntax_options, bufp);
5894 5892
5895 if (!ret) 5893 if (!ret)
5896 return NULL; 5894 return NULL;
5897 return gettext (re_error_msgid[(int) ret]); 5895 return gettext (re_error_msgid[(int) ret]);
5898 } 5896 }
5899 5897
5900 /* Entry points compatible with 4.2 BSD regex library. We don't define 5898 /* Entry points compatible with 4.2 BSD regex library. We don't define
5901 them unless specifically requested. */ 5899 them unless specifically requested. */
5902 5900
5903 #ifdef _REGEX_RE_COMP 5901 #ifdef _REGEX_RE_COMP
5907 5905
5908 char * 5906 char *
5909 re_comp (CONST char *s) 5907 re_comp (CONST char *s)
5910 { 5908 {
5911 reg_errcode_t ret; 5909 reg_errcode_t ret;
5912 5910
5913 if (!s) 5911 if (!s)
5914 { 5912 {
5915 if (!re_comp_buf.buffer) 5913 if (!re_comp_buf.buffer)
5916 return gettext ("No previous regular expression"); 5914 return gettext ("No previous regular expression");
5917 return 0; 5915 return 0;
5934 5932
5935 /* Match anchors at newlines. */ 5933 /* Match anchors at newlines. */
5936 re_comp_buf.newline_anchor = 1; 5934 re_comp_buf.newline_anchor = 1;
5937 5935
5938 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 5936 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5939 5937
5940 if (!ret) 5938 if (!ret)
5941 return NULL; 5939 return NULL;
5942 5940
5943 /* Yes, we're discarding `CONST' here if !HAVE_LIBINTL. */ 5941 /* Yes, we're discarding `CONST' here if !HAVE_LIBINTL. */
5944 return (char *) gettext (re_error_msgid[(int) ret]); 5942 return (char *) gettext (re_error_msgid[(int) ret]);
6002 6000
6003 /* regex_compile will allocate the space for the compiled pattern. */ 6001 /* regex_compile will allocate the space for the compiled pattern. */
6004 preg->buffer = 0; 6002 preg->buffer = 0;
6005 preg->allocated = 0; 6003 preg->allocated = 0;
6006 preg->used = 0; 6004 preg->used = 0;
6007 6005
6008 /* Don't bother to use a fastmap when searching. This simplifies the 6006 /* Don't bother to use a fastmap when searching. This simplifies the
6009 REG_NEWLINE case: if we used a fastmap, we'd have to put all the 6007 REG_NEWLINE case: if we used a fastmap, we'd have to put all the
6010 characters after newlines into the fastmap. This way, we just try 6008 characters after newlines into the fastmap. This way, we just try
6011 every character. */ 6009 every character. */
6012 preg->fastmap = 0; 6010 preg->fastmap = 0;
6013 6011
6014 if (cflags & REG_ICASE) 6012 if (cflags & REG_ICASE)
6015 { 6013 {
6016 unsigned i; 6014 unsigned i;
6017 6015
6018 preg->translate = (char *) malloc (CHAR_SET_SIZE); 6016 preg->translate = (char *) malloc (CHAR_SET_SIZE);
6019 if (preg->translate == NULL) 6017 if (preg->translate == NULL)
6020 return (int) REG_ESPACE; 6018 return (int) REG_ESPACE;
6021 6019
6022 /* Map uppercase characters to corresponding lowercase ones. */ 6020 /* Map uppercase characters to corresponding lowercase ones. */
6037 else 6035 else
6038 preg->newline_anchor = 0; 6036 preg->newline_anchor = 0;
6039 6037
6040 preg->no_sub = !!(cflags & REG_NOSUB); 6038 preg->no_sub = !!(cflags & REG_NOSUB);
6041 6039
6042 /* POSIX says a null character in the pattern terminates it, so we 6040 /* POSIX says a null character in the pattern terminates it, so we
6043 can use strlen here in compiling the pattern. */ 6041 can use strlen here in compiling the pattern. */
6044 ret = regex_compile (pattern, strlen (pattern), syntax, preg); 6042 ret = regex_compile (pattern, strlen (pattern), syntax, preg);
6045 6043
6046 /* POSIX doesn't distinguish between an unmatched open-group and an 6044 /* POSIX doesn't distinguish between an unmatched open-group and an
6047 unmatched close-group: both are REG_EPAREN. */ 6045 unmatched close-group: both are REG_EPAREN. */
6048 if (ret == REG_ERPAREN) ret = REG_EPAREN; 6046 if (ret == REG_ERPAREN) ret = REG_EPAREN;
6049 6047
6050 return (int) ret; 6048 return (int) ret;
6051 } 6049 }
6052 6050
6053 6051
6054 /* regexec searches for a given pattern, specified by PREG, in the 6052 /* regexec searches for a given pattern, specified by PREG, in the
6055 string STRING. 6053 string STRING.
6056 6054
6057 If NMATCH is zero or REG_NOSUB was set in the cflags argument to 6055 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6058 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at 6056 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
6059 least NMATCH elements, and we set them to the offsets of the 6057 least NMATCH elements, and we set them to the offsets of the
6060 corresponding matched substrings. 6058 corresponding matched substrings.
6061 6059
6062 EFLAGS specifies `execution flags' which affect matching: if 6060 EFLAGS specifies `execution flags' which affect matching: if
6063 REG_NOTBOL is set, then ^ does not match at the beginning of the 6061 REG_NOTBOL is set, then ^ does not match at the beginning of the
6064 string; if REG_NOTEOL is set, then $ does not match at the end. 6062 string; if REG_NOTEOL is set, then $ does not match at the end.
6065 6063
6066 We return 0 if we find a match and REG_NOMATCH if not. */ 6064 We return 0 if we find a match and REG_NOMATCH if not. */
6067 6065
6068 int 6066 int
6069 regexec (CONST regex_t *preg, CONST char *string, size_t nmatch, 6067 regexec (CONST regex_t *preg, CONST char *string, size_t nmatch,
6070 regmatch_t pmatch[], int eflags) 6068 regmatch_t pmatch[], int eflags)
6074 regex_t private_preg; 6072 regex_t private_preg;
6075 int len = strlen (string); 6073 int len = strlen (string);
6076 boolean want_reg_info = !preg->no_sub && nmatch > 0; 6074 boolean want_reg_info = !preg->no_sub && nmatch > 0;
6077 6075
6078 private_preg = *preg; 6076 private_preg = *preg;
6079 6077
6080 private_preg.not_bol = !!(eflags & REG_NOTBOL); 6078 private_preg.not_bol = !!(eflags & REG_NOTBOL);
6081 private_preg.not_eol = !!(eflags & REG_NOTEOL); 6079 private_preg.not_eol = !!(eflags & REG_NOTEOL);
6082 6080
6083 /* The user has told us exactly how many registers to return 6081 /* The user has told us exactly how many registers to return
6084 information about, via `nmatch'. We have to pass that on to the 6082 information about, via `nmatch'. We have to pass that on to the
6085 matching routines. */ 6083 matching routines. */
6086 private_preg.regs_allocated = REGS_FIXED; 6084 private_preg.regs_allocated = REGS_FIXED;
6087 6085
6088 if (want_reg_info) 6086 if (want_reg_info)
6089 { 6087 {
6090 regs.num_regs = nmatch; 6088 regs.num_regs = nmatch;
6091 regs.start = TALLOC (nmatch, regoff_t); 6089 regs.start = TALLOC (nmatch, regoff_t);
6092 regs.end = TALLOC (nmatch, regoff_t); 6090 regs.end = TALLOC (nmatch, regoff_t);
6096 6094
6097 /* Perform the searching operation. */ 6095 /* Perform the searching operation. */
6098 ret = re_search (&private_preg, string, len, 6096 ret = re_search (&private_preg, string, len,
6099 /* start: */ 0, /* range: */ len, 6097 /* start: */ 0, /* range: */ len,
6100 want_reg_info ? &regs : (struct re_registers *) 0); 6098 want_reg_info ? &regs : (struct re_registers *) 0);
6101 6099
6102 /* Copy the register information to the POSIX structure. */ 6100 /* Copy the register information to the POSIX structure. */
6103 if (want_reg_info) 6101 if (want_reg_info)
6104 { 6102 {
6105 if (ret >= 0) 6103 if (ret >= 0)
6106 { 6104 {
6132 CONST char *msg; 6130 CONST char *msg;
6133 size_t msg_size; 6131 size_t msg_size;
6134 6132
6135 if (errcode < 0 6133 if (errcode < 0
6136 || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0]))) 6134 || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
6137 /* Only error codes returned by the rest of the code should be passed 6135 /* Only error codes returned by the rest of the code should be passed
6138 to this routine. If we are given anything else, or if other regex 6136 to this routine. If we are given anything else, or if other regex
6139 code generates an invalid error code, then the program has a bug. 6137 code generates an invalid error code, then the program has a bug.
6140 Dump core so we can fix it. */ 6138 Dump core so we can fix it. */
6141 abort (); 6139 abort ();
6142 6140
6143 msg = gettext (re_error_msgid[errcode]); 6141 msg = gettext (re_error_msgid[errcode]);
6144 6142
6145 msg_size = strlen (msg) + 1; /* Includes the null. */ 6143 msg_size = strlen (msg) + 1; /* Includes the null. */
6146 6144
6147 if (errbuf_size != 0) 6145 if (errbuf_size != 0)
6148 { 6146 {
6149 if (msg_size > errbuf_size) 6147 if (msg_size > errbuf_size)
6150 { 6148 {
6151 strncpy (errbuf, msg, errbuf_size - 1); 6149 strncpy (errbuf, msg, errbuf_size - 1);
6165 regfree (regex_t *preg) 6163 regfree (regex_t *preg)
6166 { 6164 {
6167 if (preg->buffer != NULL) 6165 if (preg->buffer != NULL)
6168 free (preg->buffer); 6166 free (preg->buffer);
6169 preg->buffer = NULL; 6167 preg->buffer = NULL;
6170 6168
6171 preg->allocated = 0; 6169 preg->allocated = 0;
6172 preg->used = 0; 6170 preg->used = 0;
6173 6171
6174 if (preg->fastmap != NULL) 6172 if (preg->fastmap != NULL)
6175 free (preg->fastmap); 6173 free (preg->fastmap);