Mercurial > hg > xemacs-beta
comparison src/regex.c @ 104:cf808b4c4290 r20-1b4
Import from CVS: tag r20-1b4
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:16:51 +0200 |
parents | a145efe76779 |
children | fe104dbd9147 |
comparison
equal
deleted
inserted
replaced
103:30eda07fe280 | 104:cf808b4c4290 |
---|---|
532 a byte which contains a syntax code, e.g., Sword. */ | 532 a byte which contains a syntax code, e.g., Sword. */ |
533 syntaxspec, | 533 syntaxspec, |
534 | 534 |
535 /* Matches any character whose syntax is not that specified. */ | 535 /* Matches any character whose syntax is not that specified. */ |
536 notsyntaxspec | 536 notsyntaxspec |
537 | |
537 #endif /* emacs */ | 538 #endif /* emacs */ |
538 | 539 |
539 #ifdef MULE | 540 #ifdef MULE |
540 /* need extra stuff to be able to properly work with XEmacs/Mule | 541 /* need extra stuff to be able to properly work with XEmacs/Mule |
541 characters (which may take up more than one byte) */ | 542 characters (which may take up more than one byte) */ |
545 format"; see rangetab.c. Unlike the `charset' | 546 format"; see rangetab.c. Unlike the `charset' |
546 opcode, this can handle arbitrary characters. */ | 547 opcode, this can handle arbitrary characters. */ |
547 | 548 |
548 charset_mule_not /* Same parameters as charset_mule, but match any | 549 charset_mule_not /* Same parameters as charset_mule, but match any |
549 character that is not one of those specified. */ | 550 character that is not one of those specified. */ |
551 | |
552 /* 97/2/17 jhod: The following two were merged back in from the Mule | |
553 2.3 code to enable some language specific processing */ | |
554 ,categoryspec, /* Matches entries in the character category tables */ | |
555 notcategoryspec /* The opposite of the above */ | |
550 #endif | 556 #endif |
551 | 557 |
552 } re_opcode_t; | 558 } re_opcode_t; |
553 | 559 |
554 /* Common operations on the compiled pattern. */ | 560 /* Common operations on the compiled pattern. */ |
917 case notsyntaxspec: | 923 case notsyntaxspec: |
918 printf ("/notsyntaxspec"); | 924 printf ("/notsyntaxspec"); |
919 mcnt = *p++; | 925 mcnt = *p++; |
920 printf ("/%d", mcnt); | 926 printf ("/%d", mcnt); |
921 break; | 927 break; |
928 | |
929 #ifdef MULE | |
930 /* 97/2/17 jhod Mule category patch */ | |
931 case categoryspec: | |
932 printf ("/categoryspec"); | |
933 mcnt = *p++; | |
934 printf ("/%d", mcnt); | |
935 break; | |
936 | |
937 case notcategoryspec: | |
938 printf ("/notcategoryspec"); | |
939 mcnt = *p++; | |
940 printf ("/%d", mcnt); | |
941 break; | |
942 /* end of category patch */ | |
943 #endif /* MULE */ | |
922 #endif /* emacs */ | 944 #endif /* emacs */ |
923 | 945 |
924 case wordchar: | 946 case wordchar: |
925 printf ("/wordchar"); | 947 printf ("/wordchar"); |
926 break; | 948 break; |
970 printf ("no_sub: %d\t", bufp->no_sub); | 992 printf ("no_sub: %d\t", bufp->no_sub); |
971 printf ("not_bol: %d\t", bufp->not_bol); | 993 printf ("not_bol: %d\t", bufp->not_bol); |
972 printf ("not_eol: %d\t", bufp->not_eol); | 994 printf ("not_eol: %d\t", bufp->not_eol); |
973 printf ("syntax: %d\n", bufp->syntax); | 995 printf ("syntax: %d\n", bufp->syntax); |
974 /* Perhaps we should print the translate table? */ | 996 /* Perhaps we should print the translate table? */ |
997 /* and maybe the category table? */ | |
975 } | 998 } |
976 | 999 |
977 | 1000 |
978 static void | 1001 static void |
979 print_double_string (CONST char *where, CONST char *string1, int size1, | 1002 print_double_string (CONST char *where, CONST char *string1, int size1, |
1063 #ifdef emacs | 1086 #ifdef emacs |
1064 "Invalid syntax designator", /* REG_ESYNTAX */ | 1087 "Invalid syntax designator", /* REG_ESYNTAX */ |
1065 #endif | 1088 #endif |
1066 #ifdef MULE | 1089 #ifdef MULE |
1067 "Ranges may not span charsets", /* REG_ERANGESPAN */ | 1090 "Ranges may not span charsets", /* REG_ERANGESPAN */ |
1091 "Invalid category designator", /* REG_ECATEGORY */ | |
1068 #endif | 1092 #endif |
1069 }; | 1093 }; |
1070 | 1094 |
1071 /* Avoiding alloca during matching, to placate r_alloc. */ | 1095 /* Avoiding alloca during matching, to placate r_alloc. */ |
1072 | 1096 |
2833 /* XEmacs addition */ | 2857 /* XEmacs addition */ |
2834 if (c >= 0x80 || syntax_spec_code[c] == 0377) | 2858 if (c >= 0x80 || syntax_spec_code[c] == 0377) |
2835 FREE_STACK_RETURN (REG_ESYNTAX); | 2859 FREE_STACK_RETURN (REG_ESYNTAX); |
2836 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); | 2860 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); |
2837 break; | 2861 break; |
2862 | |
2863 #ifdef MULE | |
2864 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ | |
2865 case 'c': | |
2866 laststart = b; | |
2867 PATFETCH_RAW (c); | |
2868 if (c < 32 || c > 127) | |
2869 FREE_STACK_RETURN (REG_ECATEGORY); | |
2870 BUF_PUSH_2 (categoryspec, c); | |
2871 break; | |
2872 | |
2873 case 'C': | |
2874 laststart = b; | |
2875 PATFETCH_RAW (c); | |
2876 if (c < 32 || c > 127) | |
2877 FREE_STACK_RETURN (REG_ECATEGORY); | |
2878 BUF_PUSH_2 (notcategoryspec, c); | |
2879 break; | |
2880 /* end of category patch */ | |
2881 #endif /* MULE */ | |
2838 #endif /* emacs */ | 2882 #endif /* emacs */ |
2839 | 2883 |
2840 | 2884 |
2841 case 'w': | 2885 case 'w': |
2842 laststart = b; | 2886 laststart = b; |
3586 (enum syntaxcode) k) | 3630 (enum syntaxcode) k) |
3587 fastmap[j] = 1; | 3631 fastmap[j] = 1; |
3588 #endif /* ! MULE */ | 3632 #endif /* ! MULE */ |
3589 break; | 3633 break; |
3590 | 3634 |
3635 #ifdef MULE | |
3636 /* 97/2/17 jhod category patch */ | |
3637 case categoryspec: | |
3638 case notcategoryspec: | |
3639 bufp->can_be_null = 1; | |
3640 return; | |
3641 /* end if category patch */ | |
3642 #endif /* MULE */ | |
3591 | 3643 |
3592 /* All cases after this match the empty string. These end with | 3644 /* All cases after this match the empty string. These end with |
3593 `continue'. */ | 3645 `continue'. */ |
3594 | 3646 |
3595 | 3647 |
3817 int total_size = size1 + size2; | 3869 int total_size = size1 + size2; |
3818 int endpos = startpos + range; | 3870 int endpos = startpos + range; |
3819 #ifdef REGEX_BEGLINE_CHECK | 3871 #ifdef REGEX_BEGLINE_CHECK |
3820 int anchored_at_begline = 0; | 3872 int anchored_at_begline = 0; |
3821 #endif | 3873 #endif |
3874 CONST unsigned char *d; | |
3875 Charcount d_size; | |
3822 | 3876 |
3823 /* Check for out-of-range STARTPOS. */ | 3877 /* Check for out-of-range STARTPOS. */ |
3824 if (startpos < 0 || startpos > total_size) | 3878 if (startpos < 0 || startpos > total_size) |
3825 return -1; | 3879 return -1; |
3826 | 3880 |
3827 /* Fix up RANGE if it might eventually take us outside | 3881 /* Fix up RANGE if it might eventually take us outside |
3828 the virtual concatenation of STRING1 and STRING2. */ | 3882 the virtual concatenation of STRING1 and STRING2. */ |
3829 #if 0 | |
3830 if (endpos < -1) | |
3831 range = -1 - startpos; | |
3832 #else | |
3833 if (endpos < 0) | 3883 if (endpos < 0) |
3834 range = 0 - startpos; | 3884 range = 0 - startpos; |
3835 #endif | |
3836 else if (endpos > total_size) | 3885 else if (endpos > total_size) |
3837 range = total_size - startpos; | 3886 range = total_size - startpos; |
3838 | 3887 |
3839 /* If the search isn't to be a backwards one, don't waste time in a | 3888 /* If the search isn't to be a backwards one, don't waste time in a |
3840 search for a pattern that must be anchored. */ | 3889 search for a pattern that must be anchored. */ |
3878 range > 0) | 3927 range > 0) |
3879 { | 3928 { |
3880 /* whose stupid idea was it anyway to make this | 3929 /* whose stupid idea was it anyway to make this |
3881 function take two strings to match?? */ | 3930 function take two strings to match?? */ |
3882 int lim = 0; | 3931 int lim = 0; |
3883 register CONST unsigned char *d; | |
3884 int irange = range; | 3932 int irange = range; |
3885 | 3933 |
3886 if (startpos < size1 && startpos + range >= size1) | 3934 if (startpos < size1 && startpos + range >= size1) |
3887 lim = range - (size1 - startpos); | 3935 lim = range - (size1 - startpos); |
3888 | 3936 |
3889 d = ((CONST unsigned char *) | 3937 d = ((CONST unsigned char *) |
3890 (startpos >= size1 ? string2 - size1 : string1) + startpos); | 3938 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
3891 DEC_CHARPTR(d); | 3939 DEC_CHARPTR(d); /* Ok, since startpos != size1. */ |
3940 d_size = charcount_to_bytecount (d, 1); | |
3892 | 3941 |
3893 if (translate) | 3942 if (translate) |
3894 #ifdef MULE | 3943 #ifdef MULE |
3895 while (range > lim && (*d >= 0x80 || translate[*d] != '\n')) | 3944 while (range > lim && (*d >= 0x80 || translate[*d] != '\n')) |
3896 #else | 3945 #else |
3897 while (range > lim && translate[*d] != '\n') | 3946 while (range > lim && translate[*d] != '\n') |
3898 #endif | 3947 #endif |
3899 { | 3948 { |
3900 INC_CHARPTR(d); | 3949 d += d_size; /* Speedier INC_CHARPTR(d) */ |
3901 range -= charcount_to_bytecount (d, 1); | 3950 d_size = charcount_to_bytecount (d, 1); |
3951 range -= d_size; | |
3902 } | 3952 } |
3903 else | 3953 else |
3904 while (range > lim && *d != '\n') | 3954 while (range > lim && *d != '\n') |
3905 { | 3955 { |
3906 INC_CHARPTR(d); | 3956 d += d_size; /* Speedier INC_CHARPTR(d) */ |
3907 range -= charcount_to_bytecount (d, 1); | 3957 d_size = charcount_to_bytecount (d, 1); |
3958 range -= d_size; | |
3908 } | 3959 } |
3909 | 3960 |
3910 startpos += irange - range; | 3961 startpos += irange - range; |
3911 } | 3962 } |
3912 #endif /* REGEX_BEGLINE_CHECK */ | 3963 #endif /* REGEX_BEGLINE_CHECK */ |
3917 the first null string. */ | 3968 the first null string. */ |
3918 if (fastmap && startpos < total_size && !bufp->can_be_null) | 3969 if (fastmap && startpos < total_size && !bufp->can_be_null) |
3919 { | 3970 { |
3920 if (range > 0) /* Searching forwards. */ | 3971 if (range > 0) /* Searching forwards. */ |
3921 { | 3972 { |
3922 register CONST unsigned char *d; | 3973 int lim = 0; |
3923 register int lim = 0; | |
3924 int irange = range; | 3974 int irange = range; |
3925 | 3975 |
3926 if (startpos < size1 && startpos + range >= size1) | 3976 if (startpos < size1 && startpos + range >= size1) |
3927 lim = range - (size1 - startpos); | 3977 lim = range - (size1 - startpos); |
3928 | 3978 |
3929 d = ((CONST unsigned char *) | 3979 d = ((CONST unsigned char *) |
3930 (startpos >= size1 ? string2 - size1 : string1) + startpos); | 3980 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
3931 | 3981 |
3932 /* Written out as an if-else to avoid testing `translate' | 3982 /* Written out as an if-else to avoid testing `translate' |
3933 inside the loop. */ | 3983 inside the loop. */ |
3934 if (translate) | 3984 if (translate) |
3935 #ifdef MULE | 3985 #ifdef MULE |
3936 while (range > lim && *d < 0x80 && !fastmap[translate[*d]]) | 3986 while (range > lim && *d < 0x80 && !fastmap[translate[*d]]) |
3937 #else | 3987 #else |
3938 while (range > lim && !fastmap[translate[*d]]) | 3988 while (range > lim && !fastmap[translate[*d]]) |
3939 #endif | 3989 #endif |
3940 { | 3990 { |
3941 range -= charcount_to_bytecount (d, 1); | 3991 d_size = charcount_to_bytecount (d, 1); |
3942 INC_CHARPTR(d); | 3992 range -= d_size; |
3993 d += d_size; /* Speedier INC_CHARPTR(d) */ | |
3943 } | 3994 } |
3944 else | 3995 else |
3945 while (range > lim && !fastmap[*d]) | 3996 while (range > lim && !fastmap[*d]) |
3946 { | 3997 { |
3947 range -= charcount_to_bytecount (d, 1); | 3998 d_size = charcount_to_bytecount (d, 1); |
3948 INC_CHARPTR(d); | 3999 range -= d_size; |
4000 d += d_size; /* Speedier INC_CHARPTR(d) */ | |
3949 } | 4001 } |
3950 | 4002 |
3951 startpos += irange - range; | 4003 startpos += irange - range; |
3952 } | 4004 } |
3953 else /* Searching backwards. */ | 4005 else /* Searching backwards. */ |
3954 { | 4006 { |
3955 register unsigned char c = (size1 == 0 || startpos >= size1 | 4007 unsigned char c = (size1 == 0 || startpos >= size1 |
3956 ? string2[startpos - size1] | 4008 ? string2[startpos - size1] |
3957 : string1[startpos]); | 4009 : string1[startpos]); |
3958 #ifdef MULE | 4010 #ifdef MULE |
3959 if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)]) | 4011 if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)]) |
3960 #else | 4012 #else |
3961 if (!fastmap[(unsigned char) TRANSLATE (c)]) | 4013 if (!fastmap[(unsigned char) TRANSLATE (c)]) |
3962 #endif | 4014 #endif |
3988 return -2; | 4040 return -2; |
3989 | 4041 |
3990 advance: | 4042 advance: |
3991 if (!range) | 4043 if (!range) |
3992 break; | 4044 break; |
3993 else { | 4045 else if (range > 0) |
3994 register CONST unsigned char *d; | 4046 { |
3995 Charcount d_size; | 4047 d = ((CONST unsigned char *) |
3996 | 4048 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
3997 d = ((CONST unsigned char *) | 4049 d_size = charcount_to_bytecount (d, 1); |
3998 (startpos >= size1 ? string2 - size1 : string1) + startpos); | 4050 range -= d_size; |
3999 | 4051 startpos += d_size; |
4000 if (range > 0) | 4052 } |
4001 { | 4053 else |
4002 d_size = charcount_to_bytecount (d, 1); | 4054 { |
4003 range -= d_size; | 4055 /* Note startpos > size1 not >=. If we are on the |
4004 startpos += d_size; | 4056 string1/string2 boundary, we want to backup into string1. */ |
4005 } | 4057 d = ((CONST unsigned char *) |
4006 else | 4058 (startpos > size1 ? string2 - size1 : string1) + startpos); |
4007 { | 4059 DEC_CHARPTR(d); |
4008 DEC_CHARPTR(d); | 4060 d_size = charcount_to_bytecount (d, 1); |
4009 d_size = charcount_to_bytecount (d, 1); | 4061 range += d_size; |
4010 range += d_size; | 4062 startpos -= d_size; |
4011 startpos -= d_size; | 4063 } |
4012 } | |
4013 } | |
4014 } | 4064 } |
4015 return -1; | 4065 return -1; |
4016 } /* re_search_2 */ | 4066 } /* re_search_2 */ |
4017 | 4067 |
4018 /* Declarations and macros for re_match_2. */ | 4068 /* Declarations and macros for re_match_2. */ |
5122 #ifdef DEBUG | 5172 #ifdef DEBUG |
5123 register unsigned char c | 5173 register unsigned char c |
5124 = *p2 == (unsigned char) endline ? '\n' : p2[2]; | 5174 = *p2 == (unsigned char) endline ? '\n' : p2[2]; |
5125 #endif | 5175 #endif |
5126 | 5176 |
5127 #if 1 | |
5128 /* dmoore@ucsd.edu - emacs 19.34 uses this: */ | |
5129 | |
5130 if ((re_opcode_t) p1[3] == exactn | 5177 if ((re_opcode_t) p1[3] == exactn |
5131 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] | 5178 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] |
5132 && (p2[2 + p1[5] / BYTEWIDTH] | 5179 && (p2[2 + p1[5] / BYTEWIDTH] |
5133 & (1 << (p1[5] % BYTEWIDTH))))) | 5180 & (1 << (p1[5] % BYTEWIDTH))))) |
5134 #else | |
5135 if ((re_opcode_t) p1[3] == exactn | |
5136 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4] | |
5137 && (p2[1 + p1[4] / BYTEWIDTH] | |
5138 & (1 << (p1[4] % BYTEWIDTH))))) | |
5139 #endif | |
5140 { | 5181 { |
5141 p[-3] = (unsigned char) pop_failure_jump; | 5182 p[-3] = (unsigned char) pop_failure_jump; |
5142 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", | 5183 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", |
5143 c, p1[5]); | 5184 c, p1[5]); |
5144 } | 5185 } |
5464 mcnt = (int) Sword; | 5505 mcnt = (int) Sword; |
5465 matchnotsyntax: | 5506 matchnotsyntax: |
5466 should_succeed = 0; | 5507 should_succeed = 0; |
5467 goto matchornotsyntax; | 5508 goto matchornotsyntax; |
5468 | 5509 |
5510 #ifdef MULE | |
5511 /* 97/2/17 jhod Mule category code patch */ | |
5512 case categoryspec: | |
5513 should_succeed = 1; | |
5514 matchornotcategory: | |
5515 { | |
5516 Emchar emch; | |
5517 | |
5518 mcnt = *p++; | |
5519 PREFETCH (); | |
5520 emch = charptr_emchar ((CONST Bufbyte *) d); | |
5521 INC_CHARPTR (d); | |
5522 if (check_category_at(emch, regex_emacs_buffer->category_table, | |
5523 mcnt, should_succeed)) | |
5524 goto fail; | |
5525 SET_REGS_MATCHED (); | |
5526 } | |
5527 break; | |
5528 | |
5529 case notcategoryspec: | |
5530 should_succeed = 0; | |
5531 goto matchornotcategory; | |
5532 /* end of category patch */ | |
5533 #endif /* MULE */ | |
5469 #else /* not emacs */ | 5534 #else /* not emacs */ |
5470 case wordchar: | 5535 case wordchar: |
5471 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); | 5536 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); |
5472 PREFETCH (); | 5537 PREFETCH (); |
5473 if (!WORDCHAR_P_UNSAFE ((int) (*d))) | 5538 if (!WORDCHAR_P_UNSAFE ((int) (*d))) |