Mercurial > hg > xemacs-beta
comparison src/regex.c @ 102:a145efe76779 r20-1b3
Import from CVS: tag r20-1b3
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:15:49 +0200 |
parents | 6a378aca36af |
children | cf808b4c4290 |
comparison
equal
deleted
inserted
replaced
101:a0ec055d74dd | 102:a145efe76779 |
---|---|
3769 { | 3769 { |
3770 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, | 3770 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, |
3771 regs, size); | 3771 regs, size); |
3772 } | 3772 } |
3773 | 3773 |
3774 #ifndef emacs | |
3775 /* Snarfed from src/lisp.h, needed for compiling [ce]tags. */ | |
3776 # define bytecount_to_charcount(ptr, len) (len) | |
3777 # define charcount_to_bytecount(ptr, len) (len) | |
3778 typedef int Charcount; | |
3779 #endif | |
3774 | 3780 |
3775 /* Using the compiled pattern in BUFP->buffer, first tries to match the | 3781 /* Using the compiled pattern in BUFP->buffer, first tries to match the |
3776 virtual concatenation of STRING1 and STRING2, starting first at index | 3782 virtual concatenation of STRING1 and STRING2, starting first at index |
3777 STARTPOS, then at STARTPOS + 1, and so on. | 3783 STARTPOS, then at STARTPOS + 1, and so on. |
3784 | |
3785 With MULE, STARTPOS is a byte position, not a char position. And the | |
3786 search will increment STARTPOS by the width of the current leading | |
3787 character. | |
3778 | 3788 |
3779 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. | 3789 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. |
3780 | 3790 |
3781 RANGE is how far to scan while trying to match. RANGE = 0 means try | 3791 RANGE is how far to scan while trying to match. RANGE = 0 means try |
3782 only at STARTPOS; in general, the last start tried is STARTPOS + | 3792 only at STARTPOS; in general, the last start tried is STARTPOS + |
3783 RANGE. | 3793 RANGE. |
3794 | |
3795 With MULE, RANGE is a byte position, not a char position. The last | |
3796 start tried is the character starting <= STARTPOS + RANGE. | |
3784 | 3797 |
3785 In REGS, return the indices of the virtual concatenation of STRING1 | 3798 In REGS, return the indices of the virtual concatenation of STRING1 |
3786 and STRING2 that matched the entire BUFP->buffer and its contained | 3799 and STRING2 that matched the entire BUFP->buffer and its contained |
3787 subexpressions. | 3800 subexpressions. |
3788 | 3801 |
3811 if (startpos < 0 || startpos > total_size) | 3824 if (startpos < 0 || startpos > total_size) |
3812 return -1; | 3825 return -1; |
3813 | 3826 |
3814 /* Fix up RANGE if it might eventually take us outside | 3827 /* Fix up RANGE if it might eventually take us outside |
3815 the virtual concatenation of STRING1 and STRING2. */ | 3828 the virtual concatenation of STRING1 and STRING2. */ |
3829 #if 0 | |
3816 if (endpos < -1) | 3830 if (endpos < -1) |
3817 range = -1 - startpos; | 3831 range = -1 - startpos; |
3832 #else | |
3833 if (endpos < 0) | |
3834 range = 0 - startpos; | |
3835 #endif | |
3818 else if (endpos > total_size) | 3836 else if (endpos > total_size) |
3819 range = total_size - startpos; | 3837 range = total_size - startpos; |
3820 | 3838 |
3821 /* If the search isn't to be a backwards one, don't waste time in a | 3839 /* If the search isn't to be a backwards one, don't waste time in a |
3822 search for a pattern that must be anchored. */ | 3840 search for a pattern that must be anchored. */ |
3860 range > 0) | 3878 range > 0) |
3861 { | 3879 { |
3862 /* whose stupid idea was it anyway to make this | 3880 /* whose stupid idea was it anyway to make this |
3863 function take two strings to match?? */ | 3881 function take two strings to match?? */ |
3864 int lim = 0; | 3882 int lim = 0; |
3865 unsigned char *p; | 3883 register CONST unsigned char *d; |
3866 int irange = range; | 3884 int irange = range; |
3885 | |
3867 if (startpos < size1 && startpos + range >= size1) | 3886 if (startpos < size1 && startpos + range >= size1) |
3868 lim = range - (size1 - startpos); | 3887 lim = range - (size1 - startpos); |
3869 | 3888 |
3870 p = ((unsigned char *) | 3889 d = ((CONST unsigned char *) |
3871 &(startpos >= size1 ? string2 - size1 : string1)[startpos]); | 3890 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
3872 p--; | 3891 DEC_CHARPTR(d); |
3873 | 3892 |
3874 if (translate) | 3893 if (translate) |
3875 { | 3894 #ifdef MULE |
3876 while (range > lim && translate[*p++] != '\n') | 3895 while (range > lim && (*d >= 0x80 || translate[*d] != '\n')) |
3877 range--; | 3896 #else |
3878 } | 3897 while (range > lim && translate[*d] != '\n') |
3898 #endif | |
3899 { | |
3900 INC_CHARPTR(d); | |
3901 range -= charcount_to_bytecount (d, 1); | |
3902 } | |
3879 else | 3903 else |
3880 { | 3904 while (range > lim && *d != '\n') |
3881 while (range > lim && *p++ != '\n') | 3905 { |
3882 range--; | 3906 INC_CHARPTR(d); |
3883 } | 3907 range -= charcount_to_bytecount (d, 1); |
3908 } | |
3909 | |
3884 startpos += irange - range; | 3910 startpos += irange - range; |
3885 } | 3911 } |
3886 #endif /* REGEX_BEGLINE_CHECK */ | 3912 #endif /* REGEX_BEGLINE_CHECK */ |
3887 | 3913 |
3888 /* If a fastmap is supplied, skip quickly over characters that | 3914 /* If a fastmap is supplied, skip quickly over characters that |
3891 the first null string. */ | 3917 the first null string. */ |
3892 if (fastmap && startpos < total_size && !bufp->can_be_null) | 3918 if (fastmap && startpos < total_size && !bufp->can_be_null) |
3893 { | 3919 { |
3894 if (range > 0) /* Searching forwards. */ | 3920 if (range > 0) /* Searching forwards. */ |
3895 { | 3921 { |
3896 register CONST char *d; | 3922 register CONST unsigned char *d; |
3897 register int lim = 0; | 3923 register int lim = 0; |
3898 int irange = range; | 3924 int irange = range; |
3899 | 3925 |
3900 if (startpos < size1 && startpos + range >= size1) | 3926 if (startpos < size1 && startpos + range >= size1) |
3901 lim = range - (size1 - startpos); | 3927 lim = range - (size1 - startpos); |
3902 | 3928 |
3903 d = (startpos >= size1 ? string2 - size1 : string1) + startpos; | 3929 d = ((CONST unsigned char *) |
3930 (startpos >= size1 ? string2 - size1 : string1) + startpos); | |
3904 | 3931 |
3905 /* Written out as an if-else to avoid testing `translate' | 3932 /* Written out as an if-else to avoid testing `translate' |
3906 inside the loop. */ | 3933 inside the loop. */ |
3907 if (translate) | 3934 if (translate) |
3908 while (range > lim | 3935 #ifdef MULE |
3909 && !fastmap[(unsigned char) | 3936 while (range > lim && *d < 0x80 && !fastmap[translate[*d]]) |
3910 translate[(unsigned char) *d++]]) | 3937 #else |
3911 range--; | 3938 while (range > lim && !fastmap[translate[*d]]) |
3939 #endif | |
3940 { | |
3941 range -= charcount_to_bytecount (d, 1); | |
3942 INC_CHARPTR(d); | |
3943 } | |
3912 else | 3944 else |
3913 while (range > lim && !fastmap[(unsigned char) *d++]) | 3945 while (range > lim && !fastmap[*d]) |
3914 range--; | 3946 { |
3947 range -= charcount_to_bytecount (d, 1); | |
3948 INC_CHARPTR(d); | |
3949 } | |
3915 | 3950 |
3916 startpos += irange - range; | 3951 startpos += irange - range; |
3917 } | 3952 } |
3918 else /* Searching backwards. */ | 3953 else /* Searching backwards. */ |
3919 { | 3954 { |
3920 register char c = (size1 == 0 || startpos >= size1 | 3955 register unsigned char c = (size1 == 0 || startpos >= size1 |
3921 ? string2[startpos - size1] | 3956 ? string2[startpos - size1] |
3922 : string1[startpos]); | 3957 : string1[startpos]); |
3923 | 3958 #ifdef MULE |
3959 if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)]) | |
3960 #else | |
3924 if (!fastmap[(unsigned char) TRANSLATE (c)]) | 3961 if (!fastmap[(unsigned char) TRANSLATE (c)]) |
3962 #endif | |
3925 goto advance; | 3963 goto advance; |
3926 } | 3964 } |
3927 } | 3965 } |
3928 | 3966 |
3929 /* If can't match the null string, and that's all we have left, fail. */ | 3967 /* If can't match the null string, and that's all we have left, fail. */ |
3949 if (val == -2) | 3987 if (val == -2) |
3950 return -2; | 3988 return -2; |
3951 | 3989 |
3952 advance: | 3990 advance: |
3953 if (!range) | 3991 if (!range) |
3954 break; | 3992 break; |
3955 else if (range > 0) | 3993 else { |
3956 { | 3994 register CONST unsigned char *d; |
3957 range--; | 3995 Charcount d_size; |
3958 startpos++; | 3996 |
3959 } | 3997 d = ((CONST unsigned char *) |
3960 else | 3998 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
3961 { | 3999 |
3962 range++; | 4000 if (range > 0) |
3963 startpos--; | 4001 { |
3964 } | 4002 d_size = charcount_to_bytecount (d, 1); |
4003 range -= d_size; | |
4004 startpos += d_size; | |
4005 } | |
4006 else | |
4007 { | |
4008 DEC_CHARPTR(d); | |
4009 d_size = charcount_to_bytecount (d, 1); | |
4010 range += d_size; | |
4011 startpos -= d_size; | |
4012 } | |
4013 } | |
3965 } | 4014 } |
3966 return -1; | 4015 return -1; |
3967 } /* re_search_2 */ | 4016 } /* re_search_2 */ |
3968 | 4017 |
3969 /* Declarations and macros for re_match_2. */ | 4018 /* Declarations and macros for re_match_2. */ |
5073 #ifdef DEBUG | 5122 #ifdef DEBUG |
5074 register unsigned char c | 5123 register unsigned char c |
5075 = *p2 == (unsigned char) endline ? '\n' : p2[2]; | 5124 = *p2 == (unsigned char) endline ? '\n' : p2[2]; |
5076 #endif | 5125 #endif |
5077 | 5126 |
5127 #if 1 | |
5128 /* dmoore@ucsd.edu - emacs 19.34 uses this: */ | |
5129 | |
5078 if ((re_opcode_t) p1[3] == exactn | 5130 if ((re_opcode_t) p1[3] == exactn |
5079 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4] | 5131 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] |
5080 && (p2[1 + p1[4] / BYTEWIDTH] | 5132 && (p2[2 + p1[5] / BYTEWIDTH] |
5081 & (1 << (p1[4] % BYTEWIDTH))))) | 5133 & (1 << (p1[5] % BYTEWIDTH))))) |
5134 #else | |
5135 if ((re_opcode_t) p1[3] == exactn | |
5136 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4] | |
5137 && (p2[1 + p1[4] / BYTEWIDTH] | |
5138 & (1 << (p1[4] % BYTEWIDTH))))) | |
5139 #endif | |
5082 { | 5140 { |
5083 p[-3] = (unsigned char) pop_failure_jump; | 5141 p[-3] = (unsigned char) pop_failure_jump; |
5084 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", | 5142 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", |
5085 c, p1[5]); | 5143 c, p1[5]); |
5086 } | 5144 } |