comparison src/regex.c @ 102:a145efe76779 r20-1b3

Import from CVS: tag r20-1b3
author cvs
date Mon, 13 Aug 2007 09:15:49 +0200
parents 6a378aca36af
children cf808b4c4290
comparison
equal deleted inserted replaced
101:a0ec055d74dd 102:a145efe76779
3769 { 3769 {
3770 return re_search_2 (bufp, NULL, 0, string, size, startpos, range, 3770 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3771 regs, size); 3771 regs, size);
3772 } 3772 }
3773 3773
3774 #ifndef emacs
3775 /* Snarfed from src/lisp.h, needed for compiling [ce]tags. */
3776 # define bytecount_to_charcount(ptr, len) (len)
3777 # define charcount_to_bytecount(ptr, len) (len)
3778 typedef int Charcount;
3779 #endif
3774 3780
3775 /* Using the compiled pattern in BUFP->buffer, first tries to match the 3781 /* Using the compiled pattern in BUFP->buffer, first tries to match the
3776 virtual concatenation of STRING1 and STRING2, starting first at index 3782 virtual concatenation of STRING1 and STRING2, starting first at index
3777 STARTPOS, then at STARTPOS + 1, and so on. 3783 STARTPOS, then at STARTPOS + 1, and so on.
3784
3785 With MULE, STARTPOS is a byte position, not a char position. And the
3786 search will increment STARTPOS by the width of the current leading
3787 character.
3778 3788
3779 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. 3789 STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3780 3790
3781 RANGE is how far to scan while trying to match. RANGE = 0 means try 3791 RANGE is how far to scan while trying to match. RANGE = 0 means try
3782 only at STARTPOS; in general, the last start tried is STARTPOS + 3792 only at STARTPOS; in general, the last start tried is STARTPOS +
3783 RANGE. 3793 RANGE.
3794
3795 With MULE, RANGE is a byte position, not a char position. The last
3796 start tried is the character starting <= STARTPOS + RANGE.
3784 3797
3785 In REGS, return the indices of the virtual concatenation of STRING1 3798 In REGS, return the indices of the virtual concatenation of STRING1
3786 and STRING2 that matched the entire BUFP->buffer and its contained 3799 and STRING2 that matched the entire BUFP->buffer and its contained
3787 subexpressions. 3800 subexpressions.
3788 3801
3811 if (startpos < 0 || startpos > total_size) 3824 if (startpos < 0 || startpos > total_size)
3812 return -1; 3825 return -1;
3813 3826
3814 /* Fix up RANGE if it might eventually take us outside 3827 /* Fix up RANGE if it might eventually take us outside
3815 the virtual concatenation of STRING1 and STRING2. */ 3828 the virtual concatenation of STRING1 and STRING2. */
3829 #if 0
3816 if (endpos < -1) 3830 if (endpos < -1)
3817 range = -1 - startpos; 3831 range = -1 - startpos;
3832 #else
3833 if (endpos < 0)
3834 range = 0 - startpos;
3835 #endif
3818 else if (endpos > total_size) 3836 else if (endpos > total_size)
3819 range = total_size - startpos; 3837 range = total_size - startpos;
3820 3838
3821 /* If the search isn't to be a backwards one, don't waste time in a 3839 /* If the search isn't to be a backwards one, don't waste time in a
3822 search for a pattern that must be anchored. */ 3840 search for a pattern that must be anchored. */
3860 range > 0) 3878 range > 0)
3861 { 3879 {
3862 /* whose stupid idea was it anyway to make this 3880 /* whose stupid idea was it anyway to make this
3863 function take two strings to match?? */ 3881 function take two strings to match?? */
3864 int lim = 0; 3882 int lim = 0;
3865 unsigned char *p; 3883 register CONST unsigned char *d;
3866 int irange = range; 3884 int irange = range;
3885
3867 if (startpos < size1 && startpos + range >= size1) 3886 if (startpos < size1 && startpos + range >= size1)
3868 lim = range - (size1 - startpos); 3887 lim = range - (size1 - startpos);
3869 3888
3870 p = ((unsigned char *) 3889 d = ((CONST unsigned char *)
3871 &(startpos >= size1 ? string2 - size1 : string1)[startpos]); 3890 (startpos >= size1 ? string2 - size1 : string1) + startpos);
3872 p--; 3891 DEC_CHARPTR(d);
3873 3892
3874 if (translate) 3893 if (translate)
3875 { 3894 #ifdef MULE
3876 while (range > lim && translate[*p++] != '\n') 3895 while (range > lim && (*d >= 0x80 || translate[*d] != '\n'))
3877 range--; 3896 #else
3878 } 3897 while (range > lim && translate[*d] != '\n')
3898 #endif
3899 {
3900 INC_CHARPTR(d);
3901 range -= charcount_to_bytecount (d, 1);
3902 }
3879 else 3903 else
3880 { 3904 while (range > lim && *d != '\n')
3881 while (range > lim && *p++ != '\n') 3905 {
3882 range--; 3906 INC_CHARPTR(d);
3883 } 3907 range -= charcount_to_bytecount (d, 1);
3908 }
3909
3884 startpos += irange - range; 3910 startpos += irange - range;
3885 } 3911 }
3886 #endif /* REGEX_BEGLINE_CHECK */ 3912 #endif /* REGEX_BEGLINE_CHECK */
3887 3913
3888 /* If a fastmap is supplied, skip quickly over characters that 3914 /* If a fastmap is supplied, skip quickly over characters that
3891 the first null string. */ 3917 the first null string. */
3892 if (fastmap && startpos < total_size && !bufp->can_be_null) 3918 if (fastmap && startpos < total_size && !bufp->can_be_null)
3893 { 3919 {
3894 if (range > 0) /* Searching forwards. */ 3920 if (range > 0) /* Searching forwards. */
3895 { 3921 {
3896 register CONST char *d; 3922 register CONST unsigned char *d;
3897 register int lim = 0; 3923 register int lim = 0;
3898 int irange = range; 3924 int irange = range;
3899 3925
3900 if (startpos < size1 && startpos + range >= size1) 3926 if (startpos < size1 && startpos + range >= size1)
3901 lim = range - (size1 - startpos); 3927 lim = range - (size1 - startpos);
3902 3928
3903 d = (startpos >= size1 ? string2 - size1 : string1) + startpos; 3929 d = ((CONST unsigned char *)
3930 (startpos >= size1 ? string2 - size1 : string1) + startpos);
3904 3931
3905 /* Written out as an if-else to avoid testing `translate' 3932 /* Written out as an if-else to avoid testing `translate'
3906 inside the loop. */ 3933 inside the loop. */
3907 if (translate) 3934 if (translate)
3908 while (range > lim 3935 #ifdef MULE
3909 && !fastmap[(unsigned char) 3936 while (range > lim && *d < 0x80 && !fastmap[translate[*d]])
3910 translate[(unsigned char) *d++]]) 3937 #else
3911 range--; 3938 while (range > lim && !fastmap[translate[*d]])
3939 #endif
3940 {
3941 range -= charcount_to_bytecount (d, 1);
3942 INC_CHARPTR(d);
3943 }
3912 else 3944 else
3913 while (range > lim && !fastmap[(unsigned char) *d++]) 3945 while (range > lim && !fastmap[*d])
3914 range--; 3946 {
3947 range -= charcount_to_bytecount (d, 1);
3948 INC_CHARPTR(d);
3949 }
3915 3950
3916 startpos += irange - range; 3951 startpos += irange - range;
3917 } 3952 }
3918 else /* Searching backwards. */ 3953 else /* Searching backwards. */
3919 { 3954 {
3920 register char c = (size1 == 0 || startpos >= size1 3955 register unsigned char c = (size1 == 0 || startpos >= size1
3921 ? string2[startpos - size1] 3956 ? string2[startpos - size1]
3922 : string1[startpos]); 3957 : string1[startpos]);
3923 3958 #ifdef MULE
3959 if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)])
3960 #else
3924 if (!fastmap[(unsigned char) TRANSLATE (c)]) 3961 if (!fastmap[(unsigned char) TRANSLATE (c)])
3962 #endif
3925 goto advance; 3963 goto advance;
3926 } 3964 }
3927 } 3965 }
3928 3966
3929 /* If can't match the null string, and that's all we have left, fail. */ 3967 /* If can't match the null string, and that's all we have left, fail. */
3949 if (val == -2) 3987 if (val == -2)
3950 return -2; 3988 return -2;
3951 3989
3952 advance: 3990 advance:
3953 if (!range) 3991 if (!range)
3954 break; 3992 break;
3955 else if (range > 0) 3993 else {
3956 { 3994 register CONST unsigned char *d;
3957 range--; 3995 Charcount d_size;
3958 startpos++; 3996
3959 } 3997 d = ((CONST unsigned char *)
3960 else 3998 (startpos >= size1 ? string2 - size1 : string1) + startpos);
3961 { 3999
3962 range++; 4000 if (range > 0)
3963 startpos--; 4001 {
3964 } 4002 d_size = charcount_to_bytecount (d, 1);
4003 range -= d_size;
4004 startpos += d_size;
4005 }
4006 else
4007 {
4008 DEC_CHARPTR(d);
4009 d_size = charcount_to_bytecount (d, 1);
4010 range += d_size;
4011 startpos -= d_size;
4012 }
4013 }
3965 } 4014 }
3966 return -1; 4015 return -1;
3967 } /* re_search_2 */ 4016 } /* re_search_2 */
3968 4017
3969 /* Declarations and macros for re_match_2. */ 4018 /* Declarations and macros for re_match_2. */
5073 #ifdef DEBUG 5122 #ifdef DEBUG
5074 register unsigned char c 5123 register unsigned char c
5075 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 5124 = *p2 == (unsigned char) endline ? '\n' : p2[2];
5076 #endif 5125 #endif
5077 5126
5127 #if 1
5128 /* dmoore@ucsd.edu - emacs 19.34 uses this: */
5129
5078 if ((re_opcode_t) p1[3] == exactn 5130 if ((re_opcode_t) p1[3] == exactn
5079 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4] 5131 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
5080 && (p2[1 + p1[4] / BYTEWIDTH] 5132 && (p2[2 + p1[5] / BYTEWIDTH]
5081 & (1 << (p1[4] % BYTEWIDTH))))) 5133 & (1 << (p1[5] % BYTEWIDTH)))))
5134 #else
5135 if ((re_opcode_t) p1[3] == exactn
5136 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
5137 && (p2[1 + p1[4] / BYTEWIDTH]
5138 & (1 << (p1[4] % BYTEWIDTH)))))
5139 #endif
5082 { 5140 {
5083 p[-3] = (unsigned char) pop_failure_jump; 5141 p[-3] = (unsigned char) pop_failure_jump;
5084 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 5142 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
5085 c, p1[5]); 5143 c, p1[5]);
5086 } 5144 }