comparison src/regex.c @ 104:cf808b4c4290 r20-1b4

Import from CVS: tag r20-1b4
author cvs
date Mon, 13 Aug 2007 09:16:51 +0200
parents a145efe76779
children fe104dbd9147
comparison
equal deleted inserted replaced
103:30eda07fe280 104:cf808b4c4290
532 a byte which contains a syntax code, e.g., Sword. */ 532 a byte which contains a syntax code, e.g., Sword. */
533 syntaxspec, 533 syntaxspec,
534 534
535 /* Matches any character whose syntax is not that specified. */ 535 /* Matches any character whose syntax is not that specified. */
536 notsyntaxspec 536 notsyntaxspec
537
537 #endif /* emacs */ 538 #endif /* emacs */
538 539
539 #ifdef MULE 540 #ifdef MULE
540 /* need extra stuff to be able to properly work with XEmacs/Mule 541 /* need extra stuff to be able to properly work with XEmacs/Mule
541 characters (which may take up more than one byte) */ 542 characters (which may take up more than one byte) */
545 format"; see rangetab.c. Unlike the `charset' 546 format"; see rangetab.c. Unlike the `charset'
546 opcode, this can handle arbitrary characters. */ 547 opcode, this can handle arbitrary characters. */
547 548
548 charset_mule_not /* Same parameters as charset_mule, but match any 549 charset_mule_not /* Same parameters as charset_mule, but match any
549 character that is not one of those specified. */ 550 character that is not one of those specified. */
551
552 /* 97/2/17 jhod: The following two were merged back in from the Mule
553 2.3 code to enable some language specific processing */
554 ,categoryspec, /* Matches entries in the character category tables */
555 notcategoryspec /* The opposite of the above */
550 #endif 556 #endif
551 557
552 } re_opcode_t; 558 } re_opcode_t;
553 559
554 /* Common operations on the compiled pattern. */ 560 /* Common operations on the compiled pattern. */
917 case notsyntaxspec: 923 case notsyntaxspec:
918 printf ("/notsyntaxspec"); 924 printf ("/notsyntaxspec");
919 mcnt = *p++; 925 mcnt = *p++;
920 printf ("/%d", mcnt); 926 printf ("/%d", mcnt);
921 break; 927 break;
928
929 #ifdef MULE
930 /* 97/2/17 jhod Mule category patch */
931 case categoryspec:
932 printf ("/categoryspec");
933 mcnt = *p++;
934 printf ("/%d", mcnt);
935 break;
936
937 case notcategoryspec:
938 printf ("/notcategoryspec");
939 mcnt = *p++;
940 printf ("/%d", mcnt);
941 break;
942 /* end of category patch */
943 #endif /* MULE */
922 #endif /* emacs */ 944 #endif /* emacs */
923 945
924 case wordchar: 946 case wordchar:
925 printf ("/wordchar"); 947 printf ("/wordchar");
926 break; 948 break;
970 printf ("no_sub: %d\t", bufp->no_sub); 992 printf ("no_sub: %d\t", bufp->no_sub);
971 printf ("not_bol: %d\t", bufp->not_bol); 993 printf ("not_bol: %d\t", bufp->not_bol);
972 printf ("not_eol: %d\t", bufp->not_eol); 994 printf ("not_eol: %d\t", bufp->not_eol);
973 printf ("syntax: %d\n", bufp->syntax); 995 printf ("syntax: %d\n", bufp->syntax);
974 /* Perhaps we should print the translate table? */ 996 /* Perhaps we should print the translate table? */
997 /* and maybe the category table? */
975 } 998 }
976 999
977 1000
978 static void 1001 static void
979 print_double_string (CONST char *where, CONST char *string1, int size1, 1002 print_double_string (CONST char *where, CONST char *string1, int size1,
1063 #ifdef emacs 1086 #ifdef emacs
1064 "Invalid syntax designator", /* REG_ESYNTAX */ 1087 "Invalid syntax designator", /* REG_ESYNTAX */
1065 #endif 1088 #endif
1066 #ifdef MULE 1089 #ifdef MULE
1067 "Ranges may not span charsets", /* REG_ERANGESPAN */ 1090 "Ranges may not span charsets", /* REG_ERANGESPAN */
1091 "Invalid category designator", /* REG_ECATEGORY */
1068 #endif 1092 #endif
1069 }; 1093 };
1070 1094
1071 /* Avoiding alloca during matching, to placate r_alloc. */ 1095 /* Avoiding alloca during matching, to placate r_alloc. */
1072 1096
2833 /* XEmacs addition */ 2857 /* XEmacs addition */
2834 if (c >= 0x80 || syntax_spec_code[c] == 0377) 2858 if (c >= 0x80 || syntax_spec_code[c] == 0377)
2835 FREE_STACK_RETURN (REG_ESYNTAX); 2859 FREE_STACK_RETURN (REG_ESYNTAX);
2836 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); 2860 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2837 break; 2861 break;
2862
2863 #ifdef MULE
2864 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */
2865 case 'c':
2866 laststart = b;
2867 PATFETCH_RAW (c);
2868 if (c < 32 || c > 127)
2869 FREE_STACK_RETURN (REG_ECATEGORY);
2870 BUF_PUSH_2 (categoryspec, c);
2871 break;
2872
2873 case 'C':
2874 laststart = b;
2875 PATFETCH_RAW (c);
2876 if (c < 32 || c > 127)
2877 FREE_STACK_RETURN (REG_ECATEGORY);
2878 BUF_PUSH_2 (notcategoryspec, c);
2879 break;
2880 /* end of category patch */
2881 #endif /* MULE */
2838 #endif /* emacs */ 2882 #endif /* emacs */
2839 2883
2840 2884
2841 case 'w': 2885 case 'w':
2842 laststart = b; 2886 laststart = b;
3586 (enum syntaxcode) k) 3630 (enum syntaxcode) k)
3587 fastmap[j] = 1; 3631 fastmap[j] = 1;
3588 #endif /* ! MULE */ 3632 #endif /* ! MULE */
3589 break; 3633 break;
3590 3634
3635 #ifdef MULE
3636 /* 97/2/17 jhod category patch */
3637 case categoryspec:
3638 case notcategoryspec:
3639 bufp->can_be_null = 1;
3640 return;
3641 /* end if category patch */
3642 #endif /* MULE */
3591 3643
3592 /* All cases after this match the empty string. These end with 3644 /* All cases after this match the empty string. These end with
3593 `continue'. */ 3645 `continue'. */
3594 3646
3595 3647
3817 int total_size = size1 + size2; 3869 int total_size = size1 + size2;
3818 int endpos = startpos + range; 3870 int endpos = startpos + range;
3819 #ifdef REGEX_BEGLINE_CHECK 3871 #ifdef REGEX_BEGLINE_CHECK
3820 int anchored_at_begline = 0; 3872 int anchored_at_begline = 0;
3821 #endif 3873 #endif
3874 CONST unsigned char *d;
3875 Charcount d_size;
3822 3876
3823 /* Check for out-of-range STARTPOS. */ 3877 /* Check for out-of-range STARTPOS. */
3824 if (startpos < 0 || startpos > total_size) 3878 if (startpos < 0 || startpos > total_size)
3825 return -1; 3879 return -1;
3826 3880
3827 /* Fix up RANGE if it might eventually take us outside 3881 /* Fix up RANGE if it might eventually take us outside
3828 the virtual concatenation of STRING1 and STRING2. */ 3882 the virtual concatenation of STRING1 and STRING2. */
3829 #if 0
3830 if (endpos < -1)
3831 range = -1 - startpos;
3832 #else
3833 if (endpos < 0) 3883 if (endpos < 0)
3834 range = 0 - startpos; 3884 range = 0 - startpos;
3835 #endif
3836 else if (endpos > total_size) 3885 else if (endpos > total_size)
3837 range = total_size - startpos; 3886 range = total_size - startpos;
3838 3887
3839 /* If the search isn't to be a backwards one, don't waste time in a 3888 /* If the search isn't to be a backwards one, don't waste time in a
3840 search for a pattern that must be anchored. */ 3889 search for a pattern that must be anchored. */
3878 range > 0) 3927 range > 0)
3879 { 3928 {
3880 /* whose stupid idea was it anyway to make this 3929 /* whose stupid idea was it anyway to make this
3881 function take two strings to match?? */ 3930 function take two strings to match?? */
3882 int lim = 0; 3931 int lim = 0;
3883 register CONST unsigned char *d;
3884 int irange = range; 3932 int irange = range;
3885 3933
3886 if (startpos < size1 && startpos + range >= size1) 3934 if (startpos < size1 && startpos + range >= size1)
3887 lim = range - (size1 - startpos); 3935 lim = range - (size1 - startpos);
3888 3936
3889 d = ((CONST unsigned char *) 3937 d = ((CONST unsigned char *)
3890 (startpos >= size1 ? string2 - size1 : string1) + startpos); 3938 (startpos >= size1 ? string2 - size1 : string1) + startpos);
3891 DEC_CHARPTR(d); 3939 DEC_CHARPTR(d); /* Ok, since startpos != size1. */
3940 d_size = charcount_to_bytecount (d, 1);
3892 3941
3893 if (translate) 3942 if (translate)
3894 #ifdef MULE 3943 #ifdef MULE
3895 while (range > lim && (*d >= 0x80 || translate[*d] != '\n')) 3944 while (range > lim && (*d >= 0x80 || translate[*d] != '\n'))
3896 #else 3945 #else
3897 while (range > lim && translate[*d] != '\n') 3946 while (range > lim && translate[*d] != '\n')
3898 #endif 3947 #endif
3899 { 3948 {
3900 INC_CHARPTR(d); 3949 d += d_size; /* Speedier INC_CHARPTR(d) */
3901 range -= charcount_to_bytecount (d, 1); 3950 d_size = charcount_to_bytecount (d, 1);
3951 range -= d_size;
3902 } 3952 }
3903 else 3953 else
3904 while (range > lim && *d != '\n') 3954 while (range > lim && *d != '\n')
3905 { 3955 {
3906 INC_CHARPTR(d); 3956 d += d_size; /* Speedier INC_CHARPTR(d) */
3907 range -= charcount_to_bytecount (d, 1); 3957 d_size = charcount_to_bytecount (d, 1);
3958 range -= d_size;
3908 } 3959 }
3909 3960
3910 startpos += irange - range; 3961 startpos += irange - range;
3911 } 3962 }
3912 #endif /* REGEX_BEGLINE_CHECK */ 3963 #endif /* REGEX_BEGLINE_CHECK */
3917 the first null string. */ 3968 the first null string. */
3918 if (fastmap && startpos < total_size && !bufp->can_be_null) 3969 if (fastmap && startpos < total_size && !bufp->can_be_null)
3919 { 3970 {
3920 if (range > 0) /* Searching forwards. */ 3971 if (range > 0) /* Searching forwards. */
3921 { 3972 {
3922 register CONST unsigned char *d; 3973 int lim = 0;
3923 register int lim = 0;
3924 int irange = range; 3974 int irange = range;
3925 3975
3926 if (startpos < size1 && startpos + range >= size1) 3976 if (startpos < size1 && startpos + range >= size1)
3927 lim = range - (size1 - startpos); 3977 lim = range - (size1 - startpos);
3928 3978
3929 d = ((CONST unsigned char *) 3979 d = ((CONST unsigned char *)
3930 (startpos >= size1 ? string2 - size1 : string1) + startpos); 3980 (startpos >= size1 ? string2 - size1 : string1) + startpos);
3931 3981
3932 /* Written out as an if-else to avoid testing `translate' 3982 /* Written out as an if-else to avoid testing `translate'
3933 inside the loop. */ 3983 inside the loop. */
3934 if (translate) 3984 if (translate)
3935 #ifdef MULE 3985 #ifdef MULE
3936 while (range > lim && *d < 0x80 && !fastmap[translate[*d]]) 3986 while (range > lim && *d < 0x80 && !fastmap[translate[*d]])
3937 #else 3987 #else
3938 while (range > lim && !fastmap[translate[*d]]) 3988 while (range > lim && !fastmap[translate[*d]])
3939 #endif 3989 #endif
3940 { 3990 {
3941 range -= charcount_to_bytecount (d, 1); 3991 d_size = charcount_to_bytecount (d, 1);
3942 INC_CHARPTR(d); 3992 range -= d_size;
3993 d += d_size; /* Speedier INC_CHARPTR(d) */
3943 } 3994 }
3944 else 3995 else
3945 while (range > lim && !fastmap[*d]) 3996 while (range > lim && !fastmap[*d])
3946 { 3997 {
3947 range -= charcount_to_bytecount (d, 1); 3998 d_size = charcount_to_bytecount (d, 1);
3948 INC_CHARPTR(d); 3999 range -= d_size;
4000 d += d_size; /* Speedier INC_CHARPTR(d) */
3949 } 4001 }
3950 4002
3951 startpos += irange - range; 4003 startpos += irange - range;
3952 } 4004 }
3953 else /* Searching backwards. */ 4005 else /* Searching backwards. */
3954 { 4006 {
3955 register unsigned char c = (size1 == 0 || startpos >= size1 4007 unsigned char c = (size1 == 0 || startpos >= size1
3956 ? string2[startpos - size1] 4008 ? string2[startpos - size1]
3957 : string1[startpos]); 4009 : string1[startpos]);
3958 #ifdef MULE 4010 #ifdef MULE
3959 if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)]) 4011 if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)])
3960 #else 4012 #else
3961 if (!fastmap[(unsigned char) TRANSLATE (c)]) 4013 if (!fastmap[(unsigned char) TRANSLATE (c)])
3962 #endif 4014 #endif
3988 return -2; 4040 return -2;
3989 4041
3990 advance: 4042 advance:
3991 if (!range) 4043 if (!range)
3992 break; 4044 break;
3993 else { 4045 else if (range > 0)
3994 register CONST unsigned char *d; 4046 {
3995 Charcount d_size; 4047 d = ((CONST unsigned char *)
3996 4048 (startpos >= size1 ? string2 - size1 : string1) + startpos);
3997 d = ((CONST unsigned char *) 4049 d_size = charcount_to_bytecount (d, 1);
3998 (startpos >= size1 ? string2 - size1 : string1) + startpos); 4050 range -= d_size;
3999 4051 startpos += d_size;
4000 if (range > 0) 4052 }
4001 { 4053 else
4002 d_size = charcount_to_bytecount (d, 1); 4054 {
4003 range -= d_size; 4055 /* Note startpos > size1 not >=. If we are on the
4004 startpos += d_size; 4056 string1/string2 boundary, we want to backup into string1. */
4005 } 4057 d = ((CONST unsigned char *)
4006 else 4058 (startpos > size1 ? string2 - size1 : string1) + startpos);
4007 { 4059 DEC_CHARPTR(d);
4008 DEC_CHARPTR(d); 4060 d_size = charcount_to_bytecount (d, 1);
4009 d_size = charcount_to_bytecount (d, 1); 4061 range += d_size;
4010 range += d_size; 4062 startpos -= d_size;
4011 startpos -= d_size; 4063 }
4012 }
4013 }
4014 } 4064 }
4015 return -1; 4065 return -1;
4016 } /* re_search_2 */ 4066 } /* re_search_2 */
4017 4067
4018 /* Declarations and macros for re_match_2. */ 4068 /* Declarations and macros for re_match_2. */
5122 #ifdef DEBUG 5172 #ifdef DEBUG
5123 register unsigned char c 5173 register unsigned char c
5124 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 5174 = *p2 == (unsigned char) endline ? '\n' : p2[2];
5125 #endif 5175 #endif
5126 5176
5127 #if 1
5128 /* dmoore@ucsd.edu - emacs 19.34 uses this: */
5129
5130 if ((re_opcode_t) p1[3] == exactn 5177 if ((re_opcode_t) p1[3] == exactn
5131 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] 5178 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
5132 && (p2[2 + p1[5] / BYTEWIDTH] 5179 && (p2[2 + p1[5] / BYTEWIDTH]
5133 & (1 << (p1[5] % BYTEWIDTH))))) 5180 & (1 << (p1[5] % BYTEWIDTH)))))
5134 #else
5135 if ((re_opcode_t) p1[3] == exactn
5136 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
5137 && (p2[1 + p1[4] / BYTEWIDTH]
5138 & (1 << (p1[4] % BYTEWIDTH)))))
5139 #endif
5140 { 5181 {
5141 p[-3] = (unsigned char) pop_failure_jump; 5182 p[-3] = (unsigned char) pop_failure_jump;
5142 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 5183 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
5143 c, p1[5]); 5184 c, p1[5]);
5144 } 5185 }
5464 mcnt = (int) Sword; 5505 mcnt = (int) Sword;
5465 matchnotsyntax: 5506 matchnotsyntax:
5466 should_succeed = 0; 5507 should_succeed = 0;
5467 goto matchornotsyntax; 5508 goto matchornotsyntax;
5468 5509
5510 #ifdef MULE
5511 /* 97/2/17 jhod Mule category code patch */
5512 case categoryspec:
5513 should_succeed = 1;
5514 matchornotcategory:
5515 {
5516 Emchar emch;
5517
5518 mcnt = *p++;
5519 PREFETCH ();
5520 emch = charptr_emchar ((CONST Bufbyte *) d);
5521 INC_CHARPTR (d);
5522 if (check_category_at(emch, regex_emacs_buffer->category_table,
5523 mcnt, should_succeed))
5524 goto fail;
5525 SET_REGS_MATCHED ();
5526 }
5527 break;
5528
5529 case notcategoryspec:
5530 should_succeed = 0;
5531 goto matchornotcategory;
5532 /* end of category patch */
5533 #endif /* MULE */
5469 #else /* not emacs */ 5534 #else /* not emacs */
5470 case wordchar: 5535 case wordchar:
5471 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); 5536 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5472 PREFETCH (); 5537 PREFETCH ();
5473 if (!WORDCHAR_P_UNSAFE ((int) (*d))) 5538 if (!WORDCHAR_P_UNSAFE ((int) (*d)))