Mercurial > hg > xemacs-beta
comparison src/search.c @ 5653:3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
src/ChangeLog addition:
2012-05-04 Aidan Kehoe <kehoea@parhasard.net>
* regex.c:
Move various #defines and enums to regex.h, since we need them
when implementing #'skip-chars-{backward,forward}.
* regex.c (re_wctype):
* regex.c (re_iswctype):
Be more robust about case insensitivity here.
* regex.c (regex_compile):
* regex.h:
* regex.h (RE_ISWCTYPE_ARG_DECL):
* regex.h (CHAR_CLASS_MAX_LENGTH):
* search.c (skip_chars):
Implement support for the predefined character classes in this
function.
tests/ChangeLog addition:
2012-05-04 Aidan Kehoe <kehoea@parhasard.net>
* automated/regexp-tests.el (equal):
* automated/regexp-tests.el (Assert-char-class):
Correct a stray parenthesis; add tests for the predefined
character classes with #'skip-chars-{forward,backward}; update the
tests to reflect some changed design decisions on my part.
man/ChangeLog addition:
2012-05-04 Aidan Kehoe <kehoea@parhasard.net>
* lispref/searching.texi (Regular Expressions):
* lispref/searching.texi (Syntax of Regexps):
* lispref/searching.texi (Char Classes):
* lispref/searching.texi (Regexp Example):
Document the predefined character classes in this file.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Fri, 04 May 2012 21:12:02 +0100 |
parents | d026b665014f |
children | 6e5a7278f9bf |
comparison
equal
deleted
inserted
replaced
5649:d026b665014f | 5653:3df910176b6a |
---|---|
885 REGISTER Ichar c; | 885 REGISTER Ichar c; |
886 /* We store the first 256 chars in an array here and the rest in | 886 /* We store the first 256 chars in an array here and the rest in |
887 a range table. */ | 887 a range table. */ |
888 unsigned char fastmap[0400]; | 888 unsigned char fastmap[0400]; |
889 int negate = 0; | 889 int negate = 0; |
890 REGISTER int i; | |
891 Charbpos limit; | 890 Charbpos limit; |
892 struct syntax_cache *scache; | 891 struct syntax_cache *scache; |
892 Bitbyte class_bits = 0; | |
893 | 893 |
894 if (NILP (lim)) | 894 if (NILP (lim)) |
895 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf); | 895 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf); |
896 else | 896 else |
897 { | 897 { |
955 if (c <= cend) | 955 if (c <= cend) |
956 Fput_range_table (make_fixnum (c), make_fixnum (cend), Qt, | 956 Fput_range_table (make_fixnum (c), make_fixnum (cend), Qt, |
957 Vskip_chars_range_table); | 957 Vskip_chars_range_table); |
958 INC_IBYTEPTR (p); | 958 INC_IBYTEPTR (p); |
959 } | 959 } |
960 else if ('[' == c && p != pend && *p == ':') | |
961 { | |
962 Ibyte *colonp; | |
963 Extbyte *classname; | |
964 int ch = 0; | |
965 re_wctype_t cc; | |
966 | |
967 INC_IBYTEPTR (p); | |
968 | |
969 if (p == pend) | |
970 { | |
971 fastmap ['['] = fastmap[':'] = 1; | |
972 break; | |
973 } | |
974 | |
975 colonp = memchr (p, ':', pend - p); | |
976 if (NULL == colonp || (colonp + 1) == pend || colonp[1] != ']') | |
977 { | |
978 fastmap ['['] = fastmap[':'] = 1; | |
979 continue; | |
980 } | |
981 | |
982 classname = alloca_extbytes (colonp - p + 1); | |
983 memmove (classname, p, colonp - p); | |
984 classname[colonp - p] = '\0'; | |
985 cc = re_wctype (classname); | |
986 | |
987 if (cc == RECC_ERROR) | |
988 { | |
989 invalid_argument ("Invalid character class", | |
990 build_extstring (classname, Qbinary)); | |
991 } | |
992 | |
993 for (ch = 0; ch < countof (fastmap); ++ch) | |
994 { | |
995 if (re_iswctype (ch, cc, buf)) | |
996 { | |
997 fastmap[ch] = 1; | |
998 } | |
999 } | |
1000 | |
1001 compile_char_class (cc, Vskip_chars_range_table, &class_bits); | |
1002 | |
1003 p = colonp + 2; | |
1004 } | |
960 else | 1005 else |
961 { | 1006 { |
962 if (c < 0400) | 1007 if (c < 0400) |
963 fastmap[c] = 1; | 1008 fastmap[c] = 1; |
964 else | 1009 else |
969 } | 1014 } |
970 | 1015 |
971 /* #### Not in FSF 21.1 */ | 1016 /* #### Not in FSF 21.1 */ |
972 if (syntaxp && fastmap['-'] != 0) | 1017 if (syntaxp && fastmap['-'] != 0) |
973 fastmap[' '] = 1; | 1018 fastmap[' '] = 1; |
974 | |
975 /* If ^ was the first character, complement the fastmap. | |
976 We don't complement the range table, however; we just use negate | |
977 in the comparisons below. */ | |
978 | |
979 if (negate) | |
980 for (i = 0; i < (int) (sizeof (fastmap)); i++) | |
981 fastmap[i] ^= 1; | |
982 | 1019 |
983 { | 1020 { |
984 Charbpos start_point = BUF_PT (buf); | 1021 Charbpos start_point = BUF_PT (buf); |
985 Charbpos pos = start_point; | 1022 Charbpos pos = start_point; |
986 Charbpos pos_byte = BYTE_BUF_PT (buf); | 1023 Charbpos pos_byte = BYTE_BUF_PT (buf); |
994 { | 1031 { |
995 if (pos < limit) | 1032 if (pos < limit) |
996 while (fastmap[(unsigned char) | 1033 while (fastmap[(unsigned char) |
997 syntax_code_spec | 1034 syntax_code_spec |
998 [(int) SYNTAX_FROM_CACHE | 1035 [(int) SYNTAX_FROM_CACHE |
999 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) | 1036 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]] |
1037 != negate) | |
1000 { | 1038 { |
1001 pos++; | 1039 pos++; |
1002 INC_BYTEBPOS (buf, pos_byte); | 1040 INC_BYTEBPOS (buf, pos_byte); |
1003 if (pos >= limit) | 1041 if (pos >= limit) |
1004 break; | 1042 break; |
1011 { | 1049 { |
1012 Charbpos savepos = pos_byte; | 1050 Charbpos savepos = pos_byte; |
1013 pos--; | 1051 pos--; |
1014 DEC_BYTEBPOS (buf, pos_byte); | 1052 DEC_BYTEBPOS (buf, pos_byte); |
1015 UPDATE_SYNTAX_CACHE_BACKWARD (scache, pos); | 1053 UPDATE_SYNTAX_CACHE_BACKWARD (scache, pos); |
1016 if (!fastmap[(unsigned char) | 1054 if (fastmap[(unsigned char) |
1017 syntax_code_spec | 1055 syntax_code_spec |
1018 [(int) SYNTAX_FROM_CACHE | 1056 [(int) SYNTAX_FROM_CACHE |
1019 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) | 1057 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]] |
1058 == negate) | |
1020 { | 1059 { |
1021 pos++; | 1060 pos++; |
1022 pos_byte = savepos; | 1061 pos_byte = savepos; |
1023 break; | 1062 break; |
1024 } | 1063 } |
1025 } | 1064 } |
1026 } | 1065 } |
1027 } | 1066 } |
1028 else | 1067 else |
1029 { | 1068 { |
1069 struct buffer *lispbuf = buf; | |
1070 | |
1071 #define CLASS_BIT_CHECK(c) \ | |
1072 (class_bits && ((class_bits & BIT_ALPHA && ISALPHA (c)) \ | |
1073 || (class_bits & BIT_SPACE && ISSPACE (c)) \ | |
1074 || (class_bits & BIT_PUNCT && ISPUNCT (c)) \ | |
1075 || (class_bits & BIT_WORD && ISWORD (c)) \ | |
1076 || (NILP (buf->case_fold_search) ? \ | |
1077 ((class_bits & BIT_UPPER && ISUPPER (c)) \ | |
1078 || (class_bits & BIT_LOWER && ISLOWER (c))) \ | |
1079 : (class_bits & (BIT_UPPER | BIT_LOWER) \ | |
1080 && !NOCASEP (buf, c))))) | |
1030 if (forwardp) | 1081 if (forwardp) |
1031 { | 1082 { |
1032 while (pos < limit) | 1083 while (pos < limit) |
1033 { | 1084 { |
1034 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); | 1085 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
1035 if ((ch < 0400) ? fastmap[ch] : | 1086 |
1036 (NILP (Fget_range_table (make_fixnum (ch), | 1087 if ((ch < countof (fastmap) ? fastmap[ch] |
1037 Vskip_chars_range_table, | 1088 : (CLASS_BIT_CHECK (ch) || |
1038 Qnil)) | 1089 (EQ (Qt, Fget_range_table (make_fixnum (ch), |
1039 == negate)) | 1090 Vskip_chars_range_table, |
1091 Qnil))))) | |
1092 != negate) | |
1040 { | 1093 { |
1041 pos++; | 1094 pos++; |
1042 INC_BYTEBPOS (buf, pos_byte); | 1095 INC_BYTEBPOS (buf, pos_byte); |
1043 } | 1096 } |
1044 else | 1097 else |
1052 Charbpos prev_pos_byte = pos_byte; | 1105 Charbpos prev_pos_byte = pos_byte; |
1053 Ichar ch; | 1106 Ichar ch; |
1054 | 1107 |
1055 DEC_BYTEBPOS (buf, prev_pos_byte); | 1108 DEC_BYTEBPOS (buf, prev_pos_byte); |
1056 ch = BYTE_BUF_FETCH_CHAR (buf, prev_pos_byte); | 1109 ch = BYTE_BUF_FETCH_CHAR (buf, prev_pos_byte); |
1057 if ((ch < 0400) ? fastmap[ch] : | 1110 if ((ch < countof (fastmap) ? fastmap[ch] |
1058 (NILP (Fget_range_table (make_fixnum (ch), | 1111 : (CLASS_BIT_CHECK (ch) || |
1059 Vskip_chars_range_table, | 1112 (EQ (Qt, Fget_range_table (make_fixnum (ch), |
1060 Qnil)) | 1113 Vskip_chars_range_table, |
1061 == negate)) | 1114 Qnil))))) |
1115 != negate) | |
1062 { | 1116 { |
1063 pos--; | 1117 pos--; |
1064 pos_byte = prev_pos_byte; | 1118 pos_byte = prev_pos_byte; |
1065 } | 1119 } |
1066 else | 1120 else |