comparison src/search.c @ 5653:3df910176b6a

Support predefined character classes in #'skip-chars-{forward,backward}, too src/ChangeLog addition: 2012-05-04 Aidan Kehoe <kehoea@parhasard.net> * regex.c: Move various #defines and enums to regex.h, since we need them when implementing #'skip-chars-{backward,forward}. * regex.c (re_wctype): * regex.c (re_iswctype): Be more robust about case insensitivity here. * regex.c (regex_compile): * regex.h: * regex.h (RE_ISWCTYPE_ARG_DECL): * regex.h (CHAR_CLASS_MAX_LENGTH): * search.c (skip_chars): Implement support for the predefined character classes in this function. tests/ChangeLog addition: 2012-05-04 Aidan Kehoe <kehoea@parhasard.net> * automated/regexp-tests.el (equal): * automated/regexp-tests.el (Assert-char-class): Correct a stray parenthesis; add tests for the predefined character classes with #'skip-chars-{forward,backward}; update the tests to reflect some changed design decisions on my part. man/ChangeLog addition: 2012-05-04 Aidan Kehoe <kehoea@parhasard.net> * lispref/searching.texi (Regular Expressions): * lispref/searching.texi (Syntax of Regexps): * lispref/searching.texi (Char Classes): * lispref/searching.texi (Regexp Example): Document the predefined character classes in this file.
author Aidan Kehoe <kehoea@parhasard.net>
date Fri, 04 May 2012 21:12:02 +0100
parents d026b665014f
children 6e5a7278f9bf
comparison
equal deleted inserted replaced
5649:d026b665014f 5653:3df910176b6a
885 REGISTER Ichar c; 885 REGISTER Ichar c;
886 /* We store the first 256 chars in an array here and the rest in 886 /* We store the first 256 chars in an array here and the rest in
887 a range table. */ 887 a range table. */
888 unsigned char fastmap[0400]; 888 unsigned char fastmap[0400];
889 int negate = 0; 889 int negate = 0;
890 REGISTER int i;
891 Charbpos limit; 890 Charbpos limit;
892 struct syntax_cache *scache; 891 struct syntax_cache *scache;
892 Bitbyte class_bits = 0;
893 893
894 if (NILP (lim)) 894 if (NILP (lim))
895 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf); 895 limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
896 else 896 else
897 { 897 {
955 if (c <= cend) 955 if (c <= cend)
956 Fput_range_table (make_fixnum (c), make_fixnum (cend), Qt, 956 Fput_range_table (make_fixnum (c), make_fixnum (cend), Qt,
957 Vskip_chars_range_table); 957 Vskip_chars_range_table);
958 INC_IBYTEPTR (p); 958 INC_IBYTEPTR (p);
959 } 959 }
960 else if ('[' == c && p != pend && *p == ':')
961 {
962 Ibyte *colonp;
963 Extbyte *classname;
964 int ch = 0;
965 re_wctype_t cc;
966
967 INC_IBYTEPTR (p);
968
969 if (p == pend)
970 {
971 fastmap ['['] = fastmap[':'] = 1;
972 break;
973 }
974
975 colonp = memchr (p, ':', pend - p);
976 if (NULL == colonp || (colonp + 1) == pend || colonp[1] != ']')
977 {
978 fastmap ['['] = fastmap[':'] = 1;
979 continue;
980 }
981
982 classname = alloca_extbytes (colonp - p + 1);
983 memmove (classname, p, colonp - p);
984 classname[colonp - p] = '\0';
985 cc = re_wctype (classname);
986
987 if (cc == RECC_ERROR)
988 {
989 invalid_argument ("Invalid character class",
990 build_extstring (classname, Qbinary));
991 }
992
993 for (ch = 0; ch < countof (fastmap); ++ch)
994 {
995 if (re_iswctype (ch, cc, buf))
996 {
997 fastmap[ch] = 1;
998 }
999 }
1000
1001 compile_char_class (cc, Vskip_chars_range_table, &class_bits);
1002
1003 p = colonp + 2;
1004 }
960 else 1005 else
961 { 1006 {
962 if (c < 0400) 1007 if (c < 0400)
963 fastmap[c] = 1; 1008 fastmap[c] = 1;
964 else 1009 else
969 } 1014 }
970 1015
971 /* #### Not in FSF 21.1 */ 1016 /* #### Not in FSF 21.1 */
972 if (syntaxp && fastmap['-'] != 0) 1017 if (syntaxp && fastmap['-'] != 0)
973 fastmap[' '] = 1; 1018 fastmap[' '] = 1;
974
975 /* If ^ was the first character, complement the fastmap.
976 We don't complement the range table, however; we just use negate
977 in the comparisons below. */
978
979 if (negate)
980 for (i = 0; i < (int) (sizeof (fastmap)); i++)
981 fastmap[i] ^= 1;
982 1019
983 { 1020 {
984 Charbpos start_point = BUF_PT (buf); 1021 Charbpos start_point = BUF_PT (buf);
985 Charbpos pos = start_point; 1022 Charbpos pos = start_point;
986 Charbpos pos_byte = BYTE_BUF_PT (buf); 1023 Charbpos pos_byte = BYTE_BUF_PT (buf);
994 { 1031 {
995 if (pos < limit) 1032 if (pos < limit)
996 while (fastmap[(unsigned char) 1033 while (fastmap[(unsigned char)
997 syntax_code_spec 1034 syntax_code_spec
998 [(int) SYNTAX_FROM_CACHE 1035 [(int) SYNTAX_FROM_CACHE
999 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) 1036 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]
1037 != negate)
1000 { 1038 {
1001 pos++; 1039 pos++;
1002 INC_BYTEBPOS (buf, pos_byte); 1040 INC_BYTEBPOS (buf, pos_byte);
1003 if (pos >= limit) 1041 if (pos >= limit)
1004 break; 1042 break;
1011 { 1049 {
1012 Charbpos savepos = pos_byte; 1050 Charbpos savepos = pos_byte;
1013 pos--; 1051 pos--;
1014 DEC_BYTEBPOS (buf, pos_byte); 1052 DEC_BYTEBPOS (buf, pos_byte);
1015 UPDATE_SYNTAX_CACHE_BACKWARD (scache, pos); 1053 UPDATE_SYNTAX_CACHE_BACKWARD (scache, pos);
1016 if (!fastmap[(unsigned char) 1054 if (fastmap[(unsigned char)
1017 syntax_code_spec 1055 syntax_code_spec
1018 [(int) SYNTAX_FROM_CACHE 1056 [(int) SYNTAX_FROM_CACHE
1019 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) 1057 (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]
1058 == negate)
1020 { 1059 {
1021 pos++; 1060 pos++;
1022 pos_byte = savepos; 1061 pos_byte = savepos;
1023 break; 1062 break;
1024 } 1063 }
1025 } 1064 }
1026 } 1065 }
1027 } 1066 }
1028 else 1067 else
1029 { 1068 {
1069 struct buffer *lispbuf = buf;
1070
1071 #define CLASS_BIT_CHECK(c) \
1072 (class_bits && ((class_bits & BIT_ALPHA && ISALPHA (c)) \
1073 || (class_bits & BIT_SPACE && ISSPACE (c)) \
1074 || (class_bits & BIT_PUNCT && ISPUNCT (c)) \
1075 || (class_bits & BIT_WORD && ISWORD (c)) \
1076 || (NILP (buf->case_fold_search) ? \
1077 ((class_bits & BIT_UPPER && ISUPPER (c)) \
1078 || (class_bits & BIT_LOWER && ISLOWER (c))) \
1079 : (class_bits & (BIT_UPPER | BIT_LOWER) \
1080 && !NOCASEP (buf, c)))))
1030 if (forwardp) 1081 if (forwardp)
1031 { 1082 {
1032 while (pos < limit) 1083 while (pos < limit)
1033 { 1084 {
1034 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); 1085 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte);
1035 if ((ch < 0400) ? fastmap[ch] : 1086
1036 (NILP (Fget_range_table (make_fixnum (ch), 1087 if ((ch < countof (fastmap) ? fastmap[ch]
1037 Vskip_chars_range_table, 1088 : (CLASS_BIT_CHECK (ch) ||
1038 Qnil)) 1089 (EQ (Qt, Fget_range_table (make_fixnum (ch),
1039 == negate)) 1090 Vskip_chars_range_table,
1091 Qnil)))))
1092 != negate)
1040 { 1093 {
1041 pos++; 1094 pos++;
1042 INC_BYTEBPOS (buf, pos_byte); 1095 INC_BYTEBPOS (buf, pos_byte);
1043 } 1096 }
1044 else 1097 else
1052 Charbpos prev_pos_byte = pos_byte; 1105 Charbpos prev_pos_byte = pos_byte;
1053 Ichar ch; 1106 Ichar ch;
1054 1107
1055 DEC_BYTEBPOS (buf, prev_pos_byte); 1108 DEC_BYTEBPOS (buf, prev_pos_byte);
1056 ch = BYTE_BUF_FETCH_CHAR (buf, prev_pos_byte); 1109 ch = BYTE_BUF_FETCH_CHAR (buf, prev_pos_byte);
1057 if ((ch < 0400) ? fastmap[ch] : 1110 if ((ch < countof (fastmap) ? fastmap[ch]
1058 (NILP (Fget_range_table (make_fixnum (ch), 1111 : (CLASS_BIT_CHECK (ch) ||
1059 Vskip_chars_range_table, 1112 (EQ (Qt, Fget_range_table (make_fixnum (ch),
1060 Qnil)) 1113 Vskip_chars_range_table,
1061 == negate)) 1114 Qnil)))))
1115 != negate)
1062 { 1116 {
1063 pos--; 1117 pos--;
1064 pos_byte = prev_pos_byte; 1118 pos_byte = prev_pos_byte;
1065 } 1119 }
1066 else 1120 else