Mercurial > hg > xemacs-beta
diff src/search.c @ 5653:3df910176b6a
Support predefined character classes in #'skip-chars-{forward,backward}, too
src/ChangeLog addition:
2012-05-04 Aidan Kehoe <kehoea@parhasard.net>
* regex.c:
Move various #defines and enums to regex.h, since we need them
when implementing #'skip-chars-{backward,forward}.
* regex.c (re_wctype):
* regex.c (re_iswctype):
Be more robust about case insensitivity here.
* regex.c (regex_compile):
* regex.h:
* regex.h (RE_ISWCTYPE_ARG_DECL):
* regex.h (CHAR_CLASS_MAX_LENGTH):
* search.c (skip_chars):
Implement support for the predefined character classes in this
function.
tests/ChangeLog addition:
2012-05-04 Aidan Kehoe <kehoea@parhasard.net>
* automated/regexp-tests.el (equal):
* automated/regexp-tests.el (Assert-char-class):
Correct a stray parenthesis; add tests for the predefined
character classes with #'skip-chars-{forward,backward}; update the
tests to reflect some changed design decisions on my part.
man/ChangeLog addition:
2012-05-04 Aidan Kehoe <kehoea@parhasard.net>
* lispref/searching.texi (Regular Expressions):
* lispref/searching.texi (Syntax of Regexps):
* lispref/searching.texi (Char Classes):
* lispref/searching.texi (Regexp Example):
Document the predefined character classes in this file.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Fri, 04 May 2012 21:12:02 +0100 |
parents | d026b665014f |
children | 6e5a7278f9bf |
line wrap: on
line diff
--- a/src/search.c Wed Apr 25 20:25:33 2012 +0100 +++ b/src/search.c Fri May 04 21:12:02 2012 +0100 @@ -887,9 +887,9 @@ a range table. */ unsigned char fastmap[0400]; int negate = 0; - REGISTER int i; Charbpos limit; struct syntax_cache *scache; + Bitbyte class_bits = 0; if (NILP (lim)) limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf); @@ -957,6 +957,51 @@ Vskip_chars_range_table); INC_IBYTEPTR (p); } + else if ('[' == c && p != pend && *p == ':') + { + Ibyte *colonp; + Extbyte *classname; + int ch = 0; + re_wctype_t cc; + + INC_IBYTEPTR (p); + + if (p == pend) + { + fastmap ['['] = fastmap[':'] = 1; + break; + } + + colonp = memchr (p, ':', pend - p); + if (NULL == colonp || (colonp + 1) == pend || colonp[1] != ']') + { + fastmap ['['] = fastmap[':'] = 1; + continue; + } + + classname = alloca_extbytes (colonp - p + 1); + memmove (classname, p, colonp - p); + classname[colonp - p] = '\0'; + cc = re_wctype (classname); + + if (cc == RECC_ERROR) + { + invalid_argument ("Invalid character class", + build_extstring (classname, Qbinary)); + } + + for (ch = 0; ch < countof (fastmap); ++ch) + { + if (re_iswctype (ch, cc, buf)) + { + fastmap[ch] = 1; + } + } + + compile_char_class (cc, Vskip_chars_range_table, &class_bits); + + p = colonp + 2; + } else { if (c < 0400) @@ -972,14 +1017,6 @@ if (syntaxp && fastmap['-'] != 0) fastmap[' '] = 1; - /* If ^ was the first character, complement the fastmap. - We don't complement the range table, however; we just use negate - in the comparisons below. */ - - if (negate) - for (i = 0; i < (int) (sizeof (fastmap)); i++) - fastmap[i] ^= 1; - { Charbpos start_point = BUF_PT (buf); Charbpos pos = start_point; @@ -996,7 +1033,8 @@ while (fastmap[(unsigned char) syntax_code_spec [(int) SYNTAX_FROM_CACHE - (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) + (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]] + != negate) { pos++; INC_BYTEBPOS (buf, pos_byte); @@ -1013,10 +1051,11 @@ pos--; DEC_BYTEBPOS (buf, pos_byte); UPDATE_SYNTAX_CACHE_BACKWARD (scache, pos); - if (!fastmap[(unsigned char) - syntax_code_spec - [(int) SYNTAX_FROM_CACHE - (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) + if (fastmap[(unsigned char) + syntax_code_spec + [(int) SYNTAX_FROM_CACHE + (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]] + == negate) { pos++; pos_byte = savepos; @@ -1027,16 +1066,30 @@ } else { + struct buffer *lispbuf = buf; + +#define CLASS_BIT_CHECK(c) \ + (class_bits && ((class_bits & BIT_ALPHA && ISALPHA (c)) \ + || (class_bits & BIT_SPACE && ISSPACE (c)) \ + || (class_bits & BIT_PUNCT && ISPUNCT (c)) \ + || (class_bits & BIT_WORD && ISWORD (c)) \ + || (NILP (buf->case_fold_search) ? \ + ((class_bits & BIT_UPPER && ISUPPER (c)) \ + || (class_bits & BIT_LOWER && ISLOWER (c))) \ + : (class_bits & (BIT_UPPER | BIT_LOWER) \ + && !NOCASEP (buf, c))))) if (forwardp) { while (pos < limit) { Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); - if ((ch < 0400) ? fastmap[ch] : - (NILP (Fget_range_table (make_fixnum (ch), - Vskip_chars_range_table, - Qnil)) - == negate)) + + if ((ch < countof (fastmap) ? fastmap[ch] + : (CLASS_BIT_CHECK (ch) || + (EQ (Qt, Fget_range_table (make_fixnum (ch), + Vskip_chars_range_table, + Qnil))))) + != negate) { pos++; INC_BYTEBPOS (buf, pos_byte); @@ -1054,11 +1107,12 @@ DEC_BYTEBPOS (buf, prev_pos_byte); ch = BYTE_BUF_FETCH_CHAR (buf, prev_pos_byte); - if ((ch < 0400) ? fastmap[ch] : - (NILP (Fget_range_table (make_fixnum (ch), - Vskip_chars_range_table, - Qnil)) - == negate)) + if ((ch < countof (fastmap) ? fastmap[ch] + : (CLASS_BIT_CHECK (ch) || + (EQ (Qt, Fget_range_table (make_fixnum (ch), + Vskip_chars_range_table, + Qnil))))) + != negate) { pos--; pos_byte = prev_pos_byte;