diff src/search.c @ 5653:3df910176b6a

Support predefined character classes in #'skip-chars-{forward,backward}, too src/ChangeLog addition: 2012-05-04 Aidan Kehoe <kehoea@parhasard.net> * regex.c: Move various #defines and enums to regex.h, since we need them when implementing #'skip-chars-{backward,forward}. * regex.c (re_wctype): * regex.c (re_iswctype): Be more robust about case insensitivity here. * regex.c (regex_compile): * regex.h: * regex.h (RE_ISWCTYPE_ARG_DECL): * regex.h (CHAR_CLASS_MAX_LENGTH): * search.c (skip_chars): Implement support for the predefined character classes in this function. tests/ChangeLog addition: 2012-05-04 Aidan Kehoe <kehoea@parhasard.net> * automated/regexp-tests.el (equal): * automated/regexp-tests.el (Assert-char-class): Correct a stray parenthesis; add tests for the predefined character classes with #'skip-chars-{forward,backward}; update the tests to reflect some changed design decisions on my part. man/ChangeLog addition: 2012-05-04 Aidan Kehoe <kehoea@parhasard.net> * lispref/searching.texi (Regular Expressions): * lispref/searching.texi (Syntax of Regexps): * lispref/searching.texi (Char Classes): * lispref/searching.texi (Regexp Example): Document the predefined character classes in this file.
author Aidan Kehoe <kehoea@parhasard.net>
date Fri, 04 May 2012 21:12:02 +0100
parents d026b665014f
children 6e5a7278f9bf
line wrap: on
line diff
--- a/src/search.c	Wed Apr 25 20:25:33 2012 +0100
+++ b/src/search.c	Fri May 04 21:12:02 2012 +0100
@@ -887,9 +887,9 @@
      a range table. */
   unsigned char fastmap[0400];
   int negate = 0;
-  REGISTER int i;
   Charbpos limit;
   struct syntax_cache *scache;
+  Bitbyte class_bits = 0;
   
   if (NILP (lim))
     limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
@@ -957,6 +957,51 @@
 				  Vskip_chars_range_table);
 	      INC_IBYTEPTR (p);
 	    }
+          else if ('[' == c && p != pend && *p == ':')
+            {
+              Ibyte *colonp;
+              Extbyte *classname;
+              int ch = 0;
+              re_wctype_t cc;
+
+              INC_IBYTEPTR (p);
+
+              if (p == pend)
+                {
+                  fastmap ['['] = fastmap[':'] = 1;
+                  break;
+                }
+
+              colonp = memchr (p, ':', pend - p);
+              if (NULL == colonp || (colonp + 1) == pend || colonp[1] != ']')
+                {
+                  fastmap ['['] = fastmap[':'] = 1;
+                  continue;
+                }
+
+              classname = alloca_extbytes (colonp - p + 1);
+              memmove (classname, p, colonp - p);
+              classname[colonp - p] = '\0';
+              cc = re_wctype (classname);
+                  
+              if (cc == RECC_ERROR)
+                {
+                  invalid_argument ("Invalid character class",
+                                    build_extstring (classname, Qbinary));
+                }
+
+              for (ch = 0; ch < countof (fastmap); ++ch)
+                {
+                  if (re_iswctype (ch, cc, buf))
+                    {
+                      fastmap[ch] = 1;
+                    }
+                }
+
+              compile_char_class (cc, Vskip_chars_range_table, &class_bits);
+
+              p = colonp + 2;
+            }
 	  else
 	    {
 	      if (c < 0400)
@@ -972,14 +1017,6 @@
   if (syntaxp && fastmap['-'] != 0)
     fastmap[' '] = 1;
 
-  /* If ^ was the first character, complement the fastmap.
-     We don't complement the range table, however; we just use negate
-     in the comparisons below. */
-
-  if (negate)
-    for (i = 0; i < (int) (sizeof (fastmap)); i++)
-      fastmap[i] ^= 1;
-
   {
     Charbpos start_point = BUF_PT (buf);
     Charbpos pos = start_point;
@@ -996,7 +1033,8 @@
 	      while (fastmap[(unsigned char)
 			     syntax_code_spec
 			     [(int) SYNTAX_FROM_CACHE
-			      (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]])
+			      (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]
+                     != negate)
 		{
 		  pos++;
 		  INC_BYTEBPOS (buf, pos_byte);
@@ -1013,10 +1051,11 @@
 		pos--;
 		DEC_BYTEBPOS (buf, pos_byte);
 		UPDATE_SYNTAX_CACHE_BACKWARD (scache, pos);
-		if (!fastmap[(unsigned char)
-			     syntax_code_spec
-			     [(int) SYNTAX_FROM_CACHE
-			      (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]])
+		if (fastmap[(unsigned char)
+                            syntax_code_spec
+                            [(int) SYNTAX_FROM_CACHE
+                             (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]
+                    == negate)
 		  {
 		    pos++;
 		    pos_byte = savepos;
@@ -1027,16 +1066,30 @@
       }
     else
       {
+        struct buffer *lispbuf = buf;
+
+#define CLASS_BIT_CHECK(c)                                              \
+        (class_bits && ((class_bits & BIT_ALPHA && ISALPHA (c))         \
+                        || (class_bits & BIT_SPACE && ISSPACE (c))      \
+                        || (class_bits & BIT_PUNCT && ISPUNCT (c))      \
+                        || (class_bits & BIT_WORD && ISWORD (c))        \
+                        || (NILP (buf->case_fold_search) ?              \
+                            ((class_bits & BIT_UPPER && ISUPPER (c))    \
+                             || (class_bits & BIT_LOWER && ISLOWER (c))) \
+                            : (class_bits & (BIT_UPPER | BIT_LOWER)     \
+                               && !NOCASEP (buf, c)))))
 	if (forwardp)
 	  {
 	    while (pos < limit)
 	      {
 		Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte);
-		if ((ch < 0400) ? fastmap[ch] :
-		    (NILP (Fget_range_table (make_fixnum (ch),
-					     Vskip_chars_range_table,
-					     Qnil))
-		     == negate))
+
+                if ((ch < countof (fastmap) ? fastmap[ch]
+                     : (CLASS_BIT_CHECK (ch) ||
+                        (EQ (Qt, Fget_range_table (make_fixnum (ch),
+                                                   Vskip_chars_range_table,
+                                                   Qnil)))))
+                    != negate)
 		  {
 		    pos++;
 		    INC_BYTEBPOS (buf, pos_byte);
@@ -1054,11 +1107,12 @@
 
 		DEC_BYTEBPOS (buf, prev_pos_byte);
 		ch = BYTE_BUF_FETCH_CHAR (buf, prev_pos_byte);
-		if ((ch < 0400) ? fastmap[ch] :
-		    (NILP (Fget_range_table (make_fixnum (ch),
-					     Vskip_chars_range_table,
-					     Qnil))
-		     == negate))
+                if ((ch < countof (fastmap) ? fastmap[ch]
+                     : (CLASS_BIT_CHECK (ch) ||
+                        (EQ (Qt, Fget_range_table (make_fixnum (ch),
+                                                   Vskip_chars_range_table,
+                                                   Qnil)))))
+                    != negate)
 		  {
 		    pos--;
 		    pos_byte = prev_pos_byte;