changeset 5647:1d9f603e9125

Turn on character classes in regex.c by default; test them in regexp-tests.el 2012-04-21 Aidan Kehoe <kehoea@parhasard.net> * regex.h (RE_SYNTAX_EMACS): Turn on character classes ([:alnum:] and friends) by default. This implementation is incomplete, am working on a version that handles non-ASCII characters correctly. tests/ChangeLog addition: 2012-04-21 Aidan Kehoe <kehoea@parhasard.net> * automated/regexp-tests.el: * automated/regexp-tests.el (Assert-char-class): Test the character classes functionality that was always in regex.c but that has only just been turned on. These tests pass on GNU Emacs 24.0.94.2.
author Aidan Kehoe <kehoea@parhasard.net>
date Sat, 21 Apr 2012 09:41:27 +0100
parents 7aa144d1404b
children 3f4a234f4672
files src/ChangeLog src/regex.h tests/ChangeLog tests/automated/regexp-tests.el
diffstat 4 files changed, 178 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Sat Apr 14 21:18:11 2012 +0100
+++ b/src/ChangeLog	Sat Apr 21 09:41:27 2012 +0100
@@ -1,3 +1,10 @@
+2012-04-21  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* regex.h (RE_SYNTAX_EMACS):
+	Turn on character classes ([:alnum:] and friends) by default. This
+	implementation is incomplete, am working on a version that handles
+	non-ASCII characters correctly.
+
 2012-02-12  Vin Shelton  <acs@xemacs.org>
 
 	* sysproc.h: As of Cygwin 1.7.10, /usr/include/process.h has moved
--- a/src/regex.h	Sat Apr 14 21:18:11 2012 +0100
+++ b/src/regex.h	Sat Apr 21 09:41:27 2012 +0100
@@ -193,7 +193,7 @@
    (The [[[ comments delimit what gets put into the Texinfo file, so
    don't delete them!)  */
 /* [[[begin syntaxes]]] */
-#define RE_SYNTAX_EMACS RE_INTERVALS
+#define RE_SYNTAX_EMACS (RE_INTERVALS | RE_CHAR_CLASSES)
 
 #define RE_SYNTAX_AWK							\
   (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL			\
--- a/tests/ChangeLog	Sat Apr 14 21:18:11 2012 +0100
+++ b/tests/ChangeLog	Sat Apr 21 09:41:27 2012 +0100
@@ -1,3 +1,11 @@
+2012-04-21  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* automated/regexp-tests.el:
+	* automated/regexp-tests.el (Assert-char-class):
+	Test the character classes functionality that was always in
+	regex.c but that has only just been turned on. These tests pass on
+	GNU Emacs 24.0.94.2.
+
 2012-01-14  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* automated/lisp-tests.el:
--- a/tests/automated/regexp-tests.el	Sat Apr 14 21:18:11 2012 +0100
+++ b/tests/automated/regexp-tests.el	Sat Apr 21 09:41:27 2012 +0100
@@ -596,3 +596,165 @@
 (Assert (eql (string-match "[\x7f\x80\x9f]" "\x80") 0))
 (Assert (eql (string-match "[\x7e\x80-\x9f]" "\x80") 0))
 (Assert (eql (string-match "[\x7f\x81-\x9f]" "\x81") 0))
+
+;; Test character classes
+(macrolet
+    ((Assert-char-class (class matching-char non-matching-char)
+       (if (and (not (featurep 'mule))
+                (or (eq (car-safe matching-char) 'decode-char)
+                    (eq (car-safe non-matching-char) 'decode-char)))
+           ;; Don't attempt expansion if these clauses require Mule and we
+           ;; don't have it.
+           (return-from Assert-char-class nil)
+         (setq matching-char (eval matching-char)
+               non-matching-char (eval non-matching-char)))
+       `(progn
+         (Assert (eql (string-match ,(concat "[" class "]")
+                                      ,(concat (string matching-char)
+                                               (string non-matching-char)))
+                      0))
+         (Assert (eql (string-match ,(concat "[" class class class "]")
+                                      ,(concat (string matching-char)
+                                               (string non-matching-char)))
+                      0))
+         (Assert (eql (string-match ,(concat "[^" class "]")
+                                      ,(concat (string non-matching-char)
+                                               (string matching-char)))
+                      0))
+         (Assert (eql (string-match ,(concat "[^" class class class "]")
+                                      ,(concat (string non-matching-char)
+                                               (string matching-char)))
+                      0))
+         (Assert (eql (string-match ,(concat "[" class "]")
+                                      ,(concat (string non-matching-char)
+                                               (string matching-char)))
+                      1))
+         (Assert (eql (string-match ,(concat "[" class class class "]")
+                                      ,(concat (string non-matching-char)
+                                               (string matching-char)))
+                      1))
+         (Assert (eql (string-match ,(concat "[^" class "]")
+                                      ,(concat (string matching-char)
+                                               (string non-matching-char)))
+                      1))
+         (Assert (eql (string-match ,(concat "[^" class class class "]")
+                                      ,(concat (string matching-char)
+                                               (string non-matching-char)))
+                      1))
+         (Assert (null (string-match ,(concat "[" class "]")
+                                     ,(string non-matching-char))))
+         (Assert (null (string-match ,(concat "[^" class "]")
+                                     ,(string matching-char))))
+         (Assert (null (string-match ,(concat "[^" class
+                                              (string non-matching-char) "]")
+                                     ,(concat (string matching-char)
+                                              (string non-matching-char))))))))
+  (Assert-char-class "[:alpha:]" ?a ?0)
+  (Assert-char-class "[:alpha:]" ?z ?9)
+  (Assert-char-class "[:alpha:]" ?A ?0)
+  (Assert-char-class "[:alpha:]" ?Z ?9)
+  (Assert-char-class "[:alpha:]" ?b ?\x00)
+  (Assert-char-class "[:alpha:]" ?c ?\x09)
+  (Assert-char-class "[:alpha:]" ?d ?\ )
+  (Assert-char-class "[:alpha:]" ?e ?\x7f)
+
+  (Assert-char-class "[:alnum:]" ?a ?.)
+  (Assert-char-class "[:alnum:]" ?z ?')
+  (Assert-char-class "[:alnum:]" ?A ?/)
+  (Assert-char-class "[:alnum:]" ?Z ?!)
+  (Assert-char-class "[:alnum:]" ?0 ?,)
+  (Assert-char-class "[:alnum:]" ?9 ?$)
+
+  (Assert-char-class "[:alnum:]" ?b ?\x00)
+  (Assert-char-class "[:alnum:]" ?c ?\x09)
+  (Assert-char-class "[:alnum:]" ?d ?\   )
+  (Assert-char-class "[:alnum:]" ?e ?\x7f)
+
+  (let ((case-fold-search nil))
+    (Assert-char-class "[:upper:]" ?A ?a)
+    (Assert-char-class "[:upper:]" ?Z ?z)
+    (Assert-char-class "[:upper:]" ?B ?0)
+    (Assert-char-class "[:upper:]" ?C ?9)
+    (Assert-char-class "[:upper:]" ?D ?\x00)
+    (Assert-char-class "[:upper:]" ?E ?\x09)
+    (Assert-char-class "[:upper:]" ?F ?\ )
+    (Assert-char-class "[:upper:]" ?G ?\x7f)
+
+    (Assert-char-class "[:lower:]" ?a ?A)
+    (Assert-char-class "[:lower:]" ?z ?Z)
+    (Assert-char-class "[:lower:]" ?b ?0)
+    (Assert-char-class "[:lower:]" ?c ?9)
+    (Assert-char-class "[:lower:]" ?d ?\x00)
+    (Assert-char-class "[:lower:]" ?e ?\x09)
+    (Assert-char-class "[:lower:]" ?f ? )
+    (Assert-char-class "[:lower:]" ?g ?\x7f))
+
+  (let ((case-fold-search t))
+    ;; These currently fail, because we don't take into account the buffer's
+    ;; case table.
+    (Assert-char-class "[:upper:]" ?a ?\x00)
+    (Assert-char-class "[:upper:]" ?z ?\x01)
+    (Assert-char-class "[:upper:]" ?b ?{)
+    (Assert-char-class "[:upper:]" ?c ?})
+    (Assert-char-class "[:upper:]" ?d ?<)
+    (Assert-char-class "[:upper:]" ?e ?>)
+    (Assert-char-class "[:upper:]" ?f ?\ )
+    (Assert-char-class "[:upper:]" ?g ?\x7f)
+
+    (Assert-char-class "[:lower:]" ?A ?\x00)
+    (Assert-char-class "[:lower:]" ?Z ?\x01)
+    (Assert-char-class "[:lower:]" ?B ?{)
+    (Assert-char-class "[:lower:]" ?C ?})
+    (Assert-char-class "[:lower:]" ?D ?<)
+    (Assert-char-class "[:lower:]" ?E ?>)
+    (Assert-char-class "[:lower:]" ?F ?\ )
+    (Assert-char-class "[:lower:]" ?G ?\x7F))
+
+  (Assert-char-class "[:digit:]" ?0 ?a)
+  (Assert-char-class "[:digit:]" ?9 ?z)
+  (Assert-char-class "[:digit:]" ?1 ?A)
+  (Assert-char-class "[:digit:]" ?2 ?Z)
+  (Assert-char-class "[:digit:]" ?3 ?\x00)
+  (Assert-char-class "[:digit:]" ?4 ?\x09)
+  (Assert-char-class "[:digit:]" ?5 ? )
+  (Assert-char-class "[:digit:]" ?6 ?\x7f)
+
+  (Assert-char-class "[:xdigit:]" ?0 ?g)
+  (Assert-char-class "[:xdigit:]" ?9 ?G)
+  (Assert-char-class "[:xdigit:]" ?A ?{)
+  (Assert-char-class "[:xdigit:]" ?a ?})
+  (Assert-char-class "[:xdigit:]" ?1 ? )
+  (Assert-char-class "[:xdigit:]" ?2 ?Z)
+  (Assert-char-class "[:xdigit:]" ?3 ?\x00)
+  (Assert-char-class "[:xdigit:]" ?4 ?\x09)
+  (Assert-char-class "[:xdigit:]" ?5 ?\x7f)
+  (Assert-char-class "[:xdigit:]" ?6 ?z)
+
+  (Assert-char-class "[:space:]" ?\  ?0)
+  (Assert-char-class "[:space:]" ?\t ?9)
+  (Assert-char-class "[:space:]" ?\  ?A)
+  (Assert-char-class "[:space:]" ?\t ?Z)
+  (Assert-char-class "[:space:]" ?\  ?\x00)
+  (Assert-char-class "[:space:]" ?\  ?\x7f)
+  (Assert-char-class "[:space:]" ?\t ?a)
+  (Assert-char-class "[:space:]" ?\  ?z)
+
+  (Assert-char-class "[:print:]" ?\  ?\x00)
+  (Assert-char-class "[:print:]" ?0 ?\x09)
+  (Assert-char-class "[:print:]" ?9 ?\x7f)
+  (Assert-char-class "[:print:]" ?A ?\x01)
+  (Assert-char-class "[:print:]" ?Z ?\x02)
+  (Assert-char-class "[:print:]" ?B ?\t)
+  (Assert-char-class "[:print:]" ?a ?\x03)
+  (Assert-char-class "[:print:]" ?z ?\x04)
+
+  (Assert-char-class "[:punct:]" ?\( ?0)
+  (Assert-char-class "[:punct:]" ?. ?9)
+  (Assert-char-class "[:punct:]" ?{ ?A)
+  (Assert-char-class "[:punct:]" ?} ?Z)
+  (Assert-char-class "[:punct:]" ?: ?\t)
+  (Assert-char-class "[:punct:]" ?\; ?\x00)
+  (Assert-char-class "[:punct:]" ?< ?\x09)
+  (Assert-char-class "[:punct:]" ?> ?\x7f)
+  (Assert-char-class "[:punct:]" ?= ?a)
+  (Assert-char-class "[:punct:]" ?\? ?z))