Mercurial > hg > xemacs-beta

--- a/man/ChangeLog	Tue May 01 16:17:42 2012 +0100
+++ b/man/ChangeLog	Fri May 04 21:12:51 2012 +0100
@@ -1,3 +1,11 @@
+2012-05-04  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* lispref/searching.texi (Regular Expressions):
+	* lispref/searching.texi (Syntax of Regexps):
+	* lispref/searching.texi (Char Classes):
+	* lispref/searching.texi (Regexp Example):
+	Document the predefined character classes in this file.
+
 2011-12-30  Aidan Kehoe  <kehoea@parhasard.net>

 	* cl.texi (Top):
--- a/man/lispref/searching.texi	Tue May 01 16:17:42 2012 +0100
+++ b/man/lispref/searching.texi	Fri May 04 21:12:51 2012 +0100
@@ -180,6 +180,7 @@

 @menu
 * Syntax of Regexps::       Rules for writing regular expressions.
+* Char Classes::            Predefined character classes for searching.
 * Regexp Example::          Illustrates regular expression syntax.
 @end menu

@@ -335,6 +336,11 @@
 To include @samp{^} in a set, put it anywhere but at the beginning of
 the set.

+It is also possible to specify named character classes as part of your
+character set; for example, @samp{[:xdigit:]} will match hexadecimal
+digits, @samp{[:nonascii:]} will match characters outside the basic
+ASCII set.  These are documented elsewhere, @pxref{Char Classes}.
+
 @item [^ @dots{} ]
 @cindex @samp{^} in regexp
 @samp{[^} begins a @dfn{complement character set}, which matches any
@@ -604,6 +610,61 @@
 @end example
 @end defun

+@node Char Classes
+@subsection Char Classes
+
+These are the predefined character classes available within regular
+expression character sets, and within @samp{skip-chars-forward} and
+@samp{skip-chars-backward}, @xref{Skipping Characters}.
+
+@table @samp
+@item [:alnum:]
+This matches any ASCII letter or digit, or any non-ASCII character
+with word syntax.
+@item [:alpha:]
+This matches any ASCII letter, or any non-ASCII character with word syntax.
+@item [:ascii:]
+This matches any character with a numeric value below @samp{?\x80}.
+@item [:blank:]
+This matches space or tab.
+@item [:cntrl:]
+This matches any character with a numeric value below @samp{?\x20},
+the code for space; these are the ASCII control characters.
+@item [:digit:]
+This matches the characters @samp{?0} to @samp{?9}, inclusive.
+@item [:graph:]
+This matches ``graphic'' characters, with numeric values greater than
+@samp{?\x20}, exclusive of @samp{?\x7f}, the delete character.
+@item [:lower:]
+This matches minuscule characters, or any character with case
+information if @samp{case-fold-search} is non-nil.
+@item [:multibyte:]
+This matches non-ASCII characters, that is, any character with a
+numeric value above @samp{?\x7f}.
+@item [:nonascii:]
+This is equivalent to @samp{[:multibyte:]}.
+@item [:print:]
+This is equivalent to [:graph:], but also matches the space character,
+@samp{?\x20}.
+@item [:punct:]
+This matches non-control, non-alphanumeric ASCII characters, or any
+non-ASCII character without word syntax.
+@item [:space:]
+This matches any character with whitespace syntax.
+@item [:unibyte:]
+This is a GNU Emacs extension; in XEmacs it is equivalent to
+@samp{[:ascii:]}. Note that this means it is not equivalent to
+@samp{"\x00-\xff"}, which one might have assumed to be the case.
+@item [:upper:]
+This matches majuscule characters, or any character with case
+information if @samp{case-fold-search} is non-nil.
+@item [:word:]
+This matches any character with word syntax.
+@item [:xdigit:]
+This matches hexadecimal digits, so the decimal digits @samp{0-9} and the
+letters @samp{a-F} and @samp{A-F}.
+@end table
+
 @node Regexp Example
 @subsection Complex Regexp Example
--- a/src/ChangeLog	Tue May 01 16:17:42 2012 +0100
+++ b/src/ChangeLog	Fri May 04 21:12:51 2012 +0100
@@ -1,3 +1,19 @@
+2012-05-04  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* regex.c:
+	Move various #defines and enums to regex.h, since we need them
+	when implementing #'skip-chars-{backward,forward}.
+	* regex.c (re_wctype):
+	* regex.c (re_iswctype):
+	Be more robust about case insensitivity here.
+	* regex.c (regex_compile):
+	* regex.h:
+	* regex.h (RE_ISWCTYPE_ARG_DECL):
+	* regex.h (CHAR_CLASS_MAX_LENGTH):
+	* search.c (skip_chars):
+	Implement support for the predefined character classes in this
+	function.
+
 2012-04-25  Aidan Kehoe  <kehoea@parhasard.net>

 	* search.c (string_match_1): Actually use the POSIX argument here,
--- a/src/regex.c	Tue May 01 16:17:42 2012 +0100
+++ b/src/regex.c	Fri May 04 21:12:51 2012 +0100
@@ -178,51 +178,7 @@
 /* isalpha etc. are used for the character classes.  */
 #include <ctype.h>

-#ifdef emacs
-
-/* 1 if C is an ASCII character.  */
-#define ISASCII(c) ((c) < 0x80)
-
-/* 1 if C is a unibyte character.  */
-#define ISUNIBYTE(c) 0
-
-/* The Emacs definitions should not be directly affected by locales.  */
-
-/* In Emacs, these are only used for single-byte characters.  */
-#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
-#define ISCNTRL(c) ((c) < ' ')
-#define ISXDIGIT(c) (ISDIGIT (c) || ((c) >= 'a' && (c) <= 'f')	\
-		     || ((c) >= 'A' && (c) <= 'F'))
-
-/* This is only used for single-byte characters.  */
-#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
-
-/* The rest must handle multibyte characters.  */
-
-#define ISGRAPH(c) ((c) > ' ' && (c) != 0x7f)
-#define ISPRINT(c) ((c) == ' ' || ISGRAPH (c))
-#define ISALPHA(c) (ISASCII (c) ? (((c) >= 'a' && (c) <= 'z')		\
-				   || ((c) >= 'A' && (c) <= 'Z'))	\
-		    : ISWORD (c))
-#define ISALNUM(c) (ISALPHA (c) || ISDIGIT (c))
-
-#define ISLOWER(c) LOWERCASEP (lispbuf, c)
-
-#define ISPUNCT(c) (ISASCII (c)                                 \
-		    ? ((c) > ' ' && (c) < 0x7F			\
-		       && !(((c) >= 'a' && (c) <= 'z')		\
-		            || ((c) >= 'A' && (c) <= 'Z')	\
-		            || ((c) >= '0' && (c) <= '9')))	\
-		    : !ISWORD (c))
-
-#define ISSPACE(c) \
-	(SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Swhitespace)
-
-#define ISUPPER(c) UPPERCASEP (lispbuf, c)
-
-#define ISWORD(c) (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Sword)
-
-#else /* not emacs */
+#ifndef emacs /* For the emacs build, we need these in the header. */

 /* 1 if C is an ASCII character.  */
 #define ISASCII(c) ((c) < 0200)
@@ -2013,23 +1969,6 @@
 /* The next available element.  */
 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])

-/* Bits used to implement the multibyte-part of the various character
-   classes such as [:alnum:] in a charset's range table. XEmacs; use an
-   enum, so they're visible in the debugger. */
-enum
-{
-  BIT_WORD = (1 << 0),
-  BIT_LOWER = (1 << 1),
-  BIT_PUNCT = (1 << 2),
-  BIT_SPACE = (1 << 3),
-  BIT_UPPER = (1 << 4),
-  /* XEmacs; we need this, because we unify treatment of ASCII and non-ASCII
-     (possible matches) in charset_mule. [:alpha:] matches all characters
-     with word syntax, with the exception of [0-9]. We don't need
-     BIT_MULTIBYTE. */
-  BIT_ALPHA = (1 << 5)
-};
-
 /* Set the bit for character C in a bit vector.  */
 #define SET_LIST_BIT(c)				\
   (buf_end[((unsigned char) (c)) / BYTEWIDTH]	\
@@ -2059,10 +1998,8 @@
        } 								\
     }

-#define CHAR_CLASS_MAX_LENGTH  9 /* Namely, `multibyte'.  */
-
 /* Map a string to the char class it names (if any).  */
-static re_wctype_t
+re_wctype_t
 re_wctype (const char *string)
 {
   if      (STREQ (string, "alnum"))	return RECC_ALNUM;
@@ -2086,17 +2023,10 @@
 }

 /* True if CH is in the char class CC.  */
-static re_bool
-re_iswctype (int ch, re_wctype_t cc)
+int
+re_iswctype (int ch, re_wctype_t cc
+             RE_ISWCTYPE_ARG_DECL)
 {
-#ifdef emacs
-  /* This is cheesy, lispbuf isn't available to us when compiling the
-     pattern. It's effectively only called (on Mule builds) when the current
-     buffer doesn't matter (e.g. for RECC_ASCII, RECC_CNTRL), so it's not a
-     big deal. */
-  struct buffer *lispbuf = current_buffer;
-#endif
-
   switch (cc)
     {
     case RECC_ALNUM: return ISALNUM (ch) != 0;
@@ -2105,11 +2035,20 @@
     case RECC_CNTRL: return ISCNTRL (ch) != 0;
     case RECC_DIGIT: return ISDIGIT (ch) != 0;
     case RECC_GRAPH: return ISGRAPH (ch) != 0;
-    case RECC_LOWER: return ISLOWER (ch) != 0;
     case RECC_PRINT: return ISPRINT (ch) != 0;
     case RECC_PUNCT: return ISPUNCT (ch) != 0;
     case RECC_SPACE: return ISSPACE (ch) != 0;
+#ifdef emacs
+    case RECC_UPPER:
+      return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0
+        : !NOCASEP (lispbuf, ch);
+    case RECC_LOWER:
+      return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0
+        : !NOCASEP (lispbuf, ch);
+#else
     case RECC_UPPER: return ISUPPER (ch) != 0;
+    case RECC_LOWER: return ISLOWER (ch) != 0;
+#endif
     case RECC_XDIGIT: return ISXDIGIT (ch) != 0;
     case RECC_ASCII: return ISASCII (ch) != 0;
     case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch);
@@ -2140,6 +2079,10 @@
     }
 }

+#endif /* MULE */
+
+#ifdef emacs
+
 /* Return a bit-pattern to use in the range-table bits to match multibyte
    chars of class CC.  */
 static unsigned char
@@ -2158,7 +2101,8 @@
     case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
     case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
     default:
-      abort ();
+      ABORT ();
+      return 0;
     }
 }

@@ -2185,9 +2129,12 @@
 					     RE_TRANSLATE_TYPE translate,
 					     reg_syntax_t syntax,
 					     Lisp_Object rtab);
-static reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab,
-                                         Bitbyte *flags_out);
 #endif /* MULE */
+#ifdef emacs
+reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab,
+                                  Bitbyte *flags_out);
+#endif
+
 static re_bool group_match_null_string_p (unsigned char **p,
 					  unsigned char *end,
 					  register_info_type *reg_info);
@@ -2814,7 +2761,8 @@
 #endif /* MULE */
 			for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
 			  {
-			    if (re_iswctype (ch, cc))
+			    if (re_iswctype (ch, cc
+                                             RE_ISWCTYPE_ARG (current_buffer)))
 			      {
 				SET_LIST_BIT (ch);
 			      }
@@ -3938,7 +3886,11 @@
   return REG_NOERROR;
 }

-static reg_errcode_t
+#endif /* MULE */
+
+#ifdef emacs
+
+reg_errcode_t
 compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out)
 {
   *flags_out |= re_wctype_to_bit (cc);
--- a/src/regex.h	Tue May 01 16:17:42 2012 +0100
+++ b/src/regex.h	Fri May 04 21:12:51 2012 +0100
@@ -30,6 +30,8 @@
 #define RE_LISP_CONTEXT_ARGS_DECL , Lisp_Object lispobj, struct buffer *lispbuf, struct syntax_cache *scache
 #define RE_LISP_CONTEXT_ARGS_MULE_DECL , Lisp_Object lispobj, struct buffer *USED_IF_MULE (lispbuf), struct syntax_cache *scache
 #define RE_LISP_CONTEXT_ARGS , lispobj, lispbuf, scache
+#define RE_ISWCTYPE_ARG_DECL , struct buffer *lispbuf
+#define RE_ISWCTYPE_ARG(varname) , varname
 #else
 #define RE_TRANSLATE_TYPE char *
 #define RE_LISP_SHORT_CONTEXT_ARGS_DECL
@@ -37,6 +39,8 @@
 #define RE_LISP_CONTEXT_ARGS_DECL
 #define RE_LISP_CONTEXT_ARGS_MULE_DECL
 #define RE_LISP_CONTEXT_ARGS
+#define RE_ISWCTYPE_ARG_DECL
+#define RE_ISWCTYPE_ARG(varname)
 #define Elemcount ssize_t
 #define Bytecount ssize_t
 #endif /* emacs */
@@ -559,6 +563,86 @@
     RECC_ASCII, RECC_UNIBYTE
 } re_wctype_t;

+#define CHAR_CLASS_MAX_LENGTH  9 /* Namely, `multibyte'.  */
+
+/* Map a string to the char class it names (if any).  */
+re_wctype_t re_wctype (const char *);
+
+/* Is character CH a member of the character class CC? */
+int re_iswctype (int ch, re_wctype_t cc RE_ISWCTYPE_ARG_DECL);
+
+/* Bits used to implement the multibyte-part of the various character
+   classes such as [:alnum:] in a charset's range table. XEmacs; use an
+   enum, so they're visible in the debugger. */
+enum
+{
+  BIT_WORD = (1 << 0),
+  BIT_LOWER = (1 << 1),
+  BIT_PUNCT = (1 << 2),
+  BIT_SPACE = (1 << 3),
+  BIT_UPPER = (1 << 4),
+  /* XEmacs; we need this, because we unify treatment of ASCII and non-ASCII
+     (possible matches) in charset_mule. [:alpha:] matches all characters
+     with word syntax, with the exception of [0-9]. We don't need
+     BIT_MULTIBYTE. */
+  BIT_ALPHA = (1 << 5)
+};
+
+#ifdef emacs
+reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab,
+                                  Bitbyte *flags_out);
+
+#endif
+
+/* isalpha etc. are used for the character classes.  */
+#include <ctype.h>
+
+#ifdef emacs
+
+/* 1 if C is an ASCII character.  */
+#define ISASCII(c) ((c) < 0x80)
+
+/* 1 if C is a unibyte character.  */
+#define ISUNIBYTE ISASCII
+
+/* The Emacs definitions should not be directly affected by locales.  */
+
+/* In Emacs, these are only used for single-byte characters.  */
+#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
+#define ISCNTRL(c) ((c) < ' ')
+#define ISXDIGIT(c) (ISDIGIT (c) || ((c) >= 'a' && (c) <= 'f')	\
+		     || ((c) >= 'A' && (c) <= 'F'))
+
+/* This is only used for single-byte characters.  */
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+
+/* The rest must handle multibyte characters.  */
+
+#define ISGRAPH(c) ((c) > ' ' && (c) != 0x7f)
+#define ISPRINT(c) ((c) == ' ' || ISGRAPH (c))
+#define ISALPHA(c) (ISASCII (c) ? (((c) >= 'a' && (c) <= 'z')		\
+				   || ((c) >= 'A' && (c) <= 'Z'))	\
+		    : ISWORD (c))
+#define ISALNUM(c) (ISALPHA (c) || ISDIGIT (c))
+
+#define ISLOWER(c) LOWERCASEP (lispbuf, c)
+
+#define ISPUNCT(c) (ISASCII (c)                                 \
+		    ? ((c) > ' ' && (c) < 0x7F			\
+		       && !(((c) >= 'a' && (c) <= 'z')		\
+		            || ((c) >= 'A' && (c) <= 'Z')	\
+		            || ((c) >= '0' && (c) <= '9')))	\
+		    : !ISWORD (c))
+
+#define ISSPACE(c) \
+	(SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Swhitespace)
+
+#define ISUPPER(c) UPPERCASEP (lispbuf, c)
+
+#define ISWORD(c) (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Sword)
+
+#endif
+
 END_C_DECLS

 #endif /* INCLUDED_regex_h_ */
--- a/src/search.c	Tue May 01 16:17:42 2012 +0100
+++ b/src/search.c	Fri May 04 21:12:51 2012 +0100
@@ -887,9 +887,9 @@
      a range table. */
   unsigned char fastmap[0400];
   int negate = 0;
-  REGISTER int i;
   Charbpos limit;
   struct syntax_cache *scache;
+  Bitbyte class_bits = 0;

   if (NILP (lim))
     limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf);
@@ -957,6 +957,51 @@
 				  Vskip_chars_range_table);
 	      INC_IBYTEPTR (p);
 	    }
+          else if ('[' == c && p != pend && *p == ':')
+            {
+              Ibyte *colonp;
+              Extbyte *classname;
+              int ch = 0;
+              re_wctype_t cc;
+
+              INC_IBYTEPTR (p);
+
+              if (p == pend)
+                {
+                  fastmap ['['] = fastmap[':'] = 1;
+                  break;
+                }
+
+              colonp = memchr (p, ':', pend - p);
+              if (NULL == colonp || (colonp + 1) == pend || colonp[1] != ']')
+                {
+                  fastmap ['['] = fastmap[':'] = 1;
+                  continue;
+                }
+
+              classname = alloca_extbytes (colonp - p + 1);
+              memmove (classname, p, colonp - p);
+              classname[colonp - p] = '\0';
+              cc = re_wctype (classname);
+
+              if (cc == RECC_ERROR)
+                {
+                  invalid_argument ("Invalid character class",
+                                    build_extstring (classname, Qbinary));
+                }
+
+              for (ch = 0; ch < countof (fastmap); ++ch)
+                {
+                  if (re_iswctype (ch, cc, buf))
+                    {
+                      fastmap[ch] = 1;
+                    }
+                }
+
+              compile_char_class (cc, Vskip_chars_range_table, &class_bits);
+
+              p = colonp + 2;
+            }
 	  else
 	    {
 	      if (c < 0400)
@@ -972,14 +1017,6 @@
   if (syntaxp && fastmap['-'] != 0)
     fastmap[' '] = 1;

-  /* If ^ was the first character, complement the fastmap.
-     We don't complement the range table, however; we just use negate
-     in the comparisons below. */
-
-  if (negate)
-    for (i = 0; i < (int) (sizeof (fastmap)); i++)
-      fastmap[i] ^= 1;
-
   {
     Charbpos start_point = BUF_PT (buf);
     Charbpos pos = start_point;
@@ -996,7 +1033,8 @@
 	      while (fastmap[(unsigned char)
 			     syntax_code_spec
 			     [(int) SYNTAX_FROM_CACHE
-			      (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]])
+			      (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]
+                     != negate)
 		{
 		  pos++;
 		  INC_BYTEBPOS (buf, pos_byte);
@@ -1013,10 +1051,11 @@
 		pos--;
 		DEC_BYTEBPOS (buf, pos_byte);
 		UPDATE_SYNTAX_CACHE_BACKWARD (scache, pos);
-		if (!fastmap[(unsigned char)
-			     syntax_code_spec
-			     [(int) SYNTAX_FROM_CACHE
-			      (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]])
+		if (fastmap[(unsigned char)
+                            syntax_code_spec
+                            [(int) SYNTAX_FROM_CACHE
+                             (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]
+                    == negate)
 		  {
 		    pos++;
 		    pos_byte = savepos;
@@ -1027,16 +1066,30 @@
       }
     else
       {
+        struct buffer *lispbuf = buf;
+
+#define CLASS_BIT_CHECK(c)                                              \
+        (class_bits && ((class_bits & BIT_ALPHA && ISALPHA (c))         \
+                        || (class_bits & BIT_SPACE && ISSPACE (c))      \
+                        || (class_bits & BIT_PUNCT && ISPUNCT (c))      \
+                        || (class_bits & BIT_WORD && ISWORD (c))        \
+                        || (NILP (buf->case_fold_search) ?              \
+                            ((class_bits & BIT_UPPER && ISUPPER (c))    \
+                             || (class_bits & BIT_LOWER && ISLOWER (c))) \
+                            : (class_bits & (BIT_UPPER | BIT_LOWER)     \
+                               && !NOCASEP (buf, c)))))
 	if (forwardp)
 	  {
 	    while (pos < limit)
 	      {
 		Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte);
-		if ((ch < 0400) ? fastmap[ch] :
-		    (NILP (Fget_range_table (make_fixnum (ch),
-					     Vskip_chars_range_table,
-					     Qnil))
-		     == negate))
+
+                if ((ch < countof (fastmap) ? fastmap[ch]
+                     : (CLASS_BIT_CHECK (ch) ||
+                        (EQ (Qt, Fget_range_table (make_fixnum (ch),
+                                                   Vskip_chars_range_table,
+                                                   Qnil)))))
+                    != negate)
 		  {
 		    pos++;
 		    INC_BYTEBPOS (buf, pos_byte);
@@ -1054,11 +1107,12 @@

 		DEC_BYTEBPOS (buf, prev_pos_byte);
 		ch = BYTE_BUF_FETCH_CHAR (buf, prev_pos_byte);
-		if ((ch < 0400) ? fastmap[ch] :
-		    (NILP (Fget_range_table (make_fixnum (ch),
-					     Vskip_chars_range_table,
-					     Qnil))
-		     == negate))
+                if ((ch < countof (fastmap) ? fastmap[ch]
+                     : (CLASS_BIT_CHECK (ch) ||
+                        (EQ (Qt, Fget_range_table (make_fixnum (ch),
+                                                   Vskip_chars_range_table,
+                                                   Qnil)))))
+                    != negate)
 		  {
 		    pos--;
 		    pos_byte = prev_pos_byte;
--- a/tests/ChangeLog	Tue May 01 16:17:42 2012 +0100
+++ b/tests/ChangeLog	Fri May 04 21:12:51 2012 +0100
@@ -1,3 +1,11 @@
+2012-05-04  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* automated/regexp-tests.el (equal):
+	* automated/regexp-tests.el (Assert-char-class):
+	Correct a stray parenthesis; add tests for the predefined
+	character classes with #'skip-chars-{forward,backward}; update the
+	tests to reflect some changed design decisions on my part.
+
 2012-04-25  Aidan Kehoe  <kehoea@parhasard.net>

 	* automated/regexp-tests.el: Check that #'posix-string-match
--- a/tests/automated/regexp-tests.el	Tue May 01 16:17:42 2012 +0100
+++ b/tests/automated/regexp-tests.el	Fri May 04 21:12:51 2012 +0100
@@ -76,7 +76,7 @@
   (save-match-data
     (progn (posix-string-match "i\\|ii" "ii") (match-data)))
   '(0 2))
- "checking #'posix-string-match actually returns the longest match"))
+ "checking #'posix-string-match actually returns the longest match")

 ;; looking-at
 (with-temp-buffer
@@ -665,7 +665,25 @@
          (Assert (null (string-match ,(concat "[^" class
                                               (string non-matching-char) "]")
                                      ,(concat (string matching-char)
-                                              (string non-matching-char)))))))
+                                              (string non-matching-char)))))
+         (let ((old-case-fold-search case-fold-search))
+           (with-temp-buffer
+             (setq case-fold-search old-case-fold-search)
+             (insert-char ,matching-char 20)
+             (insert-char ,non-matching-char 20)
+             (goto-char (point-min))
+             (Assert (eql (skip-chars-forward ,class) 20)
+                     ,(format "making sure %s skips %S forward"
+                              class matching-char))
+             (Assert (eql (skip-chars-forward ,(concat "^" class)) 20)
+                     ,(format "making sure ^%s skips %S forward"
+                              class non-matching-char))
+             (Assert (eql (skip-chars-backward ,(concat "^" class)) -20)
+                     ,(format "making sure ^%s skips %S backward"
+                              class non-matching-char))
+             (Assert (eql (skip-chars-backward ,class) -20)
+                     ,(format "making sure %s skips %S backward"
+                              class matching-char))))))
      (Assert-never-matching (class &rest characters)
        (cons
         'progn
@@ -706,7 +724,7 @@
   (Assert-char-class "[:alnum:]" ?A ?/)
   (Assert-char-class "[:alnum:]" ?Z ?!)
   (Assert-char-class "[:alnum:]" ?0 ?,)
-  (Assert-char-class "[:alnum:]" ?9 ?$)
+  (Assert-char-class "[:alnum:]" ?9 ?\t)
   (Assert-char-class "[:alnum:]" ?b ?\x00)
   (Assert-char-class "[:alnum:]" ?c ?\x09)
   (Assert-char-class "[:alnum:]" ?d ?\   )
@@ -724,13 +742,12 @@
    (decode-char 'ucs #x03B2)  ;; GREEK SMALL LETTER BETA
    (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS

-  ;; Word is equivalent to alnum in this implementation.
   (Assert-char-class "[:word:]" ?a ?.)
   (Assert-char-class "[:word:]" ?z ?')
   (Assert-char-class "[:word:]" ?A ?/)
   (Assert-char-class "[:word:]" ?Z ?!)
   (Assert-char-class "[:word:]" ?0 ?,)
-  (Assert-char-class "[:word:]" ?9 ?$)
+  (Assert-char-class "[:word:]" ?9 ?\t)
   (Assert-char-class "[:word:]" ?b ?\x00)
   (Assert-char-class "[:word:]" ?c ?\x09)
   (Assert-char-class "[:word:]" ?d ?\   )
@@ -1083,7 +1100,7 @@

   (Assert-never-matching
    "[:unibyte:]"
-   ?\x01 ?\t ?A ?B ?C ?\x7f
+   ?\x80 ?\xe4 ?\xdf ?\xf8
    (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA
    (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A
    (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A