Mercurial > hg > xemacs-beta
changeset 5654:ddf56c45634e
Automated merge with file:///Sources/xemacs-21.5-checked-out
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Fri, 04 May 2012 21:12:51 +0100 |
parents | cc6f0266bc36 (current diff) 3df910176b6a (diff) |
children | b7ae5f44b950 |
files | |
diffstat | 8 files changed, 311 insertions(+), 111 deletions(-) [+] |
line wrap: on
line diff
--- a/man/ChangeLog Tue May 01 16:17:42 2012 +0100 +++ b/man/ChangeLog Fri May 04 21:12:51 2012 +0100 @@ -1,3 +1,11 @@ +2012-05-04 Aidan Kehoe <kehoea@parhasard.net> + + * lispref/searching.texi (Regular Expressions): + * lispref/searching.texi (Syntax of Regexps): + * lispref/searching.texi (Char Classes): + * lispref/searching.texi (Regexp Example): + Document the predefined character classes in this file. + 2011-12-30 Aidan Kehoe <kehoea@parhasard.net> * cl.texi (Top):
--- a/man/lispref/searching.texi Tue May 01 16:17:42 2012 +0100 +++ b/man/lispref/searching.texi Fri May 04 21:12:51 2012 +0100 @@ -180,6 +180,7 @@ @menu * Syntax of Regexps:: Rules for writing regular expressions. +* Char Classes:: Predefined character classes for searching. * Regexp Example:: Illustrates regular expression syntax. @end menu @@ -335,6 +336,11 @@ To include @samp{^} in a set, put it anywhere but at the beginning of the set. +It is also possible to specify named character classes as part of your +character set; for example, @samp{[:xdigit:]} will match hexadecimal +digits, @samp{[:nonascii:]} will match characters outside the basic +ASCII set. These are documented elsewhere, @pxref{Char Classes}. + @item [^ @dots{} ] @cindex @samp{^} in regexp @samp{[^} begins a @dfn{complement character set}, which matches any @@ -604,6 +610,61 @@ @end example @end defun +@node Char Classes +@subsection Char Classes + +These are the predefined character classes available within regular +expression character sets, and within @samp{skip-chars-forward} and +@samp{skip-chars-backward}, @xref{Skipping Characters}. + +@table @samp +@item [:alnum:] +This matches any ASCII letter or digit, or any non-ASCII character +with word syntax. +@item [:alpha:] +This matches any ASCII letter, or any non-ASCII character with word syntax. +@item [:ascii:] +This matches any character with a numeric value below @samp{?\x80}. +@item [:blank:] +This matches space or tab. +@item [:cntrl:] +This matches any character with a numeric value below @samp{?\x20}, +the code for space; these are the ASCII control characters. +@item [:digit:] +This matches the characters @samp{?0} to @samp{?9}, inclusive. +@item [:graph:] +This matches ``graphic'' characters, with numeric values greater than +@samp{?\x20}, exclusive of @samp{?\x7f}, the delete character. +@item [:lower:] +This matches minuscule characters, or any character with case +information if @samp{case-fold-search} is non-nil. +@item [:multibyte:] +This matches non-ASCII characters, that is, any character with a +numeric value above @samp{?\x7f}. +@item [:nonascii:] +This is equivalent to @samp{[:multibyte:]}. +@item [:print:] +This is equivalent to [:graph:], but also matches the space character, +@samp{?\x20}. +@item [:punct:] +This matches non-control, non-alphanumeric ASCII characters, or any +non-ASCII character without word syntax. +@item [:space:] +This matches any character with whitespace syntax. +@item [:unibyte:] +This is a GNU Emacs extension; in XEmacs it is equivalent to +@samp{[:ascii:]}. Note that this means it is not equivalent to +@samp{"\x00-\xff"}, which one might have assumed to be the case. +@item [:upper:] +This matches majuscule characters, or any character with case +information if @samp{case-fold-search} is non-nil. +@item [:word:] +This matches any character with word syntax. +@item [:xdigit:] +This matches hexadecimal digits, so the decimal digits @samp{0-9} and the +letters @samp{a-F} and @samp{A-F}. +@end table + @node Regexp Example @subsection Complex Regexp Example
--- a/src/ChangeLog Tue May 01 16:17:42 2012 +0100 +++ b/src/ChangeLog Fri May 04 21:12:51 2012 +0100 @@ -1,3 +1,19 @@ +2012-05-04 Aidan Kehoe <kehoea@parhasard.net> + + * regex.c: + Move various #defines and enums to regex.h, since we need them + when implementing #'skip-chars-{backward,forward}. + * regex.c (re_wctype): + * regex.c (re_iswctype): + Be more robust about case insensitivity here. + * regex.c (regex_compile): + * regex.h: + * regex.h (RE_ISWCTYPE_ARG_DECL): + * regex.h (CHAR_CLASS_MAX_LENGTH): + * search.c (skip_chars): + Implement support for the predefined character classes in this + function. + 2012-04-25 Aidan Kehoe <kehoea@parhasard.net> * search.c (string_match_1): Actually use the POSIX argument here,
--- a/src/regex.c Tue May 01 16:17:42 2012 +0100 +++ b/src/regex.c Fri May 04 21:12:51 2012 +0100 @@ -178,51 +178,7 @@ /* isalpha etc. are used for the character classes. */ #include <ctype.h> -#ifdef emacs - -/* 1 if C is an ASCII character. */ -#define ISASCII(c) ((c) < 0x80) - -/* 1 if C is a unibyte character. */ -#define ISUNIBYTE(c) 0 - -/* The Emacs definitions should not be directly affected by locales. */ - -/* In Emacs, these are only used for single-byte characters. */ -#define ISDIGIT(c) ((c) >= '0' && (c) <= '9') -#define ISCNTRL(c) ((c) < ' ') -#define ISXDIGIT(c) (ISDIGIT (c) || ((c) >= 'a' && (c) <= 'f') \ - || ((c) >= 'A' && (c) <= 'F')) - -/* This is only used for single-byte characters. */ -#define ISBLANK(c) ((c) == ' ' || (c) == '\t') - -/* The rest must handle multibyte characters. */ - -#define ISGRAPH(c) ((c) > ' ' && (c) != 0x7f) -#define ISPRINT(c) ((c) == ' ' || ISGRAPH (c)) -#define ISALPHA(c) (ISASCII (c) ? (((c) >= 'a' && (c) <= 'z') \ - || ((c) >= 'A' && (c) <= 'Z')) \ - : ISWORD (c)) -#define ISALNUM(c) (ISALPHA (c) || ISDIGIT (c)) - -#define ISLOWER(c) LOWERCASEP (lispbuf, c) - -#define ISPUNCT(c) (ISASCII (c) \ - ? ((c) > ' ' && (c) < 0x7F \ - && !(((c) >= 'a' && (c) <= 'z') \ - || ((c) >= 'A' && (c) <= 'Z') \ - || ((c) >= '0' && (c) <= '9'))) \ - : !ISWORD (c)) - -#define ISSPACE(c) \ - (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Swhitespace) - -#define ISUPPER(c) UPPERCASEP (lispbuf, c) - -#define ISWORD(c) (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Sword) - -#else /* not emacs */ +#ifndef emacs /* For the emacs build, we need these in the header. */ /* 1 if C is an ASCII character. */ #define ISASCII(c) ((c) < 0200) @@ -2013,23 +1969,6 @@ /* The next available element. */ #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) -/* Bits used to implement the multibyte-part of the various character - classes such as [:alnum:] in a charset's range table. XEmacs; use an - enum, so they're visible in the debugger. */ -enum -{ - BIT_WORD = (1 << 0), - BIT_LOWER = (1 << 1), - BIT_PUNCT = (1 << 2), - BIT_SPACE = (1 << 3), - BIT_UPPER = (1 << 4), - /* XEmacs; we need this, because we unify treatment of ASCII and non-ASCII - (possible matches) in charset_mule. [:alpha:] matches all characters - with word syntax, with the exception of [0-9]. We don't need - BIT_MULTIBYTE. */ - BIT_ALPHA = (1 << 5) -}; - /* Set the bit for character C in a bit vector. */ #define SET_LIST_BIT(c) \ (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ @@ -2059,10 +1998,8 @@ } \ } -#define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */ - /* Map a string to the char class it names (if any). */ -static re_wctype_t +re_wctype_t re_wctype (const char *string) { if (STREQ (string, "alnum")) return RECC_ALNUM; @@ -2086,17 +2023,10 @@ } /* True if CH is in the char class CC. */ -static re_bool -re_iswctype (int ch, re_wctype_t cc) +int +re_iswctype (int ch, re_wctype_t cc + RE_ISWCTYPE_ARG_DECL) { -#ifdef emacs - /* This is cheesy, lispbuf isn't available to us when compiling the - pattern. It's effectively only called (on Mule builds) when the current - buffer doesn't matter (e.g. for RECC_ASCII, RECC_CNTRL), so it's not a - big deal. */ - struct buffer *lispbuf = current_buffer; -#endif - switch (cc) { case RECC_ALNUM: return ISALNUM (ch) != 0; @@ -2105,11 +2035,20 @@ case RECC_CNTRL: return ISCNTRL (ch) != 0; case RECC_DIGIT: return ISDIGIT (ch) != 0; case RECC_GRAPH: return ISGRAPH (ch) != 0; - case RECC_LOWER: return ISLOWER (ch) != 0; case RECC_PRINT: return ISPRINT (ch) != 0; case RECC_PUNCT: return ISPUNCT (ch) != 0; case RECC_SPACE: return ISSPACE (ch) != 0; +#ifdef emacs + case RECC_UPPER: + return NILP (lispbuf->case_fold_search) ? ISUPPER (ch) != 0 + : !NOCASEP (lispbuf, ch); + case RECC_LOWER: + return NILP (lispbuf->case_fold_search) ? ISLOWER (ch) != 0 + : !NOCASEP (lispbuf, ch); +#else case RECC_UPPER: return ISUPPER (ch) != 0; + case RECC_LOWER: return ISLOWER (ch) != 0; +#endif case RECC_XDIGIT: return ISXDIGIT (ch) != 0; case RECC_ASCII: return ISASCII (ch) != 0; case RECC_NONASCII: case RECC_MULTIBYTE: return !ISASCII (ch); @@ -2140,6 +2079,10 @@ } } +#endif /* MULE */ + +#ifdef emacs + /* Return a bit-pattern to use in the range-table bits to match multibyte chars of class CC. */ static unsigned char @@ -2158,7 +2101,8 @@ case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL: case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0; default: - abort (); + ABORT (); + return 0; } } @@ -2185,9 +2129,12 @@ RE_TRANSLATE_TYPE translate, reg_syntax_t syntax, Lisp_Object rtab); -static reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, - Bitbyte *flags_out); #endif /* MULE */ +#ifdef emacs +reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, + Bitbyte *flags_out); +#endif + static re_bool group_match_null_string_p (unsigned char **p, unsigned char *end, register_info_type *reg_info); @@ -2814,7 +2761,8 @@ #endif /* MULE */ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) { - if (re_iswctype (ch, cc)) + if (re_iswctype (ch, cc + RE_ISWCTYPE_ARG (current_buffer))) { SET_LIST_BIT (ch); } @@ -3938,7 +3886,11 @@ return REG_NOERROR; } -static reg_errcode_t +#endif /* MULE */ + +#ifdef emacs + +reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, Bitbyte *flags_out) { *flags_out |= re_wctype_to_bit (cc);
--- a/src/regex.h Tue May 01 16:17:42 2012 +0100 +++ b/src/regex.h Fri May 04 21:12:51 2012 +0100 @@ -30,6 +30,8 @@ #define RE_LISP_CONTEXT_ARGS_DECL , Lisp_Object lispobj, struct buffer *lispbuf, struct syntax_cache *scache #define RE_LISP_CONTEXT_ARGS_MULE_DECL , Lisp_Object lispobj, struct buffer *USED_IF_MULE (lispbuf), struct syntax_cache *scache #define RE_LISP_CONTEXT_ARGS , lispobj, lispbuf, scache +#define RE_ISWCTYPE_ARG_DECL , struct buffer *lispbuf +#define RE_ISWCTYPE_ARG(varname) , varname #else #define RE_TRANSLATE_TYPE char * #define RE_LISP_SHORT_CONTEXT_ARGS_DECL @@ -37,6 +39,8 @@ #define RE_LISP_CONTEXT_ARGS_DECL #define RE_LISP_CONTEXT_ARGS_MULE_DECL #define RE_LISP_CONTEXT_ARGS +#define RE_ISWCTYPE_ARG_DECL +#define RE_ISWCTYPE_ARG(varname) #define Elemcount ssize_t #define Bytecount ssize_t #endif /* emacs */ @@ -559,6 +563,86 @@ RECC_ASCII, RECC_UNIBYTE } re_wctype_t; +#define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */ + +/* Map a string to the char class it names (if any). */ +re_wctype_t re_wctype (const char *); + +/* Is character CH a member of the character class CC? */ +int re_iswctype (int ch, re_wctype_t cc RE_ISWCTYPE_ARG_DECL); + +/* Bits used to implement the multibyte-part of the various character + classes such as [:alnum:] in a charset's range table. XEmacs; use an + enum, so they're visible in the debugger. */ +enum +{ + BIT_WORD = (1 << 0), + BIT_LOWER = (1 << 1), + BIT_PUNCT = (1 << 2), + BIT_SPACE = (1 << 3), + BIT_UPPER = (1 << 4), + /* XEmacs; we need this, because we unify treatment of ASCII and non-ASCII + (possible matches) in charset_mule. [:alpha:] matches all characters + with word syntax, with the exception of [0-9]. We don't need + BIT_MULTIBYTE. */ + BIT_ALPHA = (1 << 5) +}; + +#ifdef emacs +reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab, + Bitbyte *flags_out); + +#endif + +/* isalpha etc. are used for the character classes. */ +#include <ctype.h> + +#ifdef emacs + +/* 1 if C is an ASCII character. */ +#define ISASCII(c) ((c) < 0x80) + +/* 1 if C is a unibyte character. */ +#define ISUNIBYTE ISASCII + +/* The Emacs definitions should not be directly affected by locales. */ + +/* In Emacs, these are only used for single-byte characters. */ +#define ISDIGIT(c) ((c) >= '0' && (c) <= '9') +#define ISCNTRL(c) ((c) < ' ') +#define ISXDIGIT(c) (ISDIGIT (c) || ((c) >= 'a' && (c) <= 'f') \ + || ((c) >= 'A' && (c) <= 'F')) + +/* This is only used for single-byte characters. */ +#define ISBLANK(c) ((c) == ' ' || (c) == '\t') + +/* The rest must handle multibyte characters. */ + +#define ISGRAPH(c) ((c) > ' ' && (c) != 0x7f) +#define ISPRINT(c) ((c) == ' ' || ISGRAPH (c)) +#define ISALPHA(c) (ISASCII (c) ? (((c) >= 'a' && (c) <= 'z') \ + || ((c) >= 'A' && (c) <= 'Z')) \ + : ISWORD (c)) +#define ISALNUM(c) (ISALPHA (c) || ISDIGIT (c)) + +#define ISLOWER(c) LOWERCASEP (lispbuf, c) + +#define ISPUNCT(c) (ISASCII (c) \ + ? ((c) > ' ' && (c) < 0x7F \ + && !(((c) >= 'a' && (c) <= 'z') \ + || ((c) >= 'A' && (c) <= 'Z') \ + || ((c) >= '0' && (c) <= '9'))) \ + : !ISWORD (c)) + +#define ISSPACE(c) \ + (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Swhitespace) + +#define ISUPPER(c) UPPERCASEP (lispbuf, c) + +#define ISWORD(c) (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Sword) + +#endif + END_C_DECLS #endif /* INCLUDED_regex_h_ */
--- a/src/search.c Tue May 01 16:17:42 2012 +0100 +++ b/src/search.c Fri May 04 21:12:51 2012 +0100 @@ -887,9 +887,9 @@ a range table. */ unsigned char fastmap[0400]; int negate = 0; - REGISTER int i; Charbpos limit; struct syntax_cache *scache; + Bitbyte class_bits = 0; if (NILP (lim)) limit = forwardp ? BUF_ZV (buf) : BUF_BEGV (buf); @@ -957,6 +957,51 @@ Vskip_chars_range_table); INC_IBYTEPTR (p); } + else if ('[' == c && p != pend && *p == ':') + { + Ibyte *colonp; + Extbyte *classname; + int ch = 0; + re_wctype_t cc; + + INC_IBYTEPTR (p); + + if (p == pend) + { + fastmap ['['] = fastmap[':'] = 1; + break; + } + + colonp = memchr (p, ':', pend - p); + if (NULL == colonp || (colonp + 1) == pend || colonp[1] != ']') + { + fastmap ['['] = fastmap[':'] = 1; + continue; + } + + classname = alloca_extbytes (colonp - p + 1); + memmove (classname, p, colonp - p); + classname[colonp - p] = '\0'; + cc = re_wctype (classname); + + if (cc == RECC_ERROR) + { + invalid_argument ("Invalid character class", + build_extstring (classname, Qbinary)); + } + + for (ch = 0; ch < countof (fastmap); ++ch) + { + if (re_iswctype (ch, cc, buf)) + { + fastmap[ch] = 1; + } + } + + compile_char_class (cc, Vskip_chars_range_table, &class_bits); + + p = colonp + 2; + } else { if (c < 0400) @@ -972,14 +1017,6 @@ if (syntaxp && fastmap['-'] != 0) fastmap[' '] = 1; - /* If ^ was the first character, complement the fastmap. - We don't complement the range table, however; we just use negate - in the comparisons below. */ - - if (negate) - for (i = 0; i < (int) (sizeof (fastmap)); i++) - fastmap[i] ^= 1; - { Charbpos start_point = BUF_PT (buf); Charbpos pos = start_point; @@ -996,7 +1033,8 @@ while (fastmap[(unsigned char) syntax_code_spec [(int) SYNTAX_FROM_CACHE - (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) + (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]] + != negate) { pos++; INC_BYTEBPOS (buf, pos_byte); @@ -1013,10 +1051,11 @@ pos--; DEC_BYTEBPOS (buf, pos_byte); UPDATE_SYNTAX_CACHE_BACKWARD (scache, pos); - if (!fastmap[(unsigned char) - syntax_code_spec - [(int) SYNTAX_FROM_CACHE - (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]]) + if (fastmap[(unsigned char) + syntax_code_spec + [(int) SYNTAX_FROM_CACHE + (scache, BYTE_BUF_FETCH_CHAR (buf, pos_byte))]] + == negate) { pos++; pos_byte = savepos; @@ -1027,16 +1066,30 @@ } else { + struct buffer *lispbuf = buf; + +#define CLASS_BIT_CHECK(c) \ + (class_bits && ((class_bits & BIT_ALPHA && ISALPHA (c)) \ + || (class_bits & BIT_SPACE && ISSPACE (c)) \ + || (class_bits & BIT_PUNCT && ISPUNCT (c)) \ + || (class_bits & BIT_WORD && ISWORD (c)) \ + || (NILP (buf->case_fold_search) ? \ + ((class_bits & BIT_UPPER && ISUPPER (c)) \ + || (class_bits & BIT_LOWER && ISLOWER (c))) \ + : (class_bits & (BIT_UPPER | BIT_LOWER) \ + && !NOCASEP (buf, c))))) if (forwardp) { while (pos < limit) { Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); - if ((ch < 0400) ? fastmap[ch] : - (NILP (Fget_range_table (make_fixnum (ch), - Vskip_chars_range_table, - Qnil)) - == negate)) + + if ((ch < countof (fastmap) ? fastmap[ch] + : (CLASS_BIT_CHECK (ch) || + (EQ (Qt, Fget_range_table (make_fixnum (ch), + Vskip_chars_range_table, + Qnil))))) + != negate) { pos++; INC_BYTEBPOS (buf, pos_byte); @@ -1054,11 +1107,12 @@ DEC_BYTEBPOS (buf, prev_pos_byte); ch = BYTE_BUF_FETCH_CHAR (buf, prev_pos_byte); - if ((ch < 0400) ? fastmap[ch] : - (NILP (Fget_range_table (make_fixnum (ch), - Vskip_chars_range_table, - Qnil)) - == negate)) + if ((ch < countof (fastmap) ? fastmap[ch] + : (CLASS_BIT_CHECK (ch) || + (EQ (Qt, Fget_range_table (make_fixnum (ch), + Vskip_chars_range_table, + Qnil))))) + != negate) { pos--; pos_byte = prev_pos_byte;
--- a/tests/ChangeLog Tue May 01 16:17:42 2012 +0100 +++ b/tests/ChangeLog Fri May 04 21:12:51 2012 +0100 @@ -1,3 +1,11 @@ +2012-05-04 Aidan Kehoe <kehoea@parhasard.net> + + * automated/regexp-tests.el (equal): + * automated/regexp-tests.el (Assert-char-class): + Correct a stray parenthesis; add tests for the predefined + character classes with #'skip-chars-{forward,backward}; update the + tests to reflect some changed design decisions on my part. + 2012-04-25 Aidan Kehoe <kehoea@parhasard.net> * automated/regexp-tests.el: Check that #'posix-string-match
--- a/tests/automated/regexp-tests.el Tue May 01 16:17:42 2012 +0100 +++ b/tests/automated/regexp-tests.el Fri May 04 21:12:51 2012 +0100 @@ -76,7 +76,7 @@ (save-match-data (progn (posix-string-match "i\\|ii" "ii") (match-data))) '(0 2)) - "checking #'posix-string-match actually returns the longest match")) + "checking #'posix-string-match actually returns the longest match") ;; looking-at (with-temp-buffer @@ -665,7 +665,25 @@ (Assert (null (string-match ,(concat "[^" class (string non-matching-char) "]") ,(concat (string matching-char) - (string non-matching-char))))))) + (string non-matching-char))))) + (let ((old-case-fold-search case-fold-search)) + (with-temp-buffer + (setq case-fold-search old-case-fold-search) + (insert-char ,matching-char 20) + (insert-char ,non-matching-char 20) + (goto-char (point-min)) + (Assert (eql (skip-chars-forward ,class) 20) + ,(format "making sure %s skips %S forward" + class matching-char)) + (Assert (eql (skip-chars-forward ,(concat "^" class)) 20) + ,(format "making sure ^%s skips %S forward" + class non-matching-char)) + (Assert (eql (skip-chars-backward ,(concat "^" class)) -20) + ,(format "making sure ^%s skips %S backward" + class non-matching-char)) + (Assert (eql (skip-chars-backward ,class) -20) + ,(format "making sure %s skips %S backward" + class matching-char)))))) (Assert-never-matching (class &rest characters) (cons 'progn @@ -706,7 +724,7 @@ (Assert-char-class "[:alnum:]" ?A ?/) (Assert-char-class "[:alnum:]" ?Z ?!) (Assert-char-class "[:alnum:]" ?0 ?,) - (Assert-char-class "[:alnum:]" ?9 ?$) + (Assert-char-class "[:alnum:]" ?9 ?\t) (Assert-char-class "[:alnum:]" ?b ?\x00) (Assert-char-class "[:alnum:]" ?c ?\x09) (Assert-char-class "[:alnum:]" ?d ?\ ) @@ -724,13 +742,12 @@ (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA (decode-char 'ucs #x0385)) ;; GREEK DIALYTIKA TONOS - ;; Word is equivalent to alnum in this implementation. (Assert-char-class "[:word:]" ?a ?.) (Assert-char-class "[:word:]" ?z ?') (Assert-char-class "[:word:]" ?A ?/) (Assert-char-class "[:word:]" ?Z ?!) (Assert-char-class "[:word:]" ?0 ?,) - (Assert-char-class "[:word:]" ?9 ?$) + (Assert-char-class "[:word:]" ?9 ?\t) (Assert-char-class "[:word:]" ?b ?\x00) (Assert-char-class "[:word:]" ?c ?\x09) (Assert-char-class "[:word:]" ?d ?\ ) @@ -1083,7 +1100,7 @@ (Assert-never-matching "[:unibyte:]" - ?\x01 ?\t ?A ?B ?C ?\x7f + ?\x80 ?\xe4 ?\xdf ?\xf8 (decode-char 'ucs #x03B2) ;; GREEK SMALL LETTER BETA (decode-char 'ucs #x0410) ;; CYRILLIC CAPITAL LETTER A (decode-char 'ucs #x0430) ;; CYRILLIC SMALL LETTER A