Mercurial > hg > xemacs-beta
view src/casetab.h @ 5648:3f4a234f4672
Support non-ASCII correctly in character classes, test this.
src/ChangeLog addition:
2012-04-21 Aidan Kehoe <kehoea@parhasard.net>
Support non-ASCII correctly in character classes ([:alnum:] and
friends).
* regex.c:
* regex.c (ISBLANK, ISUNIBYTE): New. Make these and friends
independent of the locale, since we want them to be consistent in
XEmacs.
* regex.c (print_partial_compiled_pattern): Print the flags for
charset_mule; don't print non-ASCII as the character values in
ranges, this breaks with locales.
* regex.c (enum):
Define various flags the charset_mule and charset_mule_not opcodes
can now take.
* regex.c (CHAR_CLASS_MAX_LENGTH): Update this.
* regex.c (re_iswctype, re_wctype): New, from GNU.
* regex.c (re_wctype_can_match_non_ascii): New; used when deciding
on whether to use charset_mule or the ASCII-only regex character
set opcode.
* regex.c (regex_compile):
Error correctly on long, non-existent character class names.
Break out the handling of charsets that can match non-ASCII into a
separate clause. Use compile_char_class when compiling character
classes.
* regex.c (compile_char_class): New. Used in regex_compile when
compiling character sets that may match non-ASCII.
* regex.c (re_compile_fastmap):
If there are flags set for charset_mule or charset_mule_not, we
can't use the fastmap (since we need to check syntax table values
that aren't available there).
* regex.c (re_match_2_internal):
Check the new flags passed to the charset_mule{,_not} opcode,
observe them if appropriate.
* regex.h:
* regex.h (enum):
Expose re_wctype_t here, imported from GNU.
tests/ChangeLog addition:
2012-04-21 Aidan Kehoe <kehoea@parhasard.net>
* automated/regexp-tests.el:
* automated/regexp-tests.el (Assert-char-class):
Check that #'string-match errors correctly with an over-long
character class name.
Add tests for character class functionality that supports
non-ASCII characters. These tests expose bugs in GNU Emacs
24.0.94.2, but pass under current XEmacs.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sat, 21 Apr 2012 18:58:28 +0100 |
parents | 308d34e9f07d |
children |
line wrap: on
line source
/* XEmacs routines to deal with case tables. Copyright (C) 2000 Yoshiki Hayashi. Copyright (C) 2002 Ben Wing. This file is part of XEmacs. XEmacs is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. XEmacs is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */ /* Synched up with: Not in FSF. */ #ifndef INCLUDED_casetab_h_ #define INCLUDED_casetab_h_ struct Lisp_Case_Table { NORMAL_LISP_OBJECT_HEADER header; Lisp_Object downcase_table; Lisp_Object upcase_table; Lisp_Object case_canon_table; Lisp_Object case_eqv_table; int dirty; }; typedef struct Lisp_Case_Table Lisp_Case_Table; DECLARE_LISP_OBJECT (case_table, Lisp_Case_Table); #define XCASE_TABLE(x) XRECORD (x, case_table, Lisp_Case_Table) #define wrap_case_table(p) wrap_record (p, case_table) #define CASE_TABLEP(x) RECORDP (x, case_table) #define CHECK_CASE_TABLE(x) CHECK_RECORD (x, case_table) #define CONCHECK_CASE_TABLE(x) CONCHECK_RECORD (x, case_table) void recompute_case_table (Lisp_Object casetab); DECLARE_INLINE_HEADER ( Lisp_Case_Table * XCASE_TABLE_UPDATE (Lisp_Object table) ) { Lisp_Case_Table *ct = XCASE_TABLE (table); /* If the table is dirty (changes have been made without ancillary structures updated), recompute first. */ if (ct->dirty) recompute_case_table (table); return ct; } #define CASE_TABLE_DOWNCASE(ct) ((ct)->downcase_table) #define CASE_TABLE_UPCASE(ct) ((ct)->upcase_table) #define CASE_TABLE_CANON(ct) ((ct)->case_canon_table) #define CASE_TABLE_EQV(ct) ((ct)->case_eqv_table) #define XCASE_TABLE_DOWNCASE(ct) (XCASE_TABLE (ct)->downcase_table) #define XCASE_TABLE_UPCASE(ct) (XCASE_TABLE (ct)->upcase_table) /* Only do automatic updating for canon and eqv, which are the two that are automatically computed and that are not up to date. These are not normally used by the simple case routines. canon is used by compare-buffer-substrings when case-insensitive and by the regex routines, and eqv is used only by the Boyer-Moore search routines. */ #define XCASE_TABLE_CANON(ct) (XCASE_TABLE_UPDATE (ct)->case_canon_table) #define XCASE_TABLE_EQV(ct) (XCASE_TABLE_UPDATE (ct)->case_eqv_table) #define SET_CASE_TABLE_DOWNCASE(ct, p) ((ct)->downcase_table = p) #define SET_CASE_TABLE_UPCASE(ct, p) ((ct)->upcase_table = p) #define SET_CASE_TABLE_CANON(ct, p) ((ct)->case_canon_table = p) #define SET_CASE_TABLE_EQV(ct, p) ((ct)->case_eqv_table = p) #define XSET_CASE_TABLE_DOWNCASE(ct, p) \ SET_CASE_TABLE_DOWNCASE (XCASE_TABLE (ct), p) #define XSET_CASE_TABLE_UPCASE(ct, p) \ SET_CASE_TABLE_UPCASE (XCASE_TABLE (ct), p) #define XSET_CASE_TABLE_CANON(ct, p) \ SET_CASE_TABLE_CANON (XCASE_TABLE (ct), p) #define XSET_CASE_TABLE_EQV(ct, p) \ SET_CASE_TABLE_EQV (XCASE_TABLE (ct), p) extern Lisp_Object Vstandard_case_table; #endif /* INCLUDED_casetab_h_ */