annotate src/casetab.h @ 5648:3f4a234f4672

Support non-ASCII correctly in character classes, test this. src/ChangeLog addition: 2012-04-21 Aidan Kehoe <kehoea@parhasard.net> Support non-ASCII correctly in character classes ([:alnum:] and friends). * regex.c: * regex.c (ISBLANK, ISUNIBYTE): New. Make these and friends independent of the locale, since we want them to be consistent in XEmacs. * regex.c (print_partial_compiled_pattern): Print the flags for charset_mule; don't print non-ASCII as the character values in ranges, this breaks with locales. * regex.c (enum): Define various flags the charset_mule and charset_mule_not opcodes can now take. * regex.c (CHAR_CLASS_MAX_LENGTH): Update this. * regex.c (re_iswctype, re_wctype): New, from GNU. * regex.c (re_wctype_can_match_non_ascii): New; used when deciding on whether to use charset_mule or the ASCII-only regex character set opcode. * regex.c (regex_compile): Error correctly on long, non-existent character class names. Break out the handling of charsets that can match non-ASCII into a separate clause. Use compile_char_class when compiling character classes. * regex.c (compile_char_class): New. Used in regex_compile when compiling character sets that may match non-ASCII. * regex.c (re_compile_fastmap): If there are flags set for charset_mule or charset_mule_not, we can't use the fastmap (since we need to check syntax table values that aren't available there). * regex.c (re_match_2_internal): Check the new flags passed to the charset_mule{,_not} opcode, observe them if appropriate. * regex.h: * regex.h (enum): Expose re_wctype_t here, imported from GNU. tests/ChangeLog addition: 2012-04-21 Aidan Kehoe <kehoea@parhasard.net> * automated/regexp-tests.el: * automated/regexp-tests.el (Assert-char-class): Check that #'string-match errors correctly with an over-long character class name. Add tests for character class functionality that supports non-ASCII characters. These tests expose bugs in GNU Emacs 24.0.94.2, but pass under current XEmacs.
author Aidan Kehoe <kehoea@parhasard.net>
date Sat, 21 Apr 2012 18:58:28 +0100
parents 308d34e9f07d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
446
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
1 /* XEmacs routines to deal with case tables.
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
2 Copyright (C) 2000 Yoshiki Hayashi.
826
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
3 Copyright (C) 2002 Ben Wing.
446
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
4 This file is part of XEmacs.
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
5
5402
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5127
diff changeset
6 XEmacs is free software: you can redistribute it and/or modify it
446
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
7 under the terms of the GNU General Public License as published by the
5402
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5127
diff changeset
8 Free Software Foundation, either version 3 of the License, or (at your
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5127
diff changeset
9 option) any later version.
446
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
10
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
11 XEmacs is distributed in the hope that it will be useful, but WITHOUT
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
14 for more details.
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
15
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
16 You should have received a copy of the GNU General Public License
5402
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5127
diff changeset
17 along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */
446
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
18
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
19 /* Synched up with: Not in FSF. */
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
20
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
21 #ifndef INCLUDED_casetab_h_
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
22 #define INCLUDED_casetab_h_
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
23
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
24 struct Lisp_Case_Table
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
25 {
5127
a9c41067dd88 more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents: 5120
diff changeset
26 NORMAL_LISP_OBJECT_HEADER header;
446
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
27 Lisp_Object downcase_table;
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
28 Lisp_Object upcase_table;
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
29 Lisp_Object case_canon_table;
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
30 Lisp_Object case_eqv_table;
826
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
31 int dirty;
446
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
32 };
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
33 typedef struct Lisp_Case_Table Lisp_Case_Table;
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
34
5118
e0db3c197671 merge up to latest default branch, doesn't compile yet
Ben Wing <ben@xemacs.org>
parents: 3017
diff changeset
35 DECLARE_LISP_OBJECT (case_table, Lisp_Case_Table);
446
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
36 #define XCASE_TABLE(x) XRECORD (x, case_table, Lisp_Case_Table)
617
af57a77cbc92 [xemacs-hg @ 2001-06-18 07:09:50 by ben]
ben
parents: 446
diff changeset
37 #define wrap_case_table(p) wrap_record (p, case_table)
446
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
38 #define CASE_TABLEP(x) RECORDP (x, case_table)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
39 #define CHECK_CASE_TABLE(x) CHECK_RECORD (x, case_table)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
40 #define CONCHECK_CASE_TABLE(x) CONCHECK_RECORD (x, case_table)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
41
826
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
42 void recompute_case_table (Lisp_Object casetab);
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
43
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
44 DECLARE_INLINE_HEADER (
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
45 Lisp_Case_Table *
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
46 XCASE_TABLE_UPDATE (Lisp_Object table)
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
47 )
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
48 {
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
49 Lisp_Case_Table *ct = XCASE_TABLE (table);
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
50 /* If the table is dirty (changes have been made without ancillary
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
51 structures updated), recompute first. */
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
52 if (ct->dirty)
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
53 recompute_case_table (table);
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
54 return ct;
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
55 }
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
56
446
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
57 #define CASE_TABLE_DOWNCASE(ct) ((ct)->downcase_table)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
58 #define CASE_TABLE_UPCASE(ct) ((ct)->upcase_table)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
59 #define CASE_TABLE_CANON(ct) ((ct)->case_canon_table)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
60 #define CASE_TABLE_EQV(ct) ((ct)->case_eqv_table)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
61 #define XCASE_TABLE_DOWNCASE(ct) (XCASE_TABLE (ct)->downcase_table)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
62 #define XCASE_TABLE_UPCASE(ct) (XCASE_TABLE (ct)->upcase_table)
826
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
63 /* Only do automatic updating for canon and eqv, which are the two that are
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
64 automatically computed and that are not up to date. These are not
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
65 normally used by the simple case routines. canon is used by
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
66 compare-buffer-substrings when case-insensitive and by the regex
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
67 routines, and eqv is used only by the Boyer-Moore search routines. */
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
68 #define XCASE_TABLE_CANON(ct) (XCASE_TABLE_UPDATE (ct)->case_canon_table)
6728e641994e [xemacs-hg @ 2002-05-05 11:30:15 by ben]
ben
parents: 793
diff changeset
69 #define XCASE_TABLE_EQV(ct) (XCASE_TABLE_UPDATE (ct)->case_eqv_table)
446
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
70
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
71 #define SET_CASE_TABLE_DOWNCASE(ct, p) ((ct)->downcase_table = p)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
72 #define SET_CASE_TABLE_UPCASE(ct, p) ((ct)->upcase_table = p)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
73 #define SET_CASE_TABLE_CANON(ct, p) ((ct)->case_canon_table = p)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
74 #define SET_CASE_TABLE_EQV(ct, p) ((ct)->case_eqv_table = p)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
75 #define XSET_CASE_TABLE_DOWNCASE(ct, p) \
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
76 SET_CASE_TABLE_DOWNCASE (XCASE_TABLE (ct), p)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
77 #define XSET_CASE_TABLE_UPCASE(ct, p) \
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
78 SET_CASE_TABLE_UPCASE (XCASE_TABLE (ct), p)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
79 #define XSET_CASE_TABLE_CANON(ct, p) \
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
80 SET_CASE_TABLE_CANON (XCASE_TABLE (ct), p)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
81 #define XSET_CASE_TABLE_EQV(ct, p) \
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
82 SET_CASE_TABLE_EQV (XCASE_TABLE (ct), p)
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
83
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
84 extern Lisp_Object Vstandard_case_table;
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
85
1ccc32a20af4 Import from CVS: tag r21-2-38
cvs
parents:
diff changeset
86 #endif /* INCLUDED_casetab_h_ */