Mercurial > hg > xemacs-beta
view src/lisp-union.h @ 5648:3f4a234f4672
Support non-ASCII correctly in character classes, test this.
src/ChangeLog addition:
2012-04-21 Aidan Kehoe <kehoea@parhasard.net>
Support non-ASCII correctly in character classes ([:alnum:] and
friends).
* regex.c:
* regex.c (ISBLANK, ISUNIBYTE): New. Make these and friends
independent of the locale, since we want them to be consistent in
XEmacs.
* regex.c (print_partial_compiled_pattern): Print the flags for
charset_mule; don't print non-ASCII as the character values in
ranges, this breaks with locales.
* regex.c (enum):
Define various flags the charset_mule and charset_mule_not opcodes
can now take.
* regex.c (CHAR_CLASS_MAX_LENGTH): Update this.
* regex.c (re_iswctype, re_wctype): New, from GNU.
* regex.c (re_wctype_can_match_non_ascii): New; used when deciding
on whether to use charset_mule or the ASCII-only regex character
set opcode.
* regex.c (regex_compile):
Error correctly on long, non-existent character class names.
Break out the handling of charsets that can match non-ASCII into a
separate clause. Use compile_char_class when compiling character
classes.
* regex.c (compile_char_class): New. Used in regex_compile when
compiling character sets that may match non-ASCII.
* regex.c (re_compile_fastmap):
If there are flags set for charset_mule or charset_mule_not, we
can't use the fastmap (since we need to check syntax table values
that aren't available there).
* regex.c (re_match_2_internal):
Check the new flags passed to the charset_mule{,_not} opcode,
observe them if appropriate.
* regex.h:
* regex.h (enum):
Expose re_wctype_t here, imported from GNU.
tests/ChangeLog addition:
2012-04-21 Aidan Kehoe <kehoea@parhasard.net>
* automated/regexp-tests.el:
* automated/regexp-tests.el (Assert-char-class):
Check that #'string-match errors correctly with an over-long
character class name.
Add tests for character class functionality that supports
non-ASCII characters. These tests expose bugs in GNU Emacs
24.0.94.2, but pass under current XEmacs.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sat, 21 Apr 2012 18:58:28 +0100 (2012-04-21) |
parents | 56144c8593a8 |
children |
line wrap: on
line source
/* Fundamental definitions for XEmacs Lisp interpreter -- union objects. Copyright (C) 1985, 1986, 1987, 1992, 1993, 1994 Free Software Foundation, Inc. Copyright (C) 2002, 2005, 2010 Ben Wing. This file is part of XEmacs. XEmacs is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. XEmacs is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */ /* Divergent from FSF. */ /* Definition of Lisp_Object type as a union. The declaration order of the objects within the struct members of the union is dependent on ENDIAN-ness. See lisp-disunion.h for more details. */ typedef union Lisp_Object { /* if non-valbits are at lower addresses */ #ifdef WORDS_BIGENDIAN struct { EMACS_UINT val : VALBITS; enum_field (Lisp_Type) type : GCTYPEBITS; } gu; struct { signed EMACS_INT val : FIXNUM_VALBITS; unsigned int bits : FIXNUM_GCBITS; } s; struct { EMACS_UINT val : FIXNUM_VALBITS; unsigned int bits : FIXNUM_GCBITS; } u; #else /* non-valbits are at higher addresses */ struct { enum_field (Lisp_Type) type : GCTYPEBITS; EMACS_UINT val : VALBITS; } gu; struct { unsigned int bits : FIXNUM_GCBITS; signed EMACS_INT val : FIXNUM_VALBITS; } s; struct { unsigned int bits : FIXNUM_GCBITS; EMACS_UINT val : FIXNUM_VALBITS; } u; #endif /* non-valbits are at higher addresses */ EMACS_UINT ui; signed EMACS_INT i; /* This was formerly declared `void *v' etc. but that causes GCC to accept any (yes, any) pointer as the argument of a function declared to accept a Lisp_Object. */ struct nosuchstruct *v; } Lisp_Object; #define XCHARVAL(x) ((EMACS_INT)(x).gu.val) #define XPNTRVAL(x) ((x).ui) #define XREALFIXNUM(x) ((EMACS_INT)(x).s.val) #define XUINT(x) ((EMACS_UINT)(x).u.val) #define XTYPE(x) ((x).gu.type) #define EQ(x,y) ((x).v == (y).v) DECLARE_INLINE_HEADER ( Lisp_Object make_fixnum_verify (EMACS_INT val) ) { Lisp_Object obj; obj.s.bits = 1; obj.s.val = val; type_checking_assert (XREALFIXNUM (obj) == val); return obj; } DECLARE_INLINE_HEADER ( Lisp_Object make_fixnum (EMACS_INT val) ) { Lisp_Object obj; obj.s.bits = 1; obj.s.val = val; return obj; } DECLARE_INLINE_HEADER ( Lisp_Object make_char_1 (Ichar val) ) { Lisp_Object obj; obj.gu.type = Lisp_Type_Char; obj.gu.val = val; return obj; } DECLARE_INLINE_HEADER ( Lisp_Object wrap_pointer_1 (const void *ptr) ) { Lisp_Object obj; obj.ui = (EMACS_UINT) ptr; return obj; } extern MODULE_API Lisp_Object Qnull_pointer, Qzero; #define FIXNUMP(x) ((x).s.bits) #define FIXNUM_PLUS(x,y) make_fixnum (XFIXNUM (x) + XFIXNUM (y)) #define FIXNUM_MINUS(x,y) make_fixnum (XFIXNUM (x) - XFIXNUM (y)) #define FIXNUM_PLUS1(x) make_fixnum (XFIXNUM (x) + 1) #define FIXNUM_MINUS1(x) make_fixnum (XFIXNUM (x) - 1) /* WARNING!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! You can only GET_LISP_FROM_VOID something that had previously been STORE_LISP_IN_VOID'd. If you want to go the other way, use STORE_VOID_IN_LISP and GET_VOID_FROM_LISP, or use make_opaque_ptr(). */ /* Convert a Lisp object to a void * pointer, as when it needs to be passed to a toolkit callback function */ #define STORE_LISP_IN_VOID(larg) ((void *) ((larg).v)) /* Convert a void * pointer back into a Lisp object, assuming that the pointer was generated by STORE_LISP_IN_VOID. */ DECLARE_INLINE_HEADER ( Lisp_Object GET_LISP_FROM_VOID (const void *arg) ) { Lisp_Object larg; larg.v = (struct nosuchstruct *) arg; return larg; } /* Convert a Lisp_Object into something that can't be used as an lvalue. Useful for type-checking. */ #if (__GNUC__ > 1) #define NON_LVALUE(larg) ({ (larg); }) #else /* Well, you can't really do it without using a function call, and there's no real point in that; no-union-type is the rule, and that will catch errors. */ #define NON_LVALUE(larg) (larg) #endif