view src/rangetab.h @ 5648:3f4a234f4672

Support non-ASCII correctly in character classes, test this. src/ChangeLog addition: 2012-04-21 Aidan Kehoe <kehoea@parhasard.net> Support non-ASCII correctly in character classes ([:alnum:] and friends). * regex.c: * regex.c (ISBLANK, ISUNIBYTE): New. Make these and friends independent of the locale, since we want them to be consistent in XEmacs. * regex.c (print_partial_compiled_pattern): Print the flags for charset_mule; don't print non-ASCII as the character values in ranges, this breaks with locales. * regex.c (enum): Define various flags the charset_mule and charset_mule_not opcodes can now take. * regex.c (CHAR_CLASS_MAX_LENGTH): Update this. * regex.c (re_iswctype, re_wctype): New, from GNU. * regex.c (re_wctype_can_match_non_ascii): New; used when deciding on whether to use charset_mule or the ASCII-only regex character set opcode. * regex.c (regex_compile): Error correctly on long, non-existent character class names. Break out the handling of charsets that can match non-ASCII into a separate clause. Use compile_char_class when compiling character classes. * regex.c (compile_char_class): New. Used in regex_compile when compiling character sets that may match non-ASCII. * regex.c (re_compile_fastmap): If there are flags set for charset_mule or charset_mule_not, we can't use the fastmap (since we need to check syntax table values that aren't available there). * regex.c (re_match_2_internal): Check the new flags passed to the charset_mule{,_not} opcode, observe them if appropriate. * regex.h: * regex.h (enum): Expose re_wctype_t here, imported from GNU. tests/ChangeLog addition: 2012-04-21 Aidan Kehoe <kehoea@parhasard.net> * automated/regexp-tests.el: * automated/regexp-tests.el (Assert-char-class): Check that #'string-match errors correctly with an over-long character class name. Add tests for character class functionality that supports non-ASCII characters. These tests expose bugs in GNU Emacs 24.0.94.2, but pass under current XEmacs.
author Aidan Kehoe <kehoea@parhasard.net>
date Sat, 21 Apr 2012 18:58:28 +0100
parents 308d34e9f07d
children
line wrap: on
line source

/* XEmacs routines to deal with range tables.
   Copyright (C) 1995 Sun Microsystems, Inc.
   Copyright (C) 1995, 2004, 2010 Ben Wing.

This file is part of XEmacs.

XEmacs is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.

XEmacs is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with XEmacs.  If not, see <http://www.gnu.org/licenses/>. */

/* Synched up with: Not in FSF. */

/* Extracted from rangetab.c by O. Galibert, 1998. */

#ifndef INCLUDED_rangetab_h_
#define INCLUDED_rangetab_h_

typedef struct range_table_entry range_table_entry;
struct range_table_entry
{
#ifdef NEW_GC
  NORMAL_LISP_OBJECT_HEADER header;
#endif /* NEW_GC */
  EMACS_INT first;
  EMACS_INT last;
  Lisp_Object val;
};

typedef struct
{
  Dynarr_declare (range_table_entry);
} range_table_entry_dynarr;

enum range_table_type
{
  RANGE_START_CLOSED_END_OPEN,
  RANGE_START_CLOSED_END_CLOSED,
  RANGE_START_OPEN_END_CLOSED,
  RANGE_START_OPEN_END_OPEN
};

struct Lisp_Range_Table
{
  NORMAL_LISP_OBJECT_HEADER header;
  Gap_Array *entries;
  enum range_table_type type;
};
typedef struct Lisp_Range_Table Lisp_Range_Table;

DECLARE_LISP_OBJECT (range_table, Lisp_Range_Table);
#define XRANGE_TABLE(x) XRECORD (x, range_table, Lisp_Range_Table)
#define wrap_range_table(p) wrap_record (p, range_table)
#define RANGE_TABLEP(x) RECORDP (x, range_table)
#define CHECK_RANGE_TABLE(x) CHECK_RECORD (x, range_table)

#define rangetab_gap_array_at(ga, pos) \
  gap_array_at (ga, pos, struct range_table_entry)
#define rangetab_gap_array_atp(ga, pos) \
  gap_array_atp (ga, pos, struct range_table_entry)
#endif /* INCLUDED_rangetab_h_ */