diff src/regex.h @ 5654:ddf56c45634e

Automated merge with file:///Sources/xemacs-21.5-checked-out
author Aidan Kehoe <kehoea@parhasard.net>
date Fri, 04 May 2012 21:12:51 +0100
parents 3df910176b6a
children
line wrap: on
line diff
--- a/src/regex.h	Tue May 01 16:17:42 2012 +0100
+++ b/src/regex.h	Fri May 04 21:12:51 2012 +0100
@@ -30,6 +30,8 @@
 #define RE_LISP_CONTEXT_ARGS_DECL , Lisp_Object lispobj, struct buffer *lispbuf, struct syntax_cache *scache
 #define RE_LISP_CONTEXT_ARGS_MULE_DECL , Lisp_Object lispobj, struct buffer *USED_IF_MULE (lispbuf), struct syntax_cache *scache
 #define RE_LISP_CONTEXT_ARGS , lispobj, lispbuf, scache
+#define RE_ISWCTYPE_ARG_DECL , struct buffer *lispbuf
+#define RE_ISWCTYPE_ARG(varname) , varname
 #else
 #define RE_TRANSLATE_TYPE char *
 #define RE_LISP_SHORT_CONTEXT_ARGS_DECL
@@ -37,6 +39,8 @@
 #define RE_LISP_CONTEXT_ARGS_DECL
 #define RE_LISP_CONTEXT_ARGS_MULE_DECL
 #define RE_LISP_CONTEXT_ARGS
+#define RE_ISWCTYPE_ARG_DECL 
+#define RE_ISWCTYPE_ARG(varname)
 #define Elemcount ssize_t
 #define Bytecount ssize_t
 #endif /* emacs */
@@ -559,6 +563,86 @@
     RECC_ASCII, RECC_UNIBYTE
 } re_wctype_t;
 
+#define CHAR_CLASS_MAX_LENGTH  9 /* Namely, `multibyte'.  */
+
+/* Map a string to the char class it names (if any).  */
+re_wctype_t re_wctype (const char *);
+
+/* Is character CH a member of the character class CC? */
+int re_iswctype (int ch, re_wctype_t cc RE_ISWCTYPE_ARG_DECL);
+
+/* Bits used to implement the multibyte-part of the various character
+   classes such as [:alnum:] in a charset's range table. XEmacs; use an
+   enum, so they're visible in the debugger. */
+enum
+{
+  BIT_WORD = (1 << 0),
+  BIT_LOWER = (1 << 1),
+  BIT_PUNCT = (1 << 2),
+  BIT_SPACE = (1 << 3),
+  BIT_UPPER = (1 << 4),
+  /* XEmacs; we need this, because we unify treatment of ASCII and non-ASCII
+     (possible matches) in charset_mule. [:alpha:] matches all characters
+     with word syntax, with the exception of [0-9]. We don't need
+     BIT_MULTIBYTE. */
+  BIT_ALPHA = (1 << 5)
+};
+
+#ifdef emacs
+reg_errcode_t compile_char_class (re_wctype_t cc, Lisp_Object rtab,
+                                  Bitbyte *flags_out);
+
+#endif
+
+/* isalpha etc. are used for the character classes.  */
+#include <ctype.h>
+
+#ifdef emacs
+
+/* 1 if C is an ASCII character.  */
+#define ISASCII(c) ((c) < 0x80)
+
+/* 1 if C is a unibyte character.  */
+#define ISUNIBYTE ISASCII
+
+/* The Emacs definitions should not be directly affected by locales.  */
+
+/* In Emacs, these are only used for single-byte characters.  */
+#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
+#define ISCNTRL(c) ((c) < ' ')
+#define ISXDIGIT(c) (ISDIGIT (c) || ((c) >= 'a' && (c) <= 'f')	\
+		     || ((c) >= 'A' && (c) <= 'F'))
+
+/* This is only used for single-byte characters.  */
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+
+/* The rest must handle multibyte characters.  */
+
+#define ISGRAPH(c) ((c) > ' ' && (c) != 0x7f)
+#define ISPRINT(c) ((c) == ' ' || ISGRAPH (c))
+#define ISALPHA(c) (ISASCII (c) ? (((c) >= 'a' && (c) <= 'z')		\
+				   || ((c) >= 'A' && (c) <= 'Z'))	\
+		    : ISWORD (c))
+#define ISALNUM(c) (ISALPHA (c) || ISDIGIT (c))
+
+#define ISLOWER(c) LOWERCASEP (lispbuf, c)
+
+#define ISPUNCT(c) (ISASCII (c)                                 \
+		    ? ((c) > ' ' && (c) < 0x7F			\
+		       && !(((c) >= 'a' && (c) <= 'z')		\
+		            || ((c) >= 'A' && (c) <= 'Z')	\
+		            || ((c) >= '0' && (c) <= '9')))	\
+		    : !ISWORD (c))
+
+#define ISSPACE(c) \
+	(SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Swhitespace)
+
+#define ISUPPER(c) UPPERCASEP (lispbuf, c)
+
+#define ISWORD(c) (SYNTAX (BUFFER_MIRROR_SYNTAX_TABLE (lispbuf), c) == Sword)
+
+#endif 
+
 END_C_DECLS
 
 #endif /* INCLUDED_regex_h_ */