diff src/syntax.h @ 428:3ecd8885ac67 r21-2-22

Import from CVS: tag r21-2-22
author cvs
date Mon, 13 Aug 2007 11:28:15 +0200
parents
children 8de8e3f6228a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/syntax.h	Mon Aug 13 11:28:15 2007 +0200
@@ -0,0 +1,254 @@
+/* Declarations having to do with XEmacs syntax tables.
+   Copyright (C) 1985, 1992, 1993 Free Software Foundation, Inc.
+
+This file is part of XEmacs.
+
+XEmacs is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+XEmacs is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with XEmacs; see the file COPYING.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* Synched up with: FSF 19.28. */
+
+#ifndef _XEMACS_SYNTAX_H_
+#define _XEMACS_SYNTAX_H_
+
+#include "chartab.h"
+
+/* A syntax table is a type of char table.
+
+The low 7 bits of the integer is a code, as follows. The 8th bit is
+used as the prefix bit flag (see below).
+
+The values in a syntax table are either integers or conses of
+integers and chars.  The lowest 7 bits of the integer are the syntax
+class.  If this is Sinherit, then the actual syntax value needs to
+be retrieved from the standard syntax table.
+
+Since the logic involved in finding the actual integer isn't very
+complex, you'd think the time required to retrieve it is not a
+factor.  If you thought that, however, you'd be wrong, due to the
+high number of times (many per character) that the syntax value is
+accessed in functions such as scan_lists().  To speed this up,
+we maintain a mirror syntax table that contains the actual
+integers.  We can do this successfully because syntax tables are
+now an abstract type, where we control all access.
+*/
+
+enum syntaxcode
+{
+  Swhitespace,	/* whitespace character */
+  Spunct,	/* random punctuation character */
+  Sword,	/* word constituent */
+  Ssymbol,	/* symbol constituent but not word constituent */
+  Sopen,	/* a beginning delimiter */
+  Sclose,	/* an ending delimiter */
+  Squote,	/* a prefix character like Lisp ' */
+  Sstring,	/* a string-grouping character like Lisp " */
+  Smath,	/* delimiters like $ in TeX. */
+  Sescape,	/* a character that begins a C-style escape */
+  Scharquote,	/* a character that quotes the following character */
+  Scomment,	/* a comment-starting character */
+  Sendcomment,	/* a comment-ending character */
+  Sinherit,	/* use the standard syntax table for this character */
+  Smax	 /* Upper bound on codes that are meaningful */
+};
+
+enum syntaxcode charset_syntax (struct buffer *buf, Lisp_Object charset,
+				int *multi_p_out);
+
+/* Return the syntax code for a particular character and mirror table. */
+
+#define SYNTAX_CODE_UNSAFE(table, c) \
+   XINT (CHAR_TABLE_VALUE_UNSAFE (table, c))
+
+INLINE int SYNTAX_CODE (struct Lisp_Char_Table *table, Emchar c);
+INLINE int
+SYNTAX_CODE (struct Lisp_Char_Table *table, Emchar c)
+{
+  return SYNTAX_CODE_UNSAFE (table, c);
+}
+
+#define SYNTAX_UNSAFE(table, c) \
+  ((enum syntaxcode) (SYNTAX_CODE_UNSAFE (table, c) & 0177))
+
+#define SYNTAX_FROM_CODE(code) ((enum syntaxcode) ((code) & 0177))
+#define SYNTAX(table, c) SYNTAX_FROM_CODE (SYNTAX_CODE (table, c))
+
+INLINE int WORD_SYNTAX_P (struct Lisp_Char_Table *table, Emchar c);
+INLINE int
+WORD_SYNTAX_P (struct Lisp_Char_Table *table, Emchar c)
+{
+  return SYNTAX (table, c) == Sword;
+}
+
+/* OK, here's a graphic diagram of the format of the syntax values:
+
+   Bit number:
+
+ [ 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 ]
+ [ 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 ]
+
+   <-----> <-----> <-------------> <-------------> ^  <----------->
+    ELisp  unused  |comment bits |     unused      |   syntax code
+     tag           | | | | | | | |                 |
+    stuff          | | | | | | | |                 |
+                   | | | | | | | |                 |
+                   | | | | | | | |                 `--> prefix flag
+                   | | | | | | | |
+                   | | | | | | | `--> comment end style B, second char
+                   | | | | | | `----> comment end style A, second char
+                   | | | | | `------> comment end style B, first char
+                   | | | | `--------> comment end style A, first char
+                   | | | `----------> comment start style B, second char
+                   | | `------------> comment start style A, second char
+                   | `--------------> comment start style B, first char
+                   `----------------> comment start style A, first char
+
+  In a 64-bit integer, there would be 32 more unused bits between
+  the tag and the comment bits.
+
+  Clearly, such a scheme will not work for Mule, because the matching
+  paren could be any character and as such requires 19 bits, which
+  we don't got.
+
+  Remember that under Mule we use char tables instead of vectors.
+  So what we do is use another char table for the matching paren
+  and store a pointer to it in the first char table. (This frees
+  code from having to worry about passing two tables around.)
+*/
+
+
+/* The prefix flag bit for backward-prefix-chars is now put into bit 7. */
+
+#define SYNTAX_PREFIX_UNSAFE(table, c) \
+  ((SYNTAX_CODE_UNSAFE (table, c) >> 7) & 1)
+#define SYNTAX_PREFIX(table, c) \
+  ((SYNTAX_CODE (table, c) >> 7) & 1)
+
+/* Bits 23-16 are used to implement up to two comment styles
+   in a single buffer. They have the following meanings:
+
+  1. first of a one or two character comment-start sequence of style a.
+  2. first of a one or two character comment-start sequence of style b.
+  3. second of a two-character comment-start sequence of style a.
+  4. second of a two-character comment-start sequence of style b.
+  5. first of a one or two character comment-end sequence of style a.
+  6. first of a one or two character comment-end sequence of style b.
+  7. second of a two-character comment-end sequence of style a.
+  8. second of a two-character comment-end sequence of style b.
+ */
+
+#define SYNTAX_COMMENT_BITS(table, c) \
+  ((SYNTAX_CODE (table, c) >> 16) &0xff)
+
+#define SYNTAX_FIRST_OF_START_A  0x80
+#define SYNTAX_FIRST_OF_START_B  0x40
+#define SYNTAX_SECOND_OF_START_A 0x20
+#define SYNTAX_SECOND_OF_START_B 0x10
+#define SYNTAX_FIRST_OF_END_A    0x08
+#define SYNTAX_FIRST_OF_END_B    0x04
+#define SYNTAX_SECOND_OF_END_A   0x02
+#define SYNTAX_SECOND_OF_END_B   0x01
+
+#define SYNTAX_COMMENT_STYLE_A   0xaa
+#define SYNTAX_COMMENT_STYLE_B   0x55
+#define SYNTAX_FIRST_CHAR_START  0xc0
+#define SYNTAX_FIRST_CHAR_END    0x0c
+#define SYNTAX_FIRST_CHAR        0xcc
+#define SYNTAX_SECOND_CHAR_START 0x30
+#define SYNTAX_SECOND_CHAR_END   0x03
+#define SYNTAX_SECOND_CHAR       0x33
+
+#define SYNTAX_START_P(table, a, b)					\
+  ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_START)		\
+   && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_START))
+
+#define SYNTAX_END_P(table, a, b)					\
+  ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_END)		\
+   && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_END))
+
+#define SYNTAX_STYLES_MATCH_START_P(table, a, b, mask)			    \
+  ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_START & (mask))	    \
+   && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_START & (mask)))
+
+#define SYNTAX_STYLES_MATCH_END_P(table, a, b, mask)			  \
+  ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_END & (mask))	  \
+   && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_END & (mask)))
+
+#define SYNTAX_STYLES_MATCH_1CHAR_P(table, a, mask)	\
+  ((SYNTAX_COMMENT_BITS (table, a) & (mask)))
+
+#define STYLE_FOUND_P(table, a, b, startp, style)	\
+  ((SYNTAX_COMMENT_BITS (table, a) &			\
+    ((startp) ? SYNTAX_FIRST_CHAR_START :		\
+     SYNTAX_FIRST_CHAR_END) & (style))			\
+   && (SYNTAX_COMMENT_BITS (table, b) &			\
+    ((startp) ? SYNTAX_SECOND_CHAR_START : 		\
+     SYNTAX_SECOND_CHAR_END) & (style)))
+
+#define SYNTAX_COMMENT_MASK_START(table, a, b)			\
+  ((STYLE_FOUND_P (table, a, b, 1, SYNTAX_COMMENT_STYLE_A)	\
+    ? SYNTAX_COMMENT_STYLE_A					\
+    : (STYLE_FOUND_P (table, a, b, 1, SYNTAX_COMMENT_STYLE_B)	\
+         ? SYNTAX_COMMENT_STYLE_B				\
+	 : 0)))
+
+#define SYNTAX_COMMENT_MASK_END(table, a, b)			\
+  ((STYLE_FOUND_P (table, a, b, 0, SYNTAX_COMMENT_STYLE_A)	\
+   ? SYNTAX_COMMENT_STYLE_A					\
+   : (STYLE_FOUND_P (table, a, b, 0, SYNTAX_COMMENT_STYLE_B)	\
+      ? SYNTAX_COMMENT_STYLE_B					\
+      : 0)))
+
+#define STYLE_FOUND_1CHAR_P(table, a, style)	\
+  ((SYNTAX_COMMENT_BITS (table, a) & (style)))
+
+#define SYNTAX_COMMENT_1CHAR_MASK(table, a)			\
+  ((STYLE_FOUND_1CHAR_P (table, a, SYNTAX_COMMENT_STYLE_A)	\
+   ? SYNTAX_COMMENT_STYLE_A					\
+   : (STYLE_FOUND_1CHAR_P (table, a, SYNTAX_COMMENT_STYLE_B)	\
+      ? SYNTAX_COMMENT_STYLE_B					\
+	 : 0)))
+
+EXFUN (Fchar_syntax, 2);
+EXFUN (Fforward_word, 2);
+
+/* The standard syntax table is stored where it will automatically
+   be used in all new buffers.  */
+extern Lisp_Object Vstandard_syntax_table;
+
+/* This array, indexed by a character, contains the syntax code which
+   that character signifies (as a char).
+   For example, (enum syntaxcode) syntax_spec_code['w'] is Sword. */
+
+extern CONST unsigned char syntax_spec_code[0400];
+
+/* Indexed by syntax code, give the letter that describes it. */
+
+extern CONST unsigned char syntax_code_spec[];
+
+Lisp_Object scan_lists (struct buffer *buf, Bufpos from, int count,
+			int depth, int sexpflag, int no_error);
+int char_quoted (struct buffer *buf, Bufpos pos);
+
+/* NOTE: This does not refer to the mirror table, but to the
+   syntax table itself. */
+Lisp_Object syntax_match (Lisp_Object table, Emchar ch);
+
+extern int no_quit_in_re_search;
+extern struct buffer *regex_emacs_buffer;
+
+void update_syntax_table (struct Lisp_Char_Table *ct);
+
+#endif /* _XEMACS_SYNTAX_H_ */