xemacs-beta: src/syntax.h comparison

comparison src/syntax.h @ 70:131b0175ea99 r20-0b30

Import from CVS: tag r20-0b30

author	cvs
date	Mon, 13 Aug 2007 09:02:59 +0200
parents	ac2d302a0011
children	c5d627a313b1

comparison

equal deleted inserted replaced

-:804d1389bcd6
+:131b0175ea99
 /* Synched up with: FSF 19.28. */
 #ifndef _XEMACS_SYNTAX_H_
 #define _XEMACS_SYNTAX_H_
+#include "chartab.h"
 /* The standard syntax table is stored where it will automatically
 be used in all new buffers.  */
 extern Lisp_Object Vstandard_syntax_table;
-/* A syntax table is a Lisp vector of length 0400, whose elements are integers.
+/* A syntax table is a type of char table.
 The low 7 bits of the integer is a code, as follows. The 8th bit is
 used as the prefix bit flag (see below).
+The values in a syntax table are either integers or conses of
+integers and chars.  The lowest 7 bits of the integer are the syntax
+class.  If this is Sinherit, then the actual syntax value needs to
+be retrieved from the standard syntax table.
+Since the logic involved in finding the actual integer isn't very
+complex, you'd think the time required to retrieve it is not a
+factor.  If you thought that, however, you'd be wrong, due to the
+high number of times (many per character) that the syntax value is
+accessed in functions such as scan_lists().  To speed this up,
+we maintain a mirror syntax table that contains the actual
+integers.  We can do this successfully because syntax tables are
+now an abstract type, where we control all access.
 */
 enum syntaxcode
 {
 Swhitespace,	/* whitespace character */
 Sescape,	/* a character that begins a C-style escape */
 Scharquote,	/* a character that quotes the following character */
 Scomment,	/* a comment-starting character */
 Sendcomment,	/* a comment-ending character */
 Sinherit,	/* use the standard syntax table for this character */
-Sextword,	/* extended word; works mostly like a word constituent.
-		   See the comment in syntax.c. */
 Smax	 /* Upper bound on codes that are meaningful */
 };
 extern Lisp_Object Qsyntax_table_p;
 Lisp_Object Fsyntax_table_p (Lisp_Object);
 Lisp_Object Fsyntax_table   (Lisp_Object);
 Lisp_Object Fset_syntax_table (Lisp_Object, Lisp_Object);
+enum syntaxcode charset_syntax (struct buffer *buf, Lisp_Object charset,
-/* Return the raw syntax code for a particular character and table */
+				int *multi_p_out);
-#define RAW_SYNTAX_CODE_UNSAFE(table, c) \
-(XINT (vector_data (XVECTOR (table))[(unsigned char) (c)]))
+/* Return the syntax code for a particular character and mirror table. */
-/* Return the syntax code for a particular character and table, taking
+#define SYNTAX_CODE_UNSAFE(table, c) \
-into account inheritance. */
+XINT (CHAR_TABLE_VALUE_UNSAFE (table, c))
-/* Unfortunately, we cannot write SYNTAX_CODE() as a safe macro in
+INLINE int SYNTAX_CODE (struct Lisp_Char_Table *table, Emchar c);
-general.  I tried just using an inline function but that causes
-significant slowdown (esp. in regex routines) because this macro
-is called so many millions of times.  So instead we resort to
-SYNTAX_CODE_UNSAFE(), which is used most of the time.  Under
-GCC we can actually write this as a safe macro, and we do because
-it's likely to lead to speedups. */
-#ifdef __GNUC__
-#define SYNTAX_CODE_UNSAFE(table, c)					 \
-({ Emchar _ch_ = (c);							 \
-int _rawcode_ = RAW_SYNTAX_CODE_UNSAFE (table, _ch_);		 \
-if ((enum syntaxcode) (_rawcode_ & 0177) == Sinherit)		 \
-_rawcode_ = RAW_SYNTAX_CODE_UNSAFE (Vstandard_syntax_table, _ch_); \
-_rawcode_; })
-#else
-#define SYNTAX_CODE_UNSAFE(table, c)			\
-(RAW_SYNTAX_CODE_UNSAFE (table, c) == Sinherit	\
-? RAW_SYNTAX_CODE_UNSAFE (Vstandard_syntax_table, c)	\
-: RAW_SYNTAX_CODE_UNSAFE (table, c))
-#endif
-INLINE int SYNTAX_CODE (Lisp_Object table, Emchar c);
 INLINE int
-SYNTAX_CODE (Lisp_Object table, Emchar c)
+SYNTAX_CODE (struct Lisp_Char_Table *table, Emchar c)
 {
 return SYNTAX_CODE_UNSAFE (table, c);
 }
 #define SYNTAX_UNSAFE(table, c) \
 ((enum syntaxcode) (SYNTAX_CODE_UNSAFE (table, c) & 0177))
 #define SYNTAX_FROM_CODE(code) ((enum syntaxcode) ((code) & 0177))
 #define SYNTAX(table, c) SYNTAX_FROM_CODE (SYNTAX_CODE (table, c))
-INLINE int WORD_SYNTAX_P (Lisp_Object table, Emchar c);
+INLINE int WORD_SYNTAX_P (struct Lisp_Char_Table *table, Emchar c);
 INLINE int
-WORD_SYNTAX_P (Lisp_Object table, Emchar c)
+WORD_SYNTAX_P (struct Lisp_Char_Table *table, Emchar c)
 {
 int syncode = SYNTAX (table, c);
-return syncode == Sword || syncode == Sextword;
+return syncode == Sword;
 }
+/* OK, here's a graphic diagram of the format of the syntax values:
+Bit number:
+[ 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 ]
+[ 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 ]
+<-----> <-----> <-------------> <-------------> ^  <----------->
+ELisp  unused  |comment bits |     unused      |   syntax code
+tag           | | | | | | | |                 |
+stuff          | | | | | | | |                 |
+| | | | | | | |                 |
+| | | | | | | |                 `--> prefix flag
+| | | | | | | |
+| | | | | | | `--> comment end style B, second char
+| | | | | | `----> comment end style A, second char
+| | | | | `------> comment end style B, first char
+| | | | `--------> comment end style A, first char
+| | | `----------> comment start style B, second char
+| | `------------> comment start style A, second char
+| `--------------> comment start style B, first char
+`----------------> comment start style A, first char
+In a 64-bit integer, there would be 32 more unused bits between
+the tag and the comment bits.
+Clearly, such a scheme will not work for Mule, because the matching
+paren could be any character and as such requires 19 bits, which
+we don't got.
+Remember that under Mule we use char tables instead of vectors.
+So what we do is use another char table for the matching paren
+and store a pointer to it in the first char table. (This frees
+code from having to worry about passing two tables around.)
+*/
 /* The prefix flag bit for backward-prefix-chars is now put into bit 7. */
 #define SYNTAX_PREFIX_UNSAFE(table, c) \
 ((SYNTAX_CODE_UNSAFE (table, c) >> 7) & 1)
 #define SYNTAX_PREFIX(table, c) \
 ((SYNTAX_CODE (table, c) >> 7) & 1)
-/* The next 8 bits of the number is a character,
+/* Bits 23-16 are used to implement up to two comment styles
-the matching delimiter in the case of Sopen or Sclose. */
-#define SYNTAX_MATCH(table, c) \
-((SYNTAX_CODE (table, c) >> 8) & 0377)
-/* The next 8 bits are used to implement up to two comment styles
 in a single buffer. They have the following meanings:
 1. first of a one or two character comment-start sequence of style a.
 2. first of a one or two character comment-start sequence of style b.
 3. second of a two-character comment-start sequence of style a.
 Lisp_Object syntax_match (Lisp_Object table, Emchar ch);
 extern int no_quit_in_re_search;
 extern struct buffer *regex_emacs_buffer;
+void update_syntax_table (struct Lisp_Char_Table *ct);
 #endif /* _XEMACS_SYNTAX_H_ */

Mercurial > hg > xemacs-beta

comparison src/syntax.h @ 70:131b0175ea99 r20-0b30