Mercurial > hg > xemacs-beta
diff src/syntax.h @ 70:131b0175ea99 r20-0b30
Import from CVS: tag r20-0b30
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:02:59 +0200 |
parents | ac2d302a0011 |
children | c5d627a313b1 |
line wrap: on
line diff
--- a/src/syntax.h Mon Aug 13 09:00:04 2007 +0200 +++ b/src/syntax.h Mon Aug 13 09:02:59 2007 +0200 @@ -23,14 +23,30 @@ #ifndef _XEMACS_SYNTAX_H_ #define _XEMACS_SYNTAX_H_ +#include "chartab.h" + /* The standard syntax table is stored where it will automatically be used in all new buffers. */ extern Lisp_Object Vstandard_syntax_table; -/* A syntax table is a Lisp vector of length 0400, whose elements are integers. +/* A syntax table is a type of char table. The low 7 bits of the integer is a code, as follows. The 8th bit is used as the prefix bit flag (see below). + +The values in a syntax table are either integers or conses of +integers and chars. The lowest 7 bits of the integer are the syntax +class. If this is Sinherit, then the actual syntax value needs to +be retrieved from the standard syntax table. + +Since the logic involved in finding the actual integer isn't very +complex, you'd think the time required to retrieve it is not a +factor. If you thought that, however, you'd be wrong, due to the +high number of times (many per character) that the syntax value is +accessed in functions such as scan_lists(). To speed this up, +we maintain a mirror syntax table that contains the actual +integers. We can do this successfully because syntax tables are +now an abstract type, where we control all access. */ enum syntaxcode @@ -49,8 +65,6 @@ Scomment, /* a comment-starting character */ Sendcomment, /* a comment-ending character */ Sinherit, /* use the standard syntax table for this character */ - Sextword, /* extended word; works mostly like a word constituent. - See the comment in syntax.c. */ Smax /* Upper bound on codes that are meaningful */ }; @@ -58,39 +72,17 @@ Lisp_Object Fsyntax_table_p (Lisp_Object); Lisp_Object Fsyntax_table (Lisp_Object); Lisp_Object Fset_syntax_table (Lisp_Object, Lisp_Object); - -/* Return the raw syntax code for a particular character and table */ -#define RAW_SYNTAX_CODE_UNSAFE(table, c) \ - (XINT (vector_data (XVECTOR (table))[(unsigned char) (c)])) - -/* Return the syntax code for a particular character and table, taking - into account inheritance. */ +enum syntaxcode charset_syntax (struct buffer *buf, Lisp_Object charset, + int *multi_p_out); -/* Unfortunately, we cannot write SYNTAX_CODE() as a safe macro in - general. I tried just using an inline function but that causes - significant slowdown (esp. in regex routines) because this macro - is called so many millions of times. So instead we resort to - SYNTAX_CODE_UNSAFE(), which is used most of the time. Under - GCC we can actually write this as a safe macro, and we do because - it's likely to lead to speedups. */ +/* Return the syntax code for a particular character and mirror table. */ -#ifdef __GNUC__ -#define SYNTAX_CODE_UNSAFE(table, c) \ - ({ Emchar _ch_ = (c); \ - int _rawcode_ = RAW_SYNTAX_CODE_UNSAFE (table, _ch_); \ - if ((enum syntaxcode) (_rawcode_ & 0177) == Sinherit) \ - _rawcode_ = RAW_SYNTAX_CODE_UNSAFE (Vstandard_syntax_table, _ch_); \ - _rawcode_; }) -#else -#define SYNTAX_CODE_UNSAFE(table, c) \ - (RAW_SYNTAX_CODE_UNSAFE (table, c) == Sinherit \ - ? RAW_SYNTAX_CODE_UNSAFE (Vstandard_syntax_table, c) \ - : RAW_SYNTAX_CODE_UNSAFE (table, c)) -#endif +#define SYNTAX_CODE_UNSAFE(table, c) \ + XINT (CHAR_TABLE_VALUE_UNSAFE (table, c)) -INLINE int SYNTAX_CODE (Lisp_Object table, Emchar c); +INLINE int SYNTAX_CODE (struct Lisp_Char_Table *table, Emchar c); INLINE int -SYNTAX_CODE (Lisp_Object table, Emchar c) +SYNTAX_CODE (struct Lisp_Char_Table *table, Emchar c) { return SYNTAX_CODE_UNSAFE (table, c); } @@ -101,14 +93,51 @@ #define SYNTAX_FROM_CODE(code) ((enum syntaxcode) ((code) & 0177)) #define SYNTAX(table, c) SYNTAX_FROM_CODE (SYNTAX_CODE (table, c)) -INLINE int WORD_SYNTAX_P (Lisp_Object table, Emchar c); +INLINE int WORD_SYNTAX_P (struct Lisp_Char_Table *table, Emchar c); INLINE int -WORD_SYNTAX_P (Lisp_Object table, Emchar c) +WORD_SYNTAX_P (struct Lisp_Char_Table *table, Emchar c) { int syncode = SYNTAX (table, c); - return syncode == Sword || syncode == Sextword; + return syncode == Sword; } +/* OK, here's a graphic diagram of the format of the syntax values: + + Bit number: + + [ 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 ] + [ 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 ] + + <-----> <-----> <-------------> <-------------> ^ <-----------> + ELisp unused |comment bits | unused | syntax code + tag | | | | | | | | | + stuff | | | | | | | | | + | | | | | | | | | + | | | | | | | | `--> prefix flag + | | | | | | | | + | | | | | | | `--> comment end style B, second char + | | | | | | `----> comment end style A, second char + | | | | | `------> comment end style B, first char + | | | | `--------> comment end style A, first char + | | | `----------> comment start style B, second char + | | `------------> comment start style A, second char + | `--------------> comment start style B, first char + `----------------> comment start style A, first char + + In a 64-bit integer, there would be 32 more unused bits between + the tag and the comment bits. + + Clearly, such a scheme will not work for Mule, because the matching + paren could be any character and as such requires 19 bits, which + we don't got. + + Remember that under Mule we use char tables instead of vectors. + So what we do is use another char table for the matching paren + and store a pointer to it in the first char table. (This frees + code from having to worry about passing two tables around.) +*/ + + /* The prefix flag bit for backward-prefix-chars is now put into bit 7. */ #define SYNTAX_PREFIX_UNSAFE(table, c) \ @@ -116,13 +145,7 @@ #define SYNTAX_PREFIX(table, c) \ ((SYNTAX_CODE (table, c) >> 7) & 1) -/* The next 8 bits of the number is a character, - the matching delimiter in the case of Sopen or Sclose. */ - -#define SYNTAX_MATCH(table, c) \ - ((SYNTAX_CODE (table, c) >> 8) & 0377) - -/* The next 8 bits are used to implement up to two comment styles +/* Bits 23-16 are used to implement up to two comment styles in a single buffer. They have the following meanings: 1. first of a one or two character comment-start sequence of style a. @@ -228,4 +251,6 @@ extern int no_quit_in_re_search; extern struct buffer *regex_emacs_buffer; +void update_syntax_table (struct Lisp_Char_Table *ct); + #endif /* _XEMACS_SYNTAX_H_ */