comparison src/syntax.h @ 70:131b0175ea99 r20-0b30

Import from CVS: tag r20-0b30
author cvs
date Mon, 13 Aug 2007 09:02:59 +0200
parents ac2d302a0011
children c5d627a313b1
comparison
equal deleted inserted replaced
69:804d1389bcd6 70:131b0175ea99
21 /* Synched up with: FSF 19.28. */ 21 /* Synched up with: FSF 19.28. */
22 22
23 #ifndef _XEMACS_SYNTAX_H_ 23 #ifndef _XEMACS_SYNTAX_H_
24 #define _XEMACS_SYNTAX_H_ 24 #define _XEMACS_SYNTAX_H_
25 25
26 #include "chartab.h"
27
26 /* The standard syntax table is stored where it will automatically 28 /* The standard syntax table is stored where it will automatically
27 be used in all new buffers. */ 29 be used in all new buffers. */
28 extern Lisp_Object Vstandard_syntax_table; 30 extern Lisp_Object Vstandard_syntax_table;
29 31
30 /* A syntax table is a Lisp vector of length 0400, whose elements are integers. 32 /* A syntax table is a type of char table.
31 33
32 The low 7 bits of the integer is a code, as follows. The 8th bit is 34 The low 7 bits of the integer is a code, as follows. The 8th bit is
33 used as the prefix bit flag (see below). 35 used as the prefix bit flag (see below).
36
37 The values in a syntax table are either integers or conses of
38 integers and chars. The lowest 7 bits of the integer are the syntax
39 class. If this is Sinherit, then the actual syntax value needs to
40 be retrieved from the standard syntax table.
41
42 Since the logic involved in finding the actual integer isn't very
43 complex, you'd think the time required to retrieve it is not a
44 factor. If you thought that, however, you'd be wrong, due to the
45 high number of times (many per character) that the syntax value is
46 accessed in functions such as scan_lists(). To speed this up,
47 we maintain a mirror syntax table that contains the actual
48 integers. We can do this successfully because syntax tables are
49 now an abstract type, where we control all access.
34 */ 50 */
35 51
36 enum syntaxcode 52 enum syntaxcode
37 { 53 {
38 Swhitespace, /* whitespace character */ 54 Swhitespace, /* whitespace character */
47 Sescape, /* a character that begins a C-style escape */ 63 Sescape, /* a character that begins a C-style escape */
48 Scharquote, /* a character that quotes the following character */ 64 Scharquote, /* a character that quotes the following character */
49 Scomment, /* a comment-starting character */ 65 Scomment, /* a comment-starting character */
50 Sendcomment, /* a comment-ending character */ 66 Sendcomment, /* a comment-ending character */
51 Sinherit, /* use the standard syntax table for this character */ 67 Sinherit, /* use the standard syntax table for this character */
52 Sextword, /* extended word; works mostly like a word constituent.
53 See the comment in syntax.c. */
54 Smax /* Upper bound on codes that are meaningful */ 68 Smax /* Upper bound on codes that are meaningful */
55 }; 69 };
56 70
57 extern Lisp_Object Qsyntax_table_p; 71 extern Lisp_Object Qsyntax_table_p;
58 Lisp_Object Fsyntax_table_p (Lisp_Object); 72 Lisp_Object Fsyntax_table_p (Lisp_Object);
59 Lisp_Object Fsyntax_table (Lisp_Object); 73 Lisp_Object Fsyntax_table (Lisp_Object);
60 Lisp_Object Fset_syntax_table (Lisp_Object, Lisp_Object); 74 Lisp_Object Fset_syntax_table (Lisp_Object, Lisp_Object);
61 75 enum syntaxcode charset_syntax (struct buffer *buf, Lisp_Object charset,
62 /* Return the raw syntax code for a particular character and table */ 76 int *multi_p_out);
63 #define RAW_SYNTAX_CODE_UNSAFE(table, c) \ 77
64 (XINT (vector_data (XVECTOR (table))[(unsigned char) (c)])) 78 /* Return the syntax code for a particular character and mirror table. */
65 79
66 /* Return the syntax code for a particular character and table, taking 80 #define SYNTAX_CODE_UNSAFE(table, c) \
67 into account inheritance. */ 81 XINT (CHAR_TABLE_VALUE_UNSAFE (table, c))
68 82
69 /* Unfortunately, we cannot write SYNTAX_CODE() as a safe macro in 83 INLINE int SYNTAX_CODE (struct Lisp_Char_Table *table, Emchar c);
70 general. I tried just using an inline function but that causes
71 significant slowdown (esp. in regex routines) because this macro
72 is called so many millions of times. So instead we resort to
73 SYNTAX_CODE_UNSAFE(), which is used most of the time. Under
74 GCC we can actually write this as a safe macro, and we do because
75 it's likely to lead to speedups. */
76
77 #ifdef __GNUC__
78 #define SYNTAX_CODE_UNSAFE(table, c) \
79 ({ Emchar _ch_ = (c); \
80 int _rawcode_ = RAW_SYNTAX_CODE_UNSAFE (table, _ch_); \
81 if ((enum syntaxcode) (_rawcode_ & 0177) == Sinherit) \
82 _rawcode_ = RAW_SYNTAX_CODE_UNSAFE (Vstandard_syntax_table, _ch_); \
83 _rawcode_; })
84 #else
85 #define SYNTAX_CODE_UNSAFE(table, c) \
86 (RAW_SYNTAX_CODE_UNSAFE (table, c) == Sinherit \
87 ? RAW_SYNTAX_CODE_UNSAFE (Vstandard_syntax_table, c) \
88 : RAW_SYNTAX_CODE_UNSAFE (table, c))
89 #endif
90
91 INLINE int SYNTAX_CODE (Lisp_Object table, Emchar c);
92 INLINE int 84 INLINE int
93 SYNTAX_CODE (Lisp_Object table, Emchar c) 85 SYNTAX_CODE (struct Lisp_Char_Table *table, Emchar c)
94 { 86 {
95 return SYNTAX_CODE_UNSAFE (table, c); 87 return SYNTAX_CODE_UNSAFE (table, c);
96 } 88 }
97 89
98 #define SYNTAX_UNSAFE(table, c) \ 90 #define SYNTAX_UNSAFE(table, c) \
99 ((enum syntaxcode) (SYNTAX_CODE_UNSAFE (table, c) & 0177)) 91 ((enum syntaxcode) (SYNTAX_CODE_UNSAFE (table, c) & 0177))
100 92
101 #define SYNTAX_FROM_CODE(code) ((enum syntaxcode) ((code) & 0177)) 93 #define SYNTAX_FROM_CODE(code) ((enum syntaxcode) ((code) & 0177))
102 #define SYNTAX(table, c) SYNTAX_FROM_CODE (SYNTAX_CODE (table, c)) 94 #define SYNTAX(table, c) SYNTAX_FROM_CODE (SYNTAX_CODE (table, c))
103 95
104 INLINE int WORD_SYNTAX_P (Lisp_Object table, Emchar c); 96 INLINE int WORD_SYNTAX_P (struct Lisp_Char_Table *table, Emchar c);
105 INLINE int 97 INLINE int
106 WORD_SYNTAX_P (Lisp_Object table, Emchar c) 98 WORD_SYNTAX_P (struct Lisp_Char_Table *table, Emchar c)
107 { 99 {
108 int syncode = SYNTAX (table, c); 100 int syncode = SYNTAX (table, c);
109 return syncode == Sword || syncode == Sextword; 101 return syncode == Sword;
110 } 102 }
103
104 /* OK, here's a graphic diagram of the format of the syntax values:
105
106 Bit number:
107
108 [ 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 ]
109 [ 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 ]
110
111 <-----> <-----> <-------------> <-------------> ^ <----------->
112 ELisp unused |comment bits | unused | syntax code
113 tag | | | | | | | | |
114 stuff | | | | | | | | |
115 | | | | | | | | |
116 | | | | | | | | `--> prefix flag
117 | | | | | | | |
118 | | | | | | | `--> comment end style B, second char
119 | | | | | | `----> comment end style A, second char
120 | | | | | `------> comment end style B, first char
121 | | | | `--------> comment end style A, first char
122 | | | `----------> comment start style B, second char
123 | | `------------> comment start style A, second char
124 | `--------------> comment start style B, first char
125 `----------------> comment start style A, first char
126
127 In a 64-bit integer, there would be 32 more unused bits between
128 the tag and the comment bits.
129
130 Clearly, such a scheme will not work for Mule, because the matching
131 paren could be any character and as such requires 19 bits, which
132 we don't got.
133
134 Remember that under Mule we use char tables instead of vectors.
135 So what we do is use another char table for the matching paren
136 and store a pointer to it in the first char table. (This frees
137 code from having to worry about passing two tables around.)
138 */
139
111 140
112 /* The prefix flag bit for backward-prefix-chars is now put into bit 7. */ 141 /* The prefix flag bit for backward-prefix-chars is now put into bit 7. */
113 142
114 #define SYNTAX_PREFIX_UNSAFE(table, c) \ 143 #define SYNTAX_PREFIX_UNSAFE(table, c) \
115 ((SYNTAX_CODE_UNSAFE (table, c) >> 7) & 1) 144 ((SYNTAX_CODE_UNSAFE (table, c) >> 7) & 1)
116 #define SYNTAX_PREFIX(table, c) \ 145 #define SYNTAX_PREFIX(table, c) \
117 ((SYNTAX_CODE (table, c) >> 7) & 1) 146 ((SYNTAX_CODE (table, c) >> 7) & 1)
118 147
119 /* The next 8 bits of the number is a character, 148 /* Bits 23-16 are used to implement up to two comment styles
120 the matching delimiter in the case of Sopen or Sclose. */
121
122 #define SYNTAX_MATCH(table, c) \
123 ((SYNTAX_CODE (table, c) >> 8) & 0377)
124
125 /* The next 8 bits are used to implement up to two comment styles
126 in a single buffer. They have the following meanings: 149 in a single buffer. They have the following meanings:
127 150
128 1. first of a one or two character comment-start sequence of style a. 151 1. first of a one or two character comment-start sequence of style a.
129 2. first of a one or two character comment-start sequence of style b. 152 2. first of a one or two character comment-start sequence of style b.
130 3. second of a two-character comment-start sequence of style a. 153 3. second of a two-character comment-start sequence of style a.
226 Lisp_Object syntax_match (Lisp_Object table, Emchar ch); 249 Lisp_Object syntax_match (Lisp_Object table, Emchar ch);
227 250
228 extern int no_quit_in_re_search; 251 extern int no_quit_in_re_search;
229 extern struct buffer *regex_emacs_buffer; 252 extern struct buffer *regex_emacs_buffer;
230 253
254 void update_syntax_table (struct Lisp_Char_Table *ct);
255
231 #endif /* _XEMACS_SYNTAX_H_ */ 256 #endif /* _XEMACS_SYNTAX_H_ */