0
|
1 /* Declarations having to do with XEmacs syntax tables.
|
|
2 Copyright (C) 1985, 1992, 1993 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is part of XEmacs.
|
|
5
|
|
6 XEmacs is free software; you can redistribute it and/or modify it
|
|
7 under the terms of the GNU General Public License as published by the
|
|
8 Free Software Foundation; either version 2, or (at your option) any
|
|
9 later version.
|
|
10
|
|
11 XEmacs is distributed in the hope that it will be useful, but WITHOUT
|
|
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with XEmacs; see the file COPYING. If not, write to
|
|
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
19 Boston, MA 02111-1307, USA. */
|
|
20
|
|
21 /* Synched up with: FSF 19.28. */
|
|
22
|
412
|
23 #ifndef _XEMACS_SYNTAX_H_
|
|
24 #define _XEMACS_SYNTAX_H_
|
0
|
25
|
70
|
26 #include "chartab.h"
|
|
27
|
|
28 /* A syntax table is a type of char table.
|
0
|
29
|
|
30 The low 7 bits of the integer is a code, as follows. The 8th bit is
|
|
31 used as the prefix bit flag (see below).
|
70
|
32
|
|
33 The values in a syntax table are either integers or conses of
|
|
34 integers and chars. The lowest 7 bits of the integer are the syntax
|
|
35 class. If this is Sinherit, then the actual syntax value needs to
|
|
36 be retrieved from the standard syntax table.
|
|
37
|
|
38 Since the logic involved in finding the actual integer isn't very
|
|
39 complex, you'd think the time required to retrieve it is not a
|
|
40 factor. If you thought that, however, you'd be wrong, due to the
|
|
41 high number of times (many per character) that the syntax value is
|
|
42 accessed in functions such as scan_lists(). To speed this up,
|
|
43 we maintain a mirror syntax table that contains the actual
|
|
44 integers. We can do this successfully because syntax tables are
|
|
45 now an abstract type, where we control all access.
|
0
|
46 */
|
|
47
|
|
48 enum syntaxcode
|
2
|
49 {
|
|
50 Swhitespace, /* whitespace character */
|
|
51 Spunct, /* random punctuation character */
|
|
52 Sword, /* word constituent */
|
|
53 Ssymbol, /* symbol constituent but not word constituent */
|
|
54 Sopen, /* a beginning delimiter */
|
|
55 Sclose, /* an ending delimiter */
|
|
56 Squote, /* a prefix character like Lisp ' */
|
|
57 Sstring, /* a string-grouping character like Lisp " */
|
|
58 Smath, /* delimiters like $ in TeX. */
|
|
59 Sescape, /* a character that begins a C-style escape */
|
|
60 Scharquote, /* a character that quotes the following character */
|
|
61 Scomment, /* a comment-starting character */
|
|
62 Sendcomment, /* a comment-ending character */
|
|
63 Sinherit, /* use the standard syntax table for this character */
|
|
64 Smax /* Upper bound on codes that are meaningful */
|
|
65 };
|
0
|
66
|
70
|
67 enum syntaxcode charset_syntax (struct buffer *buf, Lisp_Object charset,
|
|
68 int *multi_p_out);
|
0
|
69
|
70
|
70 /* Return the syntax code for a particular character and mirror table. */
|
0
|
71
|
70
|
72 #define SYNTAX_CODE_UNSAFE(table, c) \
|
|
73 XINT (CHAR_TABLE_VALUE_UNSAFE (table, c))
|
0
|
74
|
412
|
75 INLINE int SYNTAX_CODE (struct Lisp_Char_Table *table, Emchar c);
|
|
76 INLINE int
|
|
77 SYNTAX_CODE (struct Lisp_Char_Table *table, Emchar c)
|
0
|
78 {
|
|
79 return SYNTAX_CODE_UNSAFE (table, c);
|
|
80 }
|
272
|
81
|
0
|
82 #define SYNTAX_UNSAFE(table, c) \
|
|
83 ((enum syntaxcode) (SYNTAX_CODE_UNSAFE (table, c) & 0177))
|
|
84
|
|
85 #define SYNTAX_FROM_CODE(code) ((enum syntaxcode) ((code) & 0177))
|
|
86 #define SYNTAX(table, c) SYNTAX_FROM_CODE (SYNTAX_CODE (table, c))
|
|
87
|
412
|
88 INLINE int WORD_SYNTAX_P (struct Lisp_Char_Table *table, Emchar c);
|
|
89 INLINE int
|
|
90 WORD_SYNTAX_P (struct Lisp_Char_Table *table, Emchar c)
|
0
|
91 {
|
380
|
92 return SYNTAX (table, c) == Sword;
|
0
|
93 }
|
|
94
|
70
|
95 /* OK, here's a graphic diagram of the format of the syntax values:
|
|
96
|
|
97 Bit number:
|
|
98
|
|
99 [ 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 ]
|
|
100 [ 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 ]
|
|
101
|
|
102 <-----> <-----> <-------------> <-------------> ^ <----------->
|
|
103 ELisp unused |comment bits | unused | syntax code
|
|
104 tag | | | | | | | | |
|
|
105 stuff | | | | | | | | |
|
|
106 | | | | | | | | |
|
|
107 | | | | | | | | `--> prefix flag
|
|
108 | | | | | | | |
|
|
109 | | | | | | | `--> comment end style B, second char
|
|
110 | | | | | | `----> comment end style A, second char
|
|
111 | | | | | `------> comment end style B, first char
|
|
112 | | | | `--------> comment end style A, first char
|
|
113 | | | `----------> comment start style B, second char
|
|
114 | | `------------> comment start style A, second char
|
|
115 | `--------------> comment start style B, first char
|
|
116 `----------------> comment start style A, first char
|
|
117
|
|
118 In a 64-bit integer, there would be 32 more unused bits between
|
|
119 the tag and the comment bits.
|
|
120
|
|
121 Clearly, such a scheme will not work for Mule, because the matching
|
|
122 paren could be any character and as such requires 19 bits, which
|
|
123 we don't got.
|
|
124
|
|
125 Remember that under Mule we use char tables instead of vectors.
|
|
126 So what we do is use another char table for the matching paren
|
|
127 and store a pointer to it in the first char table. (This frees
|
|
128 code from having to worry about passing two tables around.)
|
|
129 */
|
|
130
|
|
131
|
0
|
132 /* The prefix flag bit for backward-prefix-chars is now put into bit 7. */
|
|
133
|
|
134 #define SYNTAX_PREFIX_UNSAFE(table, c) \
|
|
135 ((SYNTAX_CODE_UNSAFE (table, c) >> 7) & 1)
|
|
136 #define SYNTAX_PREFIX(table, c) \
|
|
137 ((SYNTAX_CODE (table, c) >> 7) & 1)
|
|
138
|
70
|
139 /* Bits 23-16 are used to implement up to two comment styles
|
0
|
140 in a single buffer. They have the following meanings:
|
|
141
|
|
142 1. first of a one or two character comment-start sequence of style a.
|
|
143 2. first of a one or two character comment-start sequence of style b.
|
|
144 3. second of a two-character comment-start sequence of style a.
|
|
145 4. second of a two-character comment-start sequence of style b.
|
|
146 5. first of a one or two character comment-end sequence of style a.
|
|
147 6. first of a one or two character comment-end sequence of style b.
|
|
148 7. second of a two-character comment-end sequence of style a.
|
|
149 8. second of a two-character comment-end sequence of style b.
|
|
150 */
|
|
151
|
|
152 #define SYNTAX_COMMENT_BITS(table, c) \
|
|
153 ((SYNTAX_CODE (table, c) >> 16) &0xff)
|
|
154
|
|
155 #define SYNTAX_FIRST_OF_START_A 0x80
|
|
156 #define SYNTAX_FIRST_OF_START_B 0x40
|
|
157 #define SYNTAX_SECOND_OF_START_A 0x20
|
|
158 #define SYNTAX_SECOND_OF_START_B 0x10
|
|
159 #define SYNTAX_FIRST_OF_END_A 0x08
|
|
160 #define SYNTAX_FIRST_OF_END_B 0x04
|
|
161 #define SYNTAX_SECOND_OF_END_A 0x02
|
|
162 #define SYNTAX_SECOND_OF_END_B 0x01
|
|
163
|
|
164 #define SYNTAX_COMMENT_STYLE_A 0xaa
|
|
165 #define SYNTAX_COMMENT_STYLE_B 0x55
|
|
166 #define SYNTAX_FIRST_CHAR_START 0xc0
|
|
167 #define SYNTAX_FIRST_CHAR_END 0x0c
|
|
168 #define SYNTAX_FIRST_CHAR 0xcc
|
|
169 #define SYNTAX_SECOND_CHAR_START 0x30
|
|
170 #define SYNTAX_SECOND_CHAR_END 0x03
|
|
171 #define SYNTAX_SECOND_CHAR 0x33
|
|
172
|
412
|
173 #define SYNTAX_START_P(table, a, b) \
|
|
174 ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_START) \
|
|
175 && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_START))
|
0
|
176
|
412
|
177 #define SYNTAX_END_P(table, a, b) \
|
|
178 ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_END) \
|
|
179 && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_END))
|
0
|
180
|
|
181 #define SYNTAX_STYLES_MATCH_START_P(table, a, b, mask) \
|
|
182 ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_START & (mask)) \
|
|
183 && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_START & (mask)))
|
|
184
|
|
185 #define SYNTAX_STYLES_MATCH_END_P(table, a, b, mask) \
|
|
186 ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_END & (mask)) \
|
|
187 && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_END & (mask)))
|
|
188
|
|
189 #define SYNTAX_STYLES_MATCH_1CHAR_P(table, a, mask) \
|
|
190 ((SYNTAX_COMMENT_BITS (table, a) & (mask)))
|
|
191
|
|
192 #define STYLE_FOUND_P(table, a, b, startp, style) \
|
|
193 ((SYNTAX_COMMENT_BITS (table, a) & \
|
|
194 ((startp) ? SYNTAX_FIRST_CHAR_START : \
|
|
195 SYNTAX_FIRST_CHAR_END) & (style)) \
|
|
196 && (SYNTAX_COMMENT_BITS (table, b) & \
|
|
197 ((startp) ? SYNTAX_SECOND_CHAR_START : \
|
|
198 SYNTAX_SECOND_CHAR_END) & (style)))
|
|
199
|
|
200 #define SYNTAX_COMMENT_MASK_START(table, a, b) \
|
|
201 ((STYLE_FOUND_P (table, a, b, 1, SYNTAX_COMMENT_STYLE_A) \
|
|
202 ? SYNTAX_COMMENT_STYLE_A \
|
|
203 : (STYLE_FOUND_P (table, a, b, 1, SYNTAX_COMMENT_STYLE_B) \
|
|
204 ? SYNTAX_COMMENT_STYLE_B \
|
|
205 : 0)))
|
|
206
|
|
207 #define SYNTAX_COMMENT_MASK_END(table, a, b) \
|
|
208 ((STYLE_FOUND_P (table, a, b, 0, SYNTAX_COMMENT_STYLE_A) \
|
|
209 ? SYNTAX_COMMENT_STYLE_A \
|
|
210 : (STYLE_FOUND_P (table, a, b, 0, SYNTAX_COMMENT_STYLE_B) \
|
|
211 ? SYNTAX_COMMENT_STYLE_B \
|
|
212 : 0)))
|
|
213
|
|
214 #define STYLE_FOUND_1CHAR_P(table, a, style) \
|
|
215 ((SYNTAX_COMMENT_BITS (table, a) & (style)))
|
|
216
|
|
217 #define SYNTAX_COMMENT_1CHAR_MASK(table, a) \
|
|
218 ((STYLE_FOUND_1CHAR_P (table, a, SYNTAX_COMMENT_STYLE_A) \
|
|
219 ? SYNTAX_COMMENT_STYLE_A \
|
|
220 : (STYLE_FOUND_1CHAR_P (table, a, SYNTAX_COMMENT_STYLE_B) \
|
|
221 ? SYNTAX_COMMENT_STYLE_B \
|
|
222 : 0)))
|
|
223
|
272
|
224 EXFUN (Fchar_syntax, 2);
|
|
225 EXFUN (Fforward_word, 2);
|
|
226
|
|
227 /* The standard syntax table is stored where it will automatically
|
|
228 be used in all new buffers. */
|
|
229 extern Lisp_Object Vstandard_syntax_table;
|
|
230
|
2
|
231 /* This array, indexed by a character, contains the syntax code which
|
|
232 that character signifies (as a char).
|
|
233 For example, (enum syntaxcode) syntax_spec_code['w'] is Sword. */
|
0
|
234
|
412
|
235 extern CONST unsigned char syntax_spec_code[0400];
|
0
|
236
|
|
237 /* Indexed by syntax code, give the letter that describes it. */
|
|
238
|
412
|
239 extern CONST unsigned char syntax_code_spec[];
|
0
|
240
|
424
|
241 Lisp_Object scan_lists (struct buffer *buf, Bufpos from, int count,
|
0
|
242 int depth, int sexpflag, int no_error);
|
424
|
243 int char_quoted (struct buffer *buf, Bufpos pos);
|
0
|
244
|
|
245 /* NOTE: This does not refer to the mirror table, but to the
|
|
246 syntax table itself. */
|
|
247 Lisp_Object syntax_match (Lisp_Object table, Emchar ch);
|
|
248
|
|
249 extern int no_quit_in_re_search;
|
|
250 extern struct buffer *regex_emacs_buffer;
|
|
251
|
412
|
252 void update_syntax_table (struct Lisp_Char_Table *ct);
|
70
|
253
|
412
|
254 #endif /* _XEMACS_SYNTAX_H_ */
|