428
|
1 /* Declarations having to do with XEmacs syntax tables.
|
|
2 Copyright (C) 1985, 1992, 1993 Free Software Foundation, Inc.
|
793
|
3 Copyright (C) 2002 Ben Wing.
|
428
|
4
|
|
5 This file is part of XEmacs.
|
|
6
|
|
7 XEmacs is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by the
|
|
9 Free Software Foundation; either version 2, or (at your option) any
|
|
10 later version.
|
|
11
|
|
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
|
|
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with XEmacs; see the file COPYING. If not, write to
|
|
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
20 Boston, MA 02111-1307, USA. */
|
|
21
|
|
22 /* Synched up with: FSF 19.28. */
|
|
23
|
440
|
24 #ifndef INCLUDED_syntax_h_
|
|
25 #define INCLUDED_syntax_h_
|
428
|
26
|
|
27 #include "chartab.h"
|
|
28
|
|
29 /* A syntax table is a type of char table.
|
|
30
|
|
31 The low 7 bits of the integer is a code, as follows. The 8th bit is
|
|
32 used as the prefix bit flag (see below).
|
|
33
|
|
34 The values in a syntax table are either integers or conses of
|
|
35 integers and chars. The lowest 7 bits of the integer are the syntax
|
|
36 class. If this is Sinherit, then the actual syntax value needs to
|
|
37 be retrieved from the standard syntax table.
|
|
38
|
|
39 Since the logic involved in finding the actual integer isn't very
|
|
40 complex, you'd think the time required to retrieve it is not a
|
|
41 factor. If you thought that, however, you'd be wrong, due to the
|
|
42 high number of times (many per character) that the syntax value is
|
|
43 accessed in functions such as scan_lists(). To speed this up,
|
|
44 we maintain a mirror syntax table that contains the actual
|
|
45 integers. We can do this successfully because syntax tables are
|
|
46 now an abstract type, where we control all access.
|
|
47 */
|
|
48
|
|
49 enum syntaxcode
|
|
50 {
|
|
51 Swhitespace, /* whitespace character */
|
|
52 Spunct, /* random punctuation character */
|
|
53 Sword, /* word constituent */
|
|
54 Ssymbol, /* symbol constituent but not word constituent */
|
|
55 Sopen, /* a beginning delimiter */
|
|
56 Sclose, /* an ending delimiter */
|
|
57 Squote, /* a prefix character like Lisp ' */
|
|
58 Sstring, /* a string-grouping character like Lisp " */
|
|
59 Smath, /* delimiters like $ in TeX. */
|
|
60 Sescape, /* a character that begins a C-style escape */
|
|
61 Scharquote, /* a character that quotes the following character */
|
|
62 Scomment, /* a comment-starting character */
|
|
63 Sendcomment, /* a comment-ending character */
|
|
64 Sinherit, /* use the standard syntax table for this character */
|
460
|
65 Scomment_fence, /* Starts/ends comment which is delimited on the
|
|
66 other side by a char with the same syntaxcode. */
|
|
67 Sstring_fence, /* Starts/ends string which is delimited on the
|
|
68 other side by a char with the same syntaxcode. */
|
428
|
69 Smax /* Upper bound on codes that are meaningful */
|
|
70 };
|
|
71
|
|
72 enum syntaxcode charset_syntax (struct buffer *buf, Lisp_Object charset,
|
|
73 int *multi_p_out);
|
|
74
|
|
75 /* Return the syntax code for a particular character and mirror table. */
|
|
76
|
826
|
77 #define SYNTAX_CODE(table, c) XINT (get_char_table (c, table))
|
428
|
78
|
|
79 #define SYNTAX_FROM_CODE(code) ((enum syntaxcode) ((code) & 0177))
|
826
|
80
|
428
|
81 #define SYNTAX(table, c) SYNTAX_FROM_CODE (SYNTAX_CODE (table, c))
|
|
82
|
826
|
83 DECLARE_INLINE_HEADER (
|
|
84 int
|
|
85 WORD_SYNTAX_P (Lisp_Object table, Emchar c)
|
|
86 )
|
428
|
87 {
|
|
88 return SYNTAX (table, c) == Sword;
|
|
89 }
|
|
90
|
|
91 /* OK, here's a graphic diagram of the format of the syntax values:
|
|
92
|
|
93 Bit number:
|
|
94
|
|
95 [ 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 ]
|
|
96 [ 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 ]
|
|
97
|
|
98 <-----> <-----> <-------------> <-------------> ^ <----------->
|
|
99 ELisp unused |comment bits | unused | syntax code
|
|
100 tag | | | | | | | | |
|
|
101 stuff | | | | | | | | |
|
|
102 | | | | | | | | |
|
|
103 | | | | | | | | `--> prefix flag
|
|
104 | | | | | | | |
|
|
105 | | | | | | | `--> comment end style B, second char
|
|
106 | | | | | | `----> comment end style A, second char
|
|
107 | | | | | `------> comment end style B, first char
|
|
108 | | | | `--------> comment end style A, first char
|
|
109 | | | `----------> comment start style B, second char
|
|
110 | | `------------> comment start style A, second char
|
|
111 | `--------------> comment start style B, first char
|
|
112 `----------------> comment start style A, first char
|
|
113
|
|
114 In a 64-bit integer, there would be 32 more unused bits between
|
|
115 the tag and the comment bits.
|
|
116
|
|
117 Clearly, such a scheme will not work for Mule, because the matching
|
|
118 paren could be any character and as such requires 19 bits, which
|
|
119 we don't got.
|
|
120
|
|
121 Remember that under Mule we use char tables instead of vectors.
|
|
122 So what we do is use another char table for the matching paren
|
|
123 and store a pointer to it in the first char table. (This frees
|
|
124 code from having to worry about passing two tables around.)
|
|
125 */
|
|
126
|
|
127
|
|
128 /* The prefix flag bit for backward-prefix-chars is now put into bit 7. */
|
|
129
|
|
130 #define SYNTAX_PREFIX(table, c) \
|
|
131 ((SYNTAX_CODE (table, c) >> 7) & 1)
|
|
132
|
|
133 /* Bits 23-16 are used to implement up to two comment styles
|
|
134 in a single buffer. They have the following meanings:
|
|
135
|
|
136 1. first of a one or two character comment-start sequence of style a.
|
|
137 2. first of a one or two character comment-start sequence of style b.
|
|
138 3. second of a two-character comment-start sequence of style a.
|
|
139 4. second of a two-character comment-start sequence of style b.
|
|
140 5. first of a one or two character comment-end sequence of style a.
|
|
141 6. first of a one or two character comment-end sequence of style b.
|
|
142 7. second of a two-character comment-end sequence of style a.
|
|
143 8. second of a two-character comment-end sequence of style b.
|
|
144 */
|
|
145
|
|
146 #define SYNTAX_COMMENT_BITS(table, c) \
|
|
147 ((SYNTAX_CODE (table, c) >> 16) &0xff)
|
|
148
|
|
149 #define SYNTAX_FIRST_OF_START_A 0x80
|
|
150 #define SYNTAX_FIRST_OF_START_B 0x40
|
|
151 #define SYNTAX_SECOND_OF_START_A 0x20
|
|
152 #define SYNTAX_SECOND_OF_START_B 0x10
|
|
153 #define SYNTAX_FIRST_OF_END_A 0x08
|
|
154 #define SYNTAX_FIRST_OF_END_B 0x04
|
|
155 #define SYNTAX_SECOND_OF_END_A 0x02
|
|
156 #define SYNTAX_SECOND_OF_END_B 0x01
|
|
157
|
|
158 #define SYNTAX_COMMENT_STYLE_A 0xaa
|
|
159 #define SYNTAX_COMMENT_STYLE_B 0x55
|
|
160 #define SYNTAX_FIRST_CHAR_START 0xc0
|
|
161 #define SYNTAX_FIRST_CHAR_END 0x0c
|
|
162 #define SYNTAX_FIRST_CHAR 0xcc
|
|
163 #define SYNTAX_SECOND_CHAR_START 0x30
|
|
164 #define SYNTAX_SECOND_CHAR_END 0x03
|
|
165 #define SYNTAX_SECOND_CHAR 0x33
|
|
166
|
826
|
167 #if 0
|
|
168
|
|
169 /* #### Entirely unused. Should they be deleted? */
|
428
|
170
|
442
|
171 /* #### These are now more or less equivalent to
|
|
172 SYNTAX_COMMENT_MATCH_START ...*/
|
|
173 /* a and b must be first and second start chars for a common type */
|
|
174 #define SYNTAX_START_P(table, a, b) \
|
|
175 (((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_START) >> 2) \
|
|
176 & (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_START))
|
|
177
|
|
178 /* ... and SYNTAX_COMMENT_MATCH_END */
|
|
179 /* a and b must be first and second end chars for a common type */
|
|
180 #define SYNTAX_END_P(table, a, b) \
|
|
181 (((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_END) >> 2) \
|
|
182 & (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_END))
|
428
|
183
|
|
184 #define SYNTAX_STYLES_MATCH_START_P(table, a, b, mask) \
|
|
185 ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_START & (mask)) \
|
|
186 && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_START & (mask)))
|
|
187
|
|
188 #define SYNTAX_STYLES_MATCH_END_P(table, a, b, mask) \
|
|
189 ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_END & (mask)) \
|
|
190 && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_END & (mask)))
|
|
191
|
|
192 #define SYNTAX_STYLES_MATCH_1CHAR_P(table, a, mask) \
|
|
193 ((SYNTAX_COMMENT_BITS (table, a) & (mask)))
|
|
194
|
|
195 #define STYLE_FOUND_P(table, a, b, startp, style) \
|
|
196 ((SYNTAX_COMMENT_BITS (table, a) & \
|
|
197 ((startp) ? SYNTAX_FIRST_CHAR_START : \
|
|
198 SYNTAX_FIRST_CHAR_END) & (style)) \
|
|
199 && (SYNTAX_COMMENT_BITS (table, b) & \
|
|
200 ((startp) ? SYNTAX_SECOND_CHAR_START : \
|
|
201 SYNTAX_SECOND_CHAR_END) & (style)))
|
|
202
|
|
203 #define SYNTAX_COMMENT_MASK_START(table, a, b) \
|
|
204 ((STYLE_FOUND_P (table, a, b, 1, SYNTAX_COMMENT_STYLE_A) \
|
|
205 ? SYNTAX_COMMENT_STYLE_A \
|
|
206 : (STYLE_FOUND_P (table, a, b, 1, SYNTAX_COMMENT_STYLE_B) \
|
|
207 ? SYNTAX_COMMENT_STYLE_B \
|
|
208 : 0)))
|
|
209
|
|
210 #define SYNTAX_COMMENT_MASK_END(table, a, b) \
|
|
211 ((STYLE_FOUND_P (table, a, b, 0, SYNTAX_COMMENT_STYLE_A) \
|
|
212 ? SYNTAX_COMMENT_STYLE_A \
|
|
213 : (STYLE_FOUND_P (table, a, b, 0, SYNTAX_COMMENT_STYLE_B) \
|
|
214 ? SYNTAX_COMMENT_STYLE_B \
|
|
215 : 0)))
|
|
216
|
|
217 #define STYLE_FOUND_1CHAR_P(table, a, style) \
|
|
218 ((SYNTAX_COMMENT_BITS (table, a) & (style)))
|
|
219
|
|
220 #define SYNTAX_COMMENT_1CHAR_MASK(table, a) \
|
|
221 ((STYLE_FOUND_1CHAR_P (table, a, SYNTAX_COMMENT_STYLE_A) \
|
|
222 ? SYNTAX_COMMENT_STYLE_A \
|
|
223 : (STYLE_FOUND_1CHAR_P (table, a, SYNTAX_COMMENT_STYLE_B) \
|
|
224 ? SYNTAX_COMMENT_STYLE_B \
|
|
225 : 0)))
|
|
226
|
826
|
227 #endif /* 0 */
|
428
|
228
|
|
229 /* This array, indexed by a character, contains the syntax code which
|
|
230 that character signifies (as a char).
|
|
231 For example, (enum syntaxcode) syntax_spec_code['w'] is Sword. */
|
|
232
|
442
|
233 extern const unsigned char syntax_spec_code[0400];
|
428
|
234
|
|
235 /* Indexed by syntax code, give the letter that describes it. */
|
|
236
|
442
|
237 extern const unsigned char syntax_code_spec[];
|
428
|
238
|
665
|
239 Lisp_Object scan_lists (struct buffer *buf, Charbpos from, int count,
|
428
|
240 int depth, int sexpflag, int no_error);
|
665
|
241 int char_quoted (struct buffer *buf, Charbpos pos);
|
428
|
242
|
|
243 /* NOTE: This does not refer to the mirror table, but to the
|
|
244 syntax table itself. */
|
|
245 Lisp_Object syntax_match (Lisp_Object table, Emchar ch);
|
|
246
|
|
247 extern int no_quit_in_re_search;
|
826
|
248
|
|
249 void update_syntax_table (Lisp_Object table);
|
460
|
250
|
826
|
251
|
|
252 /****************************** syntax caches ********************************/
|
460
|
253
|
|
254 extern int lookup_syntax_properties;
|
|
255
|
826
|
256 /* Now that the `syntax-table' property exists, and can override the syntax
|
|
257 table or directly specify the syntax, we cache the last place we
|
|
258 retrieved the syntax-table property. This is because, when moving
|
|
259 linearly through text (e.g. in the regex routines or the scanning
|
|
260 routines in syntax.c), we only need to recalculate at the next place the
|
|
261 syntax-table property changes (i.e. not every position), and when we do
|
|
262 need to recalculate, we can update the info from the previous info
|
|
263 faster than if we did the whole calculation from scratch. */
|
460
|
264 struct syntax_cache
|
|
265 {
|
826
|
266 int use_code; /* Whether to use syntax_code or
|
|
267 current_syntax_table. This is
|
|
268 set depending on whether the
|
|
269 syntax-table property is a
|
|
270 syntax table or a syntax
|
|
271 code. */
|
|
272 int no_syntax_table_prop; /* If non-zero, there was no
|
|
273 `syntax-table' property on the
|
|
274 current range, and so we're
|
|
275 using the buffer's syntax table.
|
|
276 This is important to note because
|
|
277 sometimes the buffer's syntax
|
|
278 table can be changed. */
|
460
|
279 Lisp_Object object; /* The buffer or string the current
|
826
|
280 syntax cache applies to, or
|
|
281 Qnil for a string of text not
|
|
282 coming from a buffer or string. */
|
|
283 struct buffer *buffer; /* The buffer that supplies the
|
|
284 syntax tables, or 0 for the
|
|
285 standard syntax table. If
|
|
286 OBJECT is a buffer, this will
|
|
287 always be the same buffer. */
|
460
|
288 int syntax_code; /* Syntax code of current char. */
|
|
289 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
|
826
|
290 Lisp_Object start, end; /* Markers to keep track of the
|
|
291 known region in a buffer.
|
|
292 Formerly we used an internal
|
|
293 extent, but it seems that having
|
|
294 an extent over the entire buffer
|
|
295 causes serious slowdowns in
|
|
296 extent operations! Yuck! */
|
|
297 Charxpos next_change; /* Position of the next extent
|
460
|
298 change. */
|
826
|
299 Charxpos prev_change; /* Position of the previous extent
|
|
300 change. */
|
460
|
301 };
|
826
|
302
|
|
303 /* Note that the external interface to the syntax-cache uses charpos's, but
|
|
304 intnernally we use bytepos's, for speed. */
|
460
|
305
|
826
|
306 void update_syntax_cache (struct syntax_cache *cache, Charxpos pos, int count);
|
|
307 struct syntax_cache *setup_syntax_cache (struct syntax_cache *cache,
|
|
308 Lisp_Object object,
|
|
309 struct buffer *buffer,
|
|
310 Charxpos from, int count);
|
|
311 struct syntax_cache *setup_buffer_syntax_cache (struct buffer *buffer,
|
|
312 Charxpos from, int count);
|
460
|
313
|
|
314 /* Make syntax cache state good for CHARPOS, assuming it is
|
|
315 currently good for a position before CHARPOS. */
|
826
|
316 DECLARE_INLINE_HEADER (
|
|
317 void
|
|
318 UPDATE_SYNTAX_CACHE_FORWARD (struct syntax_cache *cache, Charxpos pos)
|
|
319 )
|
|
320 {
|
|
321 if (!(pos >= cache->prev_change && pos < cache->next_change))
|
|
322 update_syntax_cache (cache, pos, 1);
|
|
323 }
|
460
|
324
|
|
325 /* Make syntax cache state good for CHARPOS, assuming it is
|
|
326 currently good for a position after CHARPOS. */
|
826
|
327 DECLARE_INLINE_HEADER (
|
|
328 void
|
|
329 UPDATE_SYNTAX_CACHE_BACKWARD (struct syntax_cache *cache, Charxpos pos)
|
|
330 )
|
|
331 {
|
|
332 if (!(pos >= cache->prev_change && pos < cache->next_change))
|
|
333 update_syntax_cache (cache, pos, -1);
|
|
334 }
|
460
|
335
|
|
336 /* Make syntax cache state good for CHARPOS */
|
826
|
337 DECLARE_INLINE_HEADER (
|
|
338 void
|
|
339 UPDATE_SYNTAX_CACHE (struct syntax_cache *cache, Charxpos pos)
|
|
340 )
|
|
341 {
|
|
342 if (!(pos >= cache->prev_change && pos < cache->next_change))
|
|
343 update_syntax_cache (cache, pos, 0);
|
|
344 }
|
460
|
345
|
826
|
346 #define SYNTAX_FROM_CACHE(cache, c) \
|
|
347 SYNTAX_FROM_CODE (SYNTAX_CODE_FROM_CACHE (cache, c))
|
460
|
348
|
826
|
349 #define SYNTAX_CODE_FROM_CACHE(cache, c) \
|
|
350 ((cache)->use_code ? (cache)->syntax_code \
|
|
351 : SYNTAX_CODE ((cache)->current_syntax_table, c))
|
|
352
|
|
353
|
|
354 /***************************** syntax code macros ****************************/
|
460
|
355
|
|
356 #define SYNTAX_CODE_PREFIX(c) \
|
|
357 ((c >> 7) & 1)
|
|
358
|
|
359 #define SYNTAX_CODE_COMMENT_BITS(c) \
|
|
360 ((c >> 16) &0xff)
|
|
361
|
|
362 #define SYNTAX_CODES_START_P(a, b) \
|
|
363 (((SYNTAX_CODE_COMMENT_BITS (a) & SYNTAX_FIRST_CHAR_START) >> 2) \
|
|
364 & (SYNTAX_CODE_COMMENT_BITS (b) & SYNTAX_SECOND_CHAR_START))
|
|
365
|
|
366 #define SYNTAX_CODES_END_P(a, b) \
|
|
367 (((SYNTAX_CODE_COMMENT_BITS (a) & SYNTAX_FIRST_CHAR_END) >> 2) \
|
|
368 & (SYNTAX_CODE_COMMENT_BITS (b) & SYNTAX_SECOND_CHAR_END))
|
|
369
|
|
370 #define SYNTAX_CODES_COMMENT_MASK_START(a, b) \
|
|
371 (SYNTAX_CODES_MATCH_START_P (a, b, SYNTAX_COMMENT_STYLE_A) \
|
|
372 ? SYNTAX_COMMENT_STYLE_A \
|
|
373 : (SYNTAX_CODES_MATCH_START_P (a, b, SYNTAX_COMMENT_STYLE_B) \
|
|
374 ? SYNTAX_COMMENT_STYLE_B \
|
|
375 : 0))
|
|
376 #define SYNTAX_CODES_COMMENT_MASK_END(a, b) \
|
|
377 (SYNTAX_CODES_MATCH_END_P (a, b, SYNTAX_COMMENT_STYLE_A) \
|
|
378 ? SYNTAX_COMMENT_STYLE_A \
|
|
379 : (SYNTAX_CODES_MATCH_END_P (a, b, SYNTAX_COMMENT_STYLE_B) \
|
|
380 ? SYNTAX_COMMENT_STYLE_B \
|
|
381 : 0))
|
|
382
|
|
383 #define SYNTAX_CODE_START_FIRST_P(a) \
|
|
384 (SYNTAX_CODE_COMMENT_BITS (a) & SYNTAX_FIRST_CHAR_START)
|
|
385
|
|
386 #define SYNTAX_CODE_START_SECOND_P(a) \
|
|
387 (SYNTAX_CODE_COMMENT_BITS (a) & SYNTAX_SECOND_CHAR_START)
|
|
388
|
|
389 #define SYNTAX_CODE_END_FIRST_P(a) \
|
|
390 (SYNTAX_CODE_COMMENT_BITS (a) & SYNTAX_FIRST_CHAR_END)
|
|
391
|
|
392 #define SYNTAX_CODE_END_SECOND_P(a) \
|
|
393 (SYNTAX_CODE_COMMENT_BITS (a) & SYNTAX_SECOND_CHAR_END)
|
|
394
|
|
395
|
|
396 #define SYNTAX_CODES_MATCH_START_P(a, b, mask) \
|
|
397 ((SYNTAX_CODE_COMMENT_BITS (a) & SYNTAX_FIRST_CHAR_START & (mask)) \
|
|
398 && (SYNTAX_CODE_COMMENT_BITS (b) \
|
|
399 & SYNTAX_SECOND_CHAR_START & (mask)))
|
|
400
|
|
401 #define SYNTAX_CODES_MATCH_END_P(a, b, mask) \
|
|
402 ((SYNTAX_CODE_COMMENT_BITS (a) & SYNTAX_FIRST_CHAR_END & (mask)) \
|
|
403 && (SYNTAX_CODE_COMMENT_BITS (b) & SYNTAX_SECOND_CHAR_END & (mask)))
|
|
404
|
|
405 #define SYNTAX_CODE_MATCHES_1CHAR_P(a, mask) \
|
|
406 ((SYNTAX_CODE_COMMENT_BITS (a) & (mask)))
|
|
407
|
|
408 #define SYNTAX_CODE_COMMENT_1CHAR_MASK(a) \
|
|
409 ((SYNTAX_CODE_MATCHES_1CHAR_P (a, SYNTAX_COMMENT_STYLE_A) \
|
|
410 ? SYNTAX_COMMENT_STYLE_A \
|
|
411 : (SYNTAX_CODE_MATCHES_1CHAR_P (a, SYNTAX_COMMENT_STYLE_B) \
|
|
412 ? SYNTAX_COMMENT_STYLE_B \
|
|
413 : 0)))
|
|
414
|
|
415
|
440
|
416 #endif /* INCLUDED_syntax_h_ */
|