Mercurial > hg > xemacs-beta
comparison src/syntax.c @ 428:3ecd8885ac67 r21-2-22
Import from CVS: tag r21-2-22
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:28:15 +0200 |
parents | |
children | 9d177e8d4150 |
comparison
equal
deleted
inserted
replaced
427:0a0253eac470 | 428:3ecd8885ac67 |
---|---|
1 /* XEmacs routines to deal with syntax tables; also word and list parsing. | |
2 Copyright (C) 1985-1994 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
4 | |
5 This file is part of XEmacs. | |
6 | |
7 XEmacs is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by the | |
9 Free Software Foundation; either version 2, or (at your option) any | |
10 later version. | |
11 | |
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with XEmacs; see the file COPYING. If not, write to | |
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
20 Boston, MA 02111-1307, USA. */ | |
21 | |
22 /* Synched up with: FSF 19.28. */ | |
23 | |
24 /* This file has been Mule-ized. */ | |
25 | |
26 #include <config.h> | |
27 #include "lisp.h" | |
28 | |
29 #include "buffer.h" | |
30 #include "syntax.h" | |
31 | |
32 /* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp> | |
33 explaining the purpose of the Sextword syntax category: | |
34 | |
35 Japanese words are not separated by spaces, which makes finding word | |
36 boundaries very difficult. Theoretically it's impossible without | |
37 using natural language processing techniques. But, by defining | |
38 pseudo-words as below (much simplified for letting you understand it | |
39 easily) for Japanese, we can have a convenient forward-word function | |
40 for Japanese. | |
41 | |
42 A Japanese word is a sequence of characters that consists of | |
43 zero or more Kanji characters followed by zero or more | |
44 Hiragana characters. | |
45 | |
46 Then, the problem is that now we can't say that a sequence of | |
47 word-constituents makes up a WORD. For instance, both Hiragana "A" | |
48 and Kanji "KAN" are word-constituents but the sequence of these two | |
49 letters can't be a single word. | |
50 | |
51 So, we introduced Sextword for Japanese letters. A character of | |
52 Sextword is a word-constituent but a word boundary may exist between | |
53 two such characters. */ | |
54 | |
55 /* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */ | |
56 /* Recovered by tomo */ | |
57 | |
58 Lisp_Object Qsyntax_table_p; | |
59 | |
60 int words_include_escapes; | |
61 | |
62 int parse_sexp_ignore_comments; | |
63 | |
64 /* The following two variables are provided to tell additional information | |
65 to the regex routines. We do it this way rather than change the | |
66 arguments to re_search_2() in an attempt to maintain some call | |
67 compatibility with other versions of the regex code. */ | |
68 | |
69 /* Tell the regex routines not to QUIT. Normally there is a QUIT | |
70 each iteration in re_search_2(). */ | |
71 int no_quit_in_re_search; | |
72 | |
73 /* Tell the regex routines which buffer to access for SYNTAX() lookups | |
74 and the like. */ | |
75 struct buffer *regex_emacs_buffer; | |
76 | |
77 Lisp_Object Vstandard_syntax_table; | |
78 | |
79 Lisp_Object Vsyntax_designator_chars_string; | |
80 | |
81 /* This is the internal form of the parse state used in parse-partial-sexp. */ | |
82 | |
83 struct lisp_parse_state | |
84 { | |
85 int depth; /* Depth at end of parsing */ | |
86 Emchar instring; /* -1 if not within string, else desired terminator */ | |
87 int incomment; /* Nonzero if within a comment at end of parsing */ | |
88 int comstyle; /* comment style a=0, or b=1 */ | |
89 int quoted; /* Nonzero if just after an escape char at end of | |
90 parsing */ | |
91 Bufpos thislevelstart;/* Char number of most recent start-of-expression | |
92 at current level */ | |
93 Bufpos prevlevelstart;/* Char number of start of containing expression */ | |
94 Bufpos location; /* Char number at which parsing stopped */ | |
95 int mindepth; /* Minimum depth seen while scanning */ | |
96 Bufpos comstart; /* Position just after last comment starter */ | |
97 }; | |
98 | |
99 /* These variables are a cache for finding the start of a defun. | |
100 find_start_pos is the place for which the defun start was found. | |
101 find_start_value is the defun start position found for it. | |
102 find_start_buffer is the buffer it was found in. | |
103 find_start_begv is the BEGV value when it was found. | |
104 find_start_modiff is the value of MODIFF when it was found. */ | |
105 | |
106 static Bufpos find_start_pos; | |
107 static Bufpos find_start_value; | |
108 static struct buffer *find_start_buffer; | |
109 static Bufpos find_start_begv; | |
110 static int find_start_modiff; | |
111 | |
112 /* Find a defun-start that is the last one before POS (or nearly the last). | |
113 We record what we find, so that another call in the same area | |
114 can return the same value right away. */ | |
115 | |
116 static Bufpos | |
117 find_defun_start (struct buffer *buf, Bufpos pos) | |
118 { | |
119 Bufpos tem; | |
120 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table); | |
121 | |
122 /* Use previous finding, if it's valid and applies to this inquiry. */ | |
123 if (buf == find_start_buffer | |
124 /* Reuse the defun-start even if POS is a little farther on. | |
125 POS might be in the next defun, but that's ok. | |
126 Our value may not be the best possible, but will still be usable. */ | |
127 && pos <= find_start_pos + 1000 | |
128 && pos >= find_start_value | |
129 && BUF_BEGV (buf) == find_start_begv | |
130 && BUF_MODIFF (buf) == find_start_modiff) | |
131 return find_start_value; | |
132 | |
133 /* Back up to start of line. */ | |
134 tem = find_next_newline (buf, pos, -1); | |
135 | |
136 while (tem > BUF_BEGV (buf)) | |
137 { | |
138 /* Open-paren at start of line means we found our defun-start. */ | |
139 if (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, tem)) == Sopen) | |
140 break; | |
141 /* Move to beg of previous line. */ | |
142 tem = find_next_newline (buf, tem, -2); | |
143 } | |
144 | |
145 /* Record what we found, for the next try. */ | |
146 find_start_value = tem; | |
147 find_start_buffer = buf; | |
148 find_start_modiff = BUF_MODIFF (buf); | |
149 find_start_begv = BUF_BEGV (buf); | |
150 find_start_pos = pos; | |
151 | |
152 return find_start_value; | |
153 } | |
154 | |
155 DEFUN ("syntax-table-p", Fsyntax_table_p, 1, 1, 0, /* | |
156 Return t if ARG is a syntax table. | |
157 Any vector of 256 elements will do. | |
158 */ | |
159 (obj)) | |
160 { | |
161 return CHAR_TABLEP (obj) && XCHAR_TABLE_TYPE (obj) == CHAR_TABLE_TYPE_SYNTAX | |
162 ? Qt : Qnil; | |
163 } | |
164 | |
165 static Lisp_Object | |
166 check_syntax_table (Lisp_Object obj, Lisp_Object default_) | |
167 { | |
168 if (NILP (obj)) | |
169 obj = default_; | |
170 while (NILP (Fsyntax_table_p (obj))) | |
171 obj = wrong_type_argument (Qsyntax_table_p, obj); | |
172 return obj; | |
173 } | |
174 | |
175 DEFUN ("syntax-table", Fsyntax_table, 0, 1, 0, /* | |
176 Return the current syntax table. | |
177 This is the one specified by the current buffer, or by BUFFER if it | |
178 is non-nil. | |
179 */ | |
180 (buffer)) | |
181 { | |
182 return decode_buffer (buffer, 0)->syntax_table; | |
183 } | |
184 | |
185 DEFUN ("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /* | |
186 Return the standard syntax table. | |
187 This is the one used for new buffers. | |
188 */ | |
189 ()) | |
190 { | |
191 return Vstandard_syntax_table; | |
192 } | |
193 | |
194 DEFUN ("copy-syntax-table", Fcopy_syntax_table, 0, 1, 0, /* | |
195 Construct a new syntax table and return it. | |
196 It is a copy of the TABLE, which defaults to the standard syntax table. | |
197 */ | |
198 (table)) | |
199 { | |
200 if (NILP (Vstandard_syntax_table)) | |
201 return Fmake_char_table (Qsyntax); | |
202 | |
203 table = check_syntax_table (table, Vstandard_syntax_table); | |
204 return Fcopy_char_table (table); | |
205 } | |
206 | |
207 DEFUN ("set-syntax-table", Fset_syntax_table, 1, 2, 0, /* | |
208 Select a new syntax table for BUFFER. | |
209 One argument, a syntax table. | |
210 BUFFER defaults to the current buffer if omitted. | |
211 */ | |
212 (table, buffer)) | |
213 { | |
214 struct buffer *buf = decode_buffer (buffer, 0); | |
215 table = check_syntax_table (table, Qnil); | |
216 buf->syntax_table = table; | |
217 buf->mirror_syntax_table = XCHAR_TABLE (table)->mirror_table; | |
218 /* Indicate that this buffer now has a specified syntax table. */ | |
219 buf->local_var_flags |= XINT (buffer_local_flags.syntax_table); | |
220 return table; | |
221 } | |
222 | |
223 /* Convert a letter which signifies a syntax code | |
224 into the code it signifies. | |
225 This is used by modify-syntax-entry, and other things. */ | |
226 | |
227 CONST unsigned char syntax_spec_code[0400] = | |
228 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, | |
229 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, | |
230 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, | |
231 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, | |
232 (char) Swhitespace, 0377, (char) Sstring, 0377, | |
233 (char) Smath, 0377, 0377, (char) Squote, | |
234 (char) Sopen, (char) Sclose, 0377, 0377, | |
235 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote, | |
236 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, | |
237 0377, 0377, 0377, 0377, | |
238 (char) Scomment, 0377, (char) Sendcomment, 0377, | |
239 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */ | |
240 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, | |
241 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword, | |
242 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol, | |
243 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */ | |
244 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, | |
245 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword, | |
246 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377 | |
247 }; | |
248 | |
249 CONST unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@"; | |
250 | |
251 DEFUN ("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /* | |
252 Return a string of the recognized syntax designator chars. | |
253 The chars are ordered by their internal syntax codes, which are | |
254 numbered starting at 0. | |
255 */ | |
256 ()) | |
257 { | |
258 return Vsyntax_designator_chars_string; | |
259 } | |
260 | |
261 DEFUN ("char-syntax", Fchar_syntax, 1, 2, 0, /* | |
262 Return the syntax code of CHAR, described by a character. | |
263 For example, if CHAR is a word constituent, the character `?w' is returned. | |
264 The characters that correspond to various syntax codes | |
265 are listed in the documentation of `modify-syntax-entry'. | |
266 Optional second argument TABLE defaults to the current buffer's | |
267 syntax table. | |
268 */ | |
269 (ch, table)) | |
270 { | |
271 struct Lisp_Char_Table *mirrortab; | |
272 | |
273 if (NILP(ch)) | |
274 { | |
275 ch = make_char('\000'); | |
276 } | |
277 CHECK_CHAR_COERCE_INT (ch); | |
278 table = check_syntax_table (table, current_buffer->syntax_table); | |
279 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table); | |
280 return make_char (syntax_code_spec[(int) SYNTAX (mirrortab, XCHAR (ch))]); | |
281 } | |
282 | |
283 #ifdef MULE | |
284 | |
285 enum syntaxcode | |
286 charset_syntax (struct buffer *buf, Lisp_Object charset, int *multi_p_out) | |
287 { | |
288 *multi_p_out = 1; | |
289 /* #### get this right */ | |
290 return Spunct; | |
291 } | |
292 | |
293 #endif | |
294 | |
295 Lisp_Object | |
296 syntax_match (Lisp_Object table, Emchar ch) | |
297 { | |
298 Lisp_Object code = CHAR_TABLE_VALUE_UNSAFE (XCHAR_TABLE (table), ch); | |
299 Lisp_Object code2 = code; | |
300 | |
301 if (CONSP (code)) | |
302 code2 = XCAR (code); | |
303 if (SYNTAX_FROM_CODE (XINT (code2)) == Sinherit) | |
304 code = CHAR_TABLE_VALUE_UNSAFE (XCHAR_TABLE (Vstandard_syntax_table), | |
305 ch); | |
306 | |
307 return CONSP (code) ? XCDR (code) : Qnil; | |
308 } | |
309 | |
310 DEFUN ("matching-paren", Fmatching_paren, 1, 2, 0, /* | |
311 Return the matching parenthesis of CHAR, or nil if none. | |
312 Optional second argument TABLE defaults to the current buffer's | |
313 syntax table. | |
314 */ | |
315 (ch, table)) | |
316 { | |
317 struct Lisp_Char_Table *mirrortab; | |
318 int code; | |
319 | |
320 CHECK_CHAR_COERCE_INT (ch); | |
321 table = check_syntax_table (table, current_buffer->syntax_table); | |
322 mirrortab = XCHAR_TABLE (XCHAR_TABLE (table)->mirror_table); | |
323 code = SYNTAX (mirrortab, XCHAR (ch)); | |
324 if (code == Sopen || code == Sclose || code == Sstring) | |
325 return syntax_match (table, XCHAR (ch)); | |
326 return Qnil; | |
327 } | |
328 | |
329 | |
330 | |
331 #ifdef MULE | |
332 /* Return 1 if there is a word boundary between two word-constituent | |
333 characters C1 and C2 if they appear in this order, else return 0. | |
334 There is no word boundary between two word-constituent ASCII | |
335 characters. */ | |
336 #define WORD_BOUNDARY_P(c1, c2) \ | |
337 (!(CHAR_ASCII_P (c1) && CHAR_ASCII_P (c2)) \ | |
338 && word_boundary_p (c1, c2)) | |
339 | |
340 extern int word_boundary_p (Emchar c1, Emchar c2); | |
341 #endif | |
342 | |
343 /* Return the position across COUNT words from FROM. | |
344 If that many words cannot be found before the end of the buffer, return 0. | |
345 COUNT negative means scan backward and stop at word beginning. */ | |
346 | |
347 Bufpos | |
348 scan_words (struct buffer *buf, Bufpos from, int count) | |
349 { | |
350 Bufpos limit = count > 0 ? BUF_ZV (buf) : BUF_BEGV (buf); | |
351 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table); | |
352 Emchar ch0, ch1; | |
353 enum syntaxcode code; | |
354 | |
355 /* #### is it really worth it to hand expand both cases? JV */ | |
356 while (count > 0) | |
357 { | |
358 QUIT; | |
359 | |
360 while (1) | |
361 { | |
362 if (from == limit) | |
363 return 0; | |
364 | |
365 ch0 = BUF_FETCH_CHAR (buf, from); | |
366 code = SYNTAX_UNSAFE (mirrortab, ch0); | |
367 | |
368 if (words_include_escapes | |
369 && (code == Sescape || code == Scharquote)) | |
370 break; | |
371 if (code == Sword) | |
372 break; | |
373 | |
374 from++; | |
375 } | |
376 | |
377 QUIT; | |
378 | |
379 while (from != limit) | |
380 { | |
381 ch1 = BUF_FETCH_CHAR (buf, from); | |
382 code = SYNTAX_UNSAFE (mirrortab, ch1); | |
383 if (!(words_include_escapes | |
384 && (code == Sescape || code == Scharquote))) | |
385 if (code != Sword | |
386 #ifdef MULE | |
387 || WORD_BOUNDARY_P (ch0, ch1) | |
388 #endif | |
389 ) | |
390 break; | |
391 #ifdef MULE | |
392 ch0 = ch1; | |
393 #endif | |
394 from++; | |
395 } | |
396 count--; | |
397 } | |
398 | |
399 while (count < 0) | |
400 { | |
401 QUIT; | |
402 | |
403 while (1) | |
404 { | |
405 if (from == limit) | |
406 return 0; | |
407 | |
408 ch1 = BUF_FETCH_CHAR (buf, from - 1); | |
409 code = SYNTAX_UNSAFE (mirrortab, ch1); | |
410 if (words_include_escapes | |
411 && (code == Sescape || code == Scharquote)) | |
412 break; | |
413 if (code == Sword) | |
414 break; | |
415 | |
416 from--; | |
417 } | |
418 | |
419 QUIT; | |
420 | |
421 while (from != limit) | |
422 { | |
423 ch0 = BUF_FETCH_CHAR (buf, from - 1); | |
424 code = SYNTAX_UNSAFE (mirrortab, ch0); | |
425 if (!(words_include_escapes | |
426 && (code == Sescape || code == Scharquote))) | |
427 if (code != Sword | |
428 #ifdef MULE | |
429 || WORD_BOUNDARY_P (ch0, ch1) | |
430 #endif | |
431 ) | |
432 break; | |
433 #ifdef MULE | |
434 ch1 = ch0; | |
435 #endif | |
436 from--; | |
437 } | |
438 count++; | |
439 } | |
440 | |
441 return from; | |
442 } | |
443 | |
444 DEFUN ("forward-word", Fforward_word, 1, 2, "_p", /* | |
445 Move point forward COUNT words (backward if COUNT is negative). | |
446 Normally returns t. | |
447 If an edge of the buffer is reached, point is left there | |
448 and nil is returned. | |
449 | |
450 Optional argument BUFFER defaults to the current buffer. | |
451 */ | |
452 (count, buffer)) | |
453 { | |
454 Bufpos val; | |
455 struct buffer *buf = decode_buffer (buffer, 0); | |
456 CHECK_INT (count); | |
457 | |
458 if (!(val = scan_words (buf, BUF_PT (buf), XINT (count)))) | |
459 { | |
460 BUF_SET_PT (buf, XINT (count) > 0 ? BUF_ZV (buf) : BUF_BEGV (buf)); | |
461 return Qnil; | |
462 } | |
463 BUF_SET_PT (buf, val); | |
464 return Qt; | |
465 } | |
466 | |
467 static void scan_sexps_forward (struct buffer *buf, | |
468 struct lisp_parse_state *, | |
469 Bufpos from, Bufpos end, | |
470 int targetdepth, int stopbefore, | |
471 Lisp_Object oldstate, | |
472 int commentstop); | |
473 | |
474 static int | |
475 find_start_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask) | |
476 { | |
477 Emchar c; | |
478 enum syntaxcode code; | |
479 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table); | |
480 | |
481 /* Look back, counting the parity of string-quotes, | |
482 and recording the comment-starters seen. | |
483 When we reach a safe place, assume that's not in a string; | |
484 then step the main scan to the earliest comment-starter seen | |
485 an even number of string quotes away from the safe place. | |
486 | |
487 OFROM[I] is position of the earliest comment-starter seen | |
488 which is I+2X quotes from the comment-end. | |
489 PARITY is current parity of quotes from the comment end. */ | |
490 int parity = 0; | |
491 Emchar my_stringend = 0; | |
492 int string_lossage = 0; | |
493 Bufpos comment_end = from; | |
494 Bufpos comstart_pos = 0; | |
495 int comstart_parity = 0; | |
496 int styles_match_p = 0; | |
497 | |
498 /* At beginning of range to scan, we're outside of strings; | |
499 that determines quote parity to the comment-end. */ | |
500 while (from != stop) | |
501 { | |
502 /* Move back and examine a character. */ | |
503 from--; | |
504 | |
505 c = BUF_FETCH_CHAR (buf, from); | |
506 code = SYNTAX_UNSAFE (mirrortab, c); | |
507 | |
508 /* is this a 1-char comment end sequence? if so, try | |
509 to see if style matches previously extracted mask */ | |
510 if (code == Sendcomment) | |
511 { | |
512 styles_match_p = SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask); | |
513 } | |
514 | |
515 /* otherwise, is this a 2-char comment end sequence? */ | |
516 else if (from >= stop | |
517 && SYNTAX_END_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1))) | |
518 { | |
519 code = Sendcomment; | |
520 styles_match_p = | |
521 SYNTAX_STYLES_MATCH_END_P (mirrortab, c, | |
522 BUF_FETCH_CHAR (buf, from+1), | |
523 mask); | |
524 } | |
525 | |
526 /* or are we looking at a 1-char comment start sequence | |
527 of the style matching mask? */ | |
528 else if (code == Scomment | |
529 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask)) | |
530 { | |
531 styles_match_p = 1; | |
532 } | |
533 | |
534 /* or possibly, a 2-char comment start sequence */ | |
535 else if (from >= stop | |
536 && SYNTAX_STYLES_MATCH_START_P (mirrortab, c, | |
537 BUF_FETCH_CHAR (buf, from+1), | |
538 mask)) | |
539 { | |
540 code = Scomment; | |
541 styles_match_p = 1; | |
542 } | |
543 | |
544 /* Ignore escaped characters. */ | |
545 if (char_quoted (buf, from)) | |
546 continue; | |
547 | |
548 /* Track parity of quotes. */ | |
549 if (code == Sstring) | |
550 { | |
551 parity ^= 1; | |
552 if (my_stringend == 0) | |
553 my_stringend = c; | |
554 /* If we have two kinds of string delimiters. | |
555 There's no way to grok this scanning backwards. */ | |
556 else if (my_stringend != c) | |
557 string_lossage = 1; | |
558 } | |
559 | |
560 /* Record comment-starters according to that | |
561 quote-parity to the comment-end. */ | |
562 if (code == Scomment && styles_match_p) | |
563 { | |
564 comstart_parity = parity; | |
565 comstart_pos = from; | |
566 } | |
567 | |
568 /* If we find another earlier comment-ender, | |
569 any comment-starts earlier than that don't count | |
570 (because they go with the earlier comment-ender). */ | |
571 if (code == Sendcomment && styles_match_p) | |
572 break; | |
573 | |
574 /* Assume a defun-start point is outside of strings. */ | |
575 if (code == Sopen | |
576 && (from == stop || BUF_FETCH_CHAR (buf, from - 1) == '\n')) | |
577 break; | |
578 } | |
579 | |
580 if (comstart_pos == 0) | |
581 from = comment_end; | |
582 /* If the earliest comment starter | |
583 is followed by uniform paired string quotes or none, | |
584 we know it can't be inside a string | |
585 since if it were then the comment ender would be inside one. | |
586 So it does start a comment. Skip back to it. */ | |
587 else if (comstart_parity == 0 && !string_lossage) | |
588 from = comstart_pos; | |
589 else | |
590 { | |
591 /* We had two kinds of string delimiters mixed up | |
592 together. Decode this going forwards. | |
593 Scan fwd from the previous comment ender | |
594 to the one in question; this records where we | |
595 last passed a comment starter. */ | |
596 | |
597 struct lisp_parse_state state; | |
598 scan_sexps_forward (buf, &state, find_defun_start (buf, comment_end), | |
599 comment_end - 1, -10000, 0, Qnil, 0); | |
600 if (state.incomment) | |
601 from = state.comstart; | |
602 else | |
603 /* We can't grok this as a comment; scan it normally. */ | |
604 from = comment_end; | |
605 } | |
606 return from; | |
607 } | |
608 | |
609 static Bufpos | |
610 find_end_of_comment (struct buffer *buf, Bufpos from, Bufpos stop, int mask) | |
611 { | |
612 int c; | |
613 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table); | |
614 | |
615 while (1) | |
616 { | |
617 if (from == stop) | |
618 { | |
619 return -1; | |
620 } | |
621 c = BUF_FETCH_CHAR (buf, from); | |
622 if (SYNTAX_UNSAFE (mirrortab, c) == Sendcomment | |
623 && SYNTAX_STYLES_MATCH_1CHAR_P (mirrortab, c, mask)) | |
624 /* we have encountered a comment end of the same style | |
625 as the comment sequence which began this comment | |
626 section */ | |
627 break; | |
628 | |
629 from++; | |
630 if (from < stop | |
631 && SYNTAX_STYLES_MATCH_END_P (mirrortab, c, | |
632 BUF_FETCH_CHAR (buf, from), mask)) | |
633 /* we have encountered a comment end of the same style | |
634 as the comment sequence which began this comment | |
635 section */ | |
636 { from++; break; } | |
637 } | |
638 return from; | |
639 } | |
640 | |
641 | |
642 /* #### between FSF 19.23 and 19.28 there are some changes to the logic | |
643 in this function (and minor changes to find_start_of_comment(), | |
644 above, which is part of Fforward_comment() in FSF). Attempts to port | |
645 that logic made this function break, so I'm leaving it out. If anyone | |
646 ever complains about this function not working properly, take a look | |
647 at those changes. --ben */ | |
648 | |
649 DEFUN ("forward-comment", Fforward_comment, 1, 2, 0, /* | |
650 Move forward across up to N comments. If N is negative, move backward. | |
651 Stop scanning if we find something other than a comment or whitespace. | |
652 Set point to where scanning stops. | |
653 If N comments are found as expected, with nothing except whitespace | |
654 between them, return t; otherwise return nil. | |
655 Point is set in either case. | |
656 Optional argument BUFFER defaults to the current buffer. | |
657 */ | |
658 (n, buffer)) | |
659 { | |
660 Bufpos from; | |
661 Bufpos stop; | |
662 Emchar c; | |
663 enum syntaxcode code; | |
664 EMACS_INT count; | |
665 struct buffer *buf = decode_buffer (buffer, 0); | |
666 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table); | |
667 | |
668 CHECK_INT (n); | |
669 count = XINT (n); | |
670 | |
671 from = BUF_PT (buf); | |
672 | |
673 while (count > 0) | |
674 { | |
675 QUIT; | |
676 | |
677 stop = BUF_ZV (buf); | |
678 while (from < stop) | |
679 { | |
680 int mask = 0; /* mask for finding matching comment style */ | |
681 | |
682 if (char_quoted (buf, from)) | |
683 { | |
684 from++; | |
685 continue; | |
686 } | |
687 | |
688 c = BUF_FETCH_CHAR (buf, from); | |
689 code = SYNTAX (mirrortab, c); | |
690 | |
691 if (code == Scomment) | |
692 { | |
693 /* we have encountered a single character comment start | |
694 sequence, and we are ignoring all text inside comments. | |
695 we must record the comment style this character begins | |
696 so that later, only a comment end of the same style actually | |
697 ends the comment section */ | |
698 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c); | |
699 } | |
700 | |
701 else if (from < stop | |
702 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from+1))) | |
703 { | |
704 /* we have encountered a 2char comment start sequence and we | |
705 are ignoring all text inside comments. we must record | |
706 the comment style this sequence begins so that later, | |
707 only a comment end of the same style actually ends | |
708 the comment section */ | |
709 code = Scomment; | |
710 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c, | |
711 BUF_FETCH_CHAR (buf, from+1)); | |
712 from++; | |
713 } | |
714 | |
715 if (code == Scomment) | |
716 { | |
717 Bufpos newfrom; | |
718 | |
719 newfrom = find_end_of_comment (buf, from, stop, mask); | |
720 if (newfrom < 0) | |
721 { | |
722 /* we stopped because from==stop */ | |
723 BUF_SET_PT (buf, stop); | |
724 return Qnil; | |
725 } | |
726 from = newfrom; | |
727 | |
728 /* We have skipped one comment. */ | |
729 break; | |
730 } | |
731 else if (code != Swhitespace | |
732 && code != Sendcomment | |
733 && code != Scomment ) | |
734 { | |
735 BUF_SET_PT (buf, from); | |
736 return Qnil; | |
737 } | |
738 from++; | |
739 } | |
740 | |
741 /* End of comment reached */ | |
742 count--; | |
743 } | |
744 | |
745 while (count < 0) | |
746 { | |
747 QUIT; | |
748 | |
749 stop = BUF_BEGV (buf); | |
750 while (from > stop) | |
751 { | |
752 int mask = 0; /* mask for finding matching comment style */ | |
753 | |
754 from--; | |
755 if (char_quoted (buf, from)) | |
756 { | |
757 from--; | |
758 continue; | |
759 } | |
760 | |
761 c = BUF_FETCH_CHAR (buf, from); | |
762 code = SYNTAX (mirrortab, c); | |
763 | |
764 if (code == Sendcomment) | |
765 { | |
766 /* we have found a single char end comment. we must record | |
767 the comment style encountered so that later, we can match | |
768 only the proper comment begin sequence of the same style */ | |
769 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c); | |
770 } | |
771 | |
772 else if (from > stop | |
773 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from - 1), c) | |
774 && !char_quoted (buf, from - 1)) | |
775 { | |
776 /* We must record the comment style encountered so that | |
777 later, we can match only the proper comment begin | |
778 sequence of the same style. */ | |
779 code = Sendcomment; | |
780 mask = SYNTAX_COMMENT_MASK_END (mirrortab, | |
781 BUF_FETCH_CHAR (buf, from - 1), | |
782 c); | |
783 from--; | |
784 } | |
785 | |
786 if (code == Sendcomment) | |
787 { | |
788 from = find_start_of_comment (buf, from, stop, mask); | |
789 break; | |
790 } | |
791 | |
792 else if (code != Swhitespace | |
793 && SYNTAX (mirrortab, c) != Scomment | |
794 && SYNTAX (mirrortab, c) != Sendcomment) | |
795 { | |
796 BUF_SET_PT (buf, from + 1); | |
797 return Qnil; | |
798 } | |
799 } | |
800 | |
801 count++; | |
802 } | |
803 | |
804 BUF_SET_PT (buf, from); | |
805 return Qt; | |
806 } | |
807 | |
808 | |
809 Lisp_Object | |
810 scan_lists (struct buffer *buf, Bufpos from, int count, int depth, | |
811 int sexpflag, int no_error) | |
812 { | |
813 Bufpos stop; | |
814 Emchar c; | |
815 int quoted; | |
816 int mathexit = 0; | |
817 enum syntaxcode code; | |
818 int min_depth = depth; /* Err out if depth gets less than this. */ | |
819 Lisp_Object syntaxtab = buf->syntax_table; | |
820 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table); | |
821 | |
822 if (depth > 0) min_depth = 0; | |
823 | |
824 while (count > 0) | |
825 { | |
826 QUIT; | |
827 | |
828 stop = BUF_ZV (buf); | |
829 while (from < stop) | |
830 { | |
831 int mask = 0; /* mask for finding matching comment style */ | |
832 | |
833 c = BUF_FETCH_CHAR (buf, from); | |
834 code = SYNTAX_UNSAFE (mirrortab, c); | |
835 from++; | |
836 | |
837 /* a 1-char comment start sequence */ | |
838 if (code == Scomment && parse_sexp_ignore_comments) | |
839 { | |
840 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c); | |
841 } | |
842 | |
843 /* else, a 2-char comment start sequence? */ | |
844 else if (from < stop | |
845 && SYNTAX_START_P (mirrortab, c, BUF_FETCH_CHAR (buf, from)) | |
846 && parse_sexp_ignore_comments) | |
847 { | |
848 /* we have encountered a comment start sequence and we | |
849 are ignoring all text inside comments. we must record | |
850 the comment style this sequence begins so that later, | |
851 only a comment end of the same style actually ends | |
852 the comment section */ | |
853 code = Scomment; | |
854 mask = SYNTAX_COMMENT_MASK_START (mirrortab, c, | |
855 BUF_FETCH_CHAR (buf, from)); | |
856 from++; | |
857 } | |
858 | |
859 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c)) | |
860 continue; | |
861 | |
862 switch (code) | |
863 { | |
864 case Sescape: | |
865 case Scharquote: | |
866 if (from == stop) goto lose; | |
867 from++; | |
868 /* treat following character as a word constituent */ | |
869 case Sword: | |
870 case Ssymbol: | |
871 if (depth || !sexpflag) break; | |
872 /* This word counts as a sexp; return at end of it. */ | |
873 while (from < stop) | |
874 { | |
875 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from))) | |
876 { | |
877 case Scharquote: | |
878 case Sescape: | |
879 from++; | |
880 if (from == stop) goto lose; | |
881 break; | |
882 case Sword: | |
883 case Ssymbol: | |
884 case Squote: | |
885 break; | |
886 default: | |
887 goto done; | |
888 } | |
889 from++; | |
890 } | |
891 goto done; | |
892 | |
893 case Scomment: | |
894 if (!parse_sexp_ignore_comments) | |
895 break; | |
896 { | |
897 Bufpos newfrom = find_end_of_comment (buf, from, stop, mask); | |
898 if (newfrom < 0) | |
899 { | |
900 /* we stopped because from == stop in search forward */ | |
901 from = stop; | |
902 if (depth == 0) | |
903 goto done; | |
904 goto lose; | |
905 } | |
906 from = newfrom; | |
907 } | |
908 break; | |
909 | |
910 case Smath: | |
911 if (!sexpflag) | |
912 break; | |
913 if (from != stop && c == BUF_FETCH_CHAR (buf, from)) | |
914 from++; | |
915 if (mathexit) | |
916 { | |
917 mathexit = 0; | |
918 goto close1; | |
919 } | |
920 mathexit = 1; | |
921 | |
922 case Sopen: | |
923 if (!++depth) goto done; | |
924 break; | |
925 | |
926 case Sclose: | |
927 close1: | |
928 if (!--depth) goto done; | |
929 if (depth < min_depth) | |
930 { | |
931 if (no_error) | |
932 return Qnil; | |
933 error ("Containing expression ends prematurely"); | |
934 } | |
935 break; | |
936 | |
937 case Sstring: | |
938 { | |
939 /* XEmacs change: call syntax_match on character */ | |
940 Emchar ch = BUF_FETCH_CHAR (buf, from - 1); | |
941 Lisp_Object stermobj = syntax_match (syntaxtab, ch); | |
942 Emchar stringterm; | |
943 | |
944 if (CHARP (stermobj)) | |
945 stringterm = XCHAR (stermobj); | |
946 else | |
947 stringterm = ch; | |
948 | |
949 while (1) | |
950 { | |
951 if (from >= stop) | |
952 goto lose; | |
953 if (BUF_FETCH_CHAR (buf, from) == stringterm) | |
954 break; | |
955 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from))) | |
956 { | |
957 case Scharquote: | |
958 case Sescape: | |
959 from++; | |
960 break; | |
961 default: | |
962 break; | |
963 } | |
964 from++; | |
965 } | |
966 from++; | |
967 if (!depth && sexpflag) goto done; | |
968 break; | |
969 } | |
970 | |
971 default: | |
972 break; | |
973 } | |
974 } | |
975 | |
976 /* Reached end of buffer. Error if within object, | |
977 return nil if between */ | |
978 if (depth) goto lose; | |
979 | |
980 return Qnil; | |
981 | |
982 /* End of object reached */ | |
983 done: | |
984 count--; | |
985 } | |
986 | |
987 while (count < 0) | |
988 { | |
989 QUIT; | |
990 | |
991 stop = BUF_BEGV (buf); | |
992 while (from > stop) | |
993 { | |
994 int mask = 0; /* mask for finding matching comment style */ | |
995 | |
996 from--; | |
997 quoted = char_quoted (buf, from); | |
998 if (quoted) | |
999 from--; | |
1000 | |
1001 c = BUF_FETCH_CHAR (buf, from); | |
1002 code = SYNTAX_UNSAFE (mirrortab, c); | |
1003 | |
1004 if (code == Sendcomment && parse_sexp_ignore_comments) | |
1005 { | |
1006 /* we have found a single char end comment. we must record | |
1007 the comment style encountered so that later, we can match | |
1008 only the proper comment begin sequence of the same style */ | |
1009 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, c); | |
1010 } | |
1011 | |
1012 else if (from > stop | |
1013 && SYNTAX_END_P (mirrortab, BUF_FETCH_CHAR (buf, from-1), c) | |
1014 && !char_quoted (buf, from - 1) | |
1015 && parse_sexp_ignore_comments) | |
1016 { | |
1017 /* we must record the comment style encountered so that | |
1018 later, we can match only the proper comment begin | |
1019 sequence of the same style */ | |
1020 code = Sendcomment; | |
1021 mask = SYNTAX_COMMENT_MASK_END (mirrortab, | |
1022 BUF_FETCH_CHAR (buf, from - 1), | |
1023 c); | |
1024 from--; | |
1025 } | |
1026 | |
1027 if (SYNTAX_PREFIX_UNSAFE (mirrortab, c)) | |
1028 continue; | |
1029 | |
1030 switch (((quoted) ? Sword : code)) | |
1031 { | |
1032 case Sword: | |
1033 case Ssymbol: | |
1034 if (depth || !sexpflag) break; | |
1035 /* This word counts as a sexp; count object finished after | |
1036 passing it. */ | |
1037 while (from > stop) | |
1038 { | |
1039 enum syntaxcode syncode; | |
1040 quoted = char_quoted (buf, from - 1); | |
1041 | |
1042 if (quoted) | |
1043 from--; | |
1044 if (! (quoted | |
1045 || (syncode = | |
1046 SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from - 1))) | |
1047 == Sword | |
1048 || syncode == Ssymbol | |
1049 || syncode == Squote)) | |
1050 goto done2; | |
1051 from--; | |
1052 } | |
1053 goto done2; | |
1054 | |
1055 case Smath: | |
1056 if (!sexpflag) | |
1057 break; | |
1058 if (from != stop && c == BUF_FETCH_CHAR (buf, from - 1)) | |
1059 from--; | |
1060 if (mathexit) | |
1061 { | |
1062 mathexit = 0; | |
1063 goto open2; | |
1064 } | |
1065 mathexit = 1; | |
1066 | |
1067 case Sclose: | |
1068 if (!++depth) goto done2; | |
1069 break; | |
1070 | |
1071 case Sopen: | |
1072 open2: | |
1073 if (!--depth) goto done2; | |
1074 if (depth < min_depth) | |
1075 { | |
1076 if (no_error) | |
1077 return Qnil; | |
1078 error ("Containing expression ends prematurely"); | |
1079 } | |
1080 break; | |
1081 | |
1082 case Sendcomment: | |
1083 if (parse_sexp_ignore_comments) | |
1084 from = find_start_of_comment (buf, from, stop, mask); | |
1085 break; | |
1086 | |
1087 case Sstring: | |
1088 { | |
1089 /* XEmacs change: call syntax_match() on character */ | |
1090 Emchar ch = BUF_FETCH_CHAR (buf, from); | |
1091 Lisp_Object stermobj = syntax_match (syntaxtab, ch); | |
1092 Emchar stringterm; | |
1093 | |
1094 if (CHARP (stermobj)) | |
1095 stringterm = XCHAR (stermobj); | |
1096 else | |
1097 stringterm = ch; | |
1098 | |
1099 while (1) | |
1100 { | |
1101 if (from == stop) goto lose; | |
1102 if (!char_quoted (buf, from - 1) | |
1103 && stringterm == BUF_FETCH_CHAR (buf, from - 1)) | |
1104 break; | |
1105 from--; | |
1106 } | |
1107 from--; | |
1108 if (!depth && sexpflag) goto done2; | |
1109 break; | |
1110 } | |
1111 } | |
1112 } | |
1113 | |
1114 /* Reached start of buffer. Error if within object, | |
1115 return nil if between */ | |
1116 if (depth) goto lose; | |
1117 | |
1118 return Qnil; | |
1119 | |
1120 done2: | |
1121 count++; | |
1122 } | |
1123 | |
1124 | |
1125 return (make_int (from)); | |
1126 | |
1127 lose: | |
1128 if (!no_error) | |
1129 error ("Unbalanced parentheses"); | |
1130 return Qnil; | |
1131 } | |
1132 | |
1133 int | |
1134 char_quoted (struct buffer *buf, Bufpos pos) | |
1135 { | |
1136 enum syntaxcode code; | |
1137 Bufpos beg = BUF_BEGV (buf); | |
1138 int quoted = 0; | |
1139 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table); | |
1140 | |
1141 while (pos > beg | |
1142 && ((code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1))) | |
1143 == Scharquote | |
1144 || code == Sescape)) | |
1145 pos--, quoted = !quoted; | |
1146 return quoted; | |
1147 } | |
1148 | |
1149 DEFUN ("scan-lists", Fscan_lists, 3, 5, 0, /* | |
1150 Scan from character number FROM by COUNT lists. | |
1151 Returns the character number of the position thus found. | |
1152 | |
1153 If DEPTH is nonzero, paren depth begins counting from that value, | |
1154 only places where the depth in parentheses becomes zero | |
1155 are candidates for stopping; COUNT such places are counted. | |
1156 Thus, a positive value for DEPTH means go out levels. | |
1157 | |
1158 Comments are ignored if `parse-sexp-ignore-comments' is non-nil. | |
1159 | |
1160 If the beginning or end of (the accessible part of) the buffer is reached | |
1161 and the depth is wrong, an error is signaled. | |
1162 If the depth is right but the count is not used up, nil is returned. | |
1163 | |
1164 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead | |
1165 of in the current buffer. | |
1166 | |
1167 If optional arg NOERROR is non-nil, scan-lists will return nil instead of | |
1168 signalling an error. | |
1169 */ | |
1170 (from, count, depth, buffer, no_error)) | |
1171 { | |
1172 struct buffer *buf; | |
1173 | |
1174 CHECK_INT (from); | |
1175 CHECK_INT (count); | |
1176 CHECK_INT (depth); | |
1177 buf = decode_buffer (buffer, 0); | |
1178 | |
1179 return scan_lists (buf, XINT (from), XINT (count), XINT (depth), 0, | |
1180 !NILP (no_error)); | |
1181 } | |
1182 | |
1183 DEFUN ("scan-sexps", Fscan_sexps, 2, 4, 0, /* | |
1184 Scan from character number FROM by COUNT balanced expressions. | |
1185 If COUNT is negative, scan backwards. | |
1186 Returns the character number of the position thus found. | |
1187 | |
1188 Comments are ignored if `parse-sexp-ignore-comments' is non-nil. | |
1189 | |
1190 If the beginning or end of (the accessible part of) the buffer is reached | |
1191 in the middle of a parenthetical grouping, an error is signaled. | |
1192 If the beginning or end is reached between groupings | |
1193 but before count is used up, nil is returned. | |
1194 | |
1195 If optional arg BUFFER is non-nil, scanning occurs in that buffer instead | |
1196 of in the current buffer. | |
1197 | |
1198 If optional arg NOERROR is non-nil, scan-sexps will return nil instead of | |
1199 signalling an error. | |
1200 */ | |
1201 (from, count, buffer, no_error)) | |
1202 { | |
1203 struct buffer *buf = decode_buffer (buffer, 0); | |
1204 CHECK_INT (from); | |
1205 CHECK_INT (count); | |
1206 | |
1207 return scan_lists (buf, XINT (from), XINT (count), 0, 1, !NILP (no_error)); | |
1208 } | |
1209 | |
1210 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /* | |
1211 Move point backward over any number of chars with prefix syntax. | |
1212 This includes chars with "quote" or "prefix" syntax (' or p). | |
1213 | |
1214 Optional arg BUFFER defaults to the current buffer. | |
1215 */ | |
1216 (buffer)) | |
1217 { | |
1218 struct buffer *buf = decode_buffer (buffer, 0); | |
1219 Bufpos beg = BUF_BEGV (buf); | |
1220 Bufpos pos = BUF_PT (buf); | |
1221 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table); | |
1222 | |
1223 while (pos > beg && !char_quoted (buf, pos - 1) | |
1224 && (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)) == Squote | |
1225 || SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, pos - 1)))) | |
1226 pos--; | |
1227 | |
1228 BUF_SET_PT (buf, pos); | |
1229 | |
1230 return Qnil; | |
1231 } | |
1232 | |
1233 /* Parse forward from FROM to END, | |
1234 assuming that FROM has state OLDSTATE (nil means FROM is start of function), | |
1235 and return a description of the state of the parse at END. | |
1236 If STOPBEFORE is nonzero, stop at the start of an atom. | |
1237 If COMMENTSTOP is nonzero, stop at the start of a comment. */ | |
1238 | |
1239 static void | |
1240 scan_sexps_forward (struct buffer *buf, struct lisp_parse_state *stateptr, | |
1241 Bufpos from, Bufpos end, | |
1242 int targetdepth, int stopbefore, | |
1243 Lisp_Object oldstate, | |
1244 int commentstop) | |
1245 { | |
1246 struct lisp_parse_state state; | |
1247 | |
1248 enum syntaxcode code; | |
1249 struct level { int last, prev; }; | |
1250 struct level levelstart[100]; | |
1251 struct level *curlevel = levelstart; | |
1252 struct level *endlevel = levelstart + 100; | |
1253 int depth; /* Paren depth of current scanning location. | |
1254 level - levelstart equals this except | |
1255 when the depth becomes negative. */ | |
1256 int mindepth; /* Lowest DEPTH value seen. */ | |
1257 int start_quoted = 0; /* Nonzero means starting after a char quote */ | |
1258 Lisp_Object tem; | |
1259 int mask; /* comment mask */ | |
1260 Lisp_Object syntaxtab = buf->syntax_table; | |
1261 struct Lisp_Char_Table *mirrortab = XCHAR_TABLE (buf->mirror_syntax_table); | |
1262 | |
1263 if (NILP (oldstate)) | |
1264 { | |
1265 depth = 0; | |
1266 state.instring = -1; | |
1267 state.incomment = 0; | |
1268 state.comstyle = 0; /* comment style a by default */ | |
1269 mask = SYNTAX_COMMENT_STYLE_A; | |
1270 } | |
1271 else | |
1272 { | |
1273 tem = Fcar (oldstate); /* elt 0, depth */ | |
1274 if (!NILP (tem)) | |
1275 depth = XINT (tem); | |
1276 else | |
1277 depth = 0; | |
1278 | |
1279 oldstate = Fcdr (oldstate); | |
1280 oldstate = Fcdr (oldstate); | |
1281 oldstate = Fcdr (oldstate); | |
1282 tem = Fcar (oldstate); /* elt 3, instring */ | |
1283 state.instring = !NILP (tem) ? XINT (tem) : -1; | |
1284 | |
1285 oldstate = Fcdr (oldstate); /* elt 4, incomment */ | |
1286 tem = Fcar (oldstate); | |
1287 state.incomment = !NILP (tem); | |
1288 | |
1289 oldstate = Fcdr (oldstate); | |
1290 tem = Fcar (oldstate); /* elt 5, follows-quote */ | |
1291 start_quoted = !NILP (tem); | |
1292 | |
1293 /* if the eighth element of the list is nil, we are in comment style | |
1294 a. if it is non-nil, we are in comment style b */ | |
1295 oldstate = Fcdr (oldstate); | |
1296 oldstate = Fcdr (oldstate); | |
1297 oldstate = Fcdr (oldstate); | |
1298 tem = Fcar (oldstate); /* elt 8, comment style a */ | |
1299 state.comstyle = !NILP (tem); | |
1300 mask = state.comstyle ? SYNTAX_COMMENT_STYLE_B : SYNTAX_COMMENT_STYLE_A; | |
1301 } | |
1302 state.quoted = 0; | |
1303 mindepth = depth; | |
1304 | |
1305 curlevel->prev = -1; | |
1306 curlevel->last = -1; | |
1307 | |
1308 /* Enter the loop at a place appropriate for initial state. */ | |
1309 | |
1310 if (state.incomment) goto startincomment; | |
1311 if (state.instring >= 0) | |
1312 { | |
1313 if (start_quoted) goto startquotedinstring; | |
1314 goto startinstring; | |
1315 } | |
1316 if (start_quoted) goto startquoted; | |
1317 | |
1318 while (from < end) | |
1319 { | |
1320 QUIT; | |
1321 | |
1322 code = SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from)); | |
1323 from++; | |
1324 | |
1325 if (code == Scomment) | |
1326 { | |
1327 /* record the comment style we have entered so that only the | |
1328 comment-ender sequence (or single char) of the same style | |
1329 actually terminates the comment section. */ | |
1330 mask = SYNTAX_COMMENT_1CHAR_MASK (mirrortab, | |
1331 BUF_FETCH_CHAR (buf, from-1)); | |
1332 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B); | |
1333 state.comstart = from - 1; | |
1334 } | |
1335 | |
1336 else if (from < end && | |
1337 SYNTAX_START_P (mirrortab, BUF_FETCH_CHAR (buf, from-1), | |
1338 BUF_FETCH_CHAR (buf, from))) | |
1339 { | |
1340 /* Record the comment style we have entered so that only | |
1341 the comment-end sequence of the same style actually | |
1342 terminates the comment section. */ | |
1343 code = Scomment; | |
1344 mask = SYNTAX_COMMENT_MASK_START (mirrortab, | |
1345 BUF_FETCH_CHAR (buf, from-1), | |
1346 BUF_FETCH_CHAR (buf, from)); | |
1347 state.comstyle = (mask == SYNTAX_COMMENT_STYLE_B); | |
1348 state.comstart = from-1; | |
1349 from++; | |
1350 } | |
1351 | |
1352 if (SYNTAX_PREFIX (mirrortab, BUF_FETCH_CHAR (buf, from - 1))) | |
1353 continue; | |
1354 switch (code) | |
1355 { | |
1356 case Sescape: | |
1357 case Scharquote: | |
1358 if (stopbefore) goto stop; /* this arg means stop at sexp start */ | |
1359 curlevel->last = from - 1; | |
1360 startquoted: | |
1361 if (from == end) goto endquoted; | |
1362 from++; | |
1363 goto symstarted; | |
1364 /* treat following character as a word constituent */ | |
1365 case Sword: | |
1366 case Ssymbol: | |
1367 if (stopbefore) goto stop; /* this arg means stop at sexp start */ | |
1368 curlevel->last = from - 1; | |
1369 symstarted: | |
1370 while (from < end) | |
1371 { | |
1372 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from))) | |
1373 { | |
1374 case Scharquote: | |
1375 case Sescape: | |
1376 from++; | |
1377 if (from == end) goto endquoted; | |
1378 break; | |
1379 case Sword: | |
1380 case Ssymbol: | |
1381 case Squote: | |
1382 break; | |
1383 default: | |
1384 goto symdone; | |
1385 } | |
1386 from++; | |
1387 } | |
1388 symdone: | |
1389 curlevel->prev = curlevel->last; | |
1390 break; | |
1391 | |
1392 case Scomment: | |
1393 state.incomment = 1; | |
1394 startincomment: | |
1395 if (commentstop) | |
1396 goto done; | |
1397 { | |
1398 Bufpos newfrom = find_end_of_comment (buf, from, end, mask); | |
1399 if (newfrom < 0) | |
1400 { | |
1401 /* we terminated search because from == end */ | |
1402 from = end; | |
1403 goto done; | |
1404 } | |
1405 from = newfrom; | |
1406 } | |
1407 state.incomment = 0; | |
1408 state.comstyle = 0; /* reset the comment style */ | |
1409 mask = 0; | |
1410 break; | |
1411 | |
1412 case Sopen: | |
1413 if (stopbefore) goto stop; /* this arg means stop at sexp start */ | |
1414 depth++; | |
1415 /* curlevel++->last ran into compiler bug on Apollo */ | |
1416 curlevel->last = from - 1; | |
1417 if (++curlevel == endlevel) | |
1418 error ("Nesting too deep for parser"); | |
1419 curlevel->prev = -1; | |
1420 curlevel->last = -1; | |
1421 if (targetdepth == depth) goto done; | |
1422 break; | |
1423 | |
1424 case Sclose: | |
1425 depth--; | |
1426 if (depth < mindepth) | |
1427 mindepth = depth; | |
1428 if (curlevel != levelstart) | |
1429 curlevel--; | |
1430 curlevel->prev = curlevel->last; | |
1431 if (targetdepth == depth) goto done; | |
1432 break; | |
1433 | |
1434 case Sstring: | |
1435 { | |
1436 Emchar ch; | |
1437 if (stopbefore) goto stop; /* this arg means stop at sexp start */ | |
1438 curlevel->last = from - 1; | |
1439 /* XEmacs change: call syntax_match() on character */ | |
1440 ch = BUF_FETCH_CHAR (buf, from - 1); | |
1441 { | |
1442 Lisp_Object stermobj = syntax_match (syntaxtab, ch); | |
1443 | |
1444 if (CHARP (stermobj)) | |
1445 state.instring = XCHAR (stermobj); | |
1446 else | |
1447 state.instring = ch; | |
1448 } | |
1449 } | |
1450 startinstring: | |
1451 while (1) | |
1452 { | |
1453 if (from >= end) goto done; | |
1454 if (BUF_FETCH_CHAR (buf, from) == state.instring) break; | |
1455 switch (SYNTAX (mirrortab, BUF_FETCH_CHAR (buf, from))) | |
1456 { | |
1457 case Scharquote: | |
1458 case Sescape: | |
1459 { | |
1460 from++; | |
1461 startquotedinstring: | |
1462 if (from >= end) goto endquoted; | |
1463 break; | |
1464 } | |
1465 default: | |
1466 break; | |
1467 } | |
1468 from++; | |
1469 } | |
1470 state.instring = -1; | |
1471 curlevel->prev = curlevel->last; | |
1472 from++; | |
1473 break; | |
1474 | |
1475 case Smath: | |
1476 break; | |
1477 | |
1478 case Swhitespace: | |
1479 case Spunct: | |
1480 case Squote: | |
1481 case Sendcomment: | |
1482 case Sinherit: | |
1483 case Smax: | |
1484 break; | |
1485 } | |
1486 } | |
1487 goto done; | |
1488 | |
1489 stop: /* Here if stopping before start of sexp. */ | |
1490 from--; /* We have just fetched the char that starts it; */ | |
1491 goto done; /* but return the position before it. */ | |
1492 | |
1493 endquoted: | |
1494 state.quoted = 1; | |
1495 done: | |
1496 state.depth = depth; | |
1497 state.mindepth = mindepth; | |
1498 state.thislevelstart = curlevel->prev; | |
1499 state.prevlevelstart | |
1500 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last; | |
1501 state.location = from; | |
1502 | |
1503 *stateptr = state; | |
1504 } | |
1505 | |
1506 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, 2, 7, 0, /* | |
1507 Parse Lisp syntax starting at FROM until TO; return status of parse at TO. | |
1508 Parsing stops at TO or when certain criteria are met; | |
1509 point is set to where parsing stops. | |
1510 If fifth arg STATE is omitted or nil, | |
1511 parsing assumes that FROM is the beginning of a function. | |
1512 Value is a list of eight elements describing final state of parsing: | |
1513 0. depth in parens. | |
1514 1. character address of start of innermost containing list; nil if none. | |
1515 2. character address of start of last complete sexp terminated. | |
1516 3. non-nil if inside a string. | |
1517 (It is the character that will terminate the string.) | |
1518 4. t if inside a comment. | |
1519 5. t if following a quote character. | |
1520 6. the minimum paren-depth encountered during this scan. | |
1521 7. nil if in comment style a, or not in a comment; t if in comment style b | |
1522 If third arg TARGETDEPTH is non-nil, parsing stops if the depth | |
1523 in parentheses becomes equal to TARGETDEPTH. | |
1524 Fourth arg STOPBEFORE non-nil means stop when come to | |
1525 any character that starts a sexp. | |
1526 Fifth arg STATE is an eight-element list like what this function returns. | |
1527 It is used to initialize the state of the parse. Its second and third | |
1528 elements are ignored. | |
1529 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment. | |
1530 */ | |
1531 (from, to, targetdepth, stopbefore, oldstate, commentstop, buffer)) | |
1532 { | |
1533 struct lisp_parse_state state; | |
1534 int target; | |
1535 Bufpos start, end; | |
1536 struct buffer *buf = decode_buffer (buffer, 0); | |
1537 Lisp_Object val; | |
1538 | |
1539 if (!NILP (targetdepth)) | |
1540 { | |
1541 CHECK_INT (targetdepth); | |
1542 target = XINT (targetdepth); | |
1543 } | |
1544 else | |
1545 target = -100000; /* We won't reach this depth */ | |
1546 | |
1547 get_buffer_range_char (buf, from, to, &start, &end, 0); | |
1548 scan_sexps_forward (buf, &state, start, end, | |
1549 target, !NILP (stopbefore), oldstate, | |
1550 !NILP (commentstop)); | |
1551 | |
1552 BUF_SET_PT (buf, state.location); | |
1553 | |
1554 /* reverse order */ | |
1555 val = Qnil; | |
1556 val = Fcons (state.comstyle ? Qt : Qnil, val); | |
1557 val = Fcons (make_int (state.mindepth), val); | |
1558 val = Fcons (state.quoted ? Qt : Qnil, val); | |
1559 val = Fcons (state.incomment ? Qt : Qnil, val); | |
1560 val = Fcons (state.instring < 0 ? Qnil : make_int (state.instring), val); | |
1561 val = Fcons (state.thislevelstart < 0 ? Qnil : make_int (state.thislevelstart), val); | |
1562 val = Fcons (state.prevlevelstart < 0 ? Qnil : make_int (state.prevlevelstart), val); | |
1563 val = Fcons (make_int (state.depth), val); | |
1564 | |
1565 return val; | |
1566 } | |
1567 | |
1568 | |
1569 /* Updating of the mirror syntax table. | |
1570 | |
1571 Each syntax table has a corresponding mirror table in it. | |
1572 Whenever we make a change to a syntax table, we call | |
1573 update_syntax_table() on it. | |
1574 | |
1575 #### We really only need to map over the changed range. | |
1576 | |
1577 If we change the standard syntax table, we need to map over | |
1578 all tables because any of them could be inheriting from the | |
1579 standard syntax table. | |
1580 | |
1581 When `set-syntax-table' is called, we set the buffer's mirror | |
1582 syntax table as well. | |
1583 */ | |
1584 | |
1585 struct cmst_arg | |
1586 { | |
1587 Lisp_Object mirrortab; | |
1588 int check_inherit; | |
1589 }; | |
1590 | |
1591 static int | |
1592 cmst_mapfun (struct chartab_range *range, Lisp_Object val, void *arg) | |
1593 { | |
1594 struct cmst_arg *closure = (struct cmst_arg *) arg; | |
1595 | |
1596 if (CONSP (val)) | |
1597 val = XCAR (val); | |
1598 if (SYNTAX_FROM_CODE (XINT (val)) == Sinherit | |
1599 && closure->check_inherit) | |
1600 { | |
1601 struct cmst_arg recursive; | |
1602 | |
1603 recursive.mirrortab = closure->mirrortab; | |
1604 recursive.check_inherit = 0; | |
1605 map_char_table (XCHAR_TABLE (Vstandard_syntax_table), range, | |
1606 cmst_mapfun, &recursive); | |
1607 } | |
1608 else | |
1609 put_char_table (XCHAR_TABLE (closure->mirrortab), range, val); | |
1610 return 0; | |
1611 } | |
1612 | |
1613 static void | |
1614 update_just_this_syntax_table (struct Lisp_Char_Table *ct) | |
1615 { | |
1616 struct chartab_range range; | |
1617 struct cmst_arg arg; | |
1618 | |
1619 arg.mirrortab = ct->mirror_table; | |
1620 arg.check_inherit = (CHAR_TABLEP (Vstandard_syntax_table) | |
1621 && ct != XCHAR_TABLE (Vstandard_syntax_table)); | |
1622 range.type = CHARTAB_RANGE_ALL; | |
1623 map_char_table (ct, &range, cmst_mapfun, &arg); | |
1624 } | |
1625 | |
1626 /* Called from chartab.c when a change is made to a syntax table. | |
1627 If this is the standard syntax table, we need to recompute | |
1628 *all* syntax tables (yuck). Otherwise we just recompute this | |
1629 one. */ | |
1630 | |
1631 void | |
1632 update_syntax_table (struct Lisp_Char_Table *ct) | |
1633 { | |
1634 /* Don't be stymied at startup. */ | |
1635 if (CHAR_TABLEP (Vstandard_syntax_table) | |
1636 && ct == XCHAR_TABLE (Vstandard_syntax_table)) | |
1637 { | |
1638 Lisp_Object syntab; | |
1639 | |
1640 for (syntab = Vall_syntax_tables; !NILP (syntab); | |
1641 syntab = XCHAR_TABLE (syntab)->next_table) | |
1642 update_just_this_syntax_table (XCHAR_TABLE (syntab)); | |
1643 } | |
1644 else | |
1645 update_just_this_syntax_table (ct); | |
1646 } | |
1647 | |
1648 | |
1649 /************************************************************************/ | |
1650 /* initialization */ | |
1651 /************************************************************************/ | |
1652 | |
1653 void | |
1654 syms_of_syntax (void) | |
1655 { | |
1656 defsymbol (&Qsyntax_table_p, "syntax-table-p"); | |
1657 | |
1658 DEFSUBR (Fsyntax_table_p); | |
1659 DEFSUBR (Fsyntax_table); | |
1660 DEFSUBR (Fstandard_syntax_table); | |
1661 DEFSUBR (Fcopy_syntax_table); | |
1662 DEFSUBR (Fset_syntax_table); | |
1663 DEFSUBR (Fsyntax_designator_chars); | |
1664 DEFSUBR (Fchar_syntax); | |
1665 DEFSUBR (Fmatching_paren); | |
1666 /* DEFSUBR (Fmodify_syntax_entry); now in Lisp. */ | |
1667 /* DEFSUBR (Fdescribe_syntax); now in Lisp. */ | |
1668 | |
1669 DEFSUBR (Fforward_word); | |
1670 | |
1671 DEFSUBR (Fforward_comment); | |
1672 DEFSUBR (Fscan_lists); | |
1673 DEFSUBR (Fscan_sexps); | |
1674 DEFSUBR (Fbackward_prefix_chars); | |
1675 DEFSUBR (Fparse_partial_sexp); | |
1676 } | |
1677 | |
1678 void | |
1679 vars_of_syntax (void) | |
1680 { | |
1681 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /* | |
1682 Non-nil means `forward-sexp', etc., should treat comments as whitespace. | |
1683 */ ); | |
1684 | |
1685 words_include_escapes = 0; | |
1686 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes /* | |
1687 Non-nil means `forward-word', etc., should treat escape chars part of words. | |
1688 */ ); | |
1689 | |
1690 no_quit_in_re_search = 0; | |
1691 } | |
1692 | |
1693 static void | |
1694 define_standard_syntax (CONST char *p, enum syntaxcode syn) | |
1695 { | |
1696 for (; *p; p++) | |
1697 Fput_char_table (make_char (*p), make_int (syn), Vstandard_syntax_table); | |
1698 } | |
1699 | |
1700 void | |
1701 complex_vars_of_syntax (void) | |
1702 { | |
1703 Emchar i; | |
1704 CONST char *p; | |
1705 /* Set this now, so first buffer creation can refer to it. */ | |
1706 /* Make it nil before calling copy-syntax-table | |
1707 so that copy-syntax-table will know not to try to copy from garbage */ | |
1708 Vstandard_syntax_table = Qnil; | |
1709 Vstandard_syntax_table = Fcopy_syntax_table (Qnil); | |
1710 staticpro (&Vstandard_syntax_table); | |
1711 | |
1712 Vsyntax_designator_chars_string = make_string_nocopy (syntax_code_spec, | |
1713 Smax); | |
1714 staticpro (&Vsyntax_designator_chars_string); | |
1715 | |
1716 fill_char_table (XCHAR_TABLE (Vstandard_syntax_table), make_int (Spunct)); | |
1717 | |
1718 for (i = 0; i <= 32; i++) /* Control 0 plus SPACE */ | |
1719 Fput_char_table (make_char (i), make_int (Swhitespace), | |
1720 Vstandard_syntax_table); | |
1721 for (i = 127; i <= 159; i++) /* DEL plus Control 1 */ | |
1722 Fput_char_table (make_char (i), make_int (Swhitespace), | |
1723 Vstandard_syntax_table); | |
1724 | |
1725 define_standard_syntax ("abcdefghijklmnopqrstuvwxyz" | |
1726 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
1727 "0123456789" | |
1728 "$%", Sword); | |
1729 define_standard_syntax ("\"", Sstring); | |
1730 define_standard_syntax ("\\", Sescape); | |
1731 define_standard_syntax ("_-+*/&|<>=", Ssymbol); | |
1732 define_standard_syntax (".,;:?!#@~^'`", Spunct); | |
1733 | |
1734 for (p = "()[]{}"; *p; p+=2) | |
1735 { | |
1736 Fput_char_table (make_char (p[0]), | |
1737 Fcons (make_int (Sopen), make_char (p[1])), | |
1738 Vstandard_syntax_table); | |
1739 Fput_char_table (make_char (p[1]), | |
1740 Fcons (make_int (Sclose), make_char (p[0])), | |
1741 Vstandard_syntax_table); | |
1742 } | |
1743 } |