Mercurial > hg > xemacs-beta
changeset 5544:c2301b2c88c8
Improve documentation of syntax table internals.
author | Stephen J. Turnbull <stephen@xemacs.org> |
---|---|
date | Mon, 08 Aug 2011 13:57:20 +0900 |
parents | fbe90e6f7a43 |
children | 69de75c48efa |
files | src/ChangeLog src/syntax.c src/syntax.h |
diffstat | 3 files changed, 199 insertions(+), 245 deletions(-) [+] |
line wrap: on
line diff
--- a/src/ChangeLog Mon Aug 08 13:57:20 2011 +0900 +++ b/src/ChangeLog Mon Aug 08 13:57:20 2011 +0900 @@ -1,3 +1,28 @@ +2011-08-06 Stephen J. Turnbull <stephen@xemacs.org> + + * syntax.c: + Remove some obsolete or unintelligible #### comments. + Improve many toplevel comments. + + (Fsyntax_designator_chars): + (Fchar_syntax): + (Fmatching_paren): + (Fforward_word): + (Fforward_comment): + (Fscan_lists): + (Fscan_sexps): + (Fbackward_prefix_chars): + (parse-sexp-ignore-comments): + (lookup-syntax-properties): + (words-include-escapes): + Improve docstrings. + + * syntax.h: + Document functions and restrictions struct syntax_cache members. + Improve many toplevel comments. + Delete syntax code parsing macros unused for a decade or so. + (#if 0'd out since May 2002.) + 2011-08-06 Stephen J. Turnbull <stephen@xemacs.org> * syntax.c (reset_buffer_syntax_cache_range):
--- a/src/syntax.c Mon Aug 08 13:57:20 2011 +0900 +++ b/src/syntax.c Mon Aug 08 13:57:20 2011 +0900 @@ -41,17 +41,14 @@ int parse_sexp_ignore_comments; -/* The following two variables are provided to tell additional information - to the regex routines. We do it this way rather than change the - arguments to re_search_2() in an attempt to maintain some call - compatibility with other versions of the regex code. */ - -/* Tell the regex routines not to QUIT. Normally there is a QUIT - each iteration in re_search_2(). */ +/* Tell the regex routines not to QUIT. Normally there is a QUIT each + iteration in re_search_2(). We do it this way rather than change + the arguments to re_search_2() in an attempt to maintain some call + compatibility with other versions of the regex code. + #### Surely this is no longer a consideration. Last sync was 19.28! */ int no_quit_in_re_search; -/* The standard syntax table is stored where it will automatically - be used in all new buffers. */ +/* The standard syntax table is automatically used in all new buffers. */ Lisp_Object Vstandard_syntax_table; Lisp_Object Vsyntax_designator_chars_string; @@ -84,6 +81,7 @@ Lisp_Object levelstarts;/* Char numbers of starts-of-expression of levels (starting from outermost). */ }; + /* These variables are a cache for finding the start of a defun. find_start_pos is the place for which the defun start was found. @@ -165,9 +163,8 @@ } DEFUN ("syntax-table", Fsyntax_table, 0, 1, 0, /* -Return the current syntax table. -This is the one specified by the current buffer, or by BUFFER if it -is non-nil. +Return the current syntax table of BUFFER. +BUFFER defaults to the current buffer. */ (buffer)) { @@ -177,9 +174,8 @@ #ifdef DEBUG_XEMACS DEFUN ("mirror-syntax-table", Fmirror_syntax_table, 0, 1, 0, /* -Return the current mirror syntax table, for debugging purposes. -This is the one specified by the current buffer, or by BUFFER if it -is non-nil. +Return the current mirror syntax table of BUFFER. +BUFFER defaults to the current buffer. Only useful in debugging internals. */ (buffer)) { @@ -187,8 +183,12 @@ } DEFUN ("syntax-cache-info", Fsyntax_cache_info, 0, 1, 0, /* -Return info about the syntax cache in BUFFER. -BUFFER defaults to the current buffer if nil. +Return a list (START END PREV_CHANGE NEXT_CHANGE) for BUFFER's syntax cache. +BUFFER defaults to the current buffer. + +The elements correspond to members of struct syntax_cache of the same names. +START and END should be markers. PREV_CHANGE and NEXT_CHANGE are integers. +Only useful in debugging internals. */ (buffer)) { @@ -201,8 +201,7 @@ #endif /* DEBUG_XEMACS */ DEFUN ("standard-syntax-table", Fstandard_syntax_table, 0, 0, 0, /* -Return the standard syntax table. -This is the one used for new buffers. +Return the standard syntax table, used for new buffers. */ ()) { @@ -315,6 +314,11 @@ } } +/* init_syntax_cache + Arguments: + cache: pointer to a zero-ed struct syntax_cache + object: a Lisp string or buffer + buffer: NULL or the struct buffer of buffer */ static void init_syntax_cache (struct syntax_cache *cache, /* cache must be zero'ed */ Lisp_Object object, /* string or buffer */ @@ -333,11 +337,9 @@ /* external syntax cache API */ -/* #### This function and associated logic still needs work, and especially - documentation. */ struct syntax_cache * /* return CACHE or the cache of OBJECT */ -setup_syntax_cache (struct syntax_cache *cache, /* syntax cache, may be NULL - if OBJECT is a buffer */ +setup_syntax_cache (struct syntax_cache *cache, /* may be NULL only if + OBJECT is a buffer */ Lisp_Object object, /* the object (if any) cache is associated with */ struct buffer *buffer, /* the buffer to use as source @@ -372,21 +374,18 @@ } /* - Update syntax_cache to an appropriate setting for position POS + Update syntax_cache CACHE to an appropriate setting for position CPOS. - The sign of COUNT gives the relative position of POS wrt the + The sign of COUNT gives the relative position of CPOS wrt the previously valid interval. (not currently used) - `syntax_cache.*_change' are the next and previous positions at - which syntax_code and c_s_t will need to be recalculated. + `syntax_cache.*_change' are the next and previous positions at which + syntax_code and c_s_t will need to be recalculated. - #### Currently this code uses `get-char-property', which will - return the "last smallest" extent at a given position. In cases - where overlapping extents are defined, this code will simply use - whatever is returned by get-char-property. - - It might be worth it at some point to merge provided syntax tables - outward to the current buffer (#### rewrite in English please?!). */ + #### Currently this code uses `get-char-property', which will return + the "last smallest" extent at a given position. In cases where + overlapping extents are defined, this code will simply use whatever + is returned by get-char-property. */ void update_syntax_cache (struct syntax_cache *cache, Charxpos cpos, @@ -482,8 +481,7 @@ } /* buffer-specific APIs used in buffer.c - #### This is really unclean; - the syntax cache should just be a LISP object */ + #### Really unclean; the syntax cache should just be a LISP object. */ void mark_buffer_syntax_cache (struct buffer *buf) @@ -541,7 +539,6 @@ if (BUFFERP (buffer)) { struct syntax_cache *cache = XBUFFER (buffer)->syntax_cache; - /* #### would this be slower or less accurate in character terms? */ Bytexpos start = extent_endpoint_byte (extent, 0); Bytexpos end = extent_endpoint_byte (extent, 1); Bytexpos start2 = byte_marker_position (cache->start); @@ -569,9 +566,13 @@ -/* Convert a letter which signifies a syntax code - into the code it signifies. - This is used by modify-syntax-entry, and other things. */ +/* Convert an ASCII character which represents a syntax class + into the corresponding syntax code. + This is used by (search.c) skip_chars and (regex.c) regex_compile. + regex_compile indexes with `c' of type int, but promises that it + is positive. + #### skip_chars indexes with an Ichar, a signed type. Presumably + this is guaranteed non-negative. */ const unsigned char syntax_spec_code[0200] = { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, @@ -595,12 +596,12 @@ 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377 }; +/* The inverse mapping for syntax_spec_code. */ const unsigned char syntax_code_spec[] = " .w_()'\"$\\/<>@!|"; DEFUN ("syntax-designator-chars", Fsyntax_designator_chars, 0, 0, 0, /* -Return a string of the recognized syntax designator chars. -The chars are ordered by their internal syntax codes, which are -numbered starting at 0. +Return the string of the recognized syntax designator chars. +The chars are indexed by their internal syntax codes, starting at 0. */ ()) { @@ -608,13 +609,9 @@ } DEFUN ("char-syntax", Fchar_syntax, 1, 2, 0, /* -Return the syntax code of CHARACTER, described by a character. -For example, if CHARACTER is a word constituent, -the character `?w' is returned. -The characters that correspond to various syntax codes -are listed in the documentation of `modify-syntax-entry'. -Optional second argument SYNTAX-TABLE defaults to the current buffer's -syntax table. +Return the syntax code of CHARACTER, designated by a character. +Optional SYNTAX-TABLE defaults to the current buffer's syntax table. +See `modify-syntax-entry' for the designators of the defined syntax codes. */ (character, syntax_table)) { @@ -639,12 +636,15 @@ int *multi_p_out) { *multi_p_out = 1; - /* !!#### get this right */ + /* !!#### get this right + Maybe not worth it until we have Unicode inside. */ return Sword; } #endif +/* #### Outside of this file, only used once, in font-lock.c. Make static? + If so, remove prototype from syntax.h. */ Lisp_Object syntax_match (Lisp_Object syntax_table, Ichar ch) { @@ -661,8 +661,7 @@ DEFUN ("matching-paren", Fmatching_paren, 1, 2, 0, /* Return the matching parenthesis of CHARACTER, or nil if none. -Optional second argument SYNTAX-TABLE defaults to the current buffer's -syntax table. +Optional SYNTAX-TABLE defaults to the current buffer's syntax table. */ (character, syntax_table)) { @@ -797,16 +796,16 @@ } DEFUN ("forward-word", Fforward_word, 0, 2, "_p", /* -Move point forward COUNT words (backward if COUNT is negative). -Normally t is returned, but if an edge of the buffer is reached, -point is left there and nil is returned. +Move point forward COUNT words in BUFFER (backward if COUNT is negative). +COUNT defaults to 1, and BUFFER defaults to the current buffer. -The characters that are moved over may be added to the current selection -\(i.e. active region) if the Shift key is held down, a motion key is used -to invoke this command, and `shifted-motion-keys-select-region' is t; see +Return t if the motion successfully crosses COUNT words, otherwise nil (if +the motion would cross the buffer boundary). + +The characters that are moved over are added to the region if the region +is active, the Shift key is held down, a motion key is used to invoke this +command, and `shifted-motion-keys-select-region' is non-nil; see the documentation for this variable for more details. - -COUNT defaults to 1, and BUFFER defaults to the current buffer. */ (count, buffer)) { @@ -1111,13 +1110,13 @@ at those changes. --ben */ DEFUN ("forward-comment", Fforward_comment, 0, 2, 0, /* -Move forward across up to COUNT comments, or backwards if COUNT is negative. +Move forward across COUNT comments in BUFFER (backwards if COUNT is negative). +COUNT defaults to 1, and BUFFER defaults to the current buffer. + Stop scanning if we find something other than a comment or whitespace. Set point to where scanning stops. -If COUNT comments are found as expected, with nothing except whitespace -between them, return t; otherwise return nil. -Point is set in either case. -COUNT defaults to 1, and BUFFER defaults to the current buffer. +Return t if the motion successfully crosses COUNT comments, otherwise nil (if +the motion would cross the buffer boundary or encounters a noncomment token). */ (count, buffer)) { @@ -1729,8 +1728,11 @@ } DEFUN ("scan-lists", Fscan_lists, 3, 5, 0, /* -Scan from character number FROM by COUNT lists. -Returns the character number of the position thus found. +Scan from position FROM across COUNT lists starting from nesting DEPTH. +Returns the position thus found. +Optional BUFFER defaults to the current buffer. +If optional NOERROR is non-nil, scan-lists will return nil instead of +signalling an error when attempting to cross a buffer boundary. If DEPTH is nonzero, paren depth begins counting from that value, only places where the depth in parentheses becomes zero @@ -1743,11 +1745,6 @@ and the depth is wrong, an error is signaled. If the depth is right but the count is not used up, nil is returned. -If optional arg BUFFER is non-nil, scanning occurs in that buffer instead -of in the current buffer. - -If optional arg NOERROR is non-nil, scan-lists will return nil instead of -signalling an error. */ (from, count, depth, buffer, noerror)) { @@ -1763,9 +1760,13 @@ } DEFUN ("scan-sexps", Fscan_sexps, 2, 4, 0, /* -Scan from character number FROM by COUNT balanced expressions. +Scan from position FROM by COUNT balanced expressions. +Returns the position thus found. + If COUNT is negative, scan backwards. -Returns the character number of the position thus found. +Optional BUFFER defaults to the current buffer. +If optional NOERROR is non-nil, scan-sexps will return nil instead of +signalling an error. Comments are ignored if `parse-sexp-ignore-comments' is non-nil. @@ -1773,12 +1774,6 @@ in the middle of a parenthetical grouping, an error is signaled. If the beginning or end is reached between groupings but before count is used up, nil is returned. - -If optional arg BUFFER is non-nil, scanning occurs in that buffer instead -of in the current buffer. - -If optional arg NOERROR is non-nil, scan-sexps will return nil instead of -signalling an error. */ (from, count, buffer, noerror)) { @@ -1790,10 +1785,8 @@ } DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, 0, 1, 0, /* -Move point backward over any number of chars with prefix syntax. -This includes chars with "quote" or "prefix" syntax (' or p). - -Optional arg BUFFER defaults to the current buffer. +Move point backward over any number of chars with quote or prefix syntax. +Optional BUFFER defaults to the current buffer. */ (buffer)) { @@ -2335,8 +2328,7 @@ /* Called from chartab.c when a change is made to a syntax table. If this is the standard syntax table, we need to recompute - *all* syntax tables (yuck). Otherwise we just recompute this - one. */ + *all* syntax tables. Otherwise we just recompute this one. */ void update_syntax_table (Lisp_Object table) @@ -2399,12 +2391,12 @@ vars_of_syntax (void) { DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments /* -Non-nil means `forward-sexp', etc., should treat comments as whitespace. +If non-nil, `forward-sexp' etc. treat comments as whitespace. */ ); parse_sexp_ignore_comments = 0; DEFVAR_BOOL ("lookup-syntax-properties", &lookup_syntax_properties /* -Non-nil means `forward-sexp', etc., respect the `syntax-table' property. +If non-nil, `forward-sexp' etc. respect the `syntax-table' text property. This property can be placed on buffers or strings and can be used to explicitly specify the syntax table to be used for looking up the syntax of the chars having this property, or to directly specify the syntax of the chars. @@ -2412,12 +2404,12 @@ The value of this property should be either a syntax table, or a cons of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric syntax code, MATCHCHAR being nil or the character to match (which is -relevant only when the syntax code is open/close-type). +relevant only when the syntax code is of an open or close type). */ ); lookup_syntax_properties = 1; DEFVAR_BOOL ("words-include-escapes", &words_include_escapes /* -Non-nil means `forward-word', etc., should treat escape chars part of words. +If non-nil, `forward-word' etc. treat escape characters as parts of words. */ ); words_include_escapes = 0;
--- a/src/syntax.h Mon Aug 08 13:57:20 2011 +0900 +++ b/src/syntax.h Mon Aug 08 13:57:20 2011 +0900 @@ -26,22 +26,24 @@ /* A syntax table is a type of char table. -The low 7 bits of the integer is a code, as follows. The 8th bit is -used as the prefix bit flag (see below). - The values in a syntax table are either integers or conses of integers and chars. The lowest 7 bits of the integer are the syntax class. If this is Sinherit, then the actual syntax value needs to be retrieved from the standard syntax table. -Since the logic involved in finding the actual integer isn't very -complex, you'd think the time required to retrieve it is not a -factor. If you thought that, however, you'd be wrong, due to the -high number of times (many per character) that the syntax value is -accessed in functions such as scan_lists(). To speed this up, -we maintain a mirror syntax table that contains the actual -integers. We can do this successfully because syntax tables are -now an abstract type, where we control all access. +It turns out to be worth optimizing lookups of character syntax in two +ways. First, although the logic involved in finding the actual integer +isn't complex, the syntax value is accessed in functions such as +scan_lists() many times for each character scanned. A "mirror syntax +table" that contains the actual integers speeds this up. + +Second, due to the syntax-table text property, the table for looking up +syntax may change from character to character. Since looking up properties +is expensive, a "syntax cache" which contains the current syntax table and +the region where it is valid can speed up linear scans dramatically. + +The low 7 bits of the integer is a code, as follows. The 8th bit is +used as the prefix bit flag (see below). */ enum syntaxcode @@ -120,21 +122,23 @@ return SYNTAX (table, c) == Sword; } -/* OK, here's a graphic diagram of the format of the syntax values: +/* OK, here's a graphic diagram of the format of the syntax values. + Here, the value has already been extracted from the Lisp integer, + so there are no tag bits to worry about. Bit number: [ 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 ] [ 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 ] - <-----> <-----> <-------------> <-------------> ^ <-----------> - ELisp unused |comment bits | unused | syntax code - tag | | | | | | | | | - stuff | | | | | | | | | - | | | | | | | | | - | | | | | | | | `--> prefix flag - | | | | | | | | - | | | | | | | `--> comment end style B, second char + | <-----------> <-------------> <-------------> ^ <-----------> + | unused |comment bits | unused | syntax code + v | | | | | | | | | + unusable | | | | | | | | | + due to | | | | | | | | | + type tag | | | | | | | | `--> prefix flag + in Lisp | | | | | | | | + integer | | | | | | | `--> comment end style B, second char | | | | | | `----> comment end style A, second char | | | | | `------> comment end style B, first char | | | | `--------> comment end style A, first char @@ -144,35 +148,35 @@ `----------------> comment start style A, first char In a 64-bit integer, there would be 32 more unused bits between - the tag and the comment bits. + the unusable bit and the comment bits. - Clearly, such a scheme will not work for Mule, because the matching - paren could be any character and as such requires 21 bits, which - we don't got. + In older versions of XEmacs, bits 8-14 contained the matching + character for parentheses. Such a scheme will not work for Mule, + because the matching parenthesis could be any character and + requires 21 bits, which we don't have on a 32-bit platform. - Remember that under Mule we use char tables instead of vectors. - So what we do is use another char table for the matching paren + What we do is use another char table for the matching parenthesis and store a pointer to it in the first char table. (This frees code from having to worry about passing two tables around.) */ -/* The prefix flag bit for backward-prefix-chars is now put into bit 7. */ +/* The prefix flag bit for backward-prefix-chars is in bit 7. */ #define SYNTAX_PREFIX(table, c) \ ((SYNTAX_CODE (table, c) >> 7) & 1) /* Bits 23-16 are used to implement up to two comment styles in a single buffer. They have the following meanings: - - 1. first of a one or two character comment-start sequence of style a. - 2. first of a one or two character comment-start sequence of style b. - 3. second of a two-character comment-start sequence of style a. - 4. second of a two-character comment-start sequence of style b. - 5. first of a one or two character comment-end sequence of style a. - 6. first of a one or two character comment-end sequence of style b. - 7. second of a two-character comment-end sequence of style a. - 8. second of a two-character comment-end sequence of style b. + bit + 23 first of a one or two character comment-start sequence of style a. + 22 first of a one or two character comment-start sequence of style b. + 21 second of a two-character comment-start sequence of style a. + 20 second of a two-character comment-start sequence of style b. + 19 first of a one or two character comment-end sequence of style a. + 18 first of a one or two character comment-end sequence of style b. + 17 second of a two-character comment-end sequence of style a. + 16 second of a two-character comment-end sequence of style b. */ #define SYNTAX_COMMENT_BITS(table, c) \ @@ -196,84 +200,20 @@ #define SYNTAX_SECOND_CHAR_END 0x03 #define SYNTAX_SECOND_CHAR 0x33 -#if 0 - -/* #### Entirely unused. Should they be deleted? */ - -/* #### These are now more or less equivalent to - SYNTAX_COMMENT_MATCH_START ...*/ -/* a and b must be first and second start chars for a common type */ -#define SYNTAX_START_P(table, a, b) \ - (((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_START) >> 2) \ - & (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_START)) - -/* ... and SYNTAX_COMMENT_MATCH_END */ -/* a and b must be first and second end chars for a common type */ -#define SYNTAX_END_P(table, a, b) \ - (((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_END) >> 2) \ - & (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_END)) - -#define SYNTAX_STYLES_MATCH_START_P(table, a, b, mask) \ - ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_START & (mask)) \ - && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_START & (mask))) - -#define SYNTAX_STYLES_MATCH_END_P(table, a, b, mask) \ - ((SYNTAX_COMMENT_BITS (table, a) & SYNTAX_FIRST_CHAR_END & (mask)) \ - && (SYNTAX_COMMENT_BITS (table, b) & SYNTAX_SECOND_CHAR_END & (mask))) - -#define SYNTAX_STYLES_MATCH_1CHAR_P(table, a, mask) \ - ((SYNTAX_COMMENT_BITS (table, a) & (mask))) - -#define STYLE_FOUND_P(table, a, b, startp, style) \ - ((SYNTAX_COMMENT_BITS (table, a) & \ - ((startp) ? SYNTAX_FIRST_CHAR_START : \ - SYNTAX_FIRST_CHAR_END) & (style)) \ - && (SYNTAX_COMMENT_BITS (table, b) & \ - ((startp) ? SYNTAX_SECOND_CHAR_START : \ - SYNTAX_SECOND_CHAR_END) & (style))) - -#define SYNTAX_COMMENT_MASK_START(table, a, b) \ - ((STYLE_FOUND_P (table, a, b, 1, SYNTAX_COMMENT_STYLE_A) \ - ? SYNTAX_COMMENT_STYLE_A \ - : (STYLE_FOUND_P (table, a, b, 1, SYNTAX_COMMENT_STYLE_B) \ - ? SYNTAX_COMMENT_STYLE_B \ - : 0))) - -#define SYNTAX_COMMENT_MASK_END(table, a, b) \ - ((STYLE_FOUND_P (table, a, b, 0, SYNTAX_COMMENT_STYLE_A) \ - ? SYNTAX_COMMENT_STYLE_A \ - : (STYLE_FOUND_P (table, a, b, 0, SYNTAX_COMMENT_STYLE_B) \ - ? SYNTAX_COMMENT_STYLE_B \ - : 0))) - -#define STYLE_FOUND_1CHAR_P(table, a, style) \ - ((SYNTAX_COMMENT_BITS (table, a) & (style))) - -#define SYNTAX_COMMENT_1CHAR_MASK(table, a) \ - ((STYLE_FOUND_1CHAR_P (table, a, SYNTAX_COMMENT_STYLE_A) \ - ? SYNTAX_COMMENT_STYLE_A \ - : (STYLE_FOUND_1CHAR_P (table, a, SYNTAX_COMMENT_STYLE_B) \ - ? SYNTAX_COMMENT_STYLE_B \ - : 0))) - -#endif /* 0 */ - -/* This array, indexed by a character, contains the syntax code which - that character signifies (as a char). - For example, (enum syntaxcode) syntax_spec_code['w'] is Sword. */ - +/* Array of syntax codes, indexed by characters which designate them. + Designators must be ASCII characters (ie, in the range 0x00-0x7F). + Bounds checking is the responsibility of calling code. */ extern const unsigned char syntax_spec_code[0200]; -/* Indexed by syntax code, give the letter that describes it. */ - +/* Array of designators indexed by syntax code. + Indicies should be of type enum syntaxcode. */ extern const unsigned char syntax_code_spec[]; Lisp_Object scan_lists (struct buffer *buf, Charbpos from, int count, int depth, int sexpflag, int no_error); int char_quoted (struct buffer *buf, Charbpos pos); -/* NOTE: This does not refer to the mirror table, but to the - syntax table itself. */ +/* TABLE is a syntax table, not the mirror table. */ Lisp_Object syntax_match (Lisp_Object table, Ichar ch); extern int no_quit_in_re_search; @@ -283,55 +223,55 @@ extern int lookup_syntax_properties; -/* Now that the `syntax-table' property exists, and can override the syntax - table or directly specify the syntax, we cache the last place we - retrieved the syntax-table property. This is because, when moving - linearly through text (e.g. in the regex routines or the scanning - routines in syntax.c), we only need to recalculate at the next place the - syntax-table property changes (i.e. not every position), and when we do - need to recalculate, we can update the info from the previous info - faster than if we did the whole calculation from scratch. */ +/* The `syntax-table' property overrides the syntax table or directly + specifies the syntax. Since looking up properties is expensive, we cache + the information about the syntax-table property. When moving linearly + through text (e.g. in the regex routines or the scanning routines in + syntax.c), recalculation is needed only when the syntax-table property + changes (i.e. not every position). + When we do need to recalculate, we can update the info from the previous + info faster than if we did the whole calculation from scratch. + #### sjt sez: I'm not sure I believe that last claim. That seems to + require that we use directional information, etc, but that is ignored in + the current implementation. */ struct syntax_cache { #ifdef NEW_GC NORMAL_LISP_OBJECT_HEADER header; #endif /* NEW_GC */ - int use_code; /* Whether to use syntax_code or - syntax_table. This is set - depending on whether the - syntax-table property is a - syntax table or a syntax - code. */ - int no_syntax_table_prop; /* If non-zero, there was no - `syntax-table' property on the - current range, and so we're - using the buffer's syntax table. - This is important to note because - sometimes the buffer's syntax - table can be changed. */ - Lisp_Object object; /* The buffer or string the current - syntax cache applies to, or - Qnil for a string of text not - coming from a buffer or string. */ - struct buffer *buffer; /* The buffer that supplies the - syntax tables, or 0 for the - standard syntax table. If - OBJECT is a buffer, this will - always be the same buffer. */ - int syntax_code; /* Syntax code of current char. */ - Lisp_Object syntax_table; /* Syntax table for current pos. */ - Lisp_Object mirror_table; /* Mirror table for this table. */ - Lisp_Object start, end; /* Markers to keep track of the - known region in a buffer. - Formerly we used an internal - extent, but it seems that having - an extent over the entire buffer - causes serious slowdowns in - extent operations! Yuck! */ - Charxpos next_change; /* Position of the next extent - change. */ - Charxpos prev_change; /* Position of the previous extent - change. */ + int use_code; /* Non-zero if a syntax-table property + specified a syntax code. When zero, the + syntax_code member is invalid. Otherwise + the syntax_table member is invalid. */ + int no_syntax_table_prop; /* If non-zero, there was no `syntax-table' + property on the current range, and so we're + using the buffer's syntax table. + Then we must invalidate the cache if the + buffer's syntax table is changed. */ + Lisp_Object object; /* The buffer or string the current syntax + cache applies to, or Qnil for a string of + text not coming from a buffer or string. */ + struct buffer *buffer; /* The buffer that supplies the syntax tables, + or NULL for the standard syntax table. If + OBJECT is a buffer, this will always be + the same buffer. */ + int syntax_code; /* Syntax code of current char. */ + Lisp_Object syntax_table; /* Syntax table for current pos. */ + Lisp_Object mirror_table; /* Mirror table for this table. */ + Lisp_Object start, end; /* Markers to keep track of the known region + in a buffer. + Normally these correspond to prev_change + and next_change, respectively, except when + insertions and deletions occur. Then + prev_change and next change will be + refreshed from these markers. See + signal_syntax_cache_extent_adjust(). + We'd like to use an extent, but it seems + that having an extent over the entire + buffer causes serious slowdowns in extent + operations! Yuck! */ + Charxpos next_change; /* Position of the next extent change. */ + Charxpos prev_change; /* Position of the previous extent change. */ }; #ifdef NEW_GC @@ -347,13 +287,10 @@ #define CONCHECK_SYNTAX_CACHE(x) CONCHECK_RECORD (x, syntax_cache) #endif /* NEW_GC */ - - extern const struct sized_memory_description syntax_cache_description; -/* Note that the external interface to the syntax-cache uses charpos's, but +/* Note that the external interface to the syntax cache uses charpos's, but internally we use bytepos's, for speed. */ - void update_syntax_cache (struct syntax_cache *cache, Charxpos pos, int count); struct syntax_cache *setup_syntax_cache (struct syntax_cache *cache, Lisp_Object object,