Mercurial > hg > xemacs-beta
comparison lisp/syntax-ppss.el @ 5532:69a08906ad27
Introducing syntax-ppss.el from GNU.
author | Mats Lidell <mats.lidell@cag.se> |
---|---|
date | Wed, 20 Jul 2011 00:42:16 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5531:1b054bc2ac40 | 5532:69a08906ad27 |
---|---|
1 ;;; syntax-ppss.el --- helper functions to find syntactic context | |
2 | |
3 ;; Copyright (C) 2000, 2001, 2002, 2003, 2004, | |
4 ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. | |
5 | |
6 ;; Maintainer: XEmacs Development Team | |
7 ;; Keywords: internal | |
8 | |
9 ;; This file is part of XEmacs. | |
10 | |
11 ;; XEmacs is free software: you can redistribute it and/or modify it | |
12 ;; under the terms of the GNU General Public License as published by the | |
13 ;; Free Software Foundation, either version 3 of the License, or (at your | |
14 ;; option) any later version. | |
15 | |
16 ;; XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
17 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
18 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
19 ;; for more details. | |
20 | |
21 ;; You should have received a copy of the GNU General Public License | |
22 ;; along with XEmacs. If not, see <http://www.gnu.org/licenses/>. | |
23 | |
24 ;;; Synched up with: FSF 23,3, syntax.el. | |
25 | |
26 ;;; Commentary: | |
27 | |
28 ;; The main exported function is `syntax-ppss'. You might also need | |
29 ;; to call `syntax-ppss-flush-cache' or to add it to | |
30 ;; before-change-functions'(although this is automatically done by | |
31 ;; syntax-ppss when needed, but that might fail if syntax-ppss is | |
32 ;; called in a context where before-change-functions is temporarily | |
33 ;; let-bound to nil). | |
34 | |
35 ;;; Todo: | |
36 | |
37 ;; - do something about the case where the syntax-table is changed. | |
38 ;; This typically happens with tex-mode and its `$' operator. | |
39 ;; - move font-lock-syntactic-keywords in here. Then again, maybe not. | |
40 ;; - new functions `syntax-state', ... to replace uses of parse-partial-state | |
41 ;; with something higher-level (similar to syntax-ppss-context). | |
42 ;; - interaction with mmm-mode. | |
43 | |
44 ;;; Code: | |
45 | |
46 ;; Note: PPSS stands for `parse-partial-sexp state' | |
47 | |
48 (eval-when-compile (require 'cl)) | |
49 | |
50 (defvar font-lock-beginning-of-syntax-function) | |
51 | |
52 (defsubst syntax-ppss-depth (ppss) | |
53 (nth 0 ppss)) | |
54 | |
55 (defun syntax-ppss-toplevel-pos (ppss) | |
56 "Get the latest syntactically outermost position found in a syntactic scan. | |
57 PPSS is a scan state, as returned by `parse-partial-sexp' or `syntax-ppss'. | |
58 An \"outermost position\" means one that it is outside of any syntactic entity: | |
59 outside of any parentheses, comments, or strings encountered in the scan. | |
60 If no such position is recorded in PPSS (because the end of the scan was | |
61 itself at the outermost level), return nil." | |
62 ;; BEWARE! We rely on the undocumented 9th field. The 9th field currently | |
63 ;; contains the list of positions of the enclosing open-parens. | |
64 ;; I.e. those positions are outside of any string/comment and the first of | |
65 ;; those is outside of any paren (i.e. corresponds to a nil ppss). | |
66 ;; If this list is empty but we are in a string or comment, then the 8th | |
67 ;; field contains a similar "toplevel" position. | |
68 (or (car (nth 9 ppss)) | |
69 (nth 8 ppss))) | |
70 | |
71 (defsubst syntax-ppss-context (ppss) | |
72 (cond | |
73 ((nth 3 ppss) 'string) | |
74 ((nth 4 ppss) 'comment) | |
75 (t nil))) | |
76 | |
77 (defvar syntax-ppss-max-span 20000 | |
78 "Threshold below which cache info is deemed unnecessary. | |
79 We try to make sure that cache entries are at least this far apart | |
80 from each other, to avoid keeping too much useless info.") | |
81 | |
82 (defvar syntax-begin-function nil | |
83 "Function to move back outside of any comment/string/paren. | |
84 This function should move the cursor back to some syntactically safe | |
85 point (where the PPSS is equivalent to nil).") | |
86 | |
87 (defvar syntax-ppss-cache nil | |
88 "List of (POS . PPSS) pairs, in decreasing POS order.") | |
89 (make-variable-buffer-local 'syntax-ppss-cache) | |
90 (defvar syntax-ppss-last nil | |
91 "Cache of (LAST-POS . LAST-PPSS).") | |
92 (make-variable-buffer-local 'syntax-ppss-last) | |
93 | |
94 (defalias 'syntax-ppss-after-change-function 'syntax-ppss-flush-cache) | |
95 (defun syntax-ppss-flush-cache (beg &rest ignored) | |
96 "Flush the cache of `syntax-ppss' starting at position BEG." | |
97 ;; Flush invalid cache entries. | |
98 (while (and syntax-ppss-cache (> (caar syntax-ppss-cache) beg)) | |
99 (setq syntax-ppss-cache (cdr syntax-ppss-cache))) | |
100 ;; Throw away `last' value if made invalid. | |
101 (when (< beg (or (car syntax-ppss-last) 0)) | |
102 ;; If syntax-begin-function jumped to BEG, then the old state at BEG can | |
103 ;; depend on the text after BEG (which is presumably changed). So if | |
104 ;; BEG=(car (nth 10 syntax-ppss-last)) don't reuse that data because the | |
105 ;; assumed nil state at BEG may not be valid any more. | |
106 (if (<= beg (or (syntax-ppss-toplevel-pos (cdr syntax-ppss-last)) | |
107 (nth 3 syntax-ppss-last) | |
108 0)) | |
109 (setq syntax-ppss-last nil) | |
110 (setcar syntax-ppss-last nil))) | |
111 ;; Unregister if there's no cache left. Sadly this doesn't work | |
112 ;; because `before-change-functions' is temporarily bound to nil here. | |
113 ;; (unless syntax-ppss-cache | |
114 ;; (remove-hook 'before-change-functions 'syntax-ppss-flush-cache t)) | |
115 ) | |
116 | |
117 (defvar syntax-ppss-stats | |
118 [(0 . 0.0) (0 . 0.0) (0 . 0.0) (0 . 0.0) (0 . 0.0) (1 . 2500.0)]) | |
119 (defun syntax-ppss-stats () | |
120 (mapcar (lambda (x) | |
121 (condition-case nil | |
122 (cons (car x) (truncate (/ (cdr x) (car x)))) | |
123 (error nil))) | |
124 syntax-ppss-stats)) | |
125 | |
126 (defun syntax-ppss (&optional pos) | |
127 "Parse-Partial-Sexp State at POS, defaulting to point. | |
128 The returned value is the same as `parse-partial-sexp' except that | |
129 the 2nd and 6th values of the returned state cannot be relied upon. | |
130 Point is at POS when this function returns." | |
131 ;; Default values. | |
132 (unless pos (setq pos (point))) | |
133 ;; | |
134 (let ((old-ppss (cdr syntax-ppss-last)) | |
135 (old-pos (car syntax-ppss-last)) | |
136 (ppss nil) | |
137 (pt-min (point-min))) | |
138 (if (and old-pos (> old-pos pos)) (setq old-pos nil)) | |
139 ;; Use the OLD-POS if usable and close. Don't update the `last' cache. | |
140 (condition-case nil | |
141 (if (and old-pos (< (- pos old-pos) | |
142 ;; The time to use syntax-begin-function and | |
143 ;; find PPSS is assumed to be about 2 * distance. | |
144 (* 2 (/ (cdr (aref syntax-ppss-stats 5)) | |
145 (1+ (car (aref syntax-ppss-stats 5))))))) | |
146 (progn | |
147 (incf (car (aref syntax-ppss-stats 0))) | |
148 (incf (cdr (aref syntax-ppss-stats 0)) (- pos old-pos)) | |
149 (parse-partial-sexp old-pos pos nil nil old-ppss)) | |
150 | |
151 (cond | |
152 ;; Use OLD-PPSS if possible and close enough. | |
153 ((and (not old-pos) old-ppss | |
154 ;; If `pt-min' is too far from `pos', we could try to use | |
155 ;; other positions in (nth 9 old-ppss), but that doesn't | |
156 ;; seem to happen in practice and it would complicate this | |
157 ;; code (and the before-change-function code even more). | |
158 ;; But maybe it would be useful in "degenerate" cases such | |
159 ;; as when the whole file is wrapped in a set | |
160 ;; of parentheses. | |
161 (setq pt-min (or (syntax-ppss-toplevel-pos old-ppss) | |
162 (nth 2 old-ppss))) | |
163 (<= pt-min pos) (< (- pos pt-min) syntax-ppss-max-span)) | |
164 (incf (car (aref syntax-ppss-stats 1))) | |
165 (incf (cdr (aref syntax-ppss-stats 1)) (- pos pt-min)) | |
166 (setq ppss (parse-partial-sexp pt-min pos))) | |
167 ;; The OLD-* data can't be used. Consult the cache. | |
168 (t | |
169 (let ((cache-pred nil) | |
170 (cache syntax-ppss-cache) | |
171 (pt-min (point-min)) | |
172 ;; I differentiate between PT-MIN and PT-BEST because | |
173 ;; I feel like it might be important to ensure that the | |
174 ;; cache is only filled with 100% sure data (whereas | |
175 ;; syntax-begin-function might return incorrect data). | |
176 ;; Maybe that's just stupid. | |
177 (pt-best (point-min)) | |
178 (ppss-best nil)) | |
179 ;; look for a usable cache entry. | |
180 (while (and cache (< pos (caar cache))) | |
181 (setq cache-pred cache) | |
182 (setq cache (cdr cache))) | |
183 (if cache (setq pt-min (caar cache) ppss (cdar cache))) | |
184 | |
185 ;; Setup the before-change function if necessary. | |
186 (unless (or syntax-ppss-cache syntax-ppss-last) | |
187 (add-hook 'before-change-functions | |
188 'syntax-ppss-flush-cache t t)) | |
189 | |
190 ;; Use the best of OLD-POS and CACHE. | |
191 (if (or (not old-pos) (< old-pos pt-min)) | |
192 (setq pt-best pt-min ppss-best ppss) | |
193 (incf (car (aref syntax-ppss-stats 4))) | |
194 (incf (cdr (aref syntax-ppss-stats 4)) (- pos old-pos)) | |
195 (setq pt-best old-pos ppss-best old-ppss)) | |
196 | |
197 ;; Use the `syntax-begin-function' if available. | |
198 ;; We could try using that function earlier, but: | |
199 ;; - The result might not be 100% reliable, so it's better to use | |
200 ;; the cache if available. | |
201 ;; - The function might be slow. | |
202 ;; - If this function almost always finds a safe nearby spot, | |
203 ;; the cache won't be populated, so consulting it is cheap. | |
204 (when (and (not syntax-begin-function) | |
205 (boundp 'font-lock-beginning-of-syntax-function) | |
206 font-lock-beginning-of-syntax-function) | |
207 (set (make-local-variable 'syntax-begin-function) | |
208 font-lock-beginning-of-syntax-function)) | |
209 (when (and syntax-begin-function | |
210 (progn (goto-char pos) | |
211 (funcall syntax-begin-function) | |
212 ;; Make sure it's better. | |
213 (> (point) pt-best)) | |
214 ;; Simple sanity check. | |
215 (not (memq (get-text-property (point) 'face) | |
216 '(font-lock-string-face font-lock-doc-face | |
217 font-lock-comment-face)))) | |
218 (incf (car (aref syntax-ppss-stats 5))) | |
219 (incf (cdr (aref syntax-ppss-stats 5)) (- pos (point))) | |
220 (setq pt-best (point) ppss-best nil)) | |
221 | |
222 (cond | |
223 ;; Quick case when we found a nearby pos. | |
224 ((< (- pos pt-best) syntax-ppss-max-span) | |
225 (incf (car (aref syntax-ppss-stats 2))) | |
226 (incf (cdr (aref syntax-ppss-stats 2)) (- pos pt-best)) | |
227 (setq ppss (parse-partial-sexp pt-best pos nil nil ppss-best))) | |
228 ;; Slow case: compute the state from some known position and | |
229 ;; populate the cache so we won't need to do it again soon. | |
230 (t | |
231 (incf (car (aref syntax-ppss-stats 3))) | |
232 (incf (cdr (aref syntax-ppss-stats 3)) (- pos pt-min)) | |
233 | |
234 ;; If `pt-min' is too far, add a few intermediate entries. | |
235 (while (> (- pos pt-min) (* 2 syntax-ppss-max-span)) | |
236 (setq ppss (parse-partial-sexp | |
237 pt-min (setq pt-min (/ (+ pt-min pos) 2)) | |
238 nil nil ppss)) | |
239 (let ((pair (cons pt-min ppss))) | |
240 (if cache-pred | |
241 (push pair (cdr cache-pred)) | |
242 (push pair syntax-ppss-cache)))) | |
243 | |
244 ;; Compute the actual return value. | |
245 (setq ppss (parse-partial-sexp pt-min pos nil nil ppss)) | |
246 | |
247 ;; Debugging check. | |
248 ;; (let ((real-ppss (parse-partial-sexp (point-min) pos))) | |
249 ;; (setcar (last ppss 4) 0) | |
250 ;; (setcar (last real-ppss 4) 0) | |
251 ;; (setcar (last ppss 8) nil) | |
252 ;; (setcar (last real-ppss 8) nil) | |
253 ;; (unless (equal ppss real-ppss) | |
254 ;; (message "!!Syntax: %s != %s" ppss real-ppss) | |
255 ;; (setq ppss real-ppss))) | |
256 | |
257 ;; Store it in the cache. | |
258 (let ((pair (cons pos ppss))) | |
259 (if cache-pred | |
260 (if (> (- (caar cache-pred) pos) syntax-ppss-max-span) | |
261 (push pair (cdr cache-pred)) | |
262 (setcar cache-pred pair)) | |
263 (if (or (null syntax-ppss-cache) | |
264 (> (- (caar syntax-ppss-cache) pos) | |
265 syntax-ppss-max-span)) | |
266 (push pair syntax-ppss-cache) | |
267 (setcar syntax-ppss-cache pair))))))))) | |
268 | |
269 (setq syntax-ppss-last (cons pos ppss)) | |
270 ppss) | |
271 (args-out-of-range | |
272 ;; If the buffer is more narrowed than when we built the cache, | |
273 ;; we may end up calling parse-partial-sexp with a position before | |
274 ;; point-min. In that case, just parse from point-min assuming | |
275 ;; a nil state. | |
276 (parse-partial-sexp (point-min) pos))))) | |
277 | |
278 ;; Debugging functions | |
279 | |
280 (defun syntax-ppss-debug () | |
281 (let ((pt nil) | |
282 (min-diffs nil)) | |
283 (dolist (x (append syntax-ppss-cache (list (cons (point-min) nil)))) | |
284 (when pt (push (- pt (car x)) min-diffs)) | |
285 (setq pt (car x))) | |
286 min-diffs)) | |
287 | |
288 ;;; syntax-ppss.el ends here |