Mercurial > hg > xemacs-beta
comparison lisp/mule/mule-trex.el @ 70:131b0175ea99 r20-0b30
Import from CVS: tag r20-0b30
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:02:59 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
69:804d1389bcd6 | 70:131b0175ea99 |
---|---|
1 ;; TREX: Tools for Regluar EXpressions | |
2 ;; | |
3 ;; Regular Expression Compiler | |
4 ;; | |
5 ;; Coded by S.Tomura <tomura@etl.go.jp> | |
6 | |
7 ;; Copyright (C) 1992 Free Software Foundation, Inc. | |
8 | |
9 ;; This file is part of XEmacs. | |
10 ;; This file contains Japanese characters | |
11 | |
12 ;; XEmacs is free software; you can redistribute it and/or modify it | |
13 ;; under the terms of the GNU General Public License as published by | |
14 ;; the Free Software Foundation; either version 2, or (at your option) | |
15 ;; any later version. | |
16 | |
17 ;; XEmacs is distributed in the hope that it will be useful, but | |
18 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20 ;; General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
23 ;; along with XEmacs; see the file COPYING. If not, write to the | |
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
25 ;; Boston, MA 02111-1307, USA. | |
26 | |
27 (defvar TREX-version "0.41") | |
28 ;;; Last modified date: Thu Jun 15 13:07:39 1995 | |
29 | |
30 ;;; 95.6.15 modified by S.Tomura <tomura@etl.go.jp> | |
31 ;;; | |
32 ;;; $BFbB"$N(Bre_compile_pattern $B$HF1MM$K(B case-fold-search $B$K$h$C$F!"(B | |
33 ;;; translate $B$9$k$h$&$KJQ99$7$?!#(B | |
34 ;;; | |
35 ;;; 95.6.14 modified by S.Tomura <tomura@etl.go.jp> | |
36 ;;; print-translate $B$rDI2C!#(B<0.38> | |
37 ;;; print-fastmap $B$rDI2C!#(B | |
38 ;;; | |
39 ;;; start_memory, end_memory $B$NBh(B2$B0z?t$r@8@.$9$k$?$a$K!"(B:mark $B$NFbIt9=(B | |
40 ;;; $BB$$rJQ99$7$?!#(B | |
41 ;;; | |
42 ;;; re-compile-and-dump, regexp-compile-and-dump $B$rDI2C!#(B | |
43 ;;; | |
44 ;;; 95.6.13 | |
45 ;;; regexp19.c $B$KBP1~$7$F(B start_memory, end_memory $B$N(B dump $B%k!<%A%s$r=$@5(B | |
46 ;;; | |
47 ;;; $B$9$Y$-$3$H!'(B | |
48 ;;; | |
49 ;;; (1) \(\)* | |
50 ;;; (2) $B;^$N=gHV(B | |
51 ;;; (3) $B0UL#$N$J$$%0%k!<%W;2>H$N8!=P(B "\(a\\)\\2"$B$J$I(B | |
52 | |
53 (defmacro TREX-inc (symbol &optional delta) | |
54 (list 'setq symbol (if delta (list '+ symbol delta) | |
55 (list '1+ symbol)))) | |
56 | |
57 (defmacro TREX-dec (symbol &optional delta) | |
58 (list 'setq symbol (if delta (list '- symbol delta) | |
59 (list '1- symbol)))) | |
60 | |
61 (defmacro num (sym) | |
62 (list 'num* (list 'quote sym))) | |
63 | |
64 (defun num* (sym) | |
65 (TREX-read-hexa (substring (symbol-name sym) 2))) | |
66 | |
67 (defun TREX-read-hexa (str) | |
68 (let ((result 0) (i 0) (max (length str))) | |
69 (while (< i max) | |
70 (let ((ch (aref str i))) | |
71 (cond((and (<= ?0 ch) (<= ch ?9)) | |
72 (setq result (+ (* result 16) (- ch ?0)))) | |
73 ((and (<= ?a ch) (<= ch ?f)) | |
74 (setq result (+ (* result 16) (+ (- ch ?a) 10)))) | |
75 ((and (<= ?A ch) (<= ch ?F)) | |
76 (setq result (+ (* result 16) (+ (- ch ?A) 10))))) | |
77 (TREX-inc i))) | |
78 result)) | |
79 | |
80 ;;; 1 bytes : 0x00 <= C11 <= 0x7F | |
81 ;;; n bytes : 0x80 == LCCMP | |
82 ;;; 2 bytes 0xA0 <= LC <= 0xAF | |
83 ;;; 3 bytes 0xB0 <= LC <= 0xBB | |
84 ;;; 4 bytes 0xBC <= LC <= 0xBE | |
85 ;;; 2 bytes : 0x81 <= LC <= 0x8F | |
86 ;;; 3 bytes : 0x90 <= LC <= 0x9B | |
87 ;;; 4 bytes : 0x9C <= LC <= 0x9E | |
88 | |
89 | |
90 (defun TREX-char-octets (str index) | |
91 (let ((max (length str))) | |
92 (if (or (< index 0) (<= max index)) 0 | |
93 (let ((ch (aref str index)) | |
94 (bytes)) | |
95 (setq bytes | |
96 (cond ((<= ch (num 0x7f)) 1) | |
97 ((= ch (num 0x80)) | |
98 (let ((max (length str)) | |
99 (i index)) | |
100 (while (and (< i max) | |
101 (<= (num 0xa0) (aref str i)) | |
102 (<= (aref str i) (num 0xbe))) | |
103 (setq ch (aref str i)) | |
104 (cond ((<= ch (num 0xaf)) (TREX-inc i 2)) | |
105 ((<= ch (num 0xbb)) (TREX-inc i 3)) | |
106 ((<= ch (num 0xbe)) (TREX-inc i 4)))) | |
107 (- i index))) | |
108 ((<= ch (num 0x8f)) 2) | |
109 ((<= ch (num 0x9b)) 3) | |
110 ((<= ch (num 0x9e)) 4) | |
111 (t 1))) | |
112 (if (<= (+ index bytes) max) bytes 1))))) | |
113 | |
114 (defun TREX-comp-charp (str index) | |
115 (= (aref str index) (num 0x80))) | |
116 | |
117 ;;; 0x00 <= C11 <= 0x7F : 1 bytes | |
118 ;;; Type 1-1 C11 | |
119 ;;; 0x80 == LCCMP : n bytes | |
120 ;;; Type N LCCMP LCN1 C11 ... LCN2 C21 ... LCNn Cn1 ... | |
121 ;;; 0xA0 <= LCN* <= 0xBE | |
122 ;;; LCN* = LC + 0x20 | |
123 ;;; LCN* = 0xA0 (ASCII) | |
124 ;;; 0x81 <= LC1 <= 0x8F : 2 bytes | |
125 ;;; Type 1-2 LC1 C11 : | |
126 ;;; 0xA0 <= C11 <= 0xFF | |
127 ;;; 0x90 <= LC2 <= 0x99 : 3 bytes | |
128 ;;; Type 2-3 LC2 C21 C22 | |
129 ;;; 0xA0 <= C21 <= 0xFF | |
130 ;;; 0xA0 <= C22 <= 0xFF | |
131 ;;; 0x9A == LCPRV1 : 3 bytes | |
132 ;;; Type 1-3 LCPRV1 LC12 C11 | |
133 ;;; 0xA0 <= LC12 <= 0xB7 | |
134 ;;; 0xA0 <= C11 <= 0xFF | |
135 ;;; 0x9B == LCPRV1 : 3 bytes | |
136 ;;; Type 1-3 LCPRV1 LC12 C11 | |
137 ;;; 0xB8 <= LC12 <= 0xBF | |
138 ;;; 0xA0 <= C11 <= 0xFF | |
139 ;;; 0x9C == LCPRV2 : 4 bytes | |
140 ;;; Type 2-4 LCPRV2 LC22 C21 C22 | |
141 ;;; 0xC0 <= LC22 <= 0xC7 | |
142 ;;; 0xA0 <= C21 <= 0xFF | |
143 ;;; 0xA0 <= C22 <= 0xFF | |
144 ;;; 0x9D == LCPRV2 : 4 bytes | |
145 ;;; Type 2-4 LCPRV2 LC22 C21 C22 | |
146 ;;; 0xC8 <= LC22 <= 0xDF | |
147 ;;; 0xA0 <= C21 <= 0xFF | |
148 ;;; 0xA0 <= C22 <= 0xFF | |
149 ;;; 0x9E == LCPRV3 : 4 bytes | |
150 ;;; Type 3-4 LCPRV3 C31 C32 C33 | |
151 ;;; 0xA0 <= C31 <= 0xBF | |
152 ;;; 0xA0 <= C32 <= 0xFF | |
153 ;;; 0xA0 <= C33 <= 0xFF | |
154 ;;; char = [0x00-0x7f]\| | |
155 ;;; 0x80 | |
156 ;;; \(0xa0[0xa0-0xff]\| | |
157 ;;; [0xa1-0xaf][0xa0-0xff]\| | |
158 ;;; [0xb0-0xb9][0xa0-0xff][0xa0-0xff]\| | |
159 ;;; 0xba[0xa0-0xb7][0xa0-0xff]\| | |
160 ;;; 0xbb[0xb8-0xbf][0xa0-0xff]\| | |
161 ;;; 0xbc[0xc0-0xc7][0xa0-0xff][0xa0-0xff]\| | |
162 ;;; 0xbd[0xc8-0xdf][0xa0-0xff][0xa0-0xff]\| | |
163 ;;; 0xbe[0xa0-0xbf][0xa0-0xff][0xa0-0xff] | |
164 ;;; \)*\| | |
165 ;;; [0x81-0x8f][0xa0-0xff]\| | |
166 ;;; [0x90-0x99][0xa0-0xff][0xa0-0xff]\| | |
167 ;;; 0x9a[0xa0-0xb7][0xa0-0xff]\| | |
168 ;;; 0x9b[0xb8-0xbf][0xa0-0xff]\| | |
169 ;;; 0x9c[0xc0-0xc7][0xa0-0xff][0xa0-0xff]\| | |
170 ;;; 0x9d[0xc8-0xdf][0xa0-0xff][0xa0-0xff]\| | |
171 ;;; 0x9e[0xa0-0xbf][0xa0-0xff][0xa0-0xff] | |
172 | |
173 (defun regexp-make-or (&rest body) | |
174 (cons ':or body)) | |
175 | |
176 (defun regexp-make-seq (&rest body) | |
177 (cons ':seq body)) | |
178 | |
179 (defun regexp-make-star (regexp) | |
180 (list ':star regexp)) | |
181 | |
182 (defun regexp-make-range (from to) | |
183 (list 'CHARSET (list ':range from to))) | |
184 | |
185 | |
186 (defvar regexp-allchar-regexp | |
187 (regexp-make-or | |
188 (regexp-make-range 0 (num 0x7f)) | |
189 (regexp-make-seq | |
190 (num 0x80) | |
191 (regexp-make-star | |
192 (regexp-make-or | |
193 (regexp-make-seq | |
194 (num 0xa0) | |
195 (regexp-make-range (num 0xa0) (num 0xff))) | |
196 (regexp-make-seq | |
197 (regexp-make-range (num 0xa1) (num 0xaf)) | |
198 (regexp-make-range (num 0xa0) (num 0xff))) | |
199 (regexp-make-seq | |
200 (regexp-make-range (num 0xb0) (num 0xb9)) | |
201 (regexp-make-range (num 0xa0) (num 0xff)) | |
202 (regexp-make-range (num 0xa0) (num 0xff))) | |
203 (regexp-make-seq | |
204 (num 0xba) | |
205 (regexp-make-range (num 0xa0) (num 0xb7)) | |
206 (regexp-make-range (num 0xa0) (num 0xff))) | |
207 (regexp-make-seq | |
208 (num 0xbb) | |
209 (regexp-make-range (num 0xb8) (num 0xbf)) | |
210 (regexp-make-range (num 0xa0) (num 0xff))) | |
211 (regexp-make-seq | |
212 (num 0xbc) | |
213 (regexp-make-range (num 0xc0) (num 0xc7)) | |
214 (regexp-make-range (num 0xa0) (num 0xff)) | |
215 (regexp-make-range (num 0xa0) (num 0xff))) | |
216 (regexp-make-seq | |
217 (num 0xbd) | |
218 (regexp-make-range (num 0xc8) (num 0xdf)) | |
219 (regexp-make-range (num 0xa0) (num 0xff)) | |
220 (regexp-make-range (num 0xa0) (num 0xff))) | |
221 (regexp-make-seq | |
222 (num 0xbe) | |
223 (regexp-make-range (num 0xa0) (num 0xbf)) | |
224 (regexp-make-range (num 0xa0) (num 0xff)) | |
225 (regexp-make-range (num 0xa0) (num 0xff)))))) | |
226 (regexp-make-seq | |
227 (regexp-make-range (num 0x81) (num 0x8f)) | |
228 (regexp-make-range (num 0xa0) (num 0xff))) | |
229 (regexp-make-seq | |
230 (regexp-make-range (num 0x90) (num 0x99)) | |
231 (regexp-make-range (num 0xa0) (num 0xff)) | |
232 (regexp-make-range (num 0xa0) (num 0xff))) | |
233 (regexp-make-seq | |
234 (num 0x9a) | |
235 (regexp-make-range (num 0xa0) (num 0xb7)) | |
236 (regexp-make-range (num 0xa0) (num 0xff))) | |
237 (regexp-make-seq | |
238 (num 0x9b) | |
239 (regexp-make-range (num 0xb8) (num 0xbf)) | |
240 (regexp-make-range (num 0xa0) (num 0xff))) | |
241 (regexp-make-seq | |
242 (num 0x9c) | |
243 (regexp-make-range (num 0xc0) (num 0xc7)) | |
244 (regexp-make-range (num 0xa0) (num 0xff)) | |
245 (regexp-make-range (num 0xa0) (num 0xff))) | |
246 (regexp-make-seq | |
247 (num 0x9d) | |
248 (regexp-make-range (num 0xc8) (num 0xdf)) | |
249 (regexp-make-range (num 0xa0) (num 0xff)) | |
250 (regexp-make-range (num 0xa0) (num 0xff))) | |
251 (regexp-make-seq | |
252 (num 0x9e) | |
253 (regexp-make-range (num 0xa0) (num 0xbf)) | |
254 (regexp-make-range (num 0xa0) (num 0xff)) | |
255 (regexp-make-range (num 0xa0) (num 0xff))))) | |
256 | |
257 ;;;; | |
258 ;;;; | |
259 ;;;; | |
260 | |
261 (defun TREX-string-reverse (str) | |
262 (if (<= (length str) 1) str | |
263 (let ((result (make-string (length str) 0)) | |
264 (i 0) | |
265 (j (1- (length str)))) | |
266 (while (<= 0 j) | |
267 (aset result i (aref str j)) | |
268 (TREX-inc i) | |
269 (TREX-dec j)) | |
270 result))) | |
271 | |
272 (defun TREX-string-forward-anychar (str start) | |
273 (and (stringp str) (numberp start) | |
274 (let ((max (length str))) | |
275 (and (<= 0 start) | |
276 (< start max) | |
277 (+ start (TREX-char-octets str start)))))) | |
278 | |
279 (defmacro TREX-init (symbol value) | |
280 (` (if (null (, symbol)) | |
281 (setq (, symbol) (, value))))) | |
282 | |
283 (defmacro TREX-push (val symbol) | |
284 (list 'setq symbol (list 'cons val symbol))) | |
285 | |
286 (defun TREX-member (elm list pred) | |
287 (while (and list (not (funcall pred elm (car list)))) | |
288 (setq list (cdr list))) | |
289 list) | |
290 | |
291 (defun TREX-memequal (elm list) | |
292 (while (and list (not (equal elm (car list)))) | |
293 (setq list (cdr list))) | |
294 list) | |
295 | |
296 (defun TREX-find (elm list) | |
297 (let ((pos 0)) | |
298 (while (and list (not (equal elm (car list)))) | |
299 (setq list (cdr list)) | |
300 (TREX-inc pos)) | |
301 (if list pos | |
302 nil))) | |
303 | |
304 (defun TREX-find-if (pred list) | |
305 (let ((pos 0)) | |
306 (while (and list (not (funcall pred (car list)))) | |
307 (TREX-inc pos) | |
308 (setq list (cdr list))) | |
309 (if list pos | |
310 nil))) | |
311 | |
312 (defun TREX-firstn (list n) | |
313 (if (or (<= n 0) (null list)) nil | |
314 (cons (car list) (TREX-firstn (cdr list) (1- n))))) | |
315 | |
316 (defun TREX-delete-duplicate (list) | |
317 (let ((result nil)) | |
318 (while list | |
319 (let ((elm (car list))) | |
320 (if (not (TREX-memequal elm result)) | |
321 (TREX-push elm result))) | |
322 (setq list (cdr list))) | |
323 (nreverse result))) | |
324 | |
325 (defun TREX-delete (elm list) | |
326 (let ((result nil)) | |
327 (while list | |
328 (if (not (equal elm (car list))) | |
329 (TREX-push (car list) result)) | |
330 (setq list (cdr list))) | |
331 (nreverse result))) | |
332 | |
333 (defun TREX-string-to-list (str) | |
334 (let ((result nil) | |
335 (i 0) | |
336 (max (length str))) | |
337 (while (< i max) | |
338 (TREX-push (aref str i) result) | |
339 (TREX-inc i)) | |
340 (nreverse result))) | |
341 | |
342 (defun TREX-sort (list lessp &optional key) | |
343 (if (null key) | |
344 (sort list lessp) | |
345 (sort list (function (lambda (x y) (funcall lessp (funcall key x) (funcall key y))))))) | |
346 | |
347 (defun TREX-key-lessp (x y) | |
348 (cond((symbolp x) | |
349 (cond ((symbolp y) | |
350 (string-lessp x y)) | |
351 (t;; (not (symbolp)) | |
352 t))) | |
353 ((numberp x) | |
354 (cond ((numberp y) | |
355 (< x y)) | |
356 ((and (consp y) (eq (car y) ':range)) | |
357 (< x (nth 1 y))) | |
358 (t nil))) | |
359 ((and (consp x) (eq (car x) ':range)) | |
360 (cond ((and (consp y) (eq (car y) ':range)) | |
361 (< (nth 2 x) (nth 1 y))) | |
362 ((numberp y) | |
363 (< (nth 2 x) y)) | |
364 (t nil))) | |
365 (t nil))) | |
366 | |
367 (defun TREX-lessp-car (x y) | |
368 (let ((x (car x)) | |
369 (y (car y))) | |
370 (TREX-key-lessp x y))) | |
371 | |
372 (defmacro TREX-define-enum (&rest list) | |
373 (list 'TREX-define-enum* (list 'quote list))) | |
374 | |
375 (defun TREX-define-enum* (list) | |
376 (let ((i 0)) | |
377 (while list | |
378 (set (car list) i) | |
379 (TREX-inc i) | |
380 (setq list (cdr list))))) | |
381 | |
382 ;;; | |
383 ;;; regexp-parse | |
384 ;;; | |
385 | |
386 ;;; | |
387 ;;; $B@55,I=8=(B(regular expression) | |
388 ;;; | |
389 ;;; . single character except a newline | |
390 ;;; REG* more than zero | |
391 ;;; REG+ at least once | |
392 ;;; REG? once or not at all | |
393 ;;; [...] character set | |
394 ;;; [^...] character not set | |
395 ;;; ^ beginning of line | |
396 ;;; $ end of line | |
397 ;;; \ quote | |
398 ;;; \| alternative | |
399 ;;; \( ... \) group and mark | |
400 ;;; \DIGIT | |
401 ;;; \` beginning of buffer | |
402 ;;; \' end of buffer | |
403 ;;; \b beginning of word or end of word | |
404 ;;; \B not \b | |
405 ;;; \< beginning of word | |
406 ;;; \> end of word | |
407 ;;; | |
408 ;;; \w word-constituent character | |
409 ;;; \W not \w | |
410 ;;; \sCODE syntax CODE character | |
411 ;;; \SCODE not \sCODE | |
412 | |
413 ;;; | |
414 ;;; REG0 ::= REG1 | | |
415 ;;; REG1 "\\|" REG0 | |
416 ;;; | |
417 ;;; REG1 ::= REG2 | | |
418 ;;; REG2 REG1 | |
419 ;;; | |
420 ;;; REG2 ::= REG3 | | |
421 ;;; REG2 "*" | | |
422 ;;; REG2 "+" | | |
423 ;;; REG2 "?" | | |
424 ;;; | |
425 ;;; REG3 ::= "." | | |
426 ;;; "[" ... "]" | | |
427 ;;; "[" "^" ... "]" | | |
428 ;;; "^" | | |
429 ;;; "$" | | |
430 ;;; "\\" DIGIT | | |
431 ;;; "\\(" REG0 "\\)" | |
432 | |
433 ;;; $B>H9g$O@55,I=8=$N:8$+$i1&$X9T$o$l$k!%(B | |
434 | |
435 (defvar *regexp-parse-translate* nil | |
436 "$B@55,I=8=$rFI$_9~$_Cf$K;HMQ$9$k(B translate table.\n | |
437 case-fold-search $B$NCM$K$h$C$F(B downcasetable $B$r@_Dj$9$k!#(B") | |
438 | |
439 (defun regexp-parse-translate-char-string (str) | |
440 (if (and *regexp-parse-translate* | |
441 (= (length str) 1)) | |
442 ;;; $BK\Ev$O(B destructive $B$G$b(B OK | |
443 (char-to-string (aref *regexp-parse-translate* (aref str 0))) | |
444 str)) | |
445 | |
446 (defvar *regexp-word-definition* nil) | |
447 | |
448 (defvar *regexp-parse-index* nil) | |
449 (defvar *regexp-parse-end* nil) | |
450 (defvar *regexp-parse-str* nil) | |
451 (defvar *regexp-parse-regno* 1) | |
452 | |
453 (defun regexp-error (&optional reason) | |
454 (if (null reason) (setq reason "Bad regexp")) | |
455 (error "Regexp-parse::%s \"%s\" * \"%s\"" reason (substring *regexp-parse-str* 0 *regexp-parse-index*) | |
456 (substring *regexp-parse-str* *regexp-parse-index*))) | |
457 | |
458 (defun word-parse (pattern) | |
459 (let ((*regexp-word-definition* t)) | |
460 (regexp-parse pattern))) | |
461 | |
462 (defun regexp-parse (pattern) | |
463 (let*((*regexp-parse-str* pattern) | |
464 (*regexp-parse-index* 0) | |
465 (*regexp-parse-end* (length pattern)) | |
466 (*regexp-parse-regno* 1) | |
467 (result (regexp-parse-0))) | |
468 (if (<= *regexp-parse-end* *regexp-parse-index*) | |
469 result | |
470 (regexp-error)))) | |
471 | |
472 (defun regexp-parse-0 () | |
473 (let* ((result (regexp-parse-1))) | |
474 (cond((<= *regexp-parse-end* *regexp-parse-index*) | |
475 result) | |
476 ((and (< (1+ *regexp-parse-index*) *regexp-parse-end*) | |
477 (= (aref *regexp-parse-str* *regexp-parse-index*) ?\\) | |
478 (= (aref *regexp-parse-str* (1+ *regexp-parse-index*)) ?|)) | |
479 (TREX-inc *regexp-parse-index* 2) | |
480 (list ':or result (regexp-parse-0))) | |
481 (t result)))) | |
482 | |
483 (defun regexp-parse-1 () | |
484 (let ((results nil) | |
485 (result2 nil)) | |
486 (while (setq result2 (regexp-parse-2)) | |
487 (TREX-push result2 results)) | |
488 (if results | |
489 (if (cdr results) | |
490 (cons ':seq (nreverse results)) | |
491 (car results)) | |
492 nil))) | |
493 | |
494 (defun regexp-parse-2 () | |
495 (let ((result (regexp-parse-3))) | |
496 (while (and (< *regexp-parse-index* *regexp-parse-end*) | |
497 (TREX-memequal (aref *regexp-parse-str* *regexp-parse-index*) | |
498 '(?* ?+ ??))) | |
499 (let ((ch (aref *regexp-parse-str* *regexp-parse-index*))) | |
500 (TREX-inc *regexp-parse-index*) | |
501 (setq result | |
502 (cond((= ch ?*) (list ':star result)) | |
503 ((= ch ?+) (list ':plus result)) | |
504 ((= ch ??) (list ':optional result)))))) | |
505 result)) | |
506 | |
507 (defun regexp-parse-3 () | |
508 (if (<= *regexp-parse-end* *regexp-parse-index*) | |
509 nil | |
510 (let* ((start *regexp-parse-index*) | |
511 (i *regexp-parse-index*) | |
512 (end *regexp-parse-end*) | |
513 (ch (aref *regexp-parse-str* i))) | |
514 (TREX-inc *regexp-parse-index*) | |
515 (cond ((= ch ?.) '(ANYCHAR)) | |
516 ((= ch ?^) '(BEGLINE)) | |
517 ((= ch ?$) '(ENDLINE)) | |
518 ((= ch ?\[) | |
519 (regexp-parse-charset)) | |
520 ((= ch ?\]) | |
521 (setq *regexp-parse-index* start) | |
522 nil) | |
523 ((= ch ?*) | |
524 (setq *regexp-parse-index* start) | |
525 nil) | |
526 ((= ch ?+) | |
527 (setq *regexp-parse-index* start) | |
528 nil) | |
529 ((= ch ??) | |
530 (setq *regexp-parse-index* start) | |
531 nil) | |
532 ((and (= ch ?\\) (< (1+ i) end)) | |
533 (setq ch (aref *regexp-parse-str* (1+ i))) | |
534 (TREX-inc i) | |
535 (TREX-inc *regexp-parse-index*) | |
536 (cond ((= ch ?| ) | |
537 (setq *regexp-parse-index* start) | |
538 nil) | |
539 ((= ch ?\( ) | |
540 (if (< 9 *regexp-parse-regno*) | |
541 (regexp-error "Too many parenth")) | |
542 (let ((regexp-parse-regno *regexp-parse-regno*)) | |
543 (TREX-inc *regexp-parse-regno*) | |
544 (let ((result (regexp-parse-0))) | |
545 | |
546 (cond((and (< (1+ *regexp-parse-index*) *regexp-parse-end*) | |
547 (= (aref *regexp-parse-str* *regexp-parse-index*) ?\\ ) | |
548 (= (aref *regexp-parse-str* (1+ *regexp-parse-index*)) ?\) )) | |
549 (TREX-inc *regexp-parse-index* 2) | |
550 (if *regexp-word-definition* | |
551 result | |
552 (list ':mark regexp-parse-regno | |
553 (- *regexp-parse-regno* regexp-parse-regno 1) | |
554 result))) | |
555 (t | |
556 (regexp-error)))))) | |
557 ((= ch ?\) ) | |
558 (setq *regexp-parse-index* start) | |
559 nil) | |
560 ((= ch ?` ) '(BEGBUF)) | |
561 ((= ch ?' ) '(ENDBUF)) | |
562 ((= ch ?b ) | |
563 (if *regexp-word-definition* (regexp-error) '(WORDBOUND))) | |
564 ((= ch ?B ) | |
565 (if *regexp-word-definition* (regexp-error) '(NOTWORDBOUND))) | |
566 ((= ch ?< ) | |
567 (if *regexp-word-definition* (regexp-error) '(WORDBEG))) | |
568 ((= ch ?> ) | |
569 (if *regexp-word-definition* (regexp-error) '(WORDEND))) | |
570 ((= ch ?w ) (list 'SYNTAXSPEC | |
571 (syntax-spec-code ?w))) ;;;WORDCHAR | |
572 ((= ch ?W ) (list 'NOTSYNTAXSPEC | |
573 (syntax-spec-code ?w))) ;;;NOTWORDCHAR | |
574 ;;; ((= ch ?=) 'AT_DOT) | |
575 ((and (<= ?1 ch) | |
576 (<= ch ?9)) | |
577 (if *regexp-word-definition* | |
578 (regexp-error) (list 'DUPLICATE (- ch ?0)))) | |
579 ((= ch ?0) | |
580 (regexp-error)) | |
581 ((and (= ch ?s ) | |
582 (< (1+ i) end)) | |
583 (TREX-inc *regexp-parse-index*) | |
584 (list 'SYNTAXSPEC (syntax-spec-code (aref *regexp-parse-str* (1+ i))))) | |
585 ((and (= ch ?S ) | |
586 (< (1+ i) end)) | |
587 (TREX-inc *regexp-parse-index*) | |
588 (list 'NOTSYNTAXSPEC (syntax-spec-code (aref *regexp-parse-str* (1+ i))))) | |
589 ((and (= ch ?c ) | |
590 (< (1+ i) end)) | |
591 (TREX-inc *regexp-parse-index*) | |
592 (list 'CATEGORYSPEC (aref *regexp-parse-str* (1+ i)))) | |
593 ((and (= ch ?C ) | |
594 (< (1+ i) end)) | |
595 (TREX-inc *regexp-parse-index*) | |
596 (list 'NOTCATEGORYSPEC (aref *regexp-parse-str* (1+ i)))) | |
597 (t | |
598 (regexp-parse-translate-char-string | |
599 (substring *regexp-parse-str* (1+ i) (+ i 2)))))) | |
600 (t | |
601 (let ((nextpos (TREX-string-forward-anychar *regexp-parse-str* i))) | |
602 (cond(nextpos | |
603 (setq *regexp-parse-index* nextpos) | |
604 (regexp-parse-translate-char-string | |
605 (substring *regexp-parse-str* i nextpos))) | |
606 (t (regexp-error))))))))) | |
607 | |
608 (defun regexp-parse-charset () | |
609 (if (< *regexp-parse-index* *regexp-parse-end*) | |
610 (cond((eq (aref *regexp-parse-str* *regexp-parse-index*) ?^) | |
611 (TREX-inc *regexp-parse-index*) | |
612 (regexp-parse-charset0 'CHARSET_NOT nil)) | |
613 (t (regexp-parse-charset0 'CHARSET ;; ':or | |
614 nil))) | |
615 (regexp-error))) | |
616 | |
617 (defun regexp-parse-charset0 (op list) | |
618 (if (< *regexp-parse-index* *regexp-parse-end*) | |
619 (cond ((eq (aref *regexp-parse-str* *regexp-parse-index*) ?\]) | |
620 (TREX-inc *regexp-parse-index*) | |
621 (regexp-parse-charset1 op '("\]"))) | |
622 (t | |
623 (regexp-parse-charset1 op nil))) | |
624 (regexp-error))) | |
625 | |
626 (defun regexp-parse-charset1 (op list) | |
627 (if (< *regexp-parse-index* *regexp-parse-end*) | |
628 (let* ((pos0 *regexp-parse-index*) | |
629 (pos1 (TREX-string-forward-anychar *regexp-parse-str* pos0)) | |
630 (pos2 (TREX-string-forward-anychar *regexp-parse-str* pos1)) | |
631 (pos3 (TREX-string-forward-anychar *regexp-parse-str* pos2))) | |
632 (if pos0 | |
633 ;;; ] | |
634 (cond((eq (aref *regexp-parse-str* pos0) ?\]) | |
635 (setq *regexp-parse-index* pos1) | |
636 ;;; returns charset form | |
637 (cons op (sort (nreverse list) 'TREX-charset-lessp))) | |
638 ;;; [^]] - [^]] | |
639 ((and pos1 pos2 pos3 | |
640 (eq (aref *regexp-parse-str* pos1) ?-) | |
641 (not (eq (aref *regexp-parse-str* pos2) ?\]))) | |
642 (let ((from (substring *regexp-parse-str* pos0 pos1)) | |
643 (to (substring *regexp-parse-str* pos2 pos3))) | |
644 (if (and (= (length from) (length to)) | |
645 (not (TREX-comp-charp from 0)) | |
646 (not (TREX-comp-charp to 0)) | |
647 (or (= (length from) 1) | |
648 (= (aref from 0) (aref to 0))) | |
649 (or (string-equal from to) ;;; by Enami 93.08.08 | |
650 (string-lessp from to))) | |
651 (if (string-equal from to) | |
652 (TREX-push from list) | |
653 (TREX-push (list ':range from to) list)) | |
654 (regexp-error))) | |
655 (setq *regexp-parse-index* pos3) | |
656 (regexp-parse-charset1 op list)) | |
657 ;;; [^]] - ] ;;; by Enami 93.08.08 | |
658 ((and pos1 pos2 | |
659 (eq (aref *regexp-parse-str* pos1) ?-) | |
660 (eq (aref *regexp-parse-str* pos2) ?\])) | |
661 (TREX-push (substring *regexp-parse-str* pos0 pos1) list) | |
662 (TREX-push (substring *regexp-parse-str* pos1 pos2) list) | |
663 (setq *regexp-parse-index* pos2) | |
664 (regexp-parse-charset1 op list)) | |
665 (t | |
666 (TREX-push (substring *regexp-parse-str* pos0 pos1) list) | |
667 (setq *regexp-parse-index* pos1) | |
668 (regexp-parse-charset1 op list))) | |
669 (regexp-error))) | |
670 (regexp-error))) | |
671 | |
672 (defun TREX-charset-lessp (ch1 ch2) | |
673 (cond((and (stringp ch1) (stringp ch2)) | |
674 (string-lessp ch1 ch2)) | |
675 ((and (consp ch1) (consp ch2)) | |
676 (string-lessp (nth 2 ch1) (nth 1 ch2))) | |
677 ((consp ch1) | |
678 (string-lessp (nth 2 ch1) ch2)) | |
679 ((consp ch2) | |
680 (string-lessp ch1 (nth 1 ch2))))) | |
681 | |
682 ;;; | |
683 ;;; define-regexp | |
684 ;;; | |
685 | |
686 (defmacro define-regexp (name &rest forms) | |
687 (` (define-regexp* '(, name) '(, forms)))) | |
688 | |
689 (defun define-regexp* (name forms) | |
690 (put name ':regexp-has-definition t) | |
691 (put name ':regexp-definition | |
692 (if (= (length forms) 1) | |
693 (nth 0 forms) | |
694 (` (:seq (,@ forms)))))) | |
695 | |
696 (defun regexp-get-definition (name) | |
697 (get name ':regexp-definition)) | |
698 | |
699 (defun regexp-define-specials (names) | |
700 (mapcar (function (lambda (name) | |
701 (put name ':regexp-special t))) | |
702 names)) | |
703 | |
704 (defun regexp-has-definition (name) | |
705 (get name ':regexp-has-definition)) | |
706 | |
707 (defun regexp-specialp (name) | |
708 (get name ':regexp-special)) | |
709 | |
710 (defun regexp-expand-definition (regexp &optional callers) | |
711 (cond | |
712 ((consp regexp) | |
713 (let ((op (car regexp))) | |
714 (cond((eq op ':mark) | |
715 (` (:mark (, (nth 1 regexp)) | |
716 (, (nth 2 regexp)) | |
717 (, (regexp-expand-definition (nth 3 regexp)))))) | |
718 ((eq op ':or) | |
719 (` (:or (,@ (mapcar 'regexp-expand-definition (cdr regexp)))))) | |
720 ((eq op ':seq) | |
721 (` (:seq (,@ (mapcar 'regexp-expand-definition (cdr regexp)))))) | |
722 ((eq op ':optional) | |
723 (` (:optional (, (regexp-expand-definition (nth 1 regexp)))))) | |
724 ((eq op ':star) | |
725 (` (:star (, (regexp-expand-definition (nth 1 regexp)))))) | |
726 ((eq op ':plus) | |
727 (` (:plus (, (regexp-expand-definition (nth 1 regexp)))))) | |
728 ;;;;**** | |
729 ((eq op ':range) | |
730 regexp) | |
731 ((regexp-specialp op) | |
732 regexp) | |
733 ((memq op callers) | |
734 (error "regexp defs(%s)" op)) | |
735 ((regexp-has-definition op) | |
736 (regexp-expand-definition (regexp-get-definition op) | |
737 (cons op callers))) | |
738 (t | |
739 (error "undefined regexp(%s)" op))))) | |
740 ((stringp regexp) | |
741 regexp) | |
742 ((null regexp) | |
743 regexp) | |
744 (t | |
745 regexp))) | |
746 | |
747 ;;; | |
748 ;;; regexp-*-lessp | |
749 ;;; $B@55,7A<0$NA4=g=x$rDj5A$9$k!%(B | |
750 ;;; | |
751 | |
752 ;;; nil < number < string < symbol < cons | |
753 | |
754 (defun regexp-lessp (exp1 exp2) | |
755 (cond((equal exp1 exp2) | |
756 nil) | |
757 ((null exp1) t) | |
758 ((numberp exp1) | |
759 (cond((null exp2) nil) | |
760 ((numberp exp2) | |
761 (< exp1 exp2)) | |
762 (t t))) | |
763 ((stringp exp1) | |
764 (cond((or (null exp2) | |
765 (numberp exp2)) | |
766 nil) | |
767 ((stringp exp2) | |
768 (string< exp1 exp2)) | |
769 (t t))) | |
770 ((symbolp exp1) | |
771 (cond((or (null exp2) | |
772 (numberp exp2) | |
773 (stringp exp2)) | |
774 nil) | |
775 ((symbolp exp2) | |
776 (string< exp1 exp2)) | |
777 (t t))) | |
778 ((consp exp1) | |
779 (cond ((not (consp exp2)) | |
780 nil) | |
781 ((< (length exp1) (length exp2)) | |
782 t) | |
783 ((= (length exp1) (length exp2)) | |
784 (regexp-lessp-list exp1 exp2)) | |
785 (t nil))))) | |
786 | |
787 (defun regexp-lessp-list (exp1 exp2) | |
788 (cond((null exp1) nil) | |
789 ((regexp-lessp (car exp1) (car exp2)) | |
790 t) | |
791 ((equal (car exp1) (car exp2)) | |
792 (regexp-lessp-list (cdr exp1) (cdr exp2))) | |
793 (t nil))) | |
794 | |
795 ;;; | |
796 ;;; item = list of seq-body(== list of regexp) | |
797 ;;; nil < cons | |
798 ;;; | |
799 | |
800 (defun regexp-item-lessp (item1 item2) | |
801 (cond((equal item1 item2) | |
802 nil) | |
803 ((null item2) t) | |
804 ((consp item1) | |
805 (cond((consp item2) | |
806 (cond ((regexp-key-lessp (car item1) (car item2)) | |
807 t) | |
808 ((equal (car item1) (car item2)) | |
809 (regexp-item-lessp (cdr item1) (cdr item2))) | |
810 (t nil))) | |
811 (t nil))))) | |
812 | |
813 | |
814 (defun regexp-key-lessp-list (sym1 sym2 list) | |
815 (< (TREX-find sym1 list) (TREX-find sym2 list))) | |
816 | |
817 (defun regexp-key-lessp (key1 key2) | |
818 (cond ((regexp-key-class0 key1) | |
819 (cond((regexp-key-class0 key2) | |
820 (regexp-key-lessp-list (car key1) (car key2) *regexp-key-class0*)) | |
821 (t t))) | |
822 ((regexp-key-class1 key1) | |
823 (cond((regexp-key-class1 key2) | |
824 (regexp-key-lessp-list key1 key2 *regexp-key-class1*)) | |
825 ((or (regexp-key-class2 key2) | |
826 (regexp-key-class3 key2) | |
827 (regexp-key-class4 key2) | |
828 (null key2)) | |
829 t))) | |
830 ((regexp-key-class2 key1) | |
831 (cond((regexp-key-class2 key2) | |
832 (regexp-key-lessp-list key1 key2 *regexp-key-class2*)) | |
833 ((or (regexp-key-class3 key2) | |
834 (regexp-key-class4 key2) | |
835 (null key2)) | |
836 t))) | |
837 ((regexp-key-class3 key1) | |
838 (cond((regexp-key-class3 key2) | |
839 (regexp-key-lessp-list (car key1) (car key2) *regexp-key-class3*)) | |
840 ((or (regexp-key-class4 key2) | |
841 (null key2)) | |
842 t))) | |
843 ((regexp-key-class4 key1) | |
844 (or (null key2) | |
845 (and (regexp-key-class4 key2) (< key1 key2)))) | |
846 (t nil))) | |
847 | |
848 (defun regexp-alist-lessp (pair1 pair2) | |
849 (regexp-key-lessp (car pair1) (car pair2))) | |
850 | |
851 ;;; | |
852 ;;; | |
853 ;;; | |
854 | |
855 (defvar *regexp-key-class0* '(START_MEMORY STOP_MEMORY)) | |
856 | |
857 (defvar *regexp-key-class1* '(BEGLINE ENDLINE | |
858 ;;; BEFORE_DOT AT_DOT AFTER_DOT | |
859 BEGBUF ENDBUF | |
860 WORDBEG WORDEND | |
861 WORDBOUND NOTWORDBOUND)) | |
862 | |
863 (defvar *regexp-key-class2* '(ANYCHAR | |
864 CHARSET | |
865 CHARSET_NOT | |
866 ;;;WORDCHAR NOTWORDCHAR | |
867 )) | |
868 | |
869 (defvar *regexp-key-class3* '(DUPLICATE | |
870 SYNTAXSPEC NOTSYNTAXSPEC | |
871 CATEGORYSPEC NOTCATEGORYSPEC | |
872 )) | |
873 | |
874 (regexp-define-specials *regexp-key-class0*) | |
875 (regexp-define-specials *regexp-key-class1*) | |
876 (regexp-define-specials *regexp-key-class2*) | |
877 (regexp-define-specials *regexp-key-class3*) | |
878 | |
879 (defun regexp-key-class0 (key) | |
880 (and (consp key) (TREX-memequal (car key) *regexp-key-class0*))) | |
881 | |
882 (defun regexp-key-class1 (key) | |
883 (and (consp key) | |
884 (TREX-memequal (car key) *regexp-key-class1*))) | |
885 | |
886 (defun regexp-key-class2 (key) | |
887 (and (consp key) (TREX-memequal (car key) *regexp-key-class2*))) | |
888 | |
889 (defun regexp-key-class3 (key) | |
890 (and (consp key) | |
891 (TREX-memequal (car key) *regexp-key-class3*))) | |
892 | |
893 (defun regexp-key-class4 (key) | |
894 (or (and (consp key) (eq (car key) ':range)) | |
895 (numberp key) (symbolp key))) | |
896 | |
897 (defun regexp-item-key-class0 (item) | |
898 (regexp-key-class0 (car item))) | |
899 | |
900 (defun regexp-item-key-class1 (item) | |
901 (regexp-key-class1 (car item))) | |
902 | |
903 (defun regexp-item-key-class2 (item) | |
904 (regexp-key-class2 (car item))) | |
905 | |
906 (defun regexp-item-key-class3 (item) | |
907 (regexp-key-class3 (car item))) | |
908 | |
909 (defun regexp-item-key-class4 (item) | |
910 (regexp-key-class4 (car item))) | |
911 | |
912 ;;; | |
913 ;;; regexp-sort | |
914 ;;; $B@55,I=8=$NI8=`7A<0$r5a$a$k$?$a$K@0Ns$r9T$&!%(B | |
915 ;;; | |
916 | |
917 (defvar *regexp-sort-flag* t) | |
918 (defvar *regexp-debug* nil) | |
919 | |
920 (defun regexp-sort (list pred) | |
921 (if *regexp-sort-flag* | |
922 (progn | |
923 (if *regexp-debug* (princ (format "(regexp-sort %s %s)\n" list pred))) | |
924 (let ((result (sort list pred))) | |
925 (if *regexp-debug* (princ (format "<== %s\n" result))) | |
926 result)) | |
927 list)) | |
928 | |
929 ;;; | |
930 ;;; regexp-inverse | |
931 ;;; | |
932 | |
933 (defun regexp-inverse (regexp) | |
934 (if (consp regexp) | |
935 (let ((op (car regexp))) | |
936 (cond((eq op ':mark) | |
937 (list ':mark (nth 1 regexp) (nth 2 regexp) | |
938 (regexp-inverse (nth 3 regexp)))) | |
939 ((eq op 'DUPLICATE) | |
940 regexp) | |
941 ((eq op ':or) | |
942 (cons ':or (mapcar 'regexp-inverse (cdr regexp)))) | |
943 ((eq op ':seq) | |
944 (cons ':seq (nreverse (mapcar 'regexp-inverse (cdr regexp))))) | |
945 ((eq op ':optional) | |
946 (list ':optional (regexp-inverse (nth 1 regexp)))) | |
947 ((eq op ':star) | |
948 (list ':star (regexp-inverse (nth 1 regexp)))) | |
949 ((eq op ':plus) | |
950 (list ':plus (regexp-inverse (nth 1 regexp)))) | |
951 (t regexp))) | |
952 (if (stringp regexp) | |
953 (TREX-string-reverse regexp) | |
954 regexp))) | |
955 | |
956 ;;; | |
957 ;;; regexp-remove-infinite-loop | |
958 ;;; | |
959 | |
960 (defun regexp-remove-infinite-loop (regexp) | |
961 (cond((consp regexp) | |
962 (let ((op (car regexp))) | |
963 (cond((eq op ':mark) | |
964 ) | |
965 ((eq op 'DUPLICATE) | |
966 regexp) | |
967 ((eq op ':or) | |
968 ) | |
969 ((eq op ':seq) | |
970 ) | |
971 ((eq op ':optional) | |
972 ) | |
973 ((eq op ':star) | |
974 ) | |
975 ((eq op ':plus) | |
976 ) | |
977 (t regexp)))) | |
978 ((stringp regexp) | |
979 ) | |
980 ((null regexp) | |
981 ) | |
982 (t | |
983 regexp))) | |
984 | |
985 | |
986 ;;; | |
987 ;;; regexp-reform | |
988 ;;; | |
989 | |
990 (defvar *regexp-register-definitions* nil) | |
991 (defvar *regexp-registers* nil) | |
992 | |
993 (defun regexp-reform-duplication (regexp) | |
994 (let* ((*regexp-register-definitions* nil) | |
995 (newregexp (regexp-reform-duplication-1 regexp))) | |
996 (let ((*regexp-registers* nil)) | |
997 (regexp-reform-duplication-2 newregexp)))) | |
998 | |
999 (defun regexp-reform-duplication-1 (regexp) | |
1000 (if (not (consp regexp)) regexp | |
1001 (let ((mop (car regexp))) | |
1002 (cond((eq mop ':or) | |
1003 (cons ':or (mapcar 'regexp-reform-duplication-1 | |
1004 (cdr regexp)))) | |
1005 ((eq mop ':seq) | |
1006 (cons ':seq (mapcar 'regexp-reform-duplication-1 | |
1007 (cdr regexp)))) | |
1008 ((TREX-memequal mop '(:star :plus :optional)) | |
1009 (list mop (regexp-reform-duplication-1 (nth 1 regexp)))) | |
1010 ((eq mop ':mark) | |
1011 (TREX-push (cdr regexp) | |
1012 *regexp-register-definitions*) | |
1013 (list 'DUPLICATE (nth 1 regexp))) | |
1014 (t regexp))))) | |
1015 | |
1016 (defun regexp-reform-duplication-2 (regexp) | |
1017 (if (not (consp regexp)) regexp | |
1018 (let ((mop (car regexp))) | |
1019 (cond((eq mop ':or) | |
1020 (let ((registers *regexp-registers*) | |
1021 (newregisters nil) | |
1022 (result nil) | |
1023 (or-body (cdr regexp))) | |
1024 (while or-body | |
1025 (setq *regexp-registers* registers) | |
1026 (TREX-push (regexp-reform-duplication-2 (car or-body)) result) | |
1027 (setq newregisters (TREX-delete-duplicate (append *regexp-registers* newregisters))) | |
1028 (setq or-body (cdr or-body))) | |
1029 (setq *regexp-registers* newregisters) | |
1030 (cons ':or (nreverse result)))) | |
1031 ((eq mop ':seq) | |
1032 (cons ':seq (mapcar 'regexp-reform-duplication-2 | |
1033 (cdr regexp)))) | |
1034 ((TREX-memequal mop '(:star :plus :optional)) | |
1035 (list mop (regexp-reform-duplication-2 (nth 1 regexp)))) | |
1036 ((eq mop 'DUPLICATE) | |
1037 (let ((regno (nth 1 regexp))) | |
1038 (if (TREX-memequal regno *regexp-registers*) | |
1039 regexp | |
1040 (let ((def (assoc regno *regexp-register-definitions*))) | |
1041 (TREX-push regno *regexp-registers*) | |
1042 ;;; $BBg>fIW!)(B | |
1043 (if def | |
1044 (cons ':mark def) | |
1045 regexp))))) | |
1046 (t regexp))))) | |
1047 | |
1048 ;;; | |
1049 ;;; regexp-expand | |
1050 ;;; | |
1051 | |
1052 ;;; | |
1053 ;;; <ISLAND> ::= ( <ITEM> ...) | |
1054 ;;; <ITEM> ::= ( <SEQ-BODY> ... ) | |
1055 ;;; | |
1056 | |
1057 (defun regexp-expand-regexp (regexp) | |
1058 ;;; returns island | |
1059 (if (consp regexp) | |
1060 (let ((mop (car regexp))) | |
1061 (cond | |
1062 ;;;((eq mop 'CHARSET) | |
1063 ;;; (regexp-expand-charset t (cdr regexp))) | |
1064 ;;;((eq mop 'CHARSET_NOT) | |
1065 ;;; (regexp-expand-charset nil (cdr regexp))) | |
1066 ((eq mop ':or) | |
1067 (regexp-expand-or (cdr regexp))) | |
1068 ((eq mop ':seq) | |
1069 (regexp-expand-seq (cdr regexp))) | |
1070 ((eq mop ':star) | |
1071 (let ((arg (nth 1 regexp))) | |
1072 (if arg | |
1073 (append (regexp-expand-seq (list arg regexp)) (list nil)) | |
1074 (list nil)))) | |
1075 ((eq mop ':plus) | |
1076 (let ((arg (nth 1 regexp))) | |
1077 (if arg | |
1078 (regexp-expand-seq (list arg (list ':star arg))) | |
1079 (list nil)))) | |
1080 ((eq mop ':optional) | |
1081 (append (regexp-expand-regexp (nth 1 regexp)) (list nil))) | |
1082 ((eq mop ':mark) | |
1083 (let ((regno (nth 1 regexp)) | |
1084 (groups (nth 2 regexp)) | |
1085 (arg (nth 3 regexp))) | |
1086 (if arg | |
1087 (list (list (list 'START_MEMORY regno groups) | |
1088 arg | |
1089 (list 'STOP_MEMORY regno groups))) | |
1090 (list (list (list 'START_MEMORY regno groups) | |
1091 (list 'STOP_MEMORY regno groups)))))) | |
1092 (t (list (list regexp))))) | |
1093 (cond((null regexp) (list nil)) | |
1094 ((symbolp regexp) (list (list regexp))) | |
1095 ((numberp regexp) (list (list regexp))) | |
1096 ((stringp regexp) | |
1097 (let ((result nil)) | |
1098 (let ((i 0) (max (length regexp))) | |
1099 (while (< i max) | |
1100 (TREX-push (aref regexp i) result) | |
1101 (TREX-inc i)) | |
1102 (list (nreverse result))))) | |
1103 (t (list (list regexp)))))) | |
1104 | |
1105 ;;; | |
1106 ;;; (CHARSET "abc" ... ) == (:or (:seq "a" "b" "c") .... ) | |
1107 ;;; | |
1108 ;;; (:range "abc" "ade") == (:seq "a" (:range "bc" "de")) | |
1109 ;;; (:range "bc" "de" ) == (:or (:seq "b" (:range "c" 0xFF)) | |
1110 ;;; (:seq (:range "b"+1 "d"-1) (:range 0xA0 0xFF)) | |
1111 ;;; (:seq "d" (:range 0xA0 "e"))) | |
1112 ;;; | |
1113 | |
1114 ;;; charset:: | |
1115 | |
1116 (defun charset-member-elt (ch elt) | |
1117 (if (consp elt) | |
1118 (if (eq (nth 0 elt) ':range) | |
1119 (and (<= ch (nth 1 elt)) | |
1120 (<= (nth 2 elt) ch)) | |
1121 nil) | |
1122 (equal ch elt))) | |
1123 | |
1124 (defun charset-member-P (ch or-form) | |
1125 (let ((result) (l (cdr or-form))) | |
1126 (while (and l (null result)) | |
1127 (if (charset-membership-elt ch (car l)) | |
1128 (setq result t)) | |
1129 (setq l (cdr l))) | |
1130 result)) | |
1131 | |
1132 (defun charset-member-N (ch nor-form) | |
1133 (not (charset-member+ ch nor-form))) | |
1134 | |
1135 (defun charset-norp (form) | |
1136 (and (consp form) (eq (car form) 'CHARSET_NOT))) | |
1137 | |
1138 (defun charset-and (form1 form2) | |
1139 (if (charset-norp form1) | |
1140 (if (charset-norp form2) | |
1141 (cons ':or (charset-or-PP (cdr form1) (cdr form2))) | |
1142 (charset-and-PN form2 form1)) | |
1143 (if (charset-norp form2) | |
1144 (charset-and-pn form1 form2) | |
1145 (charset-and-PP form1 form2)))) | |
1146 | |
1147 (defun charset-or-PP (or-body1 or-body2) | |
1148 (append or-body1 or-body2)) | |
1149 | |
1150 | |
1151 | |
1152 | |
1153 (defun regexp-charset-to-regexp (charsets) | |
1154 (cons ':or (mapcar 'regexp-charset-to-regexp* charsets))) | |
1155 | |
1156 (defun regexp-charset-to-regexp* (elm) | |
1157 (cond((consp elm) (regexp-charset-range-to-regexp (nth 1 elm) (nth 2 elm))) | |
1158 ((stringp elm) (cons ':seq (TREX-string-to-list elm))) | |
1159 (t elm))) | |
1160 | |
1161 (defun regexp-charset-range-to-regexp (str1 str2) | |
1162 (let ((result (regexp-charset-range-to-regexp* (TREX-string-to-list str1) | |
1163 (TREX-string-to-list str2)))) | |
1164 (if (= (length result) 1) (car result) (cons ':seq result)))) | |
1165 | |
1166 | |
1167 (defun regexp-charset-range-to-regexp* (nums1 nums2) | |
1168 (let ((len (length (cdr nums1))) | |
1169 (ch1 (car nums1)) | |
1170 (ch2 (car nums2))) | |
1171 (if (= len 0) | |
1172 (if (= ch1 ch2) (list ch1) | |
1173 (list (regexp-charset-range-1 ch1 ch2))) | |
1174 (if (= ch1 ch2) | |
1175 (cons ch1 (regexp-charset-range-to-regexp* (cdr nums1) (cdr nums2))) | |
1176 (let ((part1 (cons ch1 (regexp-charset-range-to-regexp* (cdr nums1) (make-list (length (cdr nums1)) 255)))) | |
1177 (part2 (if (<= (1+ ch1) (1- ch2)) | |
1178 (cons (regexp-charset-range-1 (1+ ch1) (1- ch2)) | |
1179 (regexp-charset-range-to-regexp* (make-list len 160) (make-list len 255))) | |
1180 nil)) | |
1181 (part3 (cons ch2 (regexp-charset-range-to-regexp* (make-list len 160) (cdr nums2))))) | |
1182 (if part2 | |
1183 (list (list ':or (cons ':seq part1) (cons ':seq part2) (cons ':seq part3))) | |
1184 (list (list ':or (cons ':seq part1) (cons ':seq part3))))))))) | |
1185 | |
1186 (defun regexp-charset-range-1 (from to) | |
1187 (let ((result nil)) | |
1188 (while (<= from to) | |
1189 (TREX-push to result) | |
1190 (TREX-dec to)) | |
1191 (cons ':or result))) | |
1192 | |
1193 (defun regexp-charset-range-1* (from to) | |
1194 (if (not (<= from to)) nil | |
1195 (cons from (regexp-charset-range-1* (1+ from) to)))) | |
1196 | |
1197 (defvar *regexp-charset-vector* nil) | |
1198 | |
1199 (defun regexp-expand-charset (mode charsets) | |
1200 (TREX-init *regexp-charset-vector* (make-vector 256 nil)) | |
1201 (let ((i 0)) | |
1202 (while (< i 256) | |
1203 (aset *regexp-charset-vector* i nil) | |
1204 (TREX-inc i))) | |
1205 (while charsets | |
1206 (cond((numberp (car charsets)) | |
1207 (aset *regexp-charset-vector* (car charsets) t)) | |
1208 ((stringp (car charsets)) | |
1209 (if (= (length (car charsets)) 1) | |
1210 (aset *regexp-charset-vector* (aref (car charsets) 0) t) | |
1211 (let ((list (TREX-string-to-list (car charsets)))) | |
1212 (aset *regexp-charset-vector* (car list) | |
1213 (regexp-expand-charset-set-mark (cdr list) | |
1214 (aref *regexp-charset-vector* (car list))))))) | |
1215 ((and (consp (car charsets)) | |
1216 (eq (car (car charsets)) ':range)) | |
1217 (let ((from (aref (nth 1 (car charsets)) 0)) | |
1218 (to (aref (nth 2 (car charsets)) 0))) | |
1219 (if (<= from to) | |
1220 (if (< to 128) | |
1221 (let ((char from)) | |
1222 (while (<= char to) | |
1223 (aset *regexp-charset-vector* char t) | |
1224 (TREX-inc char))) | |
1225 (let ((from-list (TREX-string-to-list (nth 1 (car charsets)))) | |
1226 (to-list (TREX-string-to-list (nth 2 (car charsets))))) | |
1227 ;;; $B$I$&$9$s$N!*(B | |
1228 )))))) | |
1229 (setq charsets (cdr charsets))) | |
1230 (let ((result nil) | |
1231 (i 0)) | |
1232 (while (< i 256) | |
1233 (if (eq (aref *regexp-charset-vector* i) mode) | |
1234 (TREX-push (list i) result)) | |
1235 (TREX-inc i)) | |
1236 (nreverse result))) | |
1237 | |
1238 | |
1239 (defun regexp-expand-charset-set-mark (chars alist) | |
1240 (if (null chars) t | |
1241 (let ((place (assoc (car chars) alist))) | |
1242 (cond((null place) | |
1243 (cons | |
1244 (cons (car chars) | |
1245 (regexp-expand-charset-set-mark (cdr chars) nil)) | |
1246 alist)) | |
1247 (t | |
1248 (setcdr place | |
1249 (regexp-expand-charset-set-mark (cdr chars) (cdr place))) | |
1250 alist))))) | |
1251 | |
1252 (defun regexp-expand-or (regexps) | |
1253 (if regexps | |
1254 (append (regexp-expand-regexp (car regexps)) | |
1255 (regexp-expand-or (cdr regexps))) | |
1256 nil)) | |
1257 | |
1258 (defun regexp-expand-seq (regexps) | |
1259 (if (null regexps) | |
1260 (list nil) | |
1261 (let ((result (regexp-expand-regexp (car regexps)))) | |
1262 (if (TREX-memequal nil result) | |
1263 (let ((newresult (regexp-expand-seq (cdr regexps)))) | |
1264 (setq result (TREX-delete nil result)) | |
1265 (while result | |
1266 (TREX-push (append (car result) (cdr regexps)) newresult) | |
1267 (setq result (cdr result))) | |
1268 newresult) | |
1269 (let ((newresult nil)) | |
1270 (while result | |
1271 (TREX-push (append (car result) (cdr regexps)) newresult) | |
1272 (setq result (cdr result))) | |
1273 newresult))))) | |
1274 | |
1275 (defun regexp-expand-items (items) | |
1276 (if items | |
1277 (append (regexp-expand-seq (car items)) | |
1278 (regexp-expand-items (cdr items))) | |
1279 nil)) | |
1280 | |
1281 ;;; | |
1282 ;;; regexp- | |
1283 ;;; | |
1284 | |
1285 (defun regexp-make-island (items) | |
1286 (let ((result (TREX-delete-duplicate (regexp-expand-items items)))) | |
1287 (let ((l result)) | |
1288 (while l | |
1289 (cond((null (car l)) | |
1290 (setcdr l nil) | |
1291 (setq l nil)) | |
1292 (t (setq l (cdr l)))))) | |
1293 result)) | |
1294 | |
1295 (defun regexp-make-island-parallel (items) | |
1296 (regexp-sort (TREX-delete-duplicate (regexp-expand-items items)) | |
1297 'regexp-item-lessp)) | |
1298 | |
1299 | |
1300 ;;; Finate state Automaton: | |
1301 ;;; | |
1302 ;;; FA : Non-deterministic FA | |
1303 ;;; EFFA : Epsilon Free FA | |
1304 ;;; DFA : Deterministic FA | |
1305 ;;; | |
1306 ;;; | |
1307 ;;; DFA-optimize <- DFA <- EFFA <- NDFA <- regexp | |
1308 | |
1309 | |
1310 ;;; | |
1311 ;;; Table structure | |
1312 ;;; <FA> ::= ( <START> . <TransTables> ) | |
1313 ;;; <TransTables> ::= ( <Node> . <TransTable> ) ... | |
1314 ;;; <TransTable> ::= ( <Key> . <Next> ) ... | |
1315 ;;; <Key> ::= <Char> | <Condition> | :epsilon | |
1316 ;;; | |
1317 | |
1318 (defvar *regexp-node-to-transtable* nil) | |
1319 (defvar *regexp-island-to-node* nil) | |
1320 (defvar *regexp-counter* 0) | |
1321 | |
1322 (defun FA-make (regexp) | |
1323 (setq *regexp-island-to-node* nil) | |
1324 (let ((*regexp-node-to-transtable* nil) | |
1325 ;;; (*regexp-island-to-node* nil) | |
1326 (*regexp-counter* 0)) | |
1327 (let ((island (regexp-make-island (regexp-expand-regexp regexp)))) | |
1328 (cons (FA-make-closure island) (nreverse *regexp-node-to-transtable*))))) | |
1329 | |
1330 (defun FA-make-closure (island) | |
1331 (if *regexp-debug* (princ (format "FA-make-closure %s\n" island))) | |
1332 (if (null island) nil | |
1333 (let ((place (assoc island *regexp-island-to-node*)) | |
1334 (pos nil)) | |
1335 (cond(place (cdr place)) | |
1336 ;;; START_MEMORY and STOP_MEMORY $B!JL5>r7o!$:GM%@h$GA+0\$9$k$b$N!K(B | |
1337 ((setq pos (TREX-find-if 'regexp-item-key-class0 island)) | |
1338 (let ((pre (TREX-firstn island pos)) | |
1339 (item (nth pos island)) | |
1340 (post (nthcdr (1+ pos) island))) | |
1341 (let* ((number (TREX-inc *regexp-counter*)) | |
1342 (pair (cons (car item) nil)) | |
1343 (alist (list pair)) | |
1344 (place (cons number alist))) | |
1345 (TREX-push (cons island number) *regexp-island-to-node*) | |
1346 (TREX-push place *regexp-node-to-transtable*) | |
1347 (setcdr pair | |
1348 (FA-make-closure | |
1349 (regexp-make-island (append pre (list (cdr item)) post)))) | |
1350 number))) | |
1351 ;;; BEGLINE, ENDLINE, WORDBEG, ....$B!JD9$5#0$N$b$N!K(B | |
1352 ;;; $BA+0\$O(B | |
1353 ;;; KEY --> TRUE+FALSE | |
1354 ;;; :epsilon --> FALSE $B$H$J$k!%(B | |
1355 ((setq pos (TREX-find-if 'regexp-item-key-class1 island)) | |
1356 (let((key (car (nth pos island))) | |
1357 (items island) | |
1358 (result-true nil) | |
1359 (result-false nil)) | |
1360 (while items | |
1361 (let ((item (car items))) | |
1362 (if (equal key (car item)) | |
1363 (TREX-push (cdr item) result-true) | |
1364 (progn | |
1365 (TREX-push item result-true) | |
1366 (TREX-push item result-false)))) | |
1367 (setq items (cdr items))) | |
1368 (setq result-true (nreverse result-true) | |
1369 result-false (nreverse result-false)) | |
1370 (if (null result-false) | |
1371 (let* ((number (TREX-inc *regexp-counter*)) | |
1372 (pair-true (cons key nil)) | |
1373 (alist (list pair-true)) | |
1374 (place (cons number alist))) | |
1375 (TREX-push (cons island number) *regexp-island-to-node*) | |
1376 (TREX-push place *regexp-node-to-transtable*) | |
1377 (setcdr pair-true (FA-make-closure (regexp-make-island result-true))) | |
1378 number) | |
1379 (let* ((number (TREX-inc *regexp-counter*)) | |
1380 (pair-true (cons key nil)) | |
1381 (pair-false (cons ':epsilon nil)) | |
1382 (alist (list pair-true pair-false)) | |
1383 (place (cons number alist))) | |
1384 (TREX-push (cons island number) *regexp-island-to-node*) | |
1385 (TREX-push place *regexp-node-to-transtable*) | |
1386 (setcdr pair-true (FA-make-closure (regexp-make-island result-true))) | |
1387 (setcdr pair-false (FA-make-closure (regexp-make-island result-false))) | |
1388 number)))) | |
1389 (t | |
1390 (FA-make-closure* island (FA-make-pre-alist island))))))) | |
1391 | |
1392 ;;; | |
1393 ;;; $B$3$3$G07$&$N$O(B class2,3,4 $B$N$_(B | |
1394 ;;; | |
1395 (defun FA-make-closure* (island pre-alist) | |
1396 (if *regexp-debug* (princ (format "\nregexp-make-clousre* %s" pre-alist))) | |
1397 (let* ((number (TREX-inc *regexp-counter*)) | |
1398 (place (cons number pre-alist))) | |
1399 (TREX-push (cons island number) *regexp-island-to-node*) | |
1400 (TREX-push place *regexp-node-to-transtable*) | |
1401 (while pre-alist | |
1402 (let ((pair (car pre-alist))) | |
1403 (setcdr pair | |
1404 (FA-make-closure (regexp-make-island (cdr pair))))) | |
1405 (setq pre-alist (cdr pre-alist))) | |
1406 number)) | |
1407 | |
1408 ;;; | |
1409 ;;; PRE-ALIST ::= ( (key . items) ... ) | |
1410 ;;; | |
1411 | |
1412 (defun FA-make-pre-alist (items) | |
1413 (let ((pre-alist nil)) | |
1414 (while items | |
1415 (let ((item (car items))) | |
1416 (cond((or (regexp-key-class2 (car item)) | |
1417 (regexp-key-class3 (car item))) | |
1418 (let ((key (car item)) | |
1419 (newitems nil)) | |
1420 (while (and items (equal key (car (car items)))) | |
1421 (TREX-push (cdr (car items)) newitems) | |
1422 (setq items (cdr items))) | |
1423 (setq newitems (nreverse newitems)) | |
1424 (TREX-push (cons key newitems) pre-alist))) | |
1425 ((null item) | |
1426 (TREX-push (list nil) pre-alist) | |
1427 (setq items (cdr items))) | |
1428 ((regexp-key-class4 (car item)) | |
1429 (let((alist nil)) | |
1430 (while (and items (regexp-key-class4 (car (car items)))) | |
1431 (let* ((newitem (car items)) | |
1432 (place (assoc (car newitem) alist))) | |
1433 (if place | |
1434 (setcdr place | |
1435 (cons (cdr newitem) (cdr place))) | |
1436 (TREX-push (cons (car newitem) (list (cdr newitem))) alist))) | |
1437 (setq items (cdr items))) | |
1438 (setq alist (sort alist 'TREX-lessp-car)) | |
1439 (let ((list alist)) | |
1440 (while list | |
1441 (setcdr (car list) (nreverse (cdr (car list)))) | |
1442 (setq list (cdr list))) | |
1443 (setq pre-alist (append alist pre-alist)) | |
1444 ))) | |
1445 (t (error "undefined items(%s)" item))))) | |
1446 (nreverse pre-alist))) | |
1447 | |
1448 ;;; | |
1449 ;;; FA-inverse | |
1450 ;;; | |
1451 | |
1452 (defun FA-inverse (FA) | |
1453 (let ((invFA nil) | |
1454 (start (car FA)) | |
1455 (table (cdr FA)) | |
1456 (minnode 10000) | |
1457 (maxnode 0) | |
1458 (newtable nil) | |
1459 (newstart nil) | |
1460 (newfinal nil)) | |
1461 (let ((l table)) | |
1462 (while l | |
1463 (let ((n (car (car l)))) | |
1464 (if (< n minnode) (setq minnode n)) | |
1465 (if (< maxnode n) (setq maxnode n))) | |
1466 (setq l (cdr l)))) | |
1467 (setq newstart (1- minnode)) | |
1468 (setq newfinal (1+ maxnode)) | |
1469 (setq newtable (FA-link newfinal nil nil newtable)) | |
1470 (while table | |
1471 (let* ((Snode (car table)) | |
1472 (Snumber (car Snode)) | |
1473 (Salist (cdr Snode))) | |
1474 (while Salist | |
1475 (let* ((pair (car Salist)) | |
1476 (key (car pair)) | |
1477 (Tnumber (cdr pair))) | |
1478 (cond((null key) | |
1479 (setq newtable (FA-link newstart ':epsilon Snumber newtable))) | |
1480 (t | |
1481 (setq newtable (FA-link Tnumber key Snumber newtable)))) | |
1482 (setq Salist (cdr Salist))))) | |
1483 (setq table (cdr table))) | |
1484 (setq newtable (FA-link start ':epsilon newfinal newtable)) | |
1485 ;;;; FA $B$N(B final $B$X(B invFA $B$N(B start $B$+$i(B :epsilon link $B$rD%$k!%(B | |
1486 (let ((l newtable)) | |
1487 (while l | |
1488 (setcdr (car l) (reverse (cdr(car l)))) | |
1489 (setq l (cdr l)))) | |
1490 (setq newtable (sort newtable 'TREX-lessp-car)) | |
1491 (cons newstart newtable))) | |
1492 | |
1493 (defun FA-link (from key to table) | |
1494 (let ((place (assoc from table))) | |
1495 (cond ((null place ) | |
1496 (setq place (cons from nil)) | |
1497 (TREX-push place table))) | |
1498 (setcdr place (cons (cons key to) (cdr place))) | |
1499 table)) | |
1500 | |
1501 ;;; | |
1502 ;;; FA-dump | |
1503 ;;; | |
1504 | |
1505 (defun FA-dump (table) | |
1506 (let ((start (car table)) | |
1507 (l (cdr table))) | |
1508 (princ (format "\nstart = %d\n" start)) | |
1509 (while l | |
1510 (princ (format "%3d: " (car (car l)))) | |
1511 (let ((alist (cdr (car l)))) | |
1512 (cond ((numberp (car (car alist))) | |
1513 (princ (format "%c -> %s\n" (car (car alist)) (cdr (car alist))))) | |
1514 ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC))) | |
1515 (princ (format "(%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist))))) | |
1516 (t | |
1517 (princ (format "%s -> %s\n" (car (car alist)) (cdr (car alist)))))) | |
1518 (setq alist (cdr alist)) | |
1519 (while alist | |
1520 (cond ((numberp (car (car alist))) | |
1521 (princ (format " %c -> %s\n" (car (car alist)) (cdr (car alist))))) | |
1522 ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC))) | |
1523 (princ (format " (%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist))))) | |
1524 (t | |
1525 (princ (format " %s -> %s\n" (car (car alist)) (cdr (car alist)))))) | |
1526 (setq alist (cdr alist)))) | |
1527 (setq l (cdr l))))) | |
1528 | |
1529 ;;; | |
1530 ;;; EFFA: Epsilon Free Finate Automaton | |
1531 ;;; | |
1532 | |
1533 (defvar *FA-table* nil) | |
1534 (defvar *EFFA-table* nil) | |
1535 | |
1536 (defun EFFA-make (FA) | |
1537 (let* ((start (car FA)) | |
1538 (*FA-table* (cdr FA)) | |
1539 (newstart start) | |
1540 (*EFFA-table* nil)) | |
1541 (cons newstart (reverse (EFFA-make* start))))) | |
1542 | |
1543 (defun EFFA-make* (node) | |
1544 (let ((place (assoc node *EFFA-table*))) | |
1545 (cond((null place) | |
1546 (let ((place (cons node nil))) | |
1547 (TREX-push place *EFFA-table*) | |
1548 (setcdr place | |
1549 (reverse (EFFA-make-alist nil (cdr (assoc node *FA-table*)) | |
1550 (list node)))) | |
1551 (let ((alist (cdr place))) | |
1552 (while alist | |
1553 (cond((car (car alist)) | |
1554 (EFFA-make* (cdr (car alist))))) | |
1555 (setq alist (cdr alist)))))))) | |
1556 *EFFA-table*) | |
1557 | |
1558 (defun EFFA-make-alist (newalist alist set) | |
1559 (while alist | |
1560 (let ((node (cdr (car alist)))) | |
1561 (cond((eq (car (car alist)) ':epsilon) | |
1562 (cond((not (TREX-memequal node set)) | |
1563 (TREX-push node set) | |
1564 (setq newalist | |
1565 (EFFA-make-alist newalist (cdr (assoc node *FA-table*)) set))))) | |
1566 (t | |
1567 (TREX-push (car alist) newalist)))) | |
1568 (setq alist (cdr alist))) | |
1569 newalist) | |
1570 | |
1571 ;;; | |
1572 ;;; DFA: Deterministic Finate Automata | |
1573 ;;; | |
1574 | |
1575 (defvar *DFA-node-counter* nil) | |
1576 | |
1577 (defvar *DFA-node-definitions* nil | |
1578 "List of FD-nodes to node number") | |
1579 | |
1580 (defvar *DFA-table* nil | |
1581 "node number to alist") | |
1582 | |
1583 (defun DFA-make (EFFA) | |
1584 (let ((start (car EFFA)) | |
1585 (*EFFA-table* (cdr EFFA)) | |
1586 (*DFA-node-counter* 0) | |
1587 (*DFA-node-definitions* nil ) | |
1588 (*DFA-table* nil)) | |
1589 (DFA-make-1 (list start)) | |
1590 (cons (cdr (assoc (list start) *DFA-node-definitions*)) *DFA-table*))) | |
1591 | |
1592 (defun DFA-make-1 (states) | |
1593 (let ((place (assoc states *DFA-node-definitions*))) | |
1594 (cond((null place) | |
1595 (TREX-inc *DFA-node-counter*) | |
1596 (setq place (cons states *DFA-node-counter*)) | |
1597 (TREX-push place *DFA-node-definitions*) | |
1598 (let ((pair (cons *DFA-node-counter* nil))) | |
1599 (TREX-push pair *DFA-table*) | |
1600 (setcdr pair (DFA-make-pre-alist (DFA-collect-alist states))) | |
1601 (let ((alist (cdr pair))) | |
1602 (while alist | |
1603 (let ((top (car alist))) | |
1604 (if (car top) | |
1605 (setcdr top | |
1606 (DFA-make-1 (cdr top))))) | |
1607 (setq alist (cdr alist)))) | |
1608 ))) | |
1609 (cdr place))) | |
1610 | |
1611 (defun DFA-collect-alist (states) | |
1612 (let ((result nil)) | |
1613 (while states | |
1614 (setq result (append (cdr (assoc (car states) *EFFA-table*)) result)) | |
1615 (setq states (cdr states))) | |
1616 result)) | |
1617 | |
1618 (defun DFA-make-pre-alist (oldAlist) | |
1619 (let ((pre-alist nil)) | |
1620 (while oldAlist | |
1621 (let ((oldKey (car (car oldAlist)))) | |
1622 (cond((or (regexp-key-class0 oldKey) | |
1623 (regexp-key-class1 oldKey) | |
1624 (regexp-key-class2 oldKey) | |
1625 (regexp-key-class3 oldKey)) | |
1626 (let ((key oldKey) | |
1627 (newAlist nil)) | |
1628 (while (and oldAlist (equal key (car (car oldAlist)))) | |
1629 (TREX-push (cdr (car oldAlist)) newAlist) | |
1630 (setq oldAlist (cdr oldAlist))) | |
1631 (setq newAlist (nreverse newAlist)) | |
1632 (TREX-push (cons key newAlist) pre-alist))) | |
1633 ((regexp-key-class4 oldKey) | |
1634 (let((alist nil)) | |
1635 (while (and oldAlist (regexp-key-class4 (car (car oldAlist)))) | |
1636 (let ((place (assoc (car (car oldAlist)) alist))) | |
1637 (if place | |
1638 (setcdr place | |
1639 (cons (cdr (car oldAlist)) (cdr place))) | |
1640 (TREX-push (cons (car (car oldAlist)) (list(cdr (car oldAlist)))) alist))) | |
1641 (setq oldAlist (cdr oldAlist))) | |
1642 (setq alist (sort alist 'TREX-lessp-car)) | |
1643 (let ((list alist)) | |
1644 (while list | |
1645 (setcdr (car list) (reverse (cdr (car list)))) | |
1646 (setq list (cdr list))) | |
1647 (setq pre-alist (append alist pre-alist)) | |
1648 ))) | |
1649 ((null oldKey) | |
1650 (TREX-push (list nil) pre-alist) | |
1651 (setq oldAlist (cdr oldAlist))) | |
1652 (t | |
1653 (setq oldAlist (cdr oldAlist)))))) | |
1654 (nreverse pre-alist))) | |
1655 | |
1656 ;;; | |
1657 ;;; DFA-optimize | |
1658 ;;; $B$3$3$G$N:GE,2=$O>H9g=g=x$rJ]B8$9$k!%(B | |
1659 ;;; longer match $B$J$I$r$9$k>l9g$OJQ99$9$kI,MW$,$"$k!%(B | |
1660 | |
1661 (defvar *DFA-optimize-debug* nil) | |
1662 | |
1663 (defvar *DFA-optimize-groups* nil) | |
1664 (defvar *DFA-optimize-node* 1) | |
1665 | |
1666 (defun DFA-optimize (FA) | |
1667 (if *DFA-optimize-debug* (terpri)) | |
1668 (let* ((start (car FA)) | |
1669 (table (cdr FA)) | |
1670 (*DFA-optimize-node* 1) | |
1671 (*DFA-optimize-groups* | |
1672 (list (cons *DFA-optimize-node* (mapcar 'car table))))) | |
1673 (while | |
1674 (catch 'DFA-optimize-changed | |
1675 (let ((groups *DFA-optimize-groups*)) | |
1676 (while groups | |
1677 (if *DFA-optimize-debug* | |
1678 (princ (format "\nGroups to be checked: %s\n" groups))) | |
1679 (let* ((Sgroup (car groups)) | |
1680 (Sgroup-number (car Sgroup)) | |
1681 (oldgroup (cdr Sgroup)) | |
1682 (newgroup nil) | |
1683 (Smembers oldgroup)) | |
1684 (if *DFA-optimize-debug* | |
1685 (princ (format " Sgroup-number: %s = %s\n" Sgroup-number Smembers))) | |
1686 (while Smembers | |
1687 (let* ((Snumber (car Smembers)) | |
1688 (Salist (cdr (assoc Snumber table)))) | |
1689 (if *DFA-optimize-debug* | |
1690 (princ (format " Snumber: %s\n" Snumber))) | |
1691 (let ((Tmembers (cdr Smembers))) | |
1692 (while Tmembers | |
1693 (if (not (eq Snumber (car Tmembers))) | |
1694 (let* ((Tnumber (car Tmembers)) | |
1695 (Talist (cdr (assoc Tnumber table))) | |
1696 (Salist Salist)) | |
1697 (if *DFA-optimize-debug* | |
1698 (princ (format " Tnumber: %s\n" Tnumber))) | |
1699 (while (and Talist Salist | |
1700 (equal (car (car Talist)) | |
1701 (car (car Salist))) ;;; key | |
1702 (equal (DFA-optimize-group-number | |
1703 (cdr (car Talist))) | |
1704 (DFA-optimize-group-number | |
1705 (cdr (car Salist))) ;;; next group | |
1706 )) | |
1707 (if *DFA-optimize-debug* | |
1708 (progn | |
1709 (princ (format " Skey: %s -> %s(%s)\n" | |
1710 (car (car Salist)) | |
1711 (cdr (car Salist)) | |
1712 (DFA-optimize-group-number (cdr (car Salist))))) | |
1713 (princ (format " Tkey: %s -> %s(%s)\n" | |
1714 (car (car Talist)) | |
1715 (cdr (car Talist)) | |
1716 (DFA-optimize-group-number (cdr (car Talist))))))) | |
1717 (setq Talist (cdr Talist) | |
1718 Salist (cdr Salist))) | |
1719 (cond((or Talist Salist) | |
1720 (setq newgroup (cons Tnumber newgroup) | |
1721 oldgroup (TREX-delete Tnumber oldgroup)) | |
1722 (if *DFA-optimize-debug* | |
1723 (princ(format " oldGroup : %s\n newGroup : %s\n" oldgroup newgroup))))) | |
1724 )) | |
1725 (setq Tmembers (cdr Tmembers))))) | |
1726 (cond (newgroup | |
1727 (if *DFA-optimize-debug* | |
1728 (princ (format "Changed :%s --> " Sgroup))) | |
1729 (setcdr Sgroup oldgroup) | |
1730 (if *DFA-optimize-debug* | |
1731 (princ (format "%s" Sgroup))) | |
1732 (TREX-inc *DFA-optimize-node*) | |
1733 (if *DFA-optimize-debug* | |
1734 (princ (format "+%s\n" (cons *DFA-optimize-node* newgroup)))) | |
1735 (TREX-push (cons *DFA-optimize-node* newgroup) *DFA-optimize-groups*) | |
1736 (throw 'DFA-optimize-changed t))) | |
1737 (setq Smembers (cdr Smembers)))) | |
1738 (setq groups (cdr groups)))))) | |
1739 ;;; | |
1740 ;;; | |
1741 (if *DFA-optimize-debug* | |
1742 (princ (format "table: %s\n" table))) | |
1743 (if *DFA-optimize-debug* | |
1744 (princ (format "groups: %s\n" *DFA-optimize-groups*))) | |
1745 (let ((newtable nil) | |
1746 (newstart nil) | |
1747 (groups *DFA-optimize-groups*)) | |
1748 | |
1749 ;;; start node $B$rC5$9(B | |
1750 (let ((l *DFA-optimize-groups*)) | |
1751 (while l | |
1752 (cond((TREX-memequal start (cdr (car l))) | |
1753 (setq newstart (car (car l))) | |
1754 (setq l nil)) | |
1755 (t | |
1756 (setq l (cdr l)))))) | |
1757 | |
1758 ;;; $B?7$7$$(B transTable $B$r:n$k!%(B | |
1759 (while groups | |
1760 (let* ((group (car groups)) | |
1761 (group-number (car group)) | |
1762 (member-number (car (cdr group))) | |
1763 (member-alist (cdr (assoc member-number table)))) | |
1764 (TREX-push (cons group-number | |
1765 (let ((group-alist nil)) | |
1766 (while member-alist | |
1767 (let ((Mkey (car (car member-alist))) | |
1768 (Mnext (cdr (car member-alist)))) | |
1769 (TREX-push (cons Mkey (DFA-optimize-group-number Mnext)) | |
1770 group-alist)) | |
1771 (setq member-alist (cdr member-alist))) | |
1772 (nreverse group-alist))) | |
1773 newtable) | |
1774 (setq groups (cdr groups)))) | |
1775 (cons newstart newtable)))) | |
1776 | |
1777 (defun DFA-optimize-group-number (node) | |
1778 (let ((l *DFA-optimize-groups*) (result nil)) | |
1779 (while l | |
1780 (cond((TREX-memequal node (cdr (car l))) | |
1781 (setq result (car (car l)) | |
1782 l nil)) | |
1783 (t (setq l (cdr l))))) | |
1784 result)) | |
1785 | |
1786 (defun DFA-optimize-parallel (FA) | |
1787 (if *DFA-optimize-debug* (terpri)) | |
1788 (let* ((start (car FA)) | |
1789 (table (cdr FA)) | |
1790 (*DFA-optimize-node* 1) | |
1791 (*DFA-optimize-groups* | |
1792 (list (cons *DFA-optimize-node* (mapcar 'car table))))) | |
1793 (while | |
1794 (catch 'DFA-optimize-changed | |
1795 (let ((groups *DFA-optimize-groups*)) | |
1796 (while groups | |
1797 (if *DFA-optimize-debug* | |
1798 (princ (format "\nGroups to be checked: %s\n" groups))) | |
1799 (let* ((Sgroup (car groups)) | |
1800 (Sgroup-number (car Sgroup)) | |
1801 (oldgroup (cdr Sgroup)) | |
1802 (newgroup nil) | |
1803 (Smembers oldgroup)) | |
1804 (if *DFA-optimize-debug* | |
1805 (princ (format " Sgroup-number: %s = %s\n" Sgroup-number Smembers))) | |
1806 (while Smembers | |
1807 (let* ((Snumber (car Smembers)) | |
1808 (Salist (cdr (assoc Snumber table)))) | |
1809 (if *DFA-optimize-debug* | |
1810 (princ (format " Snumber: %s\n" Snumber))) | |
1811 (while Salist | |
1812 (let* ((Spair (car Salist)) | |
1813 (Skey (car Spair)) | |
1814 (Snext (cdr Spair)) | |
1815 (Snext-group (DFA-optimize-group-number Snext)) | |
1816 (Tmembers oldgroup)) | |
1817 (if *DFA-optimize-debug* | |
1818 (princ (format " Skey: %s -> %s(%s)\n" Skey Snext-group Snext))) | |
1819 (while Tmembers | |
1820 (if (not (eq Snumber (car Tmembers))) | |
1821 (let* ((Tnumber (car Tmembers)) | |
1822 ;;; $BMW:F8!F$(B | |
1823 (Tpair (assoc Skey (cdr (assoc Tnumber table)))) | |
1824 (Tnext (cdr Tpair)) | |
1825 (Tnext-group (DFA-optimize-group-number (cdr Tpair)))) | |
1826 (if *DFA-optimize-debug* | |
1827 (princ (format " Tnumber: %s : %s -> %s(%s)\n" Tnumber (car Tpair) | |
1828 (DFA-optimize-group-number (cdr Tpair))(cdr Tpair)))) | |
1829 (cond((and (equal Spair '(nil)) | |
1830 (equal Tpair '(nil)))) | |
1831 ((and Skey (equal Snext-group Tnext-group))) | |
1832 (t | |
1833 (TREX-push Tnumber newgroup) | |
1834 (setq oldgroup (TREX-delete Tnumber oldgroup)) | |
1835 (if *DFA-optimize-debug* | |
1836 (princ(format (format " oldGroup : %s\n newGroup : %s\n" oldgroup newgroup)))) | |
1837 )))) | |
1838 (setq Tmembers (cdr Tmembers))) | |
1839 (cond (newgroup | |
1840 (if *DFA-optimize-debug* | |
1841 (princ (format "Changed :%s --> " Sgroup))) | |
1842 (setcdr Sgroup oldgroup) | |
1843 (if *DFA-optimize-debug* | |
1844 (princ (format "%s" Sgroup))) | |
1845 (TREX-inc *DFA-optimize-node*) | |
1846 (if *DFA-optimize-debug* | |
1847 (princ (format "+%s\n" (cons *DFA-optimize-node* newgroup)))) | |
1848 (TREX-push (cons *DFA-optimize-node* newgroup) *DFA-optimize-groups*) | |
1849 (throw 'DFA-optimize-changed t)))) | |
1850 (setq Salist (cdr Salist)))) | |
1851 (setq Smembers (cdr Smembers)))) | |
1852 (setq groups (cdr groups)))))) | |
1853 ;;; | |
1854 ;;; | |
1855 (if *DFA-optimize-debug* | |
1856 (princ (format "table: %s\n" table))) | |
1857 (if *DFA-optimize-debug* | |
1858 (princ (format "groups: %s\n" *DFA-optimize-groups*))) | |
1859 (let ((newtable nil) | |
1860 (newstart nil) | |
1861 (groups *DFA-optimize-groups*)) | |
1862 | |
1863 ;;; start node $B$rC5$9(B | |
1864 (let ((l *DFA-optimize-groups*)) | |
1865 (while l | |
1866 (cond((TREX-memequal start (cdr (car l))) | |
1867 (setq newstart (car (car l))) | |
1868 (setq l nil)) | |
1869 (t | |
1870 (setq l (cdr l)))))) | |
1871 | |
1872 ;;; $B?7$7$$(B transTable $B$r:n$k!%(B | |
1873 (while groups | |
1874 (let* ((group (car groups)) | |
1875 (group-number (car group)) | |
1876 (member-number (car (cdr group))) | |
1877 (member-alist (cdr (assoc member-number table)))) | |
1878 (TREX-push (cons group-number | |
1879 (let ((group-alist nil)) | |
1880 (while member-alist | |
1881 (let ((Mkey (car (car member-alist))) | |
1882 (Mnext (cdr (car member-alist)))) | |
1883 (TREX-push (cons Mkey | |
1884 (if (consp Mnext) | |
1885 (cons (DFA-optimize-group-number (car Mnext)) | |
1886 (DFA-optimize-group-number (cdr Mnext))) | |
1887 (DFA-optimize-group-number Mnext))) | |
1888 group-alist)) | |
1889 (setq member-alist (cdr member-alist))) | |
1890 group-alist)) | |
1891 newtable) | |
1892 (setq groups (cdr groups)))) | |
1893 (cons newstart newtable)))) | |
1894 | |
1895 | |
1896 | |
1897 ;;; | |
1898 ;;; Non Empty Finite Automata | |
1899 ;;; | |
1900 | |
1901 (defun NEFA-make (EFFA) | |
1902 (let* ((start (car EFFA)) | |
1903 (table (cdr EFFA)) | |
1904 (Salist (cdr (assoc start table)))) | |
1905 (cond((equal Salist '((nil))) | |
1906 nil) | |
1907 ((and (assoc nil Salist) | |
1908 (progn | |
1909 (while (and Salist (not (equal start (cdr (car Salist))))) | |
1910 (setq Salist (cdr Salist))) | |
1911 Salist)) | |
1912 (let ((min 10000) | |
1913 (max -10000) | |
1914 (l table)) | |
1915 (while l | |
1916 (if (< (car (car l)) min) | |
1917 (setq min (car (car l)))) | |
1918 (if (< max (car (car l))) | |
1919 (setq max (car (car l)))) | |
1920 (setq l (cdr l))) | |
1921 (let* ((newstart (1- min)) | |
1922 (newtable (copy-alist table)) | |
1923 (oldSalist (cdr (assoc start table))) | |
1924 (newSalist (TREX-delete '(nil) (copy-alist oldSalist)))) | |
1925 (cons newstart | |
1926 (cons (cons newstart newSalist) newtable))))) | |
1927 (t | |
1928 EFFA)))) | |
1929 | |
1930 ;;; | |
1931 ;;; Simplify FA | |
1932 ;;; | |
1933 | |
1934 (defvar *FA-simplify-table* nil) | |
1935 | |
1936 (defun FA-simplify (FA) | |
1937 (let ((start (car FA)) | |
1938 (table (cdr FA)) | |
1939 (newtable nil) | |
1940 (*FA-simplify-table* nil)) | |
1941 (FA-simplify-mark start table) | |
1942 (while *FA-simplify-table* | |
1943 (TREX-push (assoc (car *FA-simplify-table*) table) newtable) | |
1944 (setq *FA-simplify-table* (cdr *FA-simplify-table*))) | |
1945 (cons start newtable))) | |
1946 | |
1947 (defun FA-simplify-mark (node table) | |
1948 (cond ((not (TREX-memequal node *FA-simplify-table*)) | |
1949 (TREX-push node *FA-simplify-table*) | |
1950 (let ((alist (cdr (assoc node table)))) | |
1951 (while alist | |
1952 (cond((car (car alist)) | |
1953 (FA-simplify-mark (cdr (car alist)) table))) | |
1954 (setq alist (cdr alist))))))) | |
1955 | |
1956 ;;; | |
1957 ;;; Shortest match DFA | |
1958 ;;; | |
1959 | |
1960 (defun DFA-shortest-match (DFA) | |
1961 (let ((start (car DFA)) | |
1962 (table (cdr DFA)) | |
1963 (newtable nil)) | |
1964 (while table | |
1965 (cond ((assoc nil (cdr (car table))) | |
1966 (TREX-push (cons (car (car table)) '((nil))) newtable)) | |
1967 (t | |
1968 (TREX-push (car table) newtable))) | |
1969 (setq table (cdr table))) | |
1970 (cons start newtable))) | |
1971 | |
1972 ;;; | |
1973 ;;; Fastmap computation | |
1974 ;;; | |
1975 | |
1976 (defvar *DFA-fastmap-chars* nil) | |
1977 (defvar *DFA-fastmap-syntax* nil) | |
1978 (defvar *DFA-fastmap-category* nil) | |
1979 (defvar *DFA-fastmap-init* 0 ) | |
1980 (defvar *DFA-fastmap-pos* 1 ) ;;; SYNTAXSPEC or CATEGORYSPEC | |
1981 (defvar *DFA-fastmap-neg* 2 ) ;;; NOTSYNTAXSPEC or NOTCATEGORYSPEC | |
1982 | |
1983 ;;;; $B$9$Y$F$N(B char $B$OB~0l$D$N(B syntaxspec $B$KB0$9$k(B | |
1984 ;;;; ==> syntaxspec(ch) and notsyntaxspec(ch) --> all char | |
1985 ;;;; ==> notsyntaxspec(ch1) and notsyntaxspec(ch2) --> all char | |
1986 ;;;; ==> notsyntaxspec(ch1) and syntaxspec(ch2) == notsyntaxspec(ch1) | |
1987 ;;;; $B$D$^$j(B notsyntaxspec $B$O9b!9#1$D$7$+$J$$!%(B | |
1988 | |
1989 ;;; Returns [ CODE FASTMAP SYNTAX-FASTMAP CATEGOY-FASTMAP ] | |
1990 | |
1991 (defun DFA-code-with-fastmap (DFA) | |
1992 (TREX-init *DFA-fastmap-chars* (make-vector 256 nil)) | |
1993 (TREX-init *DFA-fastmap-syntax* (make-vector 256 nil)) | |
1994 (TREX-init *DFA-fastmap-category* (make-vector 256 nil)) | |
1995 (let ((code (regexp-code-gen DFA)) | |
1996 (start (car DFA)) | |
1997 (*DFA-fastmap-table* (cdr DFA)) | |
1998 (*DFA-fastmap-mark* nil) | |
1999 (*DFA-fastmap-special* nil)) | |
2000 (let ((i 0)) | |
2001 (while (< i 256) | |
2002 (aset *DFA-fastmap-chars* i nil) | |
2003 (aset *DFA-fastmap-syntax* i nil) | |
2004 (aset *DFA-fastmap-category* i nil) | |
2005 (TREX-inc i))) | |
2006 (DFA-fastmap-collect start) | |
2007 (let ((fastmap (if *DFA-fastmap-special* | |
2008 nil ;;;(make-string 256 1) | |
2009 (make-string 256 0))) | |
2010 (fastmap-entries 0) | |
2011 (syntax (if *DFA-fastmap-special* | |
2012 nil | |
2013 (make-string 256 0))) | |
2014 (syntax-entries 0) | |
2015 (notsyntax-entries 0) | |
2016 (category (if *DFA-fastmap-special* | |
2017 nil | |
2018 (make-string 256 0))) | |
2019 (category-entries 0)) | |
2020 (let ((result (make-vector 4 nil))) | |
2021 (aset result 0 code) | |
2022 (if *DFA-fastmap-special* | |
2023 (progn | |
2024 (aset result 1 fastmap) | |
2025 (aset result 2 syntax) | |
2026 (aset result 3 category)) | |
2027 (progn | |
2028 (let ((i 0)) | |
2029 (while (< i 256) | |
2030 (if (aref *DFA-fastmap-chars* i) | |
2031 (progn | |
2032 (TREX-inc fastmap-entries) | |
2033 (aset fastmap i 1))) | |
2034 (aset syntax i | |
2035 (cond((null (aref *DFA-fastmap-syntax* i)) | |
2036 *DFA-fastmap-init*) | |
2037 ((eq (aref *DFA-fastmap-syntax* i) 'SYNTAXSPEC) | |
2038 (TREX-inc syntax-entries) | |
2039 *DFA-fastmap-pos*) | |
2040 ((eq (aref *DFA-fastmap-syntax* i) 'NOTSYNTAXSPEC) | |
2041 (TREX-inc notsyntax-entries) | |
2042 (TREX-inc syntax-entries) | |
2043 *DFA-fastmap-neg*))) | |
2044 (aset category i | |
2045 (cond((null (aref *DFA-fastmap-category* i)) | |
2046 *DFA-fastmap-init*) | |
2047 ((eq (aref *DFA-fastmap-category* i) 'CATEGORYSPEC) | |
2048 (TREX-inc category-entries) | |
2049 *DFA-fastmap-pos*) | |
2050 ((eq (aref *DFA-fastmap-category* i) 'NOTCATEGORYSPEC) | |
2051 (TREX-inc category-entries) | |
2052 *DFA-fastmap-neg*))) | |
2053 (TREX-inc i))) | |
2054 | |
2055 (cond((<= 2 notsyntax-entries) | |
2056 (setq fastmap (make-string 256 1) | |
2057 syntax nil | |
2058 category nil)) | |
2059 ((= 1 notsyntax-entries) | |
2060 (let ((ch 0)) | |
2061 (while (< ch 256) | |
2062 (if (= (aref syntax ch) *DFA-fastmap-neg*) | |
2063 (aset syntax ch *DFA-fastmap-init*) | |
2064 (aset syntax ch *DFA-fastmap-pos*)) | |
2065 (TREX-inc ch))))) | |
2066 (aset result 1 fastmap) | |
2067 (aset result 2 syntax) | |
2068 (aset result 3 category))) | |
2069 result)))) | |
2070 | |
2071 (defun DFA-fastmap-collect (node) | |
2072 (if (TREX-memequal node *DFA-fastmap-mark*) nil | |
2073 (let ((alist (cdr (assoc node *DFA-fastmap-table*)))) | |
2074 (TREX-push node *DFA-fastmap-mark*) | |
2075 (while alist | |
2076 (let ((key (car (car alist)))) | |
2077 (cond((numberp key) | |
2078 (aset *DFA-fastmap-chars* key t)) | |
2079 ((symbolp key);;; can be null | |
2080 (setq *DFA-fastmap-special* t)) | |
2081 (t | |
2082 (let ((op (car key))) | |
2083 (cond | |
2084 ((TREX-memequal op '(START_MEMORY STOP_MEMORY)) | |
2085 (DFA-fastmap-collect (cdr (car alist)))) | |
2086 ((TREX-memequal op '(SYNTAXSPEC NOTSYNTAXSPEC)) | |
2087 (let ((specch (syntax-code-spec (nth 1 key)))) | |
2088 (cond((null (aref *DFA-fastmap-syntax* (nth 1 key))) | |
2089 (aset *DFA-fastmap-syntax* specch op)) | |
2090 ((not (eq (aref *DFA-fastmap-syntax* specch) op)) | |
2091 (setq *DFA-fastmap-special* t))))) | |
2092 ((TREX-memequal op '(CATEGORYSPEC NOTCATEGORYSPEC)) | |
2093 (let ((specch (nth 1 key))) | |
2094 (cond((null (aref *DFA-fastmap-category* specch)) | |
2095 (aset *DFA-fastmap-category* specch op)) | |
2096 ((not (eq (aref *DFA-fastmap-category* specch) op)) | |
2097 (setq *DFA-fastmap-special* t))))) | |
2098 ((TREX-memequal op '(CHARSET CHARSET_NOT)) | |
2099 (let ((list (cdr key))) | |
2100 (while list | |
2101 (let ((from nil) (to nil)) | |
2102 (cond((stringp (car list)) | |
2103 (setq from (aref (car list) 0) | |
2104 to (aref (car list) 0))) | |
2105 (t ;;; :range | |
2106 (setq from (aref (nth 1 (car list)) 0) | |
2107 to (aref (nth 2 (car list)) 0)))) | |
2108 (while (<= from to) | |
2109 (cond((null (aref *DFA-fastmap-chars* from)) | |
2110 (aset *DFA-fastmap-chars* from | |
2111 (if (eq op 'CHARSET_NOT) 'CHARSET_NOT | |
2112 t)))) | |
2113 (TREX-inc from))) | |
2114 (setq list (cdr list)))) | |
2115 (if (eq op 'CHARSET_NOT) | |
2116 (let ((i 0)) | |
2117 (while (< i 256) | |
2118 (cond((null (aref *DFA-fastmap-chars* i)) | |
2119 (aset *DFA-fastmap-chars* i t)) | |
2120 ((eq (aref *DFA-fastmap-chars* i) 'CHARSET_NOT) | |
2121 (aset *DFA-fastmap-chars* i nil))) | |
2122 (TREX-inc i))))) | |
2123 (t | |
2124 (setq *DFA-fastmap-special* t))))))) | |
2125 (setq alist (cdr alist)))))) | |
2126 | |
2127 ;;; | |
2128 ;;; $B@55,I=8=%3!<%I$NL?NaI=(B | |
2129 ;;; | |
2130 | |
2131 (if (= regexp-version 19) | |
2132 (TREX-define-enum | |
2133 UNUSED ;;; 18 | |
2134 EXACTN ;;; 18 | |
2135 ANYCHAR ;;; 18 | |
2136 CHARSET ;;; 18 | |
2137 CHARSET_NOT ;;; 18 | |
2138 START_MEMORY ;;; 18* | |
2139 STOP_MEMORY ;;; 18* | |
2140 DUPLICATE ;;; 18 | |
2141 BEGLINE ;;; 18 | |
2142 ENDLINE ;;; 18 | |
2143 BEGBUF ;;; 18 | |
2144 ENDBUF ;;; 18 | |
2145 JUMP ;;; 18 | |
2146 JUMP_PAST_ALT ;;; 19 | |
2147 ON_FAILURE_JUMP ;;; 18 | |
2148 ON_FAILURE_KEEP_STRING_JUMP ;;; 19 | |
2149 ;;;; finalize_jump | |
2150 ;;;; maybe_finalize_jump | |
2151 POP_FAILURE_JUMP ;;; 19 | |
2152 MAYBE_POP_JUMP ;;; 19 | |
2153 DUMMY_FAILURE_JUMP ;;; 18 | |
2154 PUSH_DUMMY_FAILURE ;;; 19 | |
2155 SUCCEED_N ;;; 19 | |
2156 JUMP_N ;;; 19 | |
2157 SET_NUMBER_AT ;;; 19 | |
2158 WORDCHAR ;;; 18 | |
2159 NOTWORDCHAR ;;; 18 | |
2160 WORDBEG ;;; 18 | |
2161 WORDEND ;;; 18 | |
2162 WORDBOUND ;;; 18 | |
2163 NOTWORDBOUND ;;; 18 | |
2164 BEFORE_DOT ;;; 18 | |
2165 AT_DOT ;;; 18 | |
2166 AFTER_DOT ;;; 18 | |
2167 SYNTAXSPEC ;;; 18 | |
2168 NOTSYNTAXSPEC ;;; 18 | |
2169 ;;; TREX code | |
2170 EXACT1 | |
2171 EXACT2 | |
2172 EXACT3 | |
2173 CHARSET_M | |
2174 CHARSET_M_NOT | |
2175 CASEN | |
2176 SUCCESS_SHORT | |
2177 SUCCESS | |
2178 POP | |
2179 EXCEPT0 | |
2180 EXCEPT1 | |
2181 CATEGORYSPEC | |
2182 NOTCATEGORYSPEC | |
2183 RANGE | |
2184 RANGE_A | |
2185 ) | |
2186 ;; else regexp-version == 18. | |
2187 (TREX-define-enum | |
2188 UNUSED | |
2189 EXACTN | |
2190 BEGLINE | |
2191 ENDLINE | |
2192 JUMP | |
2193 ON_FAILURE_JUMP | |
2194 FINALIZE_JUMP | |
2195 MAYBE_FINALIZE_JUMP | |
2196 DUMMY_FAILURE_JUMP | |
2197 ANYCHAR | |
2198 CHARSET | |
2199 CHARSET_NOT | |
2200 START_MEMORY | |
2201 STOP_MEMORY | |
2202 DUPLICATE | |
2203 BEFORE_DOT ;;; not used | |
2204 AT_DOT ;;; not used | |
2205 AFTER_DOT ;;; not used | |
2206 BEGBUF | |
2207 ENDBUF | |
2208 WORDCHAR ;;; not used | |
2209 NOTWORDCHAR ;;; not used | |
2210 WORDBEG | |
2211 WORDEND | |
2212 WORDBOUND | |
2213 NOTWORDBOUND | |
2214 SYNTAXSPEC | |
2215 NOTSYNTAXSPEC | |
2216 ;;; | |
2217 ;;; extended instructions | |
2218 ;;; | |
2219 EXACT1 | |
2220 EXACT2 | |
2221 EXACT3 | |
2222 CHARSET_M | |
2223 CHARSET_M_NOT | |
2224 CASEN | |
2225 SUCCESS_SHORT ;;; == ON_FAILURE_SUCCESS | |
2226 SUCCESS | |
2227 POP | |
2228 EXCEPT0 ;;; ALLCHAR | |
2229 EXCEPT1 | |
2230 CATEGORYSPEC | |
2231 NOTCATEGORYSPEC | |
2232 )) | |
2233 | |
2234 (defvar ON_FAILURE_SUCCESS SUCCESS_SHORT) | |
2235 | |
2236 ;;; | |
2237 ;;; ANYCHAR = EXCEPT1 \n | |
2238 ;;; ALLCHAR = EXCEPT0 | |
2239 | |
2240 | |
2241 ;;; | |
2242 ;;; $B@55,I=8=>H9g4o$NL?NaBN7O(B | |
2243 ;;; | |
2244 ;;; UNUSED | |
2245 ;;; EXACTN n ch1 ch2 ... chn | |
2246 ;;; BEGLINE | |
2247 ;;; ENDLINE | |
2248 ;;; JUMP disp[2] | |
2249 ;;; +JUMP_PAST_ALT disp[2] | |
2250 ;;; ON_FAILURE_JUMP disp[2] | |
2251 ;;; +ON_FAILURE_KEEP_STRING_JUMP disp[2] | |
2252 ;;; -FINALIZE_JUMP disp[2] | |
2253 ;;; -MAYBE_FINALIZE_JUMP disp[2] | |
2254 ;;; +POP_FAILURE_JUMP disp[2] | |
2255 ;;; +MAYBE_POP_JUMP disp[2] | |
2256 ;;; DUMMY_FAILURE_JUMP disp[2] | |
2257 ;;; +PUSH_DUMMY_FAILURE | |
2258 ;;; +SUCCEED_N disp[2] n[2] | |
2259 ;;; +JUMP_N disp[2] n[2] | |
2260 ;;; +SET_NUMBER_AT disp[2] n[2] | |
2261 ;;; ANYCHAR | |
2262 ;;; CHARSET n b1 b2 ... bn | |
2263 ;;;**CHARSET 0xff l1 l2 cf1 ct1 cf2 ct2 ... cfn ctn | |
2264 ;;; CHARSET_NOT n b1 b2 ... bn | |
2265 ;;;**CHARSET_NOT 0xff l1 l2 cf1 ct1 cf2 ct2 ... cfn ctn | |
2266 ;;; $B0J2<$O$($J$_;a$NDs0F$K$h$k?7$?$J%;%^%s%F%#%C%/%9(B | |
2267 ;; | |
2268 ;;; CHARSET n b1 b2 ... bn (n < 0x80) | |
2269 ;;; CHARSET n+0x80 b1 b2 ... bn | |
2270 ;;; |<-- n bytes -->| | |
2271 ;;; lh lo CHARF1 CHART1 .... CHARFm CHARTm | |
2272 ;;; |<- lh << 8 + lo bytes ->| | |
2273 ;; CHARSET n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn | |
2274 ;; |<- bitmap ->| |<- range table ->| | |
2275 ;; CHARSET_NOT n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn | |
2276 ;; CHARSETM m n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn | |
2277 ;; CHARSETM_NOT m n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn | |
2278 ;; | |
2279 ;; o cfx, ctx $B0J30$O$9$Y$F(B 1byte. cfx, ctx $B$O(B multi byte | |
2280 ;; character. | |
2281 ;; | |
2282 ;; o CHARSET(_NOT) $B$H(B CHARSETM(_NOT) $B$H$N0c$$$O(B, CHARSETM(_NOT) | |
2283 ;; $B$N>l9g$K$O(B bitmap $B$N@hF,$N(B m bytes $B$,>J$+$l$F$$$kE@(B. | |
2284 ;; | |
2285 ;; o b1 ... bn ($B$D$^$j(B bitmap$B$ND9$5(B)$B$O(B, (n & 0x7f) bytes. n $B$N(B | |
2286 ;; $BJ,(B 1byte $B$O4^$^$J$$(B. | |
2287 ;; | |
2288 ;; o lh $B0J2<$O(B n & 0x80 $B$,(B 0 $B$J$iB8:_$7$J$$(B. | |
2289 ;; | |
2290 ;; o lh $B$+$i(B ctn $B$^$G$ND9$5(B($B$D$^$j(B range table $B$ND9$5(B) $B$O(B ((lh | |
2291 ;; << 8) + lo) byte. lh $B$H(B lo $B$N(B 2byte $B$r4^$`(B. ($B>e$N(B n $B$N>l(B | |
2292 ;; $B9g$H0c$$$^$9$,(B, $BE}0l$7$?$[$&$,$$$$$+$J(B?). | |
2293 ;; | |
2294 ;; o cfx $B$O(B multi byte character $B$G(B, cfx $B$H(B ctx $B$N(B leading char | |
2295 ;; $B$OF1$8$G$J$$$H$$$1$J$$(B. $B$^$?(B, cfx $B$N(B leading char $B$O(B 0 $B$G(B | |
2296 ;; $B$"$C$F$O$$$1$J$$(B(range table $B$K(B leading char $B$,(B 0 (ASCII$B$H(B | |
2297 ;; $B$+(B) $B$NJ8;z$,$"$C$F$b(B, $B8=:_$O(B fastmap $B$KH?1G$5$l$J$$$+$i(B). | |
2298 ;; | |
2299 ;;; START_MEMORY regno | |
2300 ;;; STOP_MEMORY regno | |
2301 ;;; o emacs 19 $B$N(B regex.c $B$G$O(B, | |
2302 ;;; START_MEMORY regno groupno | |
2303 ;;; STOP_MEMORY regno groupno | |
2304 ;;; groupno $B$O<+J,$h$j2<$N%l%Y%k$N%0%k!<%W$N?t(B | |
2305 ;;; | |
2306 ;;; DUPLICATE regno | |
2307 ;;; BEFORE_DOT ;;; not used | |
2308 ;;; AT_DOT ;;; not used | |
2309 ;;; AFTER_DOT ;;; not used | |
2310 ;;; BEGBUF | |
2311 ;;; ENDBUF | |
2312 ;;; WORDCHAR ;;; not used | |
2313 ;;; NOTWORDCHAR ;;; not used | |
2314 ;;; WORDBEG | |
2315 ;;; WORDEND | |
2316 ;;; WORDBOUND | |
2317 ;;; NOTWORDBOUND | |
2318 ;;; SYNTAXSPEC ch | |
2319 ;;; NOTSYNTAXSPEC ch | |
2320 | |
2321 ;;; | |
2322 ;;; $B3HD%L?Na!J(BTREX$B$G;HMQ$9$k$b$N!K(B | |
2323 ;;; | |
2324 ;;; EXACT1 ch | |
2325 ;;; EXACT2 ch1 ch2 | |
2326 ;;; EXACT3 ch1 ch2 ch3 | |
2327 ;;; CHARSETM m n b1 b2 .. bn | |
2328 ;;; charset $B$N(B bitmaps $B$N$&$A@hF,$N(B m bytes $B$r>J$$$?$b$N(B | |
2329 ;;; CHARSETM_NOT m n b1 b2 .. bn | |
2330 ;;; charset_not $B$N(B bitmaps $B$N$&$A@hF,$N(B m bytes $B$r>J$$$?$b$N(B | |
2331 ;;; CASEN n disp[1] disp[2] ... disp[n] l u ind[l] ... ind[u] | |
2332 ;;; $B:G=i$K(B n $B8D$N(B jump relative address(2bytes) $B$,B3$-!$(B | |
2333 ;;; $B<!$K(Bcharacter code l $B$+$i(B m $B$^$G$NJ,$N(Bindex(1byte)$B$,B3$/!%(B | |
2334 ;;; ON_FAILURE_SUCCESS | |
2335 ;;; alternative stack $B$r6u$K$7!$(Bpend $B$r(B push $B$9$k!%(B | |
2336 ;;; SUCCESS | |
2337 ;;; pend $B$X%8%c%s%W$9$k!%(B | |
2338 ;;; POP | |
2339 ;;; alternative stack $B$r(B pop $B$9$k!%(B | |
2340 | |
2341 ;;; RANGE ch1 ch2 | |
2342 ;;; RANGE_A == RANGE 0xA0 0xFF | |
2343 | |
2344 | |
2345 ;;; [^$B&A(B]$B&B(B\|$B&C(B $B$N0UL#!'(B | |
2346 ;;; on_failure_jump L1 | |
2347 ;;; on_failure_jump L2 | |
2348 ;;; $B&A(B | |
2349 ;;; pop | |
2350 ;;; fail | |
2351 ;;; L1: ALLCHAR | |
2352 ;;; $B&B(B | |
2353 ;;; L2: pop | |
2354 ;;; $B&C(B | |
2355 | |
2356 ;;; | |
2357 ;;; regexp-code-* | |
2358 ;;; | |
2359 | |
2360 (defvar *regexp-code-buffer* (get-buffer-create " *regexp-code-buffer*")) | |
2361 | |
2362 (defun regexp-code-gen (FA) | |
2363 (let ((start (car FA)) | |
2364 (table (cdr FA)) | |
2365 (*table* (cdr FA)) | |
2366 (*labels* nil) | |
2367 (*final* nil) | |
2368 (*counter* 0)) | |
2369 (let ((list table)) | |
2370 (while (and list (null *final*)) | |
2371 (if (equal '((nil)) (cdr (car list))) | |
2372 (setq *final* (car (car list)))) | |
2373 (setq list (cdr list)))) | |
2374 (cond((null *final*) | |
2375 (setq *final* (1+ (length table))) | |
2376 (setq *counter* (1+ *final*))) | |
2377 (t | |
2378 (setq *counter* (1+ (length table))))) | |
2379 (save-excursion | |
2380 (set-buffer *regexp-code-buffer*) | |
2381 (let ((kanji-flag nil) | |
2382 (mc-flag nil)) | |
2383 (erase-buffer) | |
2384 (regexp-code-gen* start) | |
2385 (buffer-substring (point-min) (point-max))) | |
2386 ))) | |
2387 | |
2388 (defun regexp-code-gen* (node) | |
2389 (cond((= node *final*) | |
2390 (if (null (assoc node *labels*)) | |
2391 (TREX-push (cons node (point)) *labels*)) | |
2392 (insert SUCCESS)) | |
2393 ((null (assoc node *labels*)) | |
2394 (TREX-push (cons node (point)) *labels*) | |
2395 (let ((alist (cdr (assoc node *table*)))) | |
2396 (cond((equal '((nil)) alist) | |
2397 (insert SUCCESS)) | |
2398 (t (regexp-code-gen-alist alist))))) | |
2399 (t | |
2400 (let ((disp (- (cdr (assoc node *labels*)) (+ (point) 3)))) | |
2401 (insert JUMP | |
2402 (logand disp 255) | |
2403 (/ (logand disp (* 255 256)) 256)))))) | |
2404 | |
2405 (defvar *regexp-charset-table* nil) | |
2406 (defvar *regexp-case-table* nil) | |
2407 | |
2408 (defun regexp-code-gen-alist (alist) | |
2409 (TREX-init *regexp-charset-table* (make-vector 256 nil)) | |
2410 (TREX-init *regexp-case-table* (make-vector 256 nil)) | |
2411 (if (eq (car (car alist)) nil) | |
2412 nil | |
2413 (let ((nextalist alist) | |
2414 (numberkey nil) | |
2415 (point nil) | |
2416 (min 256) (max -1) (nexts nil) (nodealist nil)) | |
2417 (cond((numberp (car (car alist))) | |
2418 (setq numberkey t) | |
2419 (let ((i 0)) | |
2420 (while (< i 256) | |
2421 (aset *regexp-case-table* i nil) | |
2422 (TREX-inc i))) | |
2423 | |
2424 (while (and nextalist | |
2425 (numberp (car (car nextalist)))) | |
2426 (let ((ch (car (car nextalist))) | |
2427 (next (cdr (car nextalist)))) | |
2428 (let ((place (assoc next nodealist))) | |
2429 (if place | |
2430 (setcdr place | |
2431 (cons ch (cdr place))) | |
2432 (TREX-push (cons ch (list next)) nodealist))) | |
2433 (aset *regexp-case-table* ch next) | |
2434 (if (< ch min) (setq min ch)) | |
2435 (if (< max ch) (setq max ch)) | |
2436 (if (not (TREX-memequal next nexts)) | |
2437 (TREX-push next nexts))) | |
2438 (setq nextalist (cdr nextalist)))) | |
2439 (t (setq nextalist (cdr alist)))) | |
2440 | |
2441 (if nextalist | |
2442 (cond((eq (car (car nextalist)) nil) | |
2443 (insert ON_FAILURE_SUCCESS )) ;;; SUCCESS_SHORT | |
2444 (t | |
2445 (insert ON_FAILURE_JUMP 0 0) | |
2446 (setq point (point))))) | |
2447 | |
2448 (cond(numberkey | |
2449 (cond((= min max) | |
2450 ;;; exact1 | |
2451 (regexp-code-gen-exact (list min) (car nexts))) | |
2452 | |
2453 ((= (length nexts) 1) | |
2454 ;;; charset or charset_not | |
2455 (if (= (length alist) 256) | |
2456 (insert EXCEPT0) ;92.10.26 by T.Saneto | |
2457 (let ((not_min 256) | |
2458 (not_max -1) | |
2459 (ch 0) | |
2460 (mode (car nexts))) | |
2461 (while (< ch 256) | |
2462 (cond((null (aref *regexp-case-table* ch)) | |
2463 (if (< ch not_min) (setq not_min ch)) | |
2464 (if (< not_max ch) (setq not_max ch)))) | |
2465 (TREX-inc ch)) | |
2466 (if (<= (- not_max not_min) (- max min)) | |
2467 (setq min not_min | |
2468 max not_max | |
2469 mode nil)) | |
2470 (let ((minb (/ min 8)) | |
2471 (maxb (1+ (/ max 8)))) | |
2472 (insert (if mode CHARSET_M CHARSET_M_NOT) minb (- maxb minb)) | |
2473 (let ((b minb)) | |
2474 (while (< b maxb) | |
2475 (let ((i 7) (bits 0)) | |
2476 (while (<= 0 i) | |
2477 (if (eq (aref *regexp-case-table* (+ (* 8 b) i)) | |
2478 mode) | |
2479 ;;;; bits table$B$N=g=x$O<!$NDL$j(B | |
2480 (TREX-inc bits (aref [1 2 4 8 16 32 64 128] i))) | |
2481 (TREX-dec i)) | |
2482 (insert bits)) | |
2483 (TREX-inc b)))))) | |
2484 (regexp-code-gen* (car nexts))) | |
2485 (t | |
2486 ;;; case | |
2487 (let ((point nil)) | |
2488 (insert CASEN) | |
2489 (insert (length nexts)) | |
2490 (setq point (point)) | |
2491 (let ((list nexts)) | |
2492 (while list | |
2493 (insert 0 0) | |
2494 (setq list (cdr list)))) | |
2495 (insert min max) | |
2496 (let ((ch min)) | |
2497 (while (<= ch max) | |
2498 (if (aref *regexp-case-table* ch) | |
2499 (insert (1+ (TREX-find (aref *regexp-case-table* ch) nexts))) | |
2500 (insert 0)) | |
2501 (TREX-inc ch))) | |
2502 (let ((list nexts)) | |
2503 (while list | |
2504 (if (null (assoc (car list) *labels*)) | |
2505 (regexp-code-gen* (car list))) | |
2506 (setq list (cdr list)))) | |
2507 (save-excursion | |
2508 (goto-char point) | |
2509 (let ((list nexts)) | |
2510 (while list | |
2511 (delete-char 2) | |
2512 (let ((disp (- (cdr (assoc (car list) *labels*)) (+ (point) 2)))) | |
2513 (insert (logand disp 255) | |
2514 (/ (logand disp (* 255 256)) 256))) | |
2515 (setq list (cdr list))))) | |
2516 )))) | |
2517 ((eq (car (car alist)) ':epsilon) | |
2518 (regexp-code-gen* (cdr (car alist)))) | |
2519 (t | |
2520 (let ((key (car (car alist))) | |
2521 (next (cdr (car alist)))) | |
2522 (cond ((symbolp key) | |
2523 (insert (eval key))) | |
2524 ((TREX-memequal (car key) '(CHARSET CHARSET_NOT)) | |
2525 (let ((charset (cdr key)) | |
2526 (min 128) (max -1) | |
2527 (mcbytes 0) | |
2528 (mcchars nil)) | |
2529 (let ((i 0)) | |
2530 (while (< i 256) | |
2531 (aset *regexp-charset-table* i nil) | |
2532 (TREX-inc i))) | |
2533 (while charset | |
2534 (cond((stringp (car charset)) | |
2535 (cond((eq (length (car charset)) 1) | |
2536 (aset *regexp-charset-table* (aref (car charset) 0) t) | |
2537 (if (< (aref (car charset) 0) min) | |
2538 (setq min (aref (car charset) 0))) | |
2539 (if (< max (aref (car charset) 0)) | |
2540 (setq max (aref (car charset) 0))) | |
2541 ) | |
2542 (t | |
2543 (TREX-inc mcbytes (* 2 (length (car charset)))) | |
2544 (if (null mcchars) (setq mcchars charset)) | |
2545 ))) | |
2546 ((consp (car charset)) ;;; range | |
2547 (cond ((eq (length (nth 1 (car charset))) 1) | |
2548 (let ((from (aref (nth 1 (car charset)) 0)) | |
2549 (to (aref (nth 2 (car charset)) 0))) | |
2550 (if (< from min) (setq min from)) | |
2551 (if (< max to) (setq max to)) | |
2552 (while (<= from to) | |
2553 (aset *regexp-charset-table* from t) | |
2554 (TREX-inc from))) | |
2555 ) | |
2556 (t | |
2557 (TREX-inc mcbytes | |
2558 (+ (length (nth 1 (car charset))) (length (nth 2 (car charset))))) | |
2559 (if (null mcchars) (setq mcchars charset)))))) | |
2560 (setq charset (cdr charset))) | |
2561 (cond ((< max min) | |
2562 (insert (if (eq (car key) 'CHARSET) CHARSET CHARSET_NOT) | |
2563 (if (< 0 mcbytes) 128 0))) | |
2564 (t | |
2565 (let ((minb (/ min 8)) | |
2566 (maxb (1+ (/ max 8)))) | |
2567 (insert (if (eq (car key) 'CHARSET) CHARSET_M CHARSET_M_NOT) | |
2568 minb (+ (if (< 0 mcbytes) 128 0) (- maxb minb))) | |
2569 (let ((b minb)) | |
2570 (while (< b maxb) | |
2571 (let ((i 7) (bits 0)) | |
2572 (while (<= 0 i) | |
2573 (if (aref *regexp-charset-table* (+ (* 8 b) i)) | |
2574 ;;;; bits table$B$N=g=x$O<!$NDL$j(B | |
2575 (TREX-inc bits (aref [1 2 4 8 16 32 64 128] i))) | |
2576 (TREX-dec i)) | |
2577 (insert bits)) | |
2578 (TREX-inc b)))))) | |
2579 | |
2580 (cond( (< 0 mcbytes) | |
2581 (TREX-inc mcbytes 2) | |
2582 (insert (/ mcbytes 256) (mod mcbytes 256)) | |
2583 (while mcchars | |
2584 (cond((stringp (car mcchars)) | |
2585 (insert (car mcchars) (car mcchars))) | |
2586 ((consp (car mcchars)) | |
2587 (insert (nth 1 (car mcchars)) (nth 2 (car mcchars))))) | |
2588 (setq mcchars (cdr mcchars))))) | |
2589 )) | |
2590 ((= (length key) 1) | |
2591 (insert (eval (car key)))) | |
2592 ((= (length key) 2) | |
2593 (insert (eval (car key)) (nth 1 key))) | |
2594 ((= (length key) 3) | |
2595 (insert (eval (car key)) (nth 1 key) (nth 2 key))) | |
2596 (t | |
2597 (regexp-error))) | |
2598 (regexp-code-gen* next)))) | |
2599 (if point | |
2600 (let ((disp (- (point) point))) | |
2601 (save-excursion | |
2602 (goto-char point) | |
2603 (delete-char -2) | |
2604 (insert (logand disp 255) | |
2605 (/ (logand disp (* 255 256)) 256))) | |
2606 (regexp-code-gen-alist nextalist)))))) | |
2607 | |
2608 (defun regexp-code-gen-exact (chars node) | |
2609 (let ((alist (cdr (assoc node *table*)))) | |
2610 (cond((and (null (assoc node *labels*)) | |
2611 (= (length alist) 1) | |
2612 (numberp (car (car alist)))) | |
2613 (regexp-code-gen-exact (cons (car (car alist)) chars) | |
2614 (cdr (car alist)))) | |
2615 (t | |
2616 (regexp-code-gen-exact* (reverse chars)) | |
2617 (regexp-code-gen* node))))) | |
2618 | |
2619 (defun regexp-code-gen-exact* (chars) | |
2620 (cond((= (length chars) 1) | |
2621 (insert EXACT1 (car chars))) | |
2622 ((= (length chars) 2) | |
2623 (insert EXACT2 (car chars) (nth 1 chars))) | |
2624 ((= (length chars) 3) | |
2625 (insert EXACT3 (car chars) (nth 1 chars) (nth 2 chars))) | |
2626 (t | |
2627 (insert EXACTN (length chars)) | |
2628 (let ((list chars)) | |
2629 (while list | |
2630 (insert (car list)) | |
2631 (setq list (cdr list))))))) | |
2632 | |
2633 ;;; | |
2634 ;;; regexp-code-dump | |
2635 ;;; $B@55,I=8=$N%3!<%I$rI=<($9$k!%(B | |
2636 ;;; | |
2637 | |
2638 (defvar *regexp-code-dump* nil) | |
2639 (defvar *regexp-code-index* nil) | |
2640 | |
2641 (defun regexp-code-dump (*regexp-code-dump*) | |
2642 (terpri) | |
2643 (let ((*regexp-code-index* 0) | |
2644 (max (length *regexp-code-dump*))) | |
2645 (while (< *regexp-code-index* max) | |
2646 (princ (format "%4d:" *regexp-code-index*)) | |
2647 (let((op (aref *regexp-code-dump* *regexp-code-index*))) | |
2648 (cond((= op UNUSED) (regexp-code-dump-0 "unused")) | |
2649 ((= op EXACTN) | |
2650 (princ (format "exactn(%d) " (aref *regexp-code-dump* (1+ *regexp-code-index*)))) | |
2651 (let ((j (+ *regexp-code-index* 2)) | |
2652 (max (+ *regexp-code-index* 2 (aref *regexp-code-dump* (1+ *regexp-code-index*))))) | |
2653 (while (< j max) | |
2654 (princ (format "%c" (aref *regexp-code-dump* j))) | |
2655 (TREX-inc j)) | |
2656 (setq *regexp-code-index* j)) | |
2657 (terpri) | |
2658 ) | |
2659 ((= op BEGLINE) (regexp-code-dump-0 "begline")) | |
2660 ((= op ENDLINE) (regexp-code-dump-0 "endline")) | |
2661 ((= op JUMP) (regexp-code-dump-jump "jump")) | |
2662 ((and (= regexp-version 19) | |
2663 (= op JUMP_PAST_ALT)) | |
2664 (regexp-code-dump-jump "jump_past_alt")) | |
2665 ((= op ON_FAILURE_JUMP ) (regexp-code-dump-jump "on_failure_jump")) | |
2666 ((and (= regexp-version 19) | |
2667 (= op ON_FAILURE_KEEP_STRING_JUMP)) | |
2668 (regexp-code-dump-jump "on_failure_keep_string_jump")) | |
2669 ((and (= regexp-version 18) | |
2670 (= op FINALIZE_JUMP)) | |
2671 (regexp-code-dump-jump "finalize_jump")) | |
2672 ((and (= regexp-version 18) | |
2673 (= op MAYBE_FINALIZE_JUMP)) | |
2674 (regexp-code-dump-jump "maybe_finalize_jump")) | |
2675 ((and (= regexp-version 19) | |
2676 (= op POP_FAILURE_JUMP)) | |
2677 (regexp-code-dump-jump "pop_failure_jump")) | |
2678 ((and (= regexp-version 19) | |
2679 (= op MAYBE_POP_JUMP)) | |
2680 (regexp-code-dump-jump "maybe_pop_jump")) | |
2681 ((= op DUMMY_FAILURE_JUMP) (regexp-code-dump-jump "dummy_failure_jump")) | |
2682 ((and (= regexp-version 19) | |
2683 (= op PUSH_DUMMY_FAILURE)) | |
2684 (regexp-code-dump-0 "push_dummy_failure")) | |
2685 ((and (= regexp-version 19) | |
2686 (= op SUCCEED_N)) | |
2687 (regexp-code-dump-jump-2 "succeed_n")) | |
2688 ((and (= regexp-version 19) | |
2689 (= op JUMP_N)) | |
2690 (regexp-code-dump-jump-2 "jump_n")) | |
2691 ((and (= regexp-version 19) | |
2692 (= op SET_NUMBER_AT)) | |
2693 (regexp-code-dump-jump-2 "SET_NUMBER_AT")) | |
2694 ((= op ANYCHAR) (regexp-code-dump-0 "anychar")) | |
2695 ((= op CHARSET) (regexp-code-dump-charset "charset")) | |
2696 ((= op CHARSET_NOT) (regexp-code-dump-charset "charset_not")) | |
2697 ((= op START_MEMORY) | |
2698 (if (= regexp-version 19) | |
2699 (regexp-code-dump-2 "start_memory") | |
2700 (regexp-code-dump-1 "start_memory"))) | |
2701 ((= op STOP_MEMORY) | |
2702 (if (= regexp-version 19) | |
2703 (regexp-code-dump-2 "stop_memory") | |
2704 (regexp-code-dump-1 "stop_memory"))) | |
2705 ((= op DUPLICATE) (regexp-code-dump-1 "duplicate")) | |
2706 ((= op BEFORE_DOT) (regexp-code-dump-0 "before_dot")) | |
2707 ((= op AT_DOT) (regexp-code-dump-0 "at_dot")) | |
2708 ((= op AFTER_DOT) (regexp-code-dump-0 "after_dot")) | |
2709 ((= op BEGBUF) (regexp-code-dump-0 "begbuf")) | |
2710 ((= op ENDBUF) (regexp-code-dump-0 "endbuf")) | |
2711 ((= op WORDCHAR) (regexp-code-dump-0 "wordchar")) | |
2712 ((= op NOTWORDCHAR) (regexp-code-dump-0 "notwordchar")) | |
2713 ((= op WORDBEG) (regexp-code-dump-0 "wordbeg")) | |
2714 ((= op WORDEND) (regexp-code-dump-0 "wordend")) | |
2715 ((= op WORDBOUND) (regexp-code-dump-0 "wordbound")) | |
2716 ((= op NOTWORDBOUND) (regexp-code-dump-0 "notwordbound")) | |
2717 ((= op SYNTAXSPEC) (regexp-code-dump-syntax "syntaxspec")) | |
2718 ((= op NOTSYNTAXSPEC) (regexp-code-dump-syntax "notsyntaxspec")) | |
2719 ((= op EXACT1) (regexp-code-dump-1ch "EXACT1")) | |
2720 ((= op EXACT2) | |
2721 (princ (format "EXACT2 %c%c\n" (aref *regexp-code-dump* (1+ *regexp-code-index*)) | |
2722 (aref *regexp-code-dump* (+ *regexp-code-index* 2)))) | |
2723 (TREX-inc *regexp-code-index* 3)) | |
2724 ((= op EXACT3) | |
2725 (princ (format "EXACT3 %c%c%c\n" | |
2726 (aref *regexp-code-dump* (1+ *regexp-code-index*)) | |
2727 (aref *regexp-code-dump* (+ *regexp-code-index* 2)) | |
2728 (aref *regexp-code-dump* (+ *regexp-code-index* 3)))) | |
2729 (TREX-inc *regexp-code-index* 4)) | |
2730 ((= op CHARSET_M) (regexp-code-dump-charset-m "CHARSET_M")) | |
2731 ((= op CHARSET_M_NOT) (regexp-code-dump-charset-m "CHARSET_M_NOT")) | |
2732 ((= op CASEN) | |
2733 (princ (format "CASEN %d\n" (aref *regexp-code-dump* (1+ *regexp-code-index*)))) | |
2734 (let ((j (+ *regexp-code-index* 2)) | |
2735 (max (+ *regexp-code-index* 2 (* 2 (aref *regexp-code-dump* (1+ *regexp-code-index*)))))) | |
2736 (while (< j max) | |
2737 (princ (format "[%d]::%d\n" (1+ (/ (- j (+ *regexp-code-index* 2)) 2)) | |
2738 (regexp-get-absolute-address | |
2739 (+ j 2) (aref *regexp-code-dump* j) | |
2740 (aref *regexp-code-dump* (1+ j))))) | |
2741 (TREX-inc j 2)) | |
2742 (let ((ch (aref *regexp-code-dump* j)) (chmax (aref *regexp-code-dump* (1+ j)))) | |
2743 (princ (format "%c::%c\n" ch chmax)) | |
2744 (TREX-inc j 2) | |
2745 (while (<= ch chmax) | |
2746 (princ (format "%c=>[%d]\n" ch (aref *regexp-code-dump* j))) | |
2747 (TREX-inc j) | |
2748 (TREX-inc ch))) | |
2749 (setq *regexp-code-index* j))) | |
2750 ((= op ON_FAILURE_SUCCESS) (regexp-code-dump-0 "ON_FAILURE_SUCCESS")) | |
2751 ((= op SUCCESS) (regexp-code-dump-0 "SUCCESS")) | |
2752 ((= op POP) (regexp-code-dump-0 "POP")) | |
2753 ((= op EXCEPT0) (regexp-code-dump-0 "EXCEPT0")) | |
2754 ((= op EXCEPT1) (regexp-code-dump-1ch "EXCEPT1")) | |
2755 ((= op CATEGORYSPEC) (regexp-code-dump-1ch "CATEGORYSPEC")) | |
2756 ((= op NOTCATEGORYSPEC) (regexp-code-dump-1ch "NOTCATEGORYSPEC")) | |
2757 (t (princ (format "unknown op=%d\n" op)) | |
2758 (TREX-inc *regexp-code-index*))))) | |
2759 (princ (format "%4d:\n" *regexp-code-index*))) | |
2760 nil | |
2761 ) | |
2762 | |
2763 (defun regexp-code-dump-0 (op) | |
2764 (princ op) (terpri) | |
2765 (TREX-inc *regexp-code-index*)) | |
2766 | |
2767 (defun regexp-code-dump-1 (op) | |
2768 (princ (format "%s %d\n" op (aref *regexp-code-dump* (1+ *regexp-code-index*)))) | |
2769 (TREX-inc *regexp-code-index* 2)) | |
2770 | |
2771 (defun regexp-code-dump-2 (op) | |
2772 (princ (format "%s %d %d\n" | |
2773 op | |
2774 (aref *regexp-code-dump* (1+ *regexp-code-index*)) | |
2775 (aref *regexp-code-dump* (+ *regexp-code-index* 2)) | |
2776 )) | |
2777 (TREX-inc *regexp-code-index* 3)) | |
2778 | |
2779 (defun regexp-code-dump-syntax (op) | |
2780 (princ (format "%s %c\n" op (syntax-code-spec (aref *regexp-code-dump* (1+ *regexp-code-index*))))) | |
2781 (TREX-inc *regexp-code-index* 2)) | |
2782 | |
2783 (defun regexp-code-dump-1ch (op) | |
2784 (princ (format "%s %c\n" op (aref *regexp-code-dump* (1+ *regexp-code-index*)))) | |
2785 (TREX-inc *regexp-code-index* 2)) | |
2786 | |
2787 (defun regexp-get-absolute-address (point b1 b2) | |
2788 (cond ((< b2 128) | |
2789 (+ point (+ (* 256 b2) b1))) | |
2790 (t | |
2791 (+ point (logior (logxor -1 (+ (* 255 256) 255)) (* 256 b2) b1))))) | |
2792 | |
2793 (defun regexp-code-dump-jump (op) | |
2794 (let* ((b1 (aref *regexp-code-dump* (1+ *regexp-code-index*))) | |
2795 (b2 (aref *regexp-code-dump* (+ *regexp-code-index* 2))) | |
2796 (p (regexp-get-absolute-address (+ *regexp-code-index* 3) b1 b2))) | |
2797 (princ (format "%s %d\n" op p))) | |
2798 (TREX-inc *regexp-code-index* 3)) | |
2799 | |
2800 (defun regexp-code-dump-jump-2 (op) | |
2801 (let* ((b1 (aref *regexp-code-dump* (1+ *regexp-code-index*))) | |
2802 (b2 (aref *regexp-code-dump* (+ *regexp-code-index* 2))) | |
2803 (p (regexp-get-absolute-address (+ *regexp-code-index* 3) b1 b2))) | |
2804 (princ (format "%s %d %d\n" op p | |
2805 (+ | |
2806 (* 256 (aref *regexp-code-dump* (+ *regexp-code-index* 3))) | |
2807 (aref *regexp-code-dump* (+ *regexp-code-index* 4)))))) | |
2808 (TREX-inc *regexp-code-index* 5)) | |
2809 | |
2810 (defun regexp-code-dump-charset (op) | |
2811 (let ((n (aref *regexp-code-dump* (1+ *regexp-code-index*)))) | |
2812 (princ (format "%s %d " op n)) | |
2813 (let ((j (+ *regexp-code-index* 2)) | |
2814 (max (+ *regexp-code-index* 2 (if (<= 128 n) (- n 128) n)))) | |
2815 (while (< j max) | |
2816 (princ (format "0x%2x " (aref *regexp-code-dump* j))) | |
2817 (TREX-inc j)) | |
2818 (cond((<= 128 n) | |
2819 (let* ((len (+ (* 256 (aref *regexp-code-dump* j)) | |
2820 (aref *regexp-code-dump* (1+ j)))) | |
2821 (last (+ j len))) | |
2822 (princ (format "\n range list[%d-2 bytes]" len)) | |
2823 (TREX-inc j 2) | |
2824 (while (< j last) | |
2825 (let ((ch (sref *regexp-code-dump* j))) | |
2826 (princ (format " %c" ch)) | |
2827 (TREX-inc j (char-octets ch)) | |
2828 (setq ch (sref *regexp-code-dump* j)) | |
2829 (princ (format "-%c" ch)) | |
2830 (TREX-inc j (char-octets ch)))) | |
2831 ))) | |
2832 (setq *regexp-code-index* j) | |
2833 (terpri)) | |
2834 )) | |
2835 | |
2836 (defun regexp-code-dump-charset-m (op) | |
2837 (let ((m (aref *regexp-code-dump* (1+ *regexp-code-index*))) | |
2838 (n (aref *regexp-code-dump* (+ *regexp-code-index* 2)))) | |
2839 (princ (format "%s %d %d " op m n)) | |
2840 (let ((j (+ *regexp-code-index* 3)) | |
2841 (max (+ *regexp-code-index* 3 (if (<= 128 n) (- n 128) n)))) | |
2842 (while (< j max) | |
2843 (princ (format "0x%02x " (aref *regexp-code-dump* j))) | |
2844 (TREX-inc j)) | |
2845 (cond((<= 128 n) | |
2846 (let* ((len (+ (* 256 (aref *regexp-code-dump* j)) | |
2847 (aref *regexp-code-dump* (1+ j)))) | |
2848 (last (+ j len))) | |
2849 (princ (format "\n range list[%d-2 bytes]" len)) | |
2850 (TREX-inc j 2) | |
2851 (while (< j last) | |
2852 (let ((ch (sref *regexp-code-dump* j))) | |
2853 (princ (format " %c" ch)) | |
2854 (TREX-inc j (char-octets ch)) | |
2855 (setq ch (sref *regexp-code-dump* j)) | |
2856 (princ (format "-%c" ch)) | |
2857 (TREX-inc j (char-octets ch)))) | |
2858 ))) | |
2859 (setq *regexp-code-index* j) | |
2860 (terpri) | |
2861 ))) | |
2862 | |
2863 ;;; | |
2864 ;;; Compile functions | |
2865 ;;; | |
2866 | |
2867 (defun TREX-simple-test1 () | |
2868 (regexp-word-compile | |
2869 "\\cA+\\cH*\\|\\cK+\\cH*\\|\\cC+\\cH*\\|\\cH+\\|\\sw+")) | |
2870 | |
2871 (defun TREX-test1 (pattern) | |
2872 (let* ((regexp (regexp-parse pattern)) | |
2873 (fFA (EFFA-make (FA-make regexp))) | |
2874 (bFA (EFFA-make (FA-inverse fFA))) | |
2875 (l (cdr fFA)) | |
2876 (result nil)) | |
2877 (TREX-push (cons (DFA-optimize (DFA-make fFA)) | |
2878 (DFA-optimize (DFA-make bFA))) | |
2879 result) | |
2880 (while l | |
2881 (let* ((forward (NEFA-make (EFFA-make (cons (car (car l)) (cdr fFA))))) | |
2882 (backward (NEFA-make (EFFA-make (cons (car (car l)) (cdr bFA)))))) | |
2883 (cond((and forward backward) | |
2884 (TREX-push (cons (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make forward)))) | |
2885 (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make backward))))) | |
2886 result)))) | |
2887 (setq l (cdr l))) | |
2888 (setq result (reverse result)) | |
2889 (let ((count 0)) | |
2890 (while result | |
2891 (princ (format "\nForward[%2d]:" count)) (FA-dump (car (car result))) | |
2892 (princ (format "\nBackward[%2d]:" count)) (FA-dump (cdr (car result))) | |
2893 (TREX-inc count) | |
2894 (setq result (cdr result)))))) | |
2895 | |
2896 (defun TREX-test2 (pattern) | |
2897 (let* ((regexp (regexp-parse pattern)) | |
2898 (fFA (EFFA-make (FA-make regexp))) | |
2899 (l (cdr fFA)) | |
2900 (result nil)) | |
2901 (regexp-code-dump (setq result (regexp-code-gen (DFA-optimize (DFA-make fFA))))) | |
2902 result)) | |
2903 | |
2904 ;;;###autoload | |
2905 (defun regexp-compile (pattern) | |
2906 (regexp-compile-internal pattern nil)) | |
2907 | |
2908 ;;;###autoload | |
2909 (defun regexp-word-compile (pattern) | |
2910 (regexp-compile-internal pattern t)) | |
2911 | |
2912 ;;; | |
2913 ;;; Returns a list of pair of forward-code and backward-code | |
2914 ;;; | |
2915 | |
2916 | |
2917 (defun regexp-compile-internal (pattern &optional word) | |
2918 (let* ((*regexp-word-definition* word) | |
2919 (*regexp-parse-translate* | |
2920 (if case-fold-search | |
2921 ;;; DOWNCASE or CANONICAL? | |
2922 (nth 2 (current-case-table)) | |
2923 nil)) | |
2924 (regexp (regexp-parse pattern)) | |
2925 (fFA (EFFA-make (FA-make (regexp-reform-duplication regexp)))) | |
2926 (bFA (EFFA-make (FA-make (regexp-reform-duplication (regexp-inverse regexp))))) | |
2927 (result nil)) | |
2928 (let ((ofFA (DFA-optimize (DFA-make fFA))) | |
2929 (obFA (DFA-optimize (DFA-make bFA)))) | |
2930 (TREX-push (cons (DFA-code-with-fastmap ofFA) | |
2931 (let* ((START_MEMORY STOP_MEMORY) | |
2932 (STOP_MEMORY START_MEMORY)) | |
2933 (DFA-code-with-fastmap obFA))) | |
2934 result)) | |
2935 (if word | |
2936 (let ((l (cdr fFA)) | |
2937 (bFA (EFFA-make (FA-inverse fFA)))) | |
2938 (while l | |
2939 (let* ((forward (NEFA-make (EFFA-make (cons (car (car l)) (cdr fFA))))) | |
2940 (backward (NEFA-make (EFFA-make (cons (car (car l)) (cdr bFA)))))) | |
2941 (cond((and forward backward) | |
2942 (let ((fFA (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make forward))))) | |
2943 (bFA (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make backward)))))) | |
2944 (TREX-push (cons (DFA-code-with-fastmap fFA) | |
2945 (DFA-code-with-fastmap bFA)) | |
2946 result))))) | |
2947 (setq l (cdr l))) | |
2948 (setq result (nreverse result)))) | |
2949 result)) | |
2950 | |
2951 (defun regexp-compiled-pattern-dump (pattern) | |
2952 ;;; PATTERN is a vector of [ code fastmap fastmap-syntax fastmap-categoy] | |
2953 (regexp-code-dump (aref pattern 0)) | |
2954 (print-fastmap (aref pattern 1) " fastmap[char]") | |
2955 (print-fastmap (aref pattern 2) " fastmap[synt]") | |
2956 (print-fastmap (aref pattern 3) " fastmap[cate]") | |
2957 ) | |
2958 | |
2959 (defun regexp-compile-dump (code) | |
2960 (let ((Fcode (aref (car (car code)) 0)) | |
2961 (Bcode (aref (cdr (car code)) 0)) | |
2962 (words (cdr code))) | |
2963 (princ (format "\nRegular Expression Compiler Dump:\n")) | |
2964 (princ (format "Forward Search:")) | |
2965 (regexp-compiled-pattern-dump (car (car code))) | |
2966 (princ (format "Backward Search:")) | |
2967 (if Bcode (regexp-compiled-pattern-dump (cdr (car code))) | |
2968 (princ (format "\n Use the interpreter\n"))) | |
2969 (if words | |
2970 (let ((i 1)) | |
2971 (princ (format "In word conditions:\n")) | |
2972 (while words | |
2973 (princ (format "Forward[%d]" i)) | |
2974 (regexp-compiled-pattern-dump (car (car words))) | |
2975 (princ (format "Backward[%d]" i)) | |
2976 (regexp-compiled-pattern-dump (cdr (car words))) | |
2977 (TREX-inc i) | |
2978 (setq words (cdr words))))))) | |
2979 | |
2980 (defun regexp-compile-and-dump (regexp) | |
2981 (regexp-compile-dump (regexp-compile regexp))) | |
2982 | |
2983 | |
2984 ;;;###autoload | |
2985 (defmacro define-word-regexp (name regexp) | |
2986 (` (defconst (, name) '(, (regexp-word-compile regexp))))) | |
2987 | |
2988 (put 'define-word-regexp 'byte-hunk-handler ;93.7.16 by S.Tomura | |
2989 'macroexpand) | |
2990 | |
2991 ;;; | |
2992 ;;; Coding system | |
2993 ;;; | |
2994 | |
2995 (defmacro define-coding-systems (&rest rest) | |
2996 (` (define-coding-systems* '(, rest)))) | |
2997 | |
2998 (defun define-coding-systems* (names) | |
2999 (let ((systems | |
3000 (` (:or (,@ (mapcar (function (lambda (name) (` (:seq (, (regexp-get-definition name)) | |
3001 (, name))))) | |
3002 names)))))) | |
3003 systems)) | |
3004 | |
3005 (defun oct (str) (aref str 0)) | |
3006 | |
3007 (defvar *TREX-range-from* nil) | |
3008 (defvar *TREX-range-to* nil) | |
3009 | |
3010 (defun TREX-range-make-jisjoint (regexp) | |
3011 (TREX-init *TREX-range-from* (make-vector 256 nil)) | |
3012 (TREX-init *TREX-range-to* (make-vector 256 nil)) | |
3013 (let ((i 0)) | |
3014 (while (< i 256) | |
3015 (aset *TREX-range-from* i nil) | |
3016 (aset *TREX-range-to* i nil) | |
3017 (TREX-inc i))) | |
3018 (aset *TREX-range-from* 0 t) | |
3019 (aset *TREX-range-to* 255 t) | |
3020 (TREX-range-mark regexp) | |
3021 (TREX-range-replace regexp)) | |
3022 | |
3023 (defun TREX-range-mark (regexp) | |
3024 (cond | |
3025 ((consp regexp) | |
3026 (let ((op (car regexp))) | |
3027 (cond((eq op ':mark) | |
3028 (TREX-range-mark (nth 3 regexp))) | |
3029 ((eq op ':or) | |
3030 (mapcar 'TREX-range-mark (cdr regexp))) | |
3031 ((eq op ':seq) | |
3032 (mapcar 'TREX-range-mark (cdr regexp))) | |
3033 ((eq op ':optional) | |
3034 (TREX-range-mark (nth 1 regexp))) | |
3035 ((eq op ':star) | |
3036 (TREX-range-mark (nth 1 regexp))) | |
3037 ((eq op ':plus) | |
3038 (TREX-range-mark (nth 1 regexp))) | |
3039 ((eq op ':range) | |
3040 (TREX-range-mark2 (nth 1 regexp) (nth 2 regexp)))))) | |
3041 ((stringp regexp) | |
3042 (TREX-range-mark2 regexp regexp)) | |
3043 ((numberp regexp) | |
3044 (TREX-range-mark2 regexp regexp)))) | |
3045 | |
3046 (defun TREX-range-mark2 (from to) | |
3047 (if (stringp from) (setq from (aref from 0))) | |
3048 (if (stringp to) (setq to (aref to 0))) | |
3049 (if (< 0 from) (aset *TREX-range-to* (1- from) t)) | |
3050 (if (< to 255) (aset *TREX-range-from* (1+ to) t)) | |
3051 (aset *TREX-range-from* from t) | |
3052 (aset *TREX-range-to* to t)) | |
3053 | |
3054 (defun TREX-range-replace (regexp) | |
3055 (cond | |
3056 ((consp regexp) | |
3057 (let ((op (car regexp))) | |
3058 (cond((eq op ':mark) | |
3059 (` (:mark (, (nth 1 regexp)) | |
3060 (, (nth 2 regexp)) | |
3061 (, (TREX-range-replace (nth 3 regexp)))))) | |
3062 ((eq op ':or) | |
3063 (` (:or (,@ (mapcar 'TREX-range-replace (cdr regexp)))))) | |
3064 ((eq op ':seq) | |
3065 (` (:seq (,@ (mapcar 'TREX-range-replace (cdr regexp)))))) | |
3066 ((eq op ':optional) | |
3067 (` (:optional (,(TREX-range-replace (nth 1 regexp)))))) | |
3068 ((eq op ':star) | |
3069 (` (:star (,(TREX-range-replace (nth 1 regexp)))))) | |
3070 ((eq op ':plus) | |
3071 (` (:plus (,(TREX-range-replace (nth 1 regexp)))))) | |
3072 ((eq op ':range) | |
3073 (let ((from (nth 1 regexp)) | |
3074 (to (nth 2 regexp)) | |
3075 i j | |
3076 (result nil)) | |
3077 (if (stringp from) (setq from (aref from 0))) | |
3078 (if (stringp to ) (setq to (aref to 0))) | |
3079 (setq i from | |
3080 j from) | |
3081 (while (<= i to) | |
3082 (while (not (aref *TREX-range-to* j)) | |
3083 (TREX-inc j)) | |
3084 (if (not (= i j)) (TREX-push (` (:range (, i) (, j))) result) | |
3085 (TREX-push i result)) | |
3086 (TREX-inc j) | |
3087 (setq i j)) | |
3088 (if (= (length result) 1) (car result) | |
3089 (` (:or (,@ (nreverse result)))))))))) | |
3090 ((stringp regexp) | |
3091 (if (= (length regexp) 1) | |
3092 (aref regexp 0) | |
3093 regexp)) | |
3094 ((numberp regexp) | |
3095 regexp) | |
3096 (t regexp))) | |
3097 | |
3098 (defun FA-sort (FA) | |
3099 (let ((start (car FA)) | |
3100 (alist (cdr FA))) | |
3101 (setq alist (sort alist 'TREX-lessp-car)) | |
3102 (while alist | |
3103 (setcdr (car alist) (sort (cdr (car alist)) 'TREX-lessp-car)) | |
3104 (setcdr (car alist ) (TREX-sort (cdr (car alist)) 'TREX-key-lessp 'cdr)) | |
3105 (setq alist (cdr alist))) | |
3106 FA)) | |
3107 | |
3108 ;;; | |
3109 ;;; CHARSET functions: | |
3110 ;;; | |
3111 ;;; CHARSET ::= RANGE | | |
3112 ;;; (:or RANGE+) | | |
3113 ;;; (:nor RANGE+) | |
3114 ;;; RANGE+ ::= CHAR | | |
3115 ;;; (:range CHAR CHAR) | |
3116 ;;; | |
3117 | |
3118 (defun CHARSET-rangep (charset) | |
3119 (or (numberp charset) | |
3120 (and (consp charset) (eq (car charset) ':range)))) | |
3121 | |
3122 (defun CHARSET-orp (charset) | |
3123 (and (consp charset) (eq (car charset) ':or))) | |
3124 | |
3125 (defun CHARSET-range-from (range) | |
3126 (if (numberp range) range | |
3127 (nth 1 range))) | |
3128 | |
3129 (defun CHARSET-range-to (range) | |
3130 (if (numberp range) range | |
3131 (nth 2 range))) | |
3132 | |
3133 (defun CHARSET-range-make (from to) | |
3134 (if (= from to) from | |
3135 (list ':range from to))) | |
3136 | |
3137 (defun CHARSET-membership (range charset) | |
3138 (let ((from (CHARSET-range-from range)) | |
3139 (to (CHARSET-range-to range)) | |
3140 (flag nil)) | |
3141 (while (and charset flag1) | |
3142 (if (< from (CHARSET-range-from (car charset))) | |
3143 (setq charset (cdr charset)) | |
3144 (setq flag t))) | |
3145 (and flag1 (<= to (CHARSET-range-to (car charset)))))) | |
3146 | |
3147 (defun CHARSET-not (charset) | |
3148 (cond((CHARSET-rangep charset) | |
3149 (list ':nor charset)) | |
3150 ((CHARSET-orp charset) | |
3151 (cons ':nor (cdr charset))) | |
3152 (t | |
3153 (cons ':or (cdr charset))))) | |
3154 | |
3155 (defun CHARSET-union (charset1 charset2) | |
3156 (cond((CHARSET-rangep charset1) | |
3157 (cond ((CHARSET-rangep charset2) | |
3158 (CHARSET-union-range-range charset1 charset2)) | |
3159 ((CHARSET-orp charset2) | |
3160 (CHARSET-union-range-or charset1 charset2)) | |
3161 (t | |
3162 (CHARSET-union-range-nor charset1 charset2)))) | |
3163 ((CHARSET-orp charset1) | |
3164 (cond ((CHARSET-rangep charset2) | |
3165 (CHARSET-union-range-or charset2 charset1)) | |
3166 ((CHARSET-orp charset2) | |
3167 (CHARSET-union-or-or charset1 charset2)) | |
3168 (t | |
3169 (CHARSET-union-or-nor charset1 charset2)))) | |
3170 (t ;;; (CHARSET-norp charset1) | |
3171 (cond((CHARSET-rangep charset2) | |
3172 (CHARSET-union-range-nor charset2 charset1)) | |
3173 ((CHARSET-orp charset2) | |
3174 (CHARSET-union-or-nor charset2 charset1)) | |
3175 (t | |
3176 (CHARSET-union-nor-nor charset1 charset2)))))) | |
3177 | |
3178 (defun CHARSET-union-range-range (range1 range2) | |
3179 (let ((from1 (CHARSET-range-from range1)) | |
3180 (to1 (CHARSET-range-to range1)) | |
3181 (from2 (CHARSET-range-from range2)) | |
3182 (to2 (CHARSET-range-to range2))) | |
3183 (cond((< to1 from2) | |
3184 (list ':or range1 range2)) | |
3185 (t ;;; (<= from2 (1+ to1)) | |
3186 (cond((<= to1 to2) ;;; (<= from2 to1 to2) | |
3187 (CHARSET-range-make (min from1 from2) to2)) | |
3188 ((<= from1 to2) ;;; (<= from1 to2 to1) | |
3189 (CHARSET-range-make (min from1 from2) to1)) | |
3190 (t ;;; (<= to2 from1 to1) | |
3191 (list ':or range2 range1))))))) | |
3192 | |
3193 (defun CHARSET-union-range-or (range or) | |
3194 (cons ':or (CHARSET-union-range-or* range (cdr or)))) | |
3195 | |
3196 (defun CHARSET-union-range-or* (range or-body) | |
3197 (let ((from (CHARSET-range-from range)) | |
3198 (to (CHARSET-range-to range)) | |
3199 (part1 nil)) | |
3200 (let ((flag nil)) | |
3201 (while (and or-body (null flag)) | |
3202 (let ((next (car or-body))) | |
3203 (if (< (CHARSET-range-from next) from) | |
3204 ;;; from[i] < from | |
3205 (if (< (CHARSET-range-to next) from) | |
3206 ;;; to[i] < from | |
3207 (setq part1 (cons next part1) | |
3208 or-body (cdr or-body)) | |
3209 ;;; from[i] < from <= to[i] | |
3210 (setq from (CHARSET-range-from next) | |
3211 flag t)) | |
3212 ;;; from <= from[1] | |
3213 ;;; to[i-1] < from <= from[i] | |
3214 (setq flag t))))) | |
3215 ;;; part1 < from <= from[i] | |
3216 (if (and part1 (<= (1+ (CHARSET-range-to (car part1))) from)) | |
3217 (setq from (CHARSET-range-from (car part1)) | |
3218 part1 (cdr part1))) | |
3219 ;;; part1 << from <= from[i] | |
3220 (let ((flag nil)) | |
3221 (while (and or-body (null flag)) | |
3222 (let ((next (car or-body))) | |
3223 (if (< (CHARSET-range-from next) to) | |
3224 ;;; from[j] < from | |
3225 (if (< (CHARSET-range-to next) to) | |
3226 ;;; to[j] < to | |
3227 (setq or-body (cdr or-body)) | |
3228 ;;; from[j] < to <= to[j] | |
3229 (setq to (CHARSET-range-to next) | |
3230 flag t)) | |
3231 ;;; to <= from[1] | |
3232 ;;; to[j-1] < to <= from[j] | |
3233 (setq flag t))))) | |
3234 ;;; part2 < to <= from[j] | |
3235 (if (and or-body (<= (CHARSET-range-from (car or-body)) (1+ to))) | |
3236 (setq to (CHARSET-range-to (car or-body)) | |
3237 or-body (cdr or-body))) | |
3238 ;;; part2 <= to << from[j] | |
3239 (nconc (reverse part1) | |
3240 (cons (CHARSET-range-make from to) | |
3241 or-body)))) | |
3242 | |
3243 | |
3244 (defun CHARSET-union-range-nor (range nor) | |
3245 (let ((from (CHARSET-range-from range)) | |
3246 (to (CHARSET-range-to range)) | |
3247 (nor-body (cdr nor))) | |
3248 | |
3249 )) | |
3250 | |
3251 (defun CHARSET-union-or-or (or1 or2) | |
3252 (cons ':or (CHARSET-union-or*-or* (cdr or1) (cdr or2)))) | |
3253 | |
3254 (defun CHARSET-union-or*-or* (or1-body or2-body) | |
3255 (let ((result-body or2-body)) | |
3256 (while or1-body | |
3257 (setq result-body | |
3258 (CHARSET-union-range-or* (car or1-body) result-body)) | |
3259 (setq or1-body (cdr or1-body))) | |
3260 result-body)) | |
3261 | |
3262 (defun CHARSET-union-or-nor (or nor) | |
3263 ) | |
3264 | |
3265 (defun CHARSET-union-nor-nor (nor1 nor2) | |
3266 (cons ':nor (CHARSET-intersection-or*-or* (cdr nor1) (cdr nor2)))) | |
3267 | |
3268 (defun CHARSET-intersection (charset1 charset2) | |
3269 (cond((CHARSET-rangep charset1) | |
3270 (cond ((CHARSET-rangep charset2) | |
3271 (CHARSET-intersection-range-range charset1 charset2)) | |
3272 ((CHARSET-orp charset2) | |
3273 (CHARSET-intersection-range-or charset1 charset2)) | |
3274 (t | |
3275 (CHARSET-intersection-range-nor charset1 charset2)))) | |
3276 ((CHARSET-orp charset1) | |
3277 (cond ((CHARSET-rangep charset2) | |
3278 (CHARSET-intersection-range-or charset2 charset1)) | |
3279 ((CHARSET-orp charset2) | |
3280 (CHARSET-intersection-or-or charset1 charset2)) | |
3281 (t | |
3282 (CHARSET-intersection-or-nor charset1 charset2)))) | |
3283 (t ;;; (CHARSET-norp charset1) | |
3284 (cond((CHARSET-rangep charset2) | |
3285 (CHARSET-intersection-range-nor charset2 charset1)) | |
3286 ((CHARSET-orp charset2) | |
3287 (CHARSET-intersection-or-nor charset2 charset1)) | |
3288 (t | |
3289 (CHARSET-intersection-nor-nor charset1 charset2)))))) | |
3290 | |
3291 (defun CHARSET-intersection-range-or (range or) | |
3292 (CHARSET-intersection-range-or* range (cdr or))) | |
3293 | |
3294 (defun CHARSET-intersection-range-or* (range or-body) | |
3295 (let ((from (CHARSET-range-from range)) | |
3296 (to (CHARSET-range-to range)) | |
3297 (part2 nil)) | |
3298 (let ((flag nil)) | |
3299 (while (and or-body (null flag)) | |
3300 (let ((next (car or-body))) | |
3301 (if (< (CHARSET-range-from next) from) | |
3302 ;;; from[i] < from | |
3303 (if (< (CHARSET-range-to next) from) | |
3304 ;;; to[i] < from | |
3305 (setq or-body (cdr or-body)) | |
3306 ;;; from[i] < from <= to[i] | |
3307 (setq flag t)) | |
3308 ;;; from <= from[1] | |
3309 ;;; to[i-1] < from <= from[i] | |
3310 (setq flag t))))) | |
3311 ;;; from[i] < from <= to[i] | |
3312 ;;; from <= from[1] | |
3313 ;;; to[i-1] < from <= from[i] | |
3314 (let ((flag nil)) | |
3315 (while (and or-body (null flag)) | |
3316 (let ((next (car or-body))) | |
3317 (if (<= (CHARSET-range-from next) to) | |
3318 ;;; from[j] <= to | |
3319 (if (<= (CHARSET-range-to next) to) | |
3320 ;;; to[j] <= to | |
3321 (setq part2 (cons next part2) | |
3322 or-body (cdr or-body)) | |
3323 ;;; from[j] <= to < to[j] | |
3324 (setq part2 (cons next part2) | |
3325 or-body (cdr or-body) | |
3326 flag t) | |
3327 ;;; to < from[1] | |
3328 ;;; to[j-1] <= to < from[j] | |
3329 (setq flag t))))) | |
3330 ;;; from[j] <= to < to[j] | |
3331 ;;; to < from[1] | |
3332 ;;; to[j-1] <= to < from[j] | |
3333 (cond ((null part2) nil) | |
3334 ((= (length part2) 1) | |
3335 (list (CHARSET-range-make (max from (CHARSET-range-from (car part2))) | |
3336 (min to (CHARSET-range-to (car part2)))))) | |
3337 (t | |
3338 (setcar part2 (CHARSET-range-make (CHARSET-range-from (car part2)) | |
3339 (min to (CHARSET-range-to (car part2))))) | |
3340 (setq part2 (nreverse part2)) | |
3341 (setcar part2 (CHARSET-range-make (max from (CHARSET-range-from (car part2))) | |
3342 (CHARSET-range-to (car part2)))) | |
3343 part2))))) | |
3344 | |
3345 (defun CHARSET-intersection-range-nor (range nor) | |
3346 (CHARSET-intersection-range-nor* range (cdr nor))) | |
3347 | |
3348 (defun CHARSET-intersecion-range-nor* (range nor-body) | |
3349 (let ((from (CHARSET-range-from range)) | |
3350 (to (CHARSET-range-to range))) | |
3351 )) | |
3352 | |
3353 ;;; (and (or a b) c) == (or (and a c) (and b c)) | |
3354 | |
3355 (defun CHARSET-intersection-or-or (or1 or2) | |
3356 (let ((result nil) | |
3357 (or1-body (cdr or1)) | |
3358 (or2-body (cdr or2))) | |
3359 (while or1-body | |
3360 (setq result (CHARSET-union-or*-or* | |
3361 (CHARSET-intersection-range-or* (car or1-body) or2-body) | |
3362 result)) | |
3363 (setq or1-body (cdr or1-body))) | |
3364 (if (= (length result) 1) (car result) | |
3365 (cons ':or result)))) | |
3366 | |
3367 (defun CHARSET-intersection-or-nor (or nor) | |
3368 ) | |
3369 | |
3370 ;;; (and (not or1) (not or2)) == (not (or or1 or2)) | |
3371 | |
3372 (defun CHARSET-intersection-nor-nor (nor1 nor2) | |
3373 (cons ':nor (CHARSET-union-or*-or* (cdr nor1) (cdr nor2)))) | |
3374 | |
3375 (defun FA-compaction (FA) | |
3376 (let ((start (car FA)) | |
3377 (alist (cdr FA))) | |
3378 (setq alist (TREX-sort alist 'TREX-key-lessp 'car)) | |
3379 (while alist | |
3380 (let ((table (cdr (car alist))) | |
3381 (newtable nil) | |
3382 (keys nil) (next nil)) | |
3383 (setq table (TREX-sort table '< 'car)) | |
3384 (while table | |
3385 (setq next (cdr (car table))) | |
3386 (TREX-push (car (car table)) keys) | |
3387 (setq table (cdr table)) | |
3388 (while (and table (eq next (cdr (car table)))) | |
3389 (TREX-push (car (car table)) keys) | |
3390 (setq table (cdr table))) | |
3391 (setq keys (reverse (sort keys 'TREX-key-lessp))) | |
3392 (let ((newkeys nil)) | |
3393 (setq newkeys (car keys) | |
3394 keys (cdr keys)) | |
3395 (while keys | |
3396 (cond((numberp (car keys)) | |
3397 (cond((numberp (car newkeys)) | |
3398 (if (= (1+ (car keys)) (car newkeys)) | |
3399 (setcar newkeys (list ':range (car keys) (car newkeys))) | |
3400 (TREX-push (car keys) newkeys))) | |
3401 ((and (consp (car newkeys)) (eq (car (car newkeys)) ':range))))))))))))) | |
3402 | |
3403 | |
3404 | |
3405 (defun FA-dump2 (table) | |
3406 (let ((start (car table)) | |
3407 (l (cdr table))) | |
3408 (princ (format "\nstart = %d\n" start)) | |
3409 (while l | |
3410 (princ (format "%3d: " (car (car l)))) | |
3411 (let ((alist (cdr (car l)))) | |
3412 (cond ((numberp (car (car alist))) | |
3413 (princ (format "\\%03o(%c) -> %s\n" (car (car alist))(car (car alist)) (cdr (car alist))))) | |
3414 ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC))) | |
3415 (princ (format "(%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist))))) | |
3416 ((and (consp (car (car alist))) (eq (car (car (car alist))) ':range)) | |
3417 (princ (format "(:range \\%03o \\%03o) -> %s\n" | |
3418 (nth 1 (car (car alist))) (nth 2 (car (car alist))) | |
3419 (cdr (car alist))))) | |
3420 (t | |
3421 (princ (format "%s -> %s\n" (car (car alist)) (cdr (car alist)))))) | |
3422 (setq alist (cdr alist)) | |
3423 (while alist | |
3424 (cond ((numberp (car (car alist))) | |
3425 (princ (format " \\%03o(%c) -> %s\n" (car (car alist))(car (car alist)) (cdr (car alist))))) | |
3426 ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC))) | |
3427 (princ (format " (%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist))))) | |
3428 ((and (consp (car (car alist))) (eq (car (car (car alist))) ':range)) | |
3429 (princ (format " (:range \\%03o \\%03o) -> %s\n" | |
3430 (nth 1 (car (car alist))) (nth 2 (car (car alist))) | |
3431 (cdr (car alist))))) | |
3432 (t | |
3433 (princ (format " %s -> %s\n" (car (car alist)) (cdr (car alist)))))) | |
3434 (setq alist (cdr alist)))) | |
3435 (setq l (cdr l))))) | |
3436 | |
3437 ;;;function re-compile REGEXP | |
3438 ;;;Compile REGEXP by GNU Emacs original regexp compiler, | |
3439 ;;;and return information of the compiled code by a vector of length 11: | |
3440 ;;; [ COMPILED-PATTERN (string) | |
3441 ;;; RE-NSUB REGS-ALLOCATED CAN-BE-NULL NEWLINE-ANCHOR (integers) | |
3442 ;;; NO-SUB NOT-BOL NOT-EOL SYNTAX (integers) | |
3443 ;;; FASTMAP TRANSLATE (string) ]. | |
3444 ;;; | |
3445 | |
3446 (defun print-compiled-pattern (compiled-code) | |
3447 (let ((compiled-pattern (aref compiled-code 0)) | |
3448 (re-nsub (aref compiled-code 1)) | |
3449 (regs-allocated (aref compiled-code 2)) | |
3450 (can-be-null (aref compiled-code 3)) | |
3451 (newline-anchor (aref compiled-code 4)) | |
3452 (no-sub (aref compiled-code 5)) | |
3453 (not-bol (aref compiled-code 6)) | |
3454 (not-eol (aref compiled-code 7)) | |
3455 (syntax (aref compiled-code 8)) | |
3456 (fastmap (aref compiled-code 9)) | |
3457 (translate (aref compiled-code 10))) | |
3458 (regexp-code-dump compiled-pattern) | |
3459 ;;; fastmap | |
3460 (if fastmap (print-fastmap fastmap "fastmap")) | |
3461 (princ (format "re_nsub: %d\n" re-nsub)) | |
3462 (princ (format "regs-alloc: %d\n" regs-allocated)) | |
3463 (princ (format "can-be-null: %d\n" can-be-null)) | |
3464 (princ (format "newline-anchor: %d\n" newline-anchor)) | |
3465 (princ (format "no-sub: %d\n" no-sub)) | |
3466 (princ (format "not-bol: %d\n" not-bol)) | |
3467 (princ (format "not-eol: %d\n" not-eol)) | |
3468 (princ (format "syntax: %d\n" syntax)) | |
3469 (if translate (print-translate translate)) | |
3470 ;;; translate | |
3471 nil | |
3472 )) | |
3473 | |
3474 (defun print-fastmap (fastmap name) | |
3475 (if fastmap | |
3476 (progn | |
3477 (princ (format "%s:[" name)) | |
3478 (let ((max (length fastmap)) | |
3479 (i 0)) | |
3480 (while (< i max) | |
3481 (if (not (= (aref fastmap i) 0)) | |
3482 (princ (format "%c" i))) | |
3483 (setq i (1+ i)))) | |
3484 (princ "]\n")))) | |
3485 | |
3486 (defun print-translate (trans) | |
3487 (if trans | |
3488 (progn | |
3489 (princ "translate:\n") | |
3490 (let ((max (length trans)) | |
3491 (i 0)) | |
3492 (while (< i max) | |
3493 (if (not (= (aref trans i) i)) | |
3494 (princ (format " %c --> %c" i (aref trans i)))) | |
3495 (setq i (1+ i)))) | |
3496 (princ "\n")))) | |
3497 | |
3498 (defun re-compile-and-dump (regexp) | |
3499 (print-compiled-pattern (re-compile regexp))) | |
3500 | |
3501 | |
3502 | |
3503 | |
3504 | |
3505 |