70
|
1 ;; TREX: Tools for Regluar EXpressions
|
|
2 ;;
|
|
3 ;; Regular Expression Compiler
|
|
4 ;;
|
|
5 ;; Coded by S.Tomura <tomura@etl.go.jp>
|
|
6
|
|
7 ;; Copyright (C) 1992 Free Software Foundation, Inc.
|
|
8
|
|
9 ;; This file is part of XEmacs.
|
|
10 ;; This file contains Japanese characters
|
|
11
|
|
12 ;; XEmacs is free software; you can redistribute it and/or modify it
|
|
13 ;; under the terms of the GNU General Public License as published by
|
|
14 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
15 ;; any later version.
|
|
16
|
|
17 ;; XEmacs is distributed in the hope that it will be useful, but
|
|
18 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
20 ;; General Public License for more details.
|
|
21
|
|
22 ;; You should have received a copy of the GNU General Public License
|
|
23 ;; along with XEmacs; see the file COPYING. If not, write to the
|
|
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
25 ;; Boston, MA 02111-1307, USA.
|
|
26
|
|
27 (defvar TREX-version "0.41")
|
|
28 ;;; Last modified date: Thu Jun 15 13:07:39 1995
|
|
29
|
|
30 ;;; 95.6.15 modified by S.Tomura <tomura@etl.go.jp>
|
|
31 ;;;
|
|
32 ;;; $BFbB"$N(Bre_compile_pattern $B$HF1MM$K(B case-fold-search $B$K$h$C$F!"(B
|
|
33 ;;; translate $B$9$k$h$&$KJQ99$7$?!#(B
|
|
34 ;;;
|
|
35 ;;; 95.6.14 modified by S.Tomura <tomura@etl.go.jp>
|
|
36 ;;; print-translate $B$rDI2C!#(B<0.38>
|
|
37 ;;; print-fastmap $B$rDI2C!#(B
|
|
38 ;;;
|
|
39 ;;; start_memory, end_memory $B$NBh(B2$B0z?t$r@8@.$9$k$?$a$K!"(B:mark $B$NFbIt9=(B
|
|
40 ;;; $BB$$rJQ99$7$?!#(B
|
|
41 ;;;
|
|
42 ;;; re-compile-and-dump, regexp-compile-and-dump $B$rDI2C!#(B
|
|
43 ;;;
|
|
44 ;;; 95.6.13
|
|
45 ;;; regexp19.c $B$KBP1~$7$F(B start_memory, end_memory $B$N(B dump $B%k!<%A%s$r=$@5(B
|
|
46 ;;;
|
|
47 ;;; $B$9$Y$-$3$H!'(B
|
|
48 ;;;
|
|
49 ;;; (1) \(\)*
|
|
50 ;;; (2) $B;^$N=gHV(B
|
|
51 ;;; (3) $B0UL#$N$J$$%0%k!<%W;2>H$N8!=P(B "\(a\\)\\2"$B$J$I(B
|
|
52
|
|
53 (defmacro TREX-inc (symbol &optional delta)
|
|
54 (list 'setq symbol (if delta (list '+ symbol delta)
|
|
55 (list '1+ symbol))))
|
|
56
|
|
57 (defmacro TREX-dec (symbol &optional delta)
|
|
58 (list 'setq symbol (if delta (list '- symbol delta)
|
|
59 (list '1- symbol))))
|
|
60
|
|
61 (defmacro num (sym)
|
|
62 (list 'num* (list 'quote sym)))
|
|
63
|
|
64 (defun num* (sym)
|
|
65 (TREX-read-hexa (substring (symbol-name sym) 2)))
|
|
66
|
|
67 (defun TREX-read-hexa (str)
|
|
68 (let ((result 0) (i 0) (max (length str)))
|
|
69 (while (< i max)
|
|
70 (let ((ch (aref str i)))
|
|
71 (cond((and (<= ?0 ch) (<= ch ?9))
|
|
72 (setq result (+ (* result 16) (- ch ?0))))
|
|
73 ((and (<= ?a ch) (<= ch ?f))
|
|
74 (setq result (+ (* result 16) (+ (- ch ?a) 10))))
|
|
75 ((and (<= ?A ch) (<= ch ?F))
|
|
76 (setq result (+ (* result 16) (+ (- ch ?A) 10)))))
|
|
77 (TREX-inc i)))
|
|
78 result))
|
|
79
|
|
80 ;;; 1 bytes : 0x00 <= C11 <= 0x7F
|
|
81 ;;; n bytes : 0x80 == LCCMP
|
|
82 ;;; 2 bytes 0xA0 <= LC <= 0xAF
|
|
83 ;;; 3 bytes 0xB0 <= LC <= 0xBB
|
|
84 ;;; 4 bytes 0xBC <= LC <= 0xBE
|
|
85 ;;; 2 bytes : 0x81 <= LC <= 0x8F
|
|
86 ;;; 3 bytes : 0x90 <= LC <= 0x9B
|
|
87 ;;; 4 bytes : 0x9C <= LC <= 0x9E
|
|
88
|
|
89
|
|
90 (defun TREX-char-octets (str index)
|
|
91 (let ((max (length str)))
|
|
92 (if (or (< index 0) (<= max index)) 0
|
|
93 (let ((ch (aref str index))
|
|
94 (bytes))
|
|
95 (setq bytes
|
|
96 (cond ((<= ch (num 0x7f)) 1)
|
|
97 ((= ch (num 0x80))
|
|
98 (let ((max (length str))
|
|
99 (i index))
|
|
100 (while (and (< i max)
|
|
101 (<= (num 0xa0) (aref str i))
|
|
102 (<= (aref str i) (num 0xbe)))
|
|
103 (setq ch (aref str i))
|
|
104 (cond ((<= ch (num 0xaf)) (TREX-inc i 2))
|
|
105 ((<= ch (num 0xbb)) (TREX-inc i 3))
|
|
106 ((<= ch (num 0xbe)) (TREX-inc i 4))))
|
|
107 (- i index)))
|
|
108 ((<= ch (num 0x8f)) 2)
|
|
109 ((<= ch (num 0x9b)) 3)
|
|
110 ((<= ch (num 0x9e)) 4)
|
|
111 (t 1)))
|
|
112 (if (<= (+ index bytes) max) bytes 1)))))
|
|
113
|
|
114 (defun TREX-comp-charp (str index)
|
|
115 (= (aref str index) (num 0x80)))
|
|
116
|
|
117 ;;; 0x00 <= C11 <= 0x7F : 1 bytes
|
|
118 ;;; Type 1-1 C11
|
|
119 ;;; 0x80 == LCCMP : n bytes
|
|
120 ;;; Type N LCCMP LCN1 C11 ... LCN2 C21 ... LCNn Cn1 ...
|
|
121 ;;; 0xA0 <= LCN* <= 0xBE
|
|
122 ;;; LCN* = LC + 0x20
|
|
123 ;;; LCN* = 0xA0 (ASCII)
|
|
124 ;;; 0x81 <= LC1 <= 0x8F : 2 bytes
|
|
125 ;;; Type 1-2 LC1 C11 :
|
|
126 ;;; 0xA0 <= C11 <= 0xFF
|
|
127 ;;; 0x90 <= LC2 <= 0x99 : 3 bytes
|
|
128 ;;; Type 2-3 LC2 C21 C22
|
|
129 ;;; 0xA0 <= C21 <= 0xFF
|
|
130 ;;; 0xA0 <= C22 <= 0xFF
|
|
131 ;;; 0x9A == LCPRV1 : 3 bytes
|
|
132 ;;; Type 1-3 LCPRV1 LC12 C11
|
|
133 ;;; 0xA0 <= LC12 <= 0xB7
|
|
134 ;;; 0xA0 <= C11 <= 0xFF
|
|
135 ;;; 0x9B == LCPRV1 : 3 bytes
|
|
136 ;;; Type 1-3 LCPRV1 LC12 C11
|
|
137 ;;; 0xB8 <= LC12 <= 0xBF
|
|
138 ;;; 0xA0 <= C11 <= 0xFF
|
|
139 ;;; 0x9C == LCPRV2 : 4 bytes
|
|
140 ;;; Type 2-4 LCPRV2 LC22 C21 C22
|
|
141 ;;; 0xC0 <= LC22 <= 0xC7
|
|
142 ;;; 0xA0 <= C21 <= 0xFF
|
|
143 ;;; 0xA0 <= C22 <= 0xFF
|
|
144 ;;; 0x9D == LCPRV2 : 4 bytes
|
|
145 ;;; Type 2-4 LCPRV2 LC22 C21 C22
|
|
146 ;;; 0xC8 <= LC22 <= 0xDF
|
|
147 ;;; 0xA0 <= C21 <= 0xFF
|
|
148 ;;; 0xA0 <= C22 <= 0xFF
|
|
149 ;;; 0x9E == LCPRV3 : 4 bytes
|
|
150 ;;; Type 3-4 LCPRV3 C31 C32 C33
|
|
151 ;;; 0xA0 <= C31 <= 0xBF
|
|
152 ;;; 0xA0 <= C32 <= 0xFF
|
|
153 ;;; 0xA0 <= C33 <= 0xFF
|
|
154 ;;; char = [0x00-0x7f]\|
|
|
155 ;;; 0x80
|
|
156 ;;; \(0xa0[0xa0-0xff]\|
|
|
157 ;;; [0xa1-0xaf][0xa0-0xff]\|
|
|
158 ;;; [0xb0-0xb9][0xa0-0xff][0xa0-0xff]\|
|
|
159 ;;; 0xba[0xa0-0xb7][0xa0-0xff]\|
|
|
160 ;;; 0xbb[0xb8-0xbf][0xa0-0xff]\|
|
|
161 ;;; 0xbc[0xc0-0xc7][0xa0-0xff][0xa0-0xff]\|
|
|
162 ;;; 0xbd[0xc8-0xdf][0xa0-0xff][0xa0-0xff]\|
|
|
163 ;;; 0xbe[0xa0-0xbf][0xa0-0xff][0xa0-0xff]
|
|
164 ;;; \)*\|
|
|
165 ;;; [0x81-0x8f][0xa0-0xff]\|
|
|
166 ;;; [0x90-0x99][0xa0-0xff][0xa0-0xff]\|
|
|
167 ;;; 0x9a[0xa0-0xb7][0xa0-0xff]\|
|
|
168 ;;; 0x9b[0xb8-0xbf][0xa0-0xff]\|
|
|
169 ;;; 0x9c[0xc0-0xc7][0xa0-0xff][0xa0-0xff]\|
|
|
170 ;;; 0x9d[0xc8-0xdf][0xa0-0xff][0xa0-0xff]\|
|
|
171 ;;; 0x9e[0xa0-0xbf][0xa0-0xff][0xa0-0xff]
|
|
172
|
|
173 (defun regexp-make-or (&rest body)
|
|
174 (cons ':or body))
|
|
175
|
|
176 (defun regexp-make-seq (&rest body)
|
|
177 (cons ':seq body))
|
|
178
|
|
179 (defun regexp-make-star (regexp)
|
|
180 (list ':star regexp))
|
|
181
|
|
182 (defun regexp-make-range (from to)
|
|
183 (list 'CHARSET (list ':range from to)))
|
|
184
|
|
185
|
|
186 (defvar regexp-allchar-regexp
|
|
187 (regexp-make-or
|
|
188 (regexp-make-range 0 (num 0x7f))
|
|
189 (regexp-make-seq
|
|
190 (num 0x80)
|
|
191 (regexp-make-star
|
|
192 (regexp-make-or
|
|
193 (regexp-make-seq
|
|
194 (num 0xa0)
|
|
195 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
196 (regexp-make-seq
|
|
197 (regexp-make-range (num 0xa1) (num 0xaf))
|
|
198 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
199 (regexp-make-seq
|
|
200 (regexp-make-range (num 0xb0) (num 0xb9))
|
|
201 (regexp-make-range (num 0xa0) (num 0xff))
|
|
202 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
203 (regexp-make-seq
|
|
204 (num 0xba)
|
|
205 (regexp-make-range (num 0xa0) (num 0xb7))
|
|
206 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
207 (regexp-make-seq
|
|
208 (num 0xbb)
|
|
209 (regexp-make-range (num 0xb8) (num 0xbf))
|
|
210 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
211 (regexp-make-seq
|
|
212 (num 0xbc)
|
|
213 (regexp-make-range (num 0xc0) (num 0xc7))
|
|
214 (regexp-make-range (num 0xa0) (num 0xff))
|
|
215 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
216 (regexp-make-seq
|
|
217 (num 0xbd)
|
|
218 (regexp-make-range (num 0xc8) (num 0xdf))
|
|
219 (regexp-make-range (num 0xa0) (num 0xff))
|
|
220 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
221 (regexp-make-seq
|
|
222 (num 0xbe)
|
|
223 (regexp-make-range (num 0xa0) (num 0xbf))
|
|
224 (regexp-make-range (num 0xa0) (num 0xff))
|
|
225 (regexp-make-range (num 0xa0) (num 0xff))))))
|
|
226 (regexp-make-seq
|
|
227 (regexp-make-range (num 0x81) (num 0x8f))
|
|
228 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
229 (regexp-make-seq
|
|
230 (regexp-make-range (num 0x90) (num 0x99))
|
|
231 (regexp-make-range (num 0xa0) (num 0xff))
|
|
232 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
233 (regexp-make-seq
|
|
234 (num 0x9a)
|
|
235 (regexp-make-range (num 0xa0) (num 0xb7))
|
|
236 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
237 (regexp-make-seq
|
|
238 (num 0x9b)
|
|
239 (regexp-make-range (num 0xb8) (num 0xbf))
|
|
240 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
241 (regexp-make-seq
|
|
242 (num 0x9c)
|
|
243 (regexp-make-range (num 0xc0) (num 0xc7))
|
|
244 (regexp-make-range (num 0xa0) (num 0xff))
|
|
245 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
246 (regexp-make-seq
|
|
247 (num 0x9d)
|
|
248 (regexp-make-range (num 0xc8) (num 0xdf))
|
|
249 (regexp-make-range (num 0xa0) (num 0xff))
|
|
250 (regexp-make-range (num 0xa0) (num 0xff)))
|
|
251 (regexp-make-seq
|
|
252 (num 0x9e)
|
|
253 (regexp-make-range (num 0xa0) (num 0xbf))
|
|
254 (regexp-make-range (num 0xa0) (num 0xff))
|
|
255 (regexp-make-range (num 0xa0) (num 0xff)))))
|
|
256
|
|
257 ;;;;
|
|
258 ;;;;
|
|
259 ;;;;
|
|
260
|
|
261 (defun TREX-string-reverse (str)
|
|
262 (if (<= (length str) 1) str
|
|
263 (let ((result (make-string (length str) 0))
|
|
264 (i 0)
|
|
265 (j (1- (length str))))
|
|
266 (while (<= 0 j)
|
|
267 (aset result i (aref str j))
|
|
268 (TREX-inc i)
|
|
269 (TREX-dec j))
|
|
270 result)))
|
|
271
|
|
272 (defun TREX-string-forward-anychar (str start)
|
|
273 (and (stringp str) (numberp start)
|
|
274 (let ((max (length str)))
|
|
275 (and (<= 0 start)
|
|
276 (< start max)
|
|
277 (+ start (TREX-char-octets str start))))))
|
|
278
|
|
279 (defmacro TREX-init (symbol value)
|
|
280 (` (if (null (, symbol))
|
|
281 (setq (, symbol) (, value)))))
|
|
282
|
|
283 (defmacro TREX-push (val symbol)
|
|
284 (list 'setq symbol (list 'cons val symbol)))
|
|
285
|
|
286 (defun TREX-member (elm list pred)
|
|
287 (while (and list (not (funcall pred elm (car list))))
|
|
288 (setq list (cdr list)))
|
|
289 list)
|
|
290
|
|
291 (defun TREX-memequal (elm list)
|
|
292 (while (and list (not (equal elm (car list))))
|
|
293 (setq list (cdr list)))
|
|
294 list)
|
|
295
|
|
296 (defun TREX-find (elm list)
|
|
297 (let ((pos 0))
|
|
298 (while (and list (not (equal elm (car list))))
|
|
299 (setq list (cdr list))
|
|
300 (TREX-inc pos))
|
|
301 (if list pos
|
|
302 nil)))
|
|
303
|
|
304 (defun TREX-find-if (pred list)
|
|
305 (let ((pos 0))
|
|
306 (while (and list (not (funcall pred (car list))))
|
|
307 (TREX-inc pos)
|
|
308 (setq list (cdr list)))
|
|
309 (if list pos
|
|
310 nil)))
|
|
311
|
|
312 (defun TREX-firstn (list n)
|
|
313 (if (or (<= n 0) (null list)) nil
|
|
314 (cons (car list) (TREX-firstn (cdr list) (1- n)))))
|
|
315
|
|
316 (defun TREX-delete-duplicate (list)
|
|
317 (let ((result nil))
|
|
318 (while list
|
|
319 (let ((elm (car list)))
|
|
320 (if (not (TREX-memequal elm result))
|
|
321 (TREX-push elm result)))
|
|
322 (setq list (cdr list)))
|
|
323 (nreverse result)))
|
|
324
|
|
325 (defun TREX-delete (elm list)
|
|
326 (let ((result nil))
|
|
327 (while list
|
|
328 (if (not (equal elm (car list)))
|
|
329 (TREX-push (car list) result))
|
|
330 (setq list (cdr list)))
|
|
331 (nreverse result)))
|
|
332
|
|
333 (defun TREX-string-to-list (str)
|
|
334 (let ((result nil)
|
|
335 (i 0)
|
|
336 (max (length str)))
|
|
337 (while (< i max)
|
|
338 (TREX-push (aref str i) result)
|
|
339 (TREX-inc i))
|
|
340 (nreverse result)))
|
|
341
|
|
342 (defun TREX-sort (list lessp &optional key)
|
|
343 (if (null key)
|
|
344 (sort list lessp)
|
|
345 (sort list (function (lambda (x y) (funcall lessp (funcall key x) (funcall key y)))))))
|
|
346
|
|
347 (defun TREX-key-lessp (x y)
|
|
348 (cond((symbolp x)
|
|
349 (cond ((symbolp y)
|
|
350 (string-lessp x y))
|
|
351 (t;; (not (symbolp))
|
|
352 t)))
|
|
353 ((numberp x)
|
|
354 (cond ((numberp y)
|
|
355 (< x y))
|
|
356 ((and (consp y) (eq (car y) ':range))
|
|
357 (< x (nth 1 y)))
|
|
358 (t nil)))
|
|
359 ((and (consp x) (eq (car x) ':range))
|
|
360 (cond ((and (consp y) (eq (car y) ':range))
|
|
361 (< (nth 2 x) (nth 1 y)))
|
|
362 ((numberp y)
|
|
363 (< (nth 2 x) y))
|
|
364 (t nil)))
|
|
365 (t nil)))
|
|
366
|
|
367 (defun TREX-lessp-car (x y)
|
|
368 (let ((x (car x))
|
|
369 (y (car y)))
|
|
370 (TREX-key-lessp x y)))
|
|
371
|
|
372 (defmacro TREX-define-enum (&rest list)
|
|
373 (list 'TREX-define-enum* (list 'quote list)))
|
|
374
|
|
375 (defun TREX-define-enum* (list)
|
|
376 (let ((i 0))
|
|
377 (while list
|
|
378 (set (car list) i)
|
|
379 (TREX-inc i)
|
|
380 (setq list (cdr list)))))
|
|
381
|
|
382 ;;;
|
|
383 ;;; regexp-parse
|
|
384 ;;;
|
|
385
|
|
386 ;;;
|
|
387 ;;; $B@55,I=8=(B(regular expression)
|
|
388 ;;;
|
|
389 ;;; . single character except a newline
|
|
390 ;;; REG* more than zero
|
|
391 ;;; REG+ at least once
|
|
392 ;;; REG? once or not at all
|
|
393 ;;; [...] character set
|
|
394 ;;; [^...] character not set
|
|
395 ;;; ^ beginning of line
|
|
396 ;;; $ end of line
|
|
397 ;;; \ quote
|
|
398 ;;; \| alternative
|
|
399 ;;; \( ... \) group and mark
|
|
400 ;;; \DIGIT
|
|
401 ;;; \` beginning of buffer
|
|
402 ;;; \' end of buffer
|
|
403 ;;; \b beginning of word or end of word
|
|
404 ;;; \B not \b
|
|
405 ;;; \< beginning of word
|
|
406 ;;; \> end of word
|
|
407 ;;;
|
|
408 ;;; \w word-constituent character
|
|
409 ;;; \W not \w
|
|
410 ;;; \sCODE syntax CODE character
|
|
411 ;;; \SCODE not \sCODE
|
|
412
|
|
413 ;;;
|
|
414 ;;; REG0 ::= REG1 |
|
|
415 ;;; REG1 "\\|" REG0
|
|
416 ;;;
|
|
417 ;;; REG1 ::= REG2 |
|
|
418 ;;; REG2 REG1
|
|
419 ;;;
|
|
420 ;;; REG2 ::= REG3 |
|
|
421 ;;; REG2 "*" |
|
|
422 ;;; REG2 "+" |
|
|
423 ;;; REG2 "?" |
|
|
424 ;;;
|
|
425 ;;; REG3 ::= "." |
|
|
426 ;;; "[" ... "]" |
|
|
427 ;;; "[" "^" ... "]" |
|
|
428 ;;; "^" |
|
|
429 ;;; "$" |
|
|
430 ;;; "\\" DIGIT |
|
|
431 ;;; "\\(" REG0 "\\)"
|
|
432
|
|
433 ;;; $B>H9g$O@55,I=8=$N:8$+$i1&$X9T$o$l$k!%(B
|
|
434
|
|
435 (defvar *regexp-parse-translate* nil
|
|
436 "$B@55,I=8=$rFI$_9~$_Cf$K;HMQ$9$k(B translate table.\n
|
|
437 case-fold-search $B$NCM$K$h$C$F(B downcasetable $B$r@_Dj$9$k!#(B")
|
|
438
|
|
439 (defun regexp-parse-translate-char-string (str)
|
|
440 (if (and *regexp-parse-translate*
|
|
441 (= (length str) 1))
|
|
442 ;;; $BK\Ev$O(B destructive $B$G$b(B OK
|
|
443 (char-to-string (aref *regexp-parse-translate* (aref str 0)))
|
|
444 str))
|
|
445
|
|
446 (defvar *regexp-word-definition* nil)
|
|
447
|
|
448 (defvar *regexp-parse-index* nil)
|
|
449 (defvar *regexp-parse-end* nil)
|
|
450 (defvar *regexp-parse-str* nil)
|
|
451 (defvar *regexp-parse-regno* 1)
|
|
452
|
|
453 (defun regexp-error (&optional reason)
|
|
454 (if (null reason) (setq reason "Bad regexp"))
|
|
455 (error "Regexp-parse::%s \"%s\" * \"%s\"" reason (substring *regexp-parse-str* 0 *regexp-parse-index*)
|
|
456 (substring *regexp-parse-str* *regexp-parse-index*)))
|
|
457
|
|
458 (defun word-parse (pattern)
|
|
459 (let ((*regexp-word-definition* t))
|
|
460 (regexp-parse pattern)))
|
|
461
|
|
462 (defun regexp-parse (pattern)
|
|
463 (let*((*regexp-parse-str* pattern)
|
|
464 (*regexp-parse-index* 0)
|
|
465 (*regexp-parse-end* (length pattern))
|
|
466 (*regexp-parse-regno* 1)
|
|
467 (result (regexp-parse-0)))
|
|
468 (if (<= *regexp-parse-end* *regexp-parse-index*)
|
|
469 result
|
|
470 (regexp-error))))
|
|
471
|
|
472 (defun regexp-parse-0 ()
|
|
473 (let* ((result (regexp-parse-1)))
|
|
474 (cond((<= *regexp-parse-end* *regexp-parse-index*)
|
|
475 result)
|
|
476 ((and (< (1+ *regexp-parse-index*) *regexp-parse-end*)
|
|
477 (= (aref *regexp-parse-str* *regexp-parse-index*) ?\\)
|
|
478 (= (aref *regexp-parse-str* (1+ *regexp-parse-index*)) ?|))
|
|
479 (TREX-inc *regexp-parse-index* 2)
|
|
480 (list ':or result (regexp-parse-0)))
|
|
481 (t result))))
|
|
482
|
|
483 (defun regexp-parse-1 ()
|
|
484 (let ((results nil)
|
|
485 (result2 nil))
|
|
486 (while (setq result2 (regexp-parse-2))
|
|
487 (TREX-push result2 results))
|
|
488 (if results
|
|
489 (if (cdr results)
|
|
490 (cons ':seq (nreverse results))
|
|
491 (car results))
|
|
492 nil)))
|
|
493
|
|
494 (defun regexp-parse-2 ()
|
|
495 (let ((result (regexp-parse-3)))
|
|
496 (while (and (< *regexp-parse-index* *regexp-parse-end*)
|
|
497 (TREX-memequal (aref *regexp-parse-str* *regexp-parse-index*)
|
|
498 '(?* ?+ ??)))
|
|
499 (let ((ch (aref *regexp-parse-str* *regexp-parse-index*)))
|
|
500 (TREX-inc *regexp-parse-index*)
|
|
501 (setq result
|
|
502 (cond((= ch ?*) (list ':star result))
|
|
503 ((= ch ?+) (list ':plus result))
|
|
504 ((= ch ??) (list ':optional result))))))
|
|
505 result))
|
|
506
|
|
507 (defun regexp-parse-3 ()
|
|
508 (if (<= *regexp-parse-end* *regexp-parse-index*)
|
|
509 nil
|
|
510 (let* ((start *regexp-parse-index*)
|
|
511 (i *regexp-parse-index*)
|
|
512 (end *regexp-parse-end*)
|
|
513 (ch (aref *regexp-parse-str* i)))
|
|
514 (TREX-inc *regexp-parse-index*)
|
|
515 (cond ((= ch ?.) '(ANYCHAR))
|
|
516 ((= ch ?^) '(BEGLINE))
|
|
517 ((= ch ?$) '(ENDLINE))
|
|
518 ((= ch ?\[)
|
|
519 (regexp-parse-charset))
|
|
520 ((= ch ?\])
|
|
521 (setq *regexp-parse-index* start)
|
|
522 nil)
|
|
523 ((= ch ?*)
|
|
524 (setq *regexp-parse-index* start)
|
|
525 nil)
|
|
526 ((= ch ?+)
|
|
527 (setq *regexp-parse-index* start)
|
|
528 nil)
|
|
529 ((= ch ??)
|
|
530 (setq *regexp-parse-index* start)
|
|
531 nil)
|
|
532 ((and (= ch ?\\) (< (1+ i) end))
|
|
533 (setq ch (aref *regexp-parse-str* (1+ i)))
|
|
534 (TREX-inc i)
|
|
535 (TREX-inc *regexp-parse-index*)
|
|
536 (cond ((= ch ?| )
|
|
537 (setq *regexp-parse-index* start)
|
|
538 nil)
|
|
539 ((= ch ?\( )
|
|
540 (if (< 9 *regexp-parse-regno*)
|
|
541 (regexp-error "Too many parenth"))
|
|
542 (let ((regexp-parse-regno *regexp-parse-regno*))
|
|
543 (TREX-inc *regexp-parse-regno*)
|
|
544 (let ((result (regexp-parse-0)))
|
|
545
|
|
546 (cond((and (< (1+ *regexp-parse-index*) *regexp-parse-end*)
|
|
547 (= (aref *regexp-parse-str* *regexp-parse-index*) ?\\ )
|
|
548 (= (aref *regexp-parse-str* (1+ *regexp-parse-index*)) ?\) ))
|
|
549 (TREX-inc *regexp-parse-index* 2)
|
|
550 (if *regexp-word-definition*
|
|
551 result
|
|
552 (list ':mark regexp-parse-regno
|
|
553 (- *regexp-parse-regno* regexp-parse-regno 1)
|
|
554 result)))
|
|
555 (t
|
|
556 (regexp-error))))))
|
|
557 ((= ch ?\) )
|
|
558 (setq *regexp-parse-index* start)
|
|
559 nil)
|
|
560 ((= ch ?` ) '(BEGBUF))
|
|
561 ((= ch ?' ) '(ENDBUF))
|
|
562 ((= ch ?b )
|
|
563 (if *regexp-word-definition* (regexp-error) '(WORDBOUND)))
|
|
564 ((= ch ?B )
|
|
565 (if *regexp-word-definition* (regexp-error) '(NOTWORDBOUND)))
|
|
566 ((= ch ?< )
|
|
567 (if *regexp-word-definition* (regexp-error) '(WORDBEG)))
|
|
568 ((= ch ?> )
|
|
569 (if *regexp-word-definition* (regexp-error) '(WORDEND)))
|
|
570 ((= ch ?w ) (list 'SYNTAXSPEC
|
|
571 (syntax-spec-code ?w))) ;;;WORDCHAR
|
|
572 ((= ch ?W ) (list 'NOTSYNTAXSPEC
|
|
573 (syntax-spec-code ?w))) ;;;NOTWORDCHAR
|
|
574 ;;; ((= ch ?=) 'AT_DOT)
|
|
575 ((and (<= ?1 ch)
|
|
576 (<= ch ?9))
|
|
577 (if *regexp-word-definition*
|
|
578 (regexp-error) (list 'DUPLICATE (- ch ?0))))
|
|
579 ((= ch ?0)
|
|
580 (regexp-error))
|
|
581 ((and (= ch ?s )
|
|
582 (< (1+ i) end))
|
|
583 (TREX-inc *regexp-parse-index*)
|
|
584 (list 'SYNTAXSPEC (syntax-spec-code (aref *regexp-parse-str* (1+ i)))))
|
|
585 ((and (= ch ?S )
|
|
586 (< (1+ i) end))
|
|
587 (TREX-inc *regexp-parse-index*)
|
|
588 (list 'NOTSYNTAXSPEC (syntax-spec-code (aref *regexp-parse-str* (1+ i)))))
|
|
589 ((and (= ch ?c )
|
|
590 (< (1+ i) end))
|
|
591 (TREX-inc *regexp-parse-index*)
|
|
592 (list 'CATEGORYSPEC (aref *regexp-parse-str* (1+ i))))
|
|
593 ((and (= ch ?C )
|
|
594 (< (1+ i) end))
|
|
595 (TREX-inc *regexp-parse-index*)
|
|
596 (list 'NOTCATEGORYSPEC (aref *regexp-parse-str* (1+ i))))
|
|
597 (t
|
|
598 (regexp-parse-translate-char-string
|
|
599 (substring *regexp-parse-str* (1+ i) (+ i 2))))))
|
|
600 (t
|
|
601 (let ((nextpos (TREX-string-forward-anychar *regexp-parse-str* i)))
|
|
602 (cond(nextpos
|
|
603 (setq *regexp-parse-index* nextpos)
|
|
604 (regexp-parse-translate-char-string
|
|
605 (substring *regexp-parse-str* i nextpos)))
|
|
606 (t (regexp-error)))))))))
|
|
607
|
|
608 (defun regexp-parse-charset ()
|
|
609 (if (< *regexp-parse-index* *regexp-parse-end*)
|
|
610 (cond((eq (aref *regexp-parse-str* *regexp-parse-index*) ?^)
|
|
611 (TREX-inc *regexp-parse-index*)
|
|
612 (regexp-parse-charset0 'CHARSET_NOT nil))
|
|
613 (t (regexp-parse-charset0 'CHARSET ;; ':or
|
|
614 nil)))
|
|
615 (regexp-error)))
|
|
616
|
|
617 (defun regexp-parse-charset0 (op list)
|
|
618 (if (< *regexp-parse-index* *regexp-parse-end*)
|
|
619 (cond ((eq (aref *regexp-parse-str* *regexp-parse-index*) ?\])
|
|
620 (TREX-inc *regexp-parse-index*)
|
|
621 (regexp-parse-charset1 op '("\]")))
|
|
622 (t
|
|
623 (regexp-parse-charset1 op nil)))
|
|
624 (regexp-error)))
|
|
625
|
|
626 (defun regexp-parse-charset1 (op list)
|
|
627 (if (< *regexp-parse-index* *regexp-parse-end*)
|
|
628 (let* ((pos0 *regexp-parse-index*)
|
|
629 (pos1 (TREX-string-forward-anychar *regexp-parse-str* pos0))
|
|
630 (pos2 (TREX-string-forward-anychar *regexp-parse-str* pos1))
|
|
631 (pos3 (TREX-string-forward-anychar *regexp-parse-str* pos2)))
|
|
632 (if pos0
|
|
633 ;;; ]
|
|
634 (cond((eq (aref *regexp-parse-str* pos0) ?\])
|
|
635 (setq *regexp-parse-index* pos1)
|
|
636 ;;; returns charset form
|
|
637 (cons op (sort (nreverse list) 'TREX-charset-lessp)))
|
|
638 ;;; [^]] - [^]]
|
|
639 ((and pos1 pos2 pos3
|
|
640 (eq (aref *regexp-parse-str* pos1) ?-)
|
|
641 (not (eq (aref *regexp-parse-str* pos2) ?\])))
|
|
642 (let ((from (substring *regexp-parse-str* pos0 pos1))
|
|
643 (to (substring *regexp-parse-str* pos2 pos3)))
|
|
644 (if (and (= (length from) (length to))
|
|
645 (not (TREX-comp-charp from 0))
|
|
646 (not (TREX-comp-charp to 0))
|
|
647 (or (= (length from) 1)
|
|
648 (= (aref from 0) (aref to 0)))
|
|
649 (or (string-equal from to) ;;; by Enami 93.08.08
|
|
650 (string-lessp from to)))
|
|
651 (if (string-equal from to)
|
|
652 (TREX-push from list)
|
|
653 (TREX-push (list ':range from to) list))
|
|
654 (regexp-error)))
|
|
655 (setq *regexp-parse-index* pos3)
|
|
656 (regexp-parse-charset1 op list))
|
|
657 ;;; [^]] - ] ;;; by Enami 93.08.08
|
|
658 ((and pos1 pos2
|
|
659 (eq (aref *regexp-parse-str* pos1) ?-)
|
|
660 (eq (aref *regexp-parse-str* pos2) ?\]))
|
|
661 (TREX-push (substring *regexp-parse-str* pos0 pos1) list)
|
|
662 (TREX-push (substring *regexp-parse-str* pos1 pos2) list)
|
|
663 (setq *regexp-parse-index* pos2)
|
|
664 (regexp-parse-charset1 op list))
|
|
665 (t
|
|
666 (TREX-push (substring *regexp-parse-str* pos0 pos1) list)
|
|
667 (setq *regexp-parse-index* pos1)
|
|
668 (regexp-parse-charset1 op list)))
|
|
669 (regexp-error)))
|
|
670 (regexp-error)))
|
|
671
|
|
672 (defun TREX-charset-lessp (ch1 ch2)
|
|
673 (cond((and (stringp ch1) (stringp ch2))
|
|
674 (string-lessp ch1 ch2))
|
|
675 ((and (consp ch1) (consp ch2))
|
|
676 (string-lessp (nth 2 ch1) (nth 1 ch2)))
|
|
677 ((consp ch1)
|
|
678 (string-lessp (nth 2 ch1) ch2))
|
|
679 ((consp ch2)
|
|
680 (string-lessp ch1 (nth 1 ch2)))))
|
|
681
|
|
682 ;;;
|
|
683 ;;; define-regexp
|
|
684 ;;;
|
|
685
|
|
686 (defmacro define-regexp (name &rest forms)
|
|
687 (` (define-regexp* '(, name) '(, forms))))
|
|
688
|
|
689 (defun define-regexp* (name forms)
|
|
690 (put name ':regexp-has-definition t)
|
|
691 (put name ':regexp-definition
|
|
692 (if (= (length forms) 1)
|
|
693 (nth 0 forms)
|
|
694 (` (:seq (,@ forms))))))
|
|
695
|
|
696 (defun regexp-get-definition (name)
|
|
697 (get name ':regexp-definition))
|
|
698
|
|
699 (defun regexp-define-specials (names)
|
|
700 (mapcar (function (lambda (name)
|
|
701 (put name ':regexp-special t)))
|
|
702 names))
|
|
703
|
|
704 (defun regexp-has-definition (name)
|
|
705 (get name ':regexp-has-definition))
|
|
706
|
|
707 (defun regexp-specialp (name)
|
|
708 (get name ':regexp-special))
|
|
709
|
|
710 (defun regexp-expand-definition (regexp &optional callers)
|
|
711 (cond
|
|
712 ((consp regexp)
|
|
713 (let ((op (car regexp)))
|
|
714 (cond((eq op ':mark)
|
|
715 (` (:mark (, (nth 1 regexp))
|
|
716 (, (nth 2 regexp))
|
|
717 (, (regexp-expand-definition (nth 3 regexp))))))
|
|
718 ((eq op ':or)
|
|
719 (` (:or (,@ (mapcar 'regexp-expand-definition (cdr regexp))))))
|
|
720 ((eq op ':seq)
|
|
721 (` (:seq (,@ (mapcar 'regexp-expand-definition (cdr regexp))))))
|
|
722 ((eq op ':optional)
|
|
723 (` (:optional (, (regexp-expand-definition (nth 1 regexp))))))
|
|
724 ((eq op ':star)
|
|
725 (` (:star (, (regexp-expand-definition (nth 1 regexp))))))
|
|
726 ((eq op ':plus)
|
|
727 (` (:plus (, (regexp-expand-definition (nth 1 regexp))))))
|
|
728 ;;;;****
|
|
729 ((eq op ':range)
|
|
730 regexp)
|
|
731 ((regexp-specialp op)
|
|
732 regexp)
|
|
733 ((memq op callers)
|
|
734 (error "regexp defs(%s)" op))
|
|
735 ((regexp-has-definition op)
|
|
736 (regexp-expand-definition (regexp-get-definition op)
|
|
737 (cons op callers)))
|
|
738 (t
|
|
739 (error "undefined regexp(%s)" op)))))
|
|
740 ((stringp regexp)
|
|
741 regexp)
|
|
742 ((null regexp)
|
|
743 regexp)
|
|
744 (t
|
|
745 regexp)))
|
|
746
|
|
747 ;;;
|
|
748 ;;; regexp-*-lessp
|
|
749 ;;; $B@55,7A<0$NA4=g=x$rDj5A$9$k!%(B
|
|
750 ;;;
|
|
751
|
|
752 ;;; nil < number < string < symbol < cons
|
|
753
|
|
754 (defun regexp-lessp (exp1 exp2)
|
|
755 (cond((equal exp1 exp2)
|
|
756 nil)
|
|
757 ((null exp1) t)
|
|
758 ((numberp exp1)
|
|
759 (cond((null exp2) nil)
|
|
760 ((numberp exp2)
|
|
761 (< exp1 exp2))
|
|
762 (t t)))
|
|
763 ((stringp exp1)
|
|
764 (cond((or (null exp2)
|
|
765 (numberp exp2))
|
|
766 nil)
|
|
767 ((stringp exp2)
|
|
768 (string< exp1 exp2))
|
|
769 (t t)))
|
|
770 ((symbolp exp1)
|
|
771 (cond((or (null exp2)
|
|
772 (numberp exp2)
|
|
773 (stringp exp2))
|
|
774 nil)
|
|
775 ((symbolp exp2)
|
|
776 (string< exp1 exp2))
|
|
777 (t t)))
|
|
778 ((consp exp1)
|
|
779 (cond ((not (consp exp2))
|
|
780 nil)
|
|
781 ((< (length exp1) (length exp2))
|
|
782 t)
|
|
783 ((= (length exp1) (length exp2))
|
|
784 (regexp-lessp-list exp1 exp2))
|
|
785 (t nil)))))
|
|
786
|
|
787 (defun regexp-lessp-list (exp1 exp2)
|
|
788 (cond((null exp1) nil)
|
|
789 ((regexp-lessp (car exp1) (car exp2))
|
|
790 t)
|
|
791 ((equal (car exp1) (car exp2))
|
|
792 (regexp-lessp-list (cdr exp1) (cdr exp2)))
|
|
793 (t nil)))
|
|
794
|
|
795 ;;;
|
|
796 ;;; item = list of seq-body(== list of regexp)
|
|
797 ;;; nil < cons
|
|
798 ;;;
|
|
799
|
|
800 (defun regexp-item-lessp (item1 item2)
|
|
801 (cond((equal item1 item2)
|
|
802 nil)
|
|
803 ((null item2) t)
|
|
804 ((consp item1)
|
|
805 (cond((consp item2)
|
|
806 (cond ((regexp-key-lessp (car item1) (car item2))
|
|
807 t)
|
|
808 ((equal (car item1) (car item2))
|
|
809 (regexp-item-lessp (cdr item1) (cdr item2)))
|
|
810 (t nil)))
|
|
811 (t nil)))))
|
|
812
|
|
813
|
|
814 (defun regexp-key-lessp-list (sym1 sym2 list)
|
|
815 (< (TREX-find sym1 list) (TREX-find sym2 list)))
|
|
816
|
|
817 (defun regexp-key-lessp (key1 key2)
|
|
818 (cond ((regexp-key-class0 key1)
|
|
819 (cond((regexp-key-class0 key2)
|
|
820 (regexp-key-lessp-list (car key1) (car key2) *regexp-key-class0*))
|
|
821 (t t)))
|
|
822 ((regexp-key-class1 key1)
|
|
823 (cond((regexp-key-class1 key2)
|
|
824 (regexp-key-lessp-list key1 key2 *regexp-key-class1*))
|
|
825 ((or (regexp-key-class2 key2)
|
|
826 (regexp-key-class3 key2)
|
|
827 (regexp-key-class4 key2)
|
|
828 (null key2))
|
|
829 t)))
|
|
830 ((regexp-key-class2 key1)
|
|
831 (cond((regexp-key-class2 key2)
|
|
832 (regexp-key-lessp-list key1 key2 *regexp-key-class2*))
|
|
833 ((or (regexp-key-class3 key2)
|
|
834 (regexp-key-class4 key2)
|
|
835 (null key2))
|
|
836 t)))
|
|
837 ((regexp-key-class3 key1)
|
|
838 (cond((regexp-key-class3 key2)
|
|
839 (regexp-key-lessp-list (car key1) (car key2) *regexp-key-class3*))
|
|
840 ((or (regexp-key-class4 key2)
|
|
841 (null key2))
|
|
842 t)))
|
|
843 ((regexp-key-class4 key1)
|
|
844 (or (null key2)
|
|
845 (and (regexp-key-class4 key2) (< key1 key2))))
|
|
846 (t nil)))
|
|
847
|
|
848 (defun regexp-alist-lessp (pair1 pair2)
|
|
849 (regexp-key-lessp (car pair1) (car pair2)))
|
|
850
|
|
851 ;;;
|
|
852 ;;;
|
|
853 ;;;
|
|
854
|
|
855 (defvar *regexp-key-class0* '(START_MEMORY STOP_MEMORY))
|
|
856
|
|
857 (defvar *regexp-key-class1* '(BEGLINE ENDLINE
|
|
858 ;;; BEFORE_DOT AT_DOT AFTER_DOT
|
|
859 BEGBUF ENDBUF
|
|
860 WORDBEG WORDEND
|
|
861 WORDBOUND NOTWORDBOUND))
|
|
862
|
|
863 (defvar *regexp-key-class2* '(ANYCHAR
|
|
864 CHARSET
|
|
865 CHARSET_NOT
|
|
866 ;;;WORDCHAR NOTWORDCHAR
|
|
867 ))
|
|
868
|
|
869 (defvar *regexp-key-class3* '(DUPLICATE
|
|
870 SYNTAXSPEC NOTSYNTAXSPEC
|
|
871 CATEGORYSPEC NOTCATEGORYSPEC
|
|
872 ))
|
|
873
|
|
874 (regexp-define-specials *regexp-key-class0*)
|
|
875 (regexp-define-specials *regexp-key-class1*)
|
|
876 (regexp-define-specials *regexp-key-class2*)
|
|
877 (regexp-define-specials *regexp-key-class3*)
|
|
878
|
|
879 (defun regexp-key-class0 (key)
|
|
880 (and (consp key) (TREX-memequal (car key) *regexp-key-class0*)))
|
|
881
|
|
882 (defun regexp-key-class1 (key)
|
|
883 (and (consp key)
|
|
884 (TREX-memequal (car key) *regexp-key-class1*)))
|
|
885
|
|
886 (defun regexp-key-class2 (key)
|
|
887 (and (consp key) (TREX-memequal (car key) *regexp-key-class2*)))
|
|
888
|
|
889 (defun regexp-key-class3 (key)
|
|
890 (and (consp key)
|
|
891 (TREX-memequal (car key) *regexp-key-class3*)))
|
|
892
|
|
893 (defun regexp-key-class4 (key)
|
|
894 (or (and (consp key) (eq (car key) ':range))
|
|
895 (numberp key) (symbolp key)))
|
|
896
|
|
897 (defun regexp-item-key-class0 (item)
|
|
898 (regexp-key-class0 (car item)))
|
|
899
|
|
900 (defun regexp-item-key-class1 (item)
|
|
901 (regexp-key-class1 (car item)))
|
|
902
|
|
903 (defun regexp-item-key-class2 (item)
|
|
904 (regexp-key-class2 (car item)))
|
|
905
|
|
906 (defun regexp-item-key-class3 (item)
|
|
907 (regexp-key-class3 (car item)))
|
|
908
|
|
909 (defun regexp-item-key-class4 (item)
|
|
910 (regexp-key-class4 (car item)))
|
|
911
|
|
912 ;;;
|
|
913 ;;; regexp-sort
|
|
914 ;;; $B@55,I=8=$NI8=`7A<0$r5a$a$k$?$a$K@0Ns$r9T$&!%(B
|
|
915 ;;;
|
|
916
|
|
917 (defvar *regexp-sort-flag* t)
|
|
918 (defvar *regexp-debug* nil)
|
|
919
|
|
920 (defun regexp-sort (list pred)
|
|
921 (if *regexp-sort-flag*
|
|
922 (progn
|
|
923 (if *regexp-debug* (princ (format "(regexp-sort %s %s)\n" list pred)))
|
|
924 (let ((result (sort list pred)))
|
|
925 (if *regexp-debug* (princ (format "<== %s\n" result)))
|
|
926 result))
|
|
927 list))
|
|
928
|
|
929 ;;;
|
|
930 ;;; regexp-inverse
|
|
931 ;;;
|
|
932
|
|
933 (defun regexp-inverse (regexp)
|
|
934 (if (consp regexp)
|
|
935 (let ((op (car regexp)))
|
|
936 (cond((eq op ':mark)
|
|
937 (list ':mark (nth 1 regexp) (nth 2 regexp)
|
|
938 (regexp-inverse (nth 3 regexp))))
|
|
939 ((eq op 'DUPLICATE)
|
|
940 regexp)
|
|
941 ((eq op ':or)
|
|
942 (cons ':or (mapcar 'regexp-inverse (cdr regexp))))
|
|
943 ((eq op ':seq)
|
|
944 (cons ':seq (nreverse (mapcar 'regexp-inverse (cdr regexp)))))
|
|
945 ((eq op ':optional)
|
|
946 (list ':optional (regexp-inverse (nth 1 regexp))))
|
|
947 ((eq op ':star)
|
|
948 (list ':star (regexp-inverse (nth 1 regexp))))
|
|
949 ((eq op ':plus)
|
|
950 (list ':plus (regexp-inverse (nth 1 regexp))))
|
|
951 (t regexp)))
|
|
952 (if (stringp regexp)
|
|
953 (TREX-string-reverse regexp)
|
|
954 regexp)))
|
|
955
|
|
956 ;;;
|
|
957 ;;; regexp-remove-infinite-loop
|
|
958 ;;;
|
|
959
|
|
960 (defun regexp-remove-infinite-loop (regexp)
|
|
961 (cond((consp regexp)
|
|
962 (let ((op (car regexp)))
|
|
963 (cond((eq op ':mark)
|
|
964 )
|
|
965 ((eq op 'DUPLICATE)
|
|
966 regexp)
|
|
967 ((eq op ':or)
|
|
968 )
|
|
969 ((eq op ':seq)
|
|
970 )
|
|
971 ((eq op ':optional)
|
|
972 )
|
|
973 ((eq op ':star)
|
|
974 )
|
|
975 ((eq op ':plus)
|
|
976 )
|
|
977 (t regexp))))
|
|
978 ((stringp regexp)
|
|
979 )
|
|
980 ((null regexp)
|
|
981 )
|
|
982 (t
|
|
983 regexp)))
|
|
984
|
|
985
|
|
986 ;;;
|
|
987 ;;; regexp-reform
|
|
988 ;;;
|
|
989
|
|
990 (defvar *regexp-register-definitions* nil)
|
|
991 (defvar *regexp-registers* nil)
|
|
992
|
|
993 (defun regexp-reform-duplication (regexp)
|
|
994 (let* ((*regexp-register-definitions* nil)
|
|
995 (newregexp (regexp-reform-duplication-1 regexp)))
|
|
996 (let ((*regexp-registers* nil))
|
|
997 (regexp-reform-duplication-2 newregexp))))
|
|
998
|
|
999 (defun regexp-reform-duplication-1 (regexp)
|
|
1000 (if (not (consp regexp)) regexp
|
|
1001 (let ((mop (car regexp)))
|
|
1002 (cond((eq mop ':or)
|
|
1003 (cons ':or (mapcar 'regexp-reform-duplication-1
|
|
1004 (cdr regexp))))
|
|
1005 ((eq mop ':seq)
|
|
1006 (cons ':seq (mapcar 'regexp-reform-duplication-1
|
|
1007 (cdr regexp))))
|
|
1008 ((TREX-memequal mop '(:star :plus :optional))
|
|
1009 (list mop (regexp-reform-duplication-1 (nth 1 regexp))))
|
|
1010 ((eq mop ':mark)
|
|
1011 (TREX-push (cdr regexp)
|
|
1012 *regexp-register-definitions*)
|
|
1013 (list 'DUPLICATE (nth 1 regexp)))
|
|
1014 (t regexp)))))
|
|
1015
|
|
1016 (defun regexp-reform-duplication-2 (regexp)
|
|
1017 (if (not (consp regexp)) regexp
|
|
1018 (let ((mop (car regexp)))
|
|
1019 (cond((eq mop ':or)
|
|
1020 (let ((registers *regexp-registers*)
|
|
1021 (newregisters nil)
|
|
1022 (result nil)
|
|
1023 (or-body (cdr regexp)))
|
|
1024 (while or-body
|
|
1025 (setq *regexp-registers* registers)
|
|
1026 (TREX-push (regexp-reform-duplication-2 (car or-body)) result)
|
|
1027 (setq newregisters (TREX-delete-duplicate (append *regexp-registers* newregisters)))
|
|
1028 (setq or-body (cdr or-body)))
|
|
1029 (setq *regexp-registers* newregisters)
|
|
1030 (cons ':or (nreverse result))))
|
|
1031 ((eq mop ':seq)
|
|
1032 (cons ':seq (mapcar 'regexp-reform-duplication-2
|
|
1033 (cdr regexp))))
|
|
1034 ((TREX-memequal mop '(:star :plus :optional))
|
|
1035 (list mop (regexp-reform-duplication-2 (nth 1 regexp))))
|
|
1036 ((eq mop 'DUPLICATE)
|
|
1037 (let ((regno (nth 1 regexp)))
|
|
1038 (if (TREX-memequal regno *regexp-registers*)
|
|
1039 regexp
|
|
1040 (let ((def (assoc regno *regexp-register-definitions*)))
|
|
1041 (TREX-push regno *regexp-registers*)
|
|
1042 ;;; $BBg>fIW!)(B
|
|
1043 (if def
|
|
1044 (cons ':mark def)
|
|
1045 regexp)))))
|
|
1046 (t regexp)))))
|
|
1047
|
|
1048 ;;;
|
|
1049 ;;; regexp-expand
|
|
1050 ;;;
|
|
1051
|
|
1052 ;;;
|
|
1053 ;;; <ISLAND> ::= ( <ITEM> ...)
|
|
1054 ;;; <ITEM> ::= ( <SEQ-BODY> ... )
|
|
1055 ;;;
|
|
1056
|
|
1057 (defun regexp-expand-regexp (regexp)
|
|
1058 ;;; returns island
|
|
1059 (if (consp regexp)
|
|
1060 (let ((mop (car regexp)))
|
|
1061 (cond
|
|
1062 ;;;((eq mop 'CHARSET)
|
|
1063 ;;; (regexp-expand-charset t (cdr regexp)))
|
|
1064 ;;;((eq mop 'CHARSET_NOT)
|
|
1065 ;;; (regexp-expand-charset nil (cdr regexp)))
|
|
1066 ((eq mop ':or)
|
|
1067 (regexp-expand-or (cdr regexp)))
|
|
1068 ((eq mop ':seq)
|
|
1069 (regexp-expand-seq (cdr regexp)))
|
|
1070 ((eq mop ':star)
|
|
1071 (let ((arg (nth 1 regexp)))
|
|
1072 (if arg
|
|
1073 (append (regexp-expand-seq (list arg regexp)) (list nil))
|
|
1074 (list nil))))
|
|
1075 ((eq mop ':plus)
|
|
1076 (let ((arg (nth 1 regexp)))
|
|
1077 (if arg
|
|
1078 (regexp-expand-seq (list arg (list ':star arg)))
|
|
1079 (list nil))))
|
|
1080 ((eq mop ':optional)
|
|
1081 (append (regexp-expand-regexp (nth 1 regexp)) (list nil)))
|
|
1082 ((eq mop ':mark)
|
|
1083 (let ((regno (nth 1 regexp))
|
|
1084 (groups (nth 2 regexp))
|
|
1085 (arg (nth 3 regexp)))
|
|
1086 (if arg
|
|
1087 (list (list (list 'START_MEMORY regno groups)
|
|
1088 arg
|
|
1089 (list 'STOP_MEMORY regno groups)))
|
|
1090 (list (list (list 'START_MEMORY regno groups)
|
|
1091 (list 'STOP_MEMORY regno groups))))))
|
|
1092 (t (list (list regexp)))))
|
|
1093 (cond((null regexp) (list nil))
|
|
1094 ((symbolp regexp) (list (list regexp)))
|
|
1095 ((numberp regexp) (list (list regexp)))
|
|
1096 ((stringp regexp)
|
|
1097 (let ((result nil))
|
|
1098 (let ((i 0) (max (length regexp)))
|
|
1099 (while (< i max)
|
|
1100 (TREX-push (aref regexp i) result)
|
|
1101 (TREX-inc i))
|
|
1102 (list (nreverse result)))))
|
|
1103 (t (list (list regexp))))))
|
|
1104
|
|
1105 ;;;
|
|
1106 ;;; (CHARSET "abc" ... ) == (:or (:seq "a" "b" "c") .... )
|
|
1107 ;;;
|
|
1108 ;;; (:range "abc" "ade") == (:seq "a" (:range "bc" "de"))
|
|
1109 ;;; (:range "bc" "de" ) == (:or (:seq "b" (:range "c" 0xFF))
|
|
1110 ;;; (:seq (:range "b"+1 "d"-1) (:range 0xA0 0xFF))
|
|
1111 ;;; (:seq "d" (:range 0xA0 "e")))
|
|
1112 ;;;
|
|
1113
|
|
1114 ;;; charset::
|
|
1115
|
|
1116 (defun charset-member-elt (ch elt)
|
|
1117 (if (consp elt)
|
|
1118 (if (eq (nth 0 elt) ':range)
|
|
1119 (and (<= ch (nth 1 elt))
|
|
1120 (<= (nth 2 elt) ch))
|
|
1121 nil)
|
|
1122 (equal ch elt)))
|
|
1123
|
|
1124 (defun charset-member-P (ch or-form)
|
|
1125 (let ((result) (l (cdr or-form)))
|
|
1126 (while (and l (null result))
|
|
1127 (if (charset-membership-elt ch (car l))
|
|
1128 (setq result t))
|
|
1129 (setq l (cdr l)))
|
|
1130 result))
|
|
1131
|
|
1132 (defun charset-member-N (ch nor-form)
|
|
1133 (not (charset-member+ ch nor-form)))
|
|
1134
|
|
1135 (defun charset-norp (form)
|
|
1136 (and (consp form) (eq (car form) 'CHARSET_NOT)))
|
|
1137
|
|
1138 (defun charset-and (form1 form2)
|
|
1139 (if (charset-norp form1)
|
|
1140 (if (charset-norp form2)
|
|
1141 (cons ':or (charset-or-PP (cdr form1) (cdr form2)))
|
|
1142 (charset-and-PN form2 form1))
|
|
1143 (if (charset-norp form2)
|
|
1144 (charset-and-pn form1 form2)
|
|
1145 (charset-and-PP form1 form2))))
|
|
1146
|
|
1147 (defun charset-or-PP (or-body1 or-body2)
|
|
1148 (append or-body1 or-body2))
|
|
1149
|
|
1150
|
|
1151
|
|
1152
|
|
1153 (defun regexp-charset-to-regexp (charsets)
|
|
1154 (cons ':or (mapcar 'regexp-charset-to-regexp* charsets)))
|
|
1155
|
|
1156 (defun regexp-charset-to-regexp* (elm)
|
|
1157 (cond((consp elm) (regexp-charset-range-to-regexp (nth 1 elm) (nth 2 elm)))
|
|
1158 ((stringp elm) (cons ':seq (TREX-string-to-list elm)))
|
|
1159 (t elm)))
|
|
1160
|
|
1161 (defun regexp-charset-range-to-regexp (str1 str2)
|
|
1162 (let ((result (regexp-charset-range-to-regexp* (TREX-string-to-list str1)
|
|
1163 (TREX-string-to-list str2))))
|
|
1164 (if (= (length result) 1) (car result) (cons ':seq result))))
|
|
1165
|
|
1166
|
|
1167 (defun regexp-charset-range-to-regexp* (nums1 nums2)
|
|
1168 (let ((len (length (cdr nums1)))
|
|
1169 (ch1 (car nums1))
|
|
1170 (ch2 (car nums2)))
|
|
1171 (if (= len 0)
|
|
1172 (if (= ch1 ch2) (list ch1)
|
|
1173 (list (regexp-charset-range-1 ch1 ch2)))
|
|
1174 (if (= ch1 ch2)
|
|
1175 (cons ch1 (regexp-charset-range-to-regexp* (cdr nums1) (cdr nums2)))
|
|
1176 (let ((part1 (cons ch1 (regexp-charset-range-to-regexp* (cdr nums1) (make-list (length (cdr nums1)) 255))))
|
|
1177 (part2 (if (<= (1+ ch1) (1- ch2))
|
|
1178 (cons (regexp-charset-range-1 (1+ ch1) (1- ch2))
|
|
1179 (regexp-charset-range-to-regexp* (make-list len 160) (make-list len 255)))
|
|
1180 nil))
|
|
1181 (part3 (cons ch2 (regexp-charset-range-to-regexp* (make-list len 160) (cdr nums2)))))
|
|
1182 (if part2
|
|
1183 (list (list ':or (cons ':seq part1) (cons ':seq part2) (cons ':seq part3)))
|
|
1184 (list (list ':or (cons ':seq part1) (cons ':seq part3)))))))))
|
|
1185
|
|
1186 (defun regexp-charset-range-1 (from to)
|
|
1187 (let ((result nil))
|
|
1188 (while (<= from to)
|
|
1189 (TREX-push to result)
|
|
1190 (TREX-dec to))
|
|
1191 (cons ':or result)))
|
|
1192
|
|
1193 (defun regexp-charset-range-1* (from to)
|
|
1194 (if (not (<= from to)) nil
|
|
1195 (cons from (regexp-charset-range-1* (1+ from) to))))
|
|
1196
|
|
1197 (defvar *regexp-charset-vector* nil)
|
|
1198
|
|
1199 (defun regexp-expand-charset (mode charsets)
|
|
1200 (TREX-init *regexp-charset-vector* (make-vector 256 nil))
|
|
1201 (let ((i 0))
|
|
1202 (while (< i 256)
|
|
1203 (aset *regexp-charset-vector* i nil)
|
|
1204 (TREX-inc i)))
|
|
1205 (while charsets
|
|
1206 (cond((numberp (car charsets))
|
|
1207 (aset *regexp-charset-vector* (car charsets) t))
|
|
1208 ((stringp (car charsets))
|
|
1209 (if (= (length (car charsets)) 1)
|
|
1210 (aset *regexp-charset-vector* (aref (car charsets) 0) t)
|
|
1211 (let ((list (TREX-string-to-list (car charsets))))
|
|
1212 (aset *regexp-charset-vector* (car list)
|
|
1213 (regexp-expand-charset-set-mark (cdr list)
|
|
1214 (aref *regexp-charset-vector* (car list)))))))
|
|
1215 ((and (consp (car charsets))
|
|
1216 (eq (car (car charsets)) ':range))
|
|
1217 (let ((from (aref (nth 1 (car charsets)) 0))
|
|
1218 (to (aref (nth 2 (car charsets)) 0)))
|
|
1219 (if (<= from to)
|
|
1220 (if (< to 128)
|
|
1221 (let ((char from))
|
|
1222 (while (<= char to)
|
|
1223 (aset *regexp-charset-vector* char t)
|
|
1224 (TREX-inc char)))
|
|
1225 (let ((from-list (TREX-string-to-list (nth 1 (car charsets))))
|
|
1226 (to-list (TREX-string-to-list (nth 2 (car charsets)))))
|
|
1227 ;;; $B$I$&$9$s$N!*(B
|
|
1228 ))))))
|
|
1229 (setq charsets (cdr charsets)))
|
|
1230 (let ((result nil)
|
|
1231 (i 0))
|
|
1232 (while (< i 256)
|
|
1233 (if (eq (aref *regexp-charset-vector* i) mode)
|
|
1234 (TREX-push (list i) result))
|
|
1235 (TREX-inc i))
|
|
1236 (nreverse result)))
|
|
1237
|
|
1238
|
|
1239 (defun regexp-expand-charset-set-mark (chars alist)
|
|
1240 (if (null chars) t
|
|
1241 (let ((place (assoc (car chars) alist)))
|
|
1242 (cond((null place)
|
|
1243 (cons
|
|
1244 (cons (car chars)
|
|
1245 (regexp-expand-charset-set-mark (cdr chars) nil))
|
|
1246 alist))
|
|
1247 (t
|
|
1248 (setcdr place
|
|
1249 (regexp-expand-charset-set-mark (cdr chars) (cdr place)))
|
|
1250 alist)))))
|
|
1251
|
|
1252 (defun regexp-expand-or (regexps)
|
|
1253 (if regexps
|
|
1254 (append (regexp-expand-regexp (car regexps))
|
|
1255 (regexp-expand-or (cdr regexps)))
|
|
1256 nil))
|
|
1257
|
|
1258 (defun regexp-expand-seq (regexps)
|
|
1259 (if (null regexps)
|
|
1260 (list nil)
|
|
1261 (let ((result (regexp-expand-regexp (car regexps))))
|
|
1262 (if (TREX-memequal nil result)
|
|
1263 (let ((newresult (regexp-expand-seq (cdr regexps))))
|
|
1264 (setq result (TREX-delete nil result))
|
|
1265 (while result
|
|
1266 (TREX-push (append (car result) (cdr regexps)) newresult)
|
|
1267 (setq result (cdr result)))
|
|
1268 newresult)
|
|
1269 (let ((newresult nil))
|
|
1270 (while result
|
|
1271 (TREX-push (append (car result) (cdr regexps)) newresult)
|
|
1272 (setq result (cdr result)))
|
|
1273 newresult)))))
|
|
1274
|
|
1275 (defun regexp-expand-items (items)
|
|
1276 (if items
|
|
1277 (append (regexp-expand-seq (car items))
|
|
1278 (regexp-expand-items (cdr items)))
|
|
1279 nil))
|
|
1280
|
|
1281 ;;;
|
|
1282 ;;; regexp-
|
|
1283 ;;;
|
|
1284
|
|
1285 (defun regexp-make-island (items)
|
|
1286 (let ((result (TREX-delete-duplicate (regexp-expand-items items))))
|
|
1287 (let ((l result))
|
|
1288 (while l
|
|
1289 (cond((null (car l))
|
|
1290 (setcdr l nil)
|
|
1291 (setq l nil))
|
|
1292 (t (setq l (cdr l))))))
|
|
1293 result))
|
|
1294
|
|
1295 (defun regexp-make-island-parallel (items)
|
|
1296 (regexp-sort (TREX-delete-duplicate (regexp-expand-items items))
|
|
1297 'regexp-item-lessp))
|
|
1298
|
|
1299
|
|
1300 ;;; Finate state Automaton:
|
|
1301 ;;;
|
|
1302 ;;; FA : Non-deterministic FA
|
|
1303 ;;; EFFA : Epsilon Free FA
|
|
1304 ;;; DFA : Deterministic FA
|
|
1305 ;;;
|
|
1306 ;;;
|
|
1307 ;;; DFA-optimize <- DFA <- EFFA <- NDFA <- regexp
|
|
1308
|
|
1309
|
|
1310 ;;;
|
|
1311 ;;; Table structure
|
|
1312 ;;; <FA> ::= ( <START> . <TransTables> )
|
|
1313 ;;; <TransTables> ::= ( <Node> . <TransTable> ) ...
|
|
1314 ;;; <TransTable> ::= ( <Key> . <Next> ) ...
|
|
1315 ;;; <Key> ::= <Char> | <Condition> | :epsilon
|
|
1316 ;;;
|
|
1317
|
|
1318 (defvar *regexp-node-to-transtable* nil)
|
|
1319 (defvar *regexp-island-to-node* nil)
|
|
1320 (defvar *regexp-counter* 0)
|
|
1321
|
|
1322 (defun FA-make (regexp)
|
|
1323 (setq *regexp-island-to-node* nil)
|
|
1324 (let ((*regexp-node-to-transtable* nil)
|
|
1325 ;;; (*regexp-island-to-node* nil)
|
|
1326 (*regexp-counter* 0))
|
|
1327 (let ((island (regexp-make-island (regexp-expand-regexp regexp))))
|
|
1328 (cons (FA-make-closure island) (nreverse *regexp-node-to-transtable*)))))
|
|
1329
|
|
1330 (defun FA-make-closure (island)
|
|
1331 (if *regexp-debug* (princ (format "FA-make-closure %s\n" island)))
|
|
1332 (if (null island) nil
|
|
1333 (let ((place (assoc island *regexp-island-to-node*))
|
|
1334 (pos nil))
|
|
1335 (cond(place (cdr place))
|
|
1336 ;;; START_MEMORY and STOP_MEMORY $B!JL5>r7o!$:GM%@h$GA+0\$9$k$b$N!K(B
|
|
1337 ((setq pos (TREX-find-if 'regexp-item-key-class0 island))
|
|
1338 (let ((pre (TREX-firstn island pos))
|
|
1339 (item (nth pos island))
|
|
1340 (post (nthcdr (1+ pos) island)))
|
|
1341 (let* ((number (TREX-inc *regexp-counter*))
|
|
1342 (pair (cons (car item) nil))
|
|
1343 (alist (list pair))
|
|
1344 (place (cons number alist)))
|
|
1345 (TREX-push (cons island number) *regexp-island-to-node*)
|
|
1346 (TREX-push place *regexp-node-to-transtable*)
|
|
1347 (setcdr pair
|
|
1348 (FA-make-closure
|
|
1349 (regexp-make-island (append pre (list (cdr item)) post))))
|
|
1350 number)))
|
|
1351 ;;; BEGLINE, ENDLINE, WORDBEG, ....$B!JD9$5#0$N$b$N!K(B
|
|
1352 ;;; $BA+0\$O(B
|
|
1353 ;;; KEY --> TRUE+FALSE
|
|
1354 ;;; :epsilon --> FALSE $B$H$J$k!%(B
|
|
1355 ((setq pos (TREX-find-if 'regexp-item-key-class1 island))
|
|
1356 (let((key (car (nth pos island)))
|
|
1357 (items island)
|
|
1358 (result-true nil)
|
|
1359 (result-false nil))
|
|
1360 (while items
|
|
1361 (let ((item (car items)))
|
|
1362 (if (equal key (car item))
|
|
1363 (TREX-push (cdr item) result-true)
|
|
1364 (progn
|
|
1365 (TREX-push item result-true)
|
|
1366 (TREX-push item result-false))))
|
|
1367 (setq items (cdr items)))
|
|
1368 (setq result-true (nreverse result-true)
|
|
1369 result-false (nreverse result-false))
|
|
1370 (if (null result-false)
|
|
1371 (let* ((number (TREX-inc *regexp-counter*))
|
|
1372 (pair-true (cons key nil))
|
|
1373 (alist (list pair-true))
|
|
1374 (place (cons number alist)))
|
|
1375 (TREX-push (cons island number) *regexp-island-to-node*)
|
|
1376 (TREX-push place *regexp-node-to-transtable*)
|
|
1377 (setcdr pair-true (FA-make-closure (regexp-make-island result-true)))
|
|
1378 number)
|
|
1379 (let* ((number (TREX-inc *regexp-counter*))
|
|
1380 (pair-true (cons key nil))
|
|
1381 (pair-false (cons ':epsilon nil))
|
|
1382 (alist (list pair-true pair-false))
|
|
1383 (place (cons number alist)))
|
|
1384 (TREX-push (cons island number) *regexp-island-to-node*)
|
|
1385 (TREX-push place *regexp-node-to-transtable*)
|
|
1386 (setcdr pair-true (FA-make-closure (regexp-make-island result-true)))
|
|
1387 (setcdr pair-false (FA-make-closure (regexp-make-island result-false)))
|
|
1388 number))))
|
|
1389 (t
|
|
1390 (FA-make-closure* island (FA-make-pre-alist island)))))))
|
|
1391
|
|
1392 ;;;
|
|
1393 ;;; $B$3$3$G07$&$N$O(B class2,3,4 $B$N$_(B
|
|
1394 ;;;
|
|
1395 (defun FA-make-closure* (island pre-alist)
|
|
1396 (if *regexp-debug* (princ (format "\nregexp-make-clousre* %s" pre-alist)))
|
|
1397 (let* ((number (TREX-inc *regexp-counter*))
|
|
1398 (place (cons number pre-alist)))
|
|
1399 (TREX-push (cons island number) *regexp-island-to-node*)
|
|
1400 (TREX-push place *regexp-node-to-transtable*)
|
|
1401 (while pre-alist
|
|
1402 (let ((pair (car pre-alist)))
|
|
1403 (setcdr pair
|
|
1404 (FA-make-closure (regexp-make-island (cdr pair)))))
|
|
1405 (setq pre-alist (cdr pre-alist)))
|
|
1406 number))
|
|
1407
|
|
1408 ;;;
|
|
1409 ;;; PRE-ALIST ::= ( (key . items) ... )
|
|
1410 ;;;
|
|
1411
|
|
1412 (defun FA-make-pre-alist (items)
|
|
1413 (let ((pre-alist nil))
|
|
1414 (while items
|
|
1415 (let ((item (car items)))
|
|
1416 (cond((or (regexp-key-class2 (car item))
|
|
1417 (regexp-key-class3 (car item)))
|
|
1418 (let ((key (car item))
|
|
1419 (newitems nil))
|
|
1420 (while (and items (equal key (car (car items))))
|
|
1421 (TREX-push (cdr (car items)) newitems)
|
|
1422 (setq items (cdr items)))
|
|
1423 (setq newitems (nreverse newitems))
|
|
1424 (TREX-push (cons key newitems) pre-alist)))
|
|
1425 ((null item)
|
|
1426 (TREX-push (list nil) pre-alist)
|
|
1427 (setq items (cdr items)))
|
|
1428 ((regexp-key-class4 (car item))
|
|
1429 (let((alist nil))
|
|
1430 (while (and items (regexp-key-class4 (car (car items))))
|
|
1431 (let* ((newitem (car items))
|
|
1432 (place (assoc (car newitem) alist)))
|
|
1433 (if place
|
|
1434 (setcdr place
|
|
1435 (cons (cdr newitem) (cdr place)))
|
|
1436 (TREX-push (cons (car newitem) (list (cdr newitem))) alist)))
|
|
1437 (setq items (cdr items)))
|
|
1438 (setq alist (sort alist 'TREX-lessp-car))
|
|
1439 (let ((list alist))
|
|
1440 (while list
|
|
1441 (setcdr (car list) (nreverse (cdr (car list))))
|
|
1442 (setq list (cdr list)))
|
|
1443 (setq pre-alist (append alist pre-alist))
|
|
1444 )))
|
|
1445 (t (error "undefined items(%s)" item)))))
|
|
1446 (nreverse pre-alist)))
|
|
1447
|
|
1448 ;;;
|
|
1449 ;;; FA-inverse
|
|
1450 ;;;
|
|
1451
|
|
1452 (defun FA-inverse (FA)
|
|
1453 (let ((invFA nil)
|
|
1454 (start (car FA))
|
|
1455 (table (cdr FA))
|
|
1456 (minnode 10000)
|
|
1457 (maxnode 0)
|
|
1458 (newtable nil)
|
|
1459 (newstart nil)
|
|
1460 (newfinal nil))
|
|
1461 (let ((l table))
|
|
1462 (while l
|
|
1463 (let ((n (car (car l))))
|
|
1464 (if (< n minnode) (setq minnode n))
|
|
1465 (if (< maxnode n) (setq maxnode n)))
|
|
1466 (setq l (cdr l))))
|
|
1467 (setq newstart (1- minnode))
|
|
1468 (setq newfinal (1+ maxnode))
|
|
1469 (setq newtable (FA-link newfinal nil nil newtable))
|
|
1470 (while table
|
|
1471 (let* ((Snode (car table))
|
|
1472 (Snumber (car Snode))
|
|
1473 (Salist (cdr Snode)))
|
|
1474 (while Salist
|
|
1475 (let* ((pair (car Salist))
|
|
1476 (key (car pair))
|
|
1477 (Tnumber (cdr pair)))
|
|
1478 (cond((null key)
|
|
1479 (setq newtable (FA-link newstart ':epsilon Snumber newtable)))
|
|
1480 (t
|
|
1481 (setq newtable (FA-link Tnumber key Snumber newtable))))
|
|
1482 (setq Salist (cdr Salist)))))
|
|
1483 (setq table (cdr table)))
|
|
1484 (setq newtable (FA-link start ':epsilon newfinal newtable))
|
|
1485 ;;;; FA $B$N(B final $B$X(B invFA $B$N(B start $B$+$i(B :epsilon link $B$rD%$k!%(B
|
|
1486 (let ((l newtable))
|
|
1487 (while l
|
|
1488 (setcdr (car l) (reverse (cdr(car l))))
|
|
1489 (setq l (cdr l))))
|
|
1490 (setq newtable (sort newtable 'TREX-lessp-car))
|
|
1491 (cons newstart newtable)))
|
|
1492
|
|
1493 (defun FA-link (from key to table)
|
|
1494 (let ((place (assoc from table)))
|
|
1495 (cond ((null place )
|
|
1496 (setq place (cons from nil))
|
|
1497 (TREX-push place table)))
|
|
1498 (setcdr place (cons (cons key to) (cdr place)))
|
|
1499 table))
|
|
1500
|
|
1501 ;;;
|
|
1502 ;;; FA-dump
|
|
1503 ;;;
|
|
1504
|
|
1505 (defun FA-dump (table)
|
|
1506 (let ((start (car table))
|
|
1507 (l (cdr table)))
|
|
1508 (princ (format "\nstart = %d\n" start))
|
|
1509 (while l
|
|
1510 (princ (format "%3d: " (car (car l))))
|
|
1511 (let ((alist (cdr (car l))))
|
|
1512 (cond ((numberp (car (car alist)))
|
|
1513 (princ (format "%c -> %s\n" (car (car alist)) (cdr (car alist)))))
|
|
1514 ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC)))
|
|
1515 (princ (format "(%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist)))))
|
|
1516 (t
|
|
1517 (princ (format "%s -> %s\n" (car (car alist)) (cdr (car alist))))))
|
|
1518 (setq alist (cdr alist))
|
|
1519 (while alist
|
|
1520 (cond ((numberp (car (car alist)))
|
|
1521 (princ (format " %c -> %s\n" (car (car alist)) (cdr (car alist)))))
|
|
1522 ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC)))
|
|
1523 (princ (format " (%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist)))))
|
|
1524 (t
|
|
1525 (princ (format " %s -> %s\n" (car (car alist)) (cdr (car alist))))))
|
|
1526 (setq alist (cdr alist))))
|
|
1527 (setq l (cdr l)))))
|
|
1528
|
|
1529 ;;;
|
|
1530 ;;; EFFA: Epsilon Free Finate Automaton
|
|
1531 ;;;
|
|
1532
|
|
1533 (defvar *FA-table* nil)
|
|
1534 (defvar *EFFA-table* nil)
|
|
1535
|
|
1536 (defun EFFA-make (FA)
|
|
1537 (let* ((start (car FA))
|
|
1538 (*FA-table* (cdr FA))
|
|
1539 (newstart start)
|
|
1540 (*EFFA-table* nil))
|
|
1541 (cons newstart (reverse (EFFA-make* start)))))
|
|
1542
|
|
1543 (defun EFFA-make* (node)
|
|
1544 (let ((place (assoc node *EFFA-table*)))
|
|
1545 (cond((null place)
|
|
1546 (let ((place (cons node nil)))
|
|
1547 (TREX-push place *EFFA-table*)
|
|
1548 (setcdr place
|
|
1549 (reverse (EFFA-make-alist nil (cdr (assoc node *FA-table*))
|
|
1550 (list node))))
|
|
1551 (let ((alist (cdr place)))
|
|
1552 (while alist
|
|
1553 (cond((car (car alist))
|
|
1554 (EFFA-make* (cdr (car alist)))))
|
|
1555 (setq alist (cdr alist))))))))
|
|
1556 *EFFA-table*)
|
|
1557
|
|
1558 (defun EFFA-make-alist (newalist alist set)
|
|
1559 (while alist
|
|
1560 (let ((node (cdr (car alist))))
|
|
1561 (cond((eq (car (car alist)) ':epsilon)
|
|
1562 (cond((not (TREX-memequal node set))
|
|
1563 (TREX-push node set)
|
|
1564 (setq newalist
|
|
1565 (EFFA-make-alist newalist (cdr (assoc node *FA-table*)) set)))))
|
|
1566 (t
|
|
1567 (TREX-push (car alist) newalist))))
|
|
1568 (setq alist (cdr alist)))
|
|
1569 newalist)
|
|
1570
|
|
1571 ;;;
|
|
1572 ;;; DFA: Deterministic Finate Automata
|
|
1573 ;;;
|
|
1574
|
|
1575 (defvar *DFA-node-counter* nil)
|
|
1576
|
|
1577 (defvar *DFA-node-definitions* nil
|
|
1578 "List of FD-nodes to node number")
|
|
1579
|
|
1580 (defvar *DFA-table* nil
|
|
1581 "node number to alist")
|
|
1582
|
|
1583 (defun DFA-make (EFFA)
|
|
1584 (let ((start (car EFFA))
|
|
1585 (*EFFA-table* (cdr EFFA))
|
|
1586 (*DFA-node-counter* 0)
|
|
1587 (*DFA-node-definitions* nil )
|
|
1588 (*DFA-table* nil))
|
|
1589 (DFA-make-1 (list start))
|
|
1590 (cons (cdr (assoc (list start) *DFA-node-definitions*)) *DFA-table*)))
|
|
1591
|
|
1592 (defun DFA-make-1 (states)
|
|
1593 (let ((place (assoc states *DFA-node-definitions*)))
|
|
1594 (cond((null place)
|
|
1595 (TREX-inc *DFA-node-counter*)
|
|
1596 (setq place (cons states *DFA-node-counter*))
|
|
1597 (TREX-push place *DFA-node-definitions*)
|
|
1598 (let ((pair (cons *DFA-node-counter* nil)))
|
|
1599 (TREX-push pair *DFA-table*)
|
|
1600 (setcdr pair (DFA-make-pre-alist (DFA-collect-alist states)))
|
|
1601 (let ((alist (cdr pair)))
|
|
1602 (while alist
|
|
1603 (let ((top (car alist)))
|
|
1604 (if (car top)
|
|
1605 (setcdr top
|
|
1606 (DFA-make-1 (cdr top)))))
|
|
1607 (setq alist (cdr alist))))
|
|
1608 )))
|
|
1609 (cdr place)))
|
|
1610
|
|
1611 (defun DFA-collect-alist (states)
|
|
1612 (let ((result nil))
|
|
1613 (while states
|
|
1614 (setq result (append (cdr (assoc (car states) *EFFA-table*)) result))
|
|
1615 (setq states (cdr states)))
|
|
1616 result))
|
|
1617
|
|
1618 (defun DFA-make-pre-alist (oldAlist)
|
|
1619 (let ((pre-alist nil))
|
|
1620 (while oldAlist
|
|
1621 (let ((oldKey (car (car oldAlist))))
|
|
1622 (cond((or (regexp-key-class0 oldKey)
|
|
1623 (regexp-key-class1 oldKey)
|
|
1624 (regexp-key-class2 oldKey)
|
|
1625 (regexp-key-class3 oldKey))
|
|
1626 (let ((key oldKey)
|
|
1627 (newAlist nil))
|
|
1628 (while (and oldAlist (equal key (car (car oldAlist))))
|
|
1629 (TREX-push (cdr (car oldAlist)) newAlist)
|
|
1630 (setq oldAlist (cdr oldAlist)))
|
|
1631 (setq newAlist (nreverse newAlist))
|
|
1632 (TREX-push (cons key newAlist) pre-alist)))
|
|
1633 ((regexp-key-class4 oldKey)
|
|
1634 (let((alist nil))
|
|
1635 (while (and oldAlist (regexp-key-class4 (car (car oldAlist))))
|
|
1636 (let ((place (assoc (car (car oldAlist)) alist)))
|
|
1637 (if place
|
|
1638 (setcdr place
|
|
1639 (cons (cdr (car oldAlist)) (cdr place)))
|
|
1640 (TREX-push (cons (car (car oldAlist)) (list(cdr (car oldAlist)))) alist)))
|
|
1641 (setq oldAlist (cdr oldAlist)))
|
|
1642 (setq alist (sort alist 'TREX-lessp-car))
|
|
1643 (let ((list alist))
|
|
1644 (while list
|
|
1645 (setcdr (car list) (reverse (cdr (car list))))
|
|
1646 (setq list (cdr list)))
|
|
1647 (setq pre-alist (append alist pre-alist))
|
|
1648 )))
|
|
1649 ((null oldKey)
|
|
1650 (TREX-push (list nil) pre-alist)
|
|
1651 (setq oldAlist (cdr oldAlist)))
|
|
1652 (t
|
|
1653 (setq oldAlist (cdr oldAlist))))))
|
|
1654 (nreverse pre-alist)))
|
|
1655
|
|
1656 ;;;
|
|
1657 ;;; DFA-optimize
|
|
1658 ;;; $B$3$3$G$N:GE,2=$O>H9g=g=x$rJ]B8$9$k!%(B
|
|
1659 ;;; longer match $B$J$I$r$9$k>l9g$OJQ99$9$kI,MW$,$"$k!%(B
|
|
1660
|
|
1661 (defvar *DFA-optimize-debug* nil)
|
|
1662
|
|
1663 (defvar *DFA-optimize-groups* nil)
|
|
1664 (defvar *DFA-optimize-node* 1)
|
|
1665
|
|
1666 (defun DFA-optimize (FA)
|
|
1667 (if *DFA-optimize-debug* (terpri))
|
|
1668 (let* ((start (car FA))
|
|
1669 (table (cdr FA))
|
|
1670 (*DFA-optimize-node* 1)
|
|
1671 (*DFA-optimize-groups*
|
|
1672 (list (cons *DFA-optimize-node* (mapcar 'car table)))))
|
|
1673 (while
|
|
1674 (catch 'DFA-optimize-changed
|
|
1675 (let ((groups *DFA-optimize-groups*))
|
|
1676 (while groups
|
|
1677 (if *DFA-optimize-debug*
|
|
1678 (princ (format "\nGroups to be checked: %s\n" groups)))
|
|
1679 (let* ((Sgroup (car groups))
|
|
1680 (Sgroup-number (car Sgroup))
|
|
1681 (oldgroup (cdr Sgroup))
|
|
1682 (newgroup nil)
|
|
1683 (Smembers oldgroup))
|
|
1684 (if *DFA-optimize-debug*
|
|
1685 (princ (format " Sgroup-number: %s = %s\n" Sgroup-number Smembers)))
|
|
1686 (while Smembers
|
|
1687 (let* ((Snumber (car Smembers))
|
|
1688 (Salist (cdr (assoc Snumber table))))
|
|
1689 (if *DFA-optimize-debug*
|
|
1690 (princ (format " Snumber: %s\n" Snumber)))
|
|
1691 (let ((Tmembers (cdr Smembers)))
|
|
1692 (while Tmembers
|
|
1693 (if (not (eq Snumber (car Tmembers)))
|
|
1694 (let* ((Tnumber (car Tmembers))
|
|
1695 (Talist (cdr (assoc Tnumber table)))
|
|
1696 (Salist Salist))
|
|
1697 (if *DFA-optimize-debug*
|
|
1698 (princ (format " Tnumber: %s\n" Tnumber)))
|
|
1699 (while (and Talist Salist
|
|
1700 (equal (car (car Talist))
|
|
1701 (car (car Salist))) ;;; key
|
|
1702 (equal (DFA-optimize-group-number
|
|
1703 (cdr (car Talist)))
|
|
1704 (DFA-optimize-group-number
|
|
1705 (cdr (car Salist))) ;;; next group
|
|
1706 ))
|
|
1707 (if *DFA-optimize-debug*
|
|
1708 (progn
|
|
1709 (princ (format " Skey: %s -> %s(%s)\n"
|
|
1710 (car (car Salist))
|
|
1711 (cdr (car Salist))
|
|
1712 (DFA-optimize-group-number (cdr (car Salist)))))
|
|
1713 (princ (format " Tkey: %s -> %s(%s)\n"
|
|
1714 (car (car Talist))
|
|
1715 (cdr (car Talist))
|
|
1716 (DFA-optimize-group-number (cdr (car Talist)))))))
|
|
1717 (setq Talist (cdr Talist)
|
|
1718 Salist (cdr Salist)))
|
|
1719 (cond((or Talist Salist)
|
|
1720 (setq newgroup (cons Tnumber newgroup)
|
|
1721 oldgroup (TREX-delete Tnumber oldgroup))
|
|
1722 (if *DFA-optimize-debug*
|
|
1723 (princ(format " oldGroup : %s\n newGroup : %s\n" oldgroup newgroup)))))
|
|
1724 ))
|
|
1725 (setq Tmembers (cdr Tmembers)))))
|
|
1726 (cond (newgroup
|
|
1727 (if *DFA-optimize-debug*
|
|
1728 (princ (format "Changed :%s --> " Sgroup)))
|
|
1729 (setcdr Sgroup oldgroup)
|
|
1730 (if *DFA-optimize-debug*
|
|
1731 (princ (format "%s" Sgroup)))
|
|
1732 (TREX-inc *DFA-optimize-node*)
|
|
1733 (if *DFA-optimize-debug*
|
|
1734 (princ (format "+%s\n" (cons *DFA-optimize-node* newgroup))))
|
|
1735 (TREX-push (cons *DFA-optimize-node* newgroup) *DFA-optimize-groups*)
|
|
1736 (throw 'DFA-optimize-changed t)))
|
|
1737 (setq Smembers (cdr Smembers))))
|
|
1738 (setq groups (cdr groups))))))
|
|
1739 ;;;
|
|
1740 ;;;
|
|
1741 (if *DFA-optimize-debug*
|
|
1742 (princ (format "table: %s\n" table)))
|
|
1743 (if *DFA-optimize-debug*
|
|
1744 (princ (format "groups: %s\n" *DFA-optimize-groups*)))
|
|
1745 (let ((newtable nil)
|
|
1746 (newstart nil)
|
|
1747 (groups *DFA-optimize-groups*))
|
|
1748
|
|
1749 ;;; start node $B$rC5$9(B
|
|
1750 (let ((l *DFA-optimize-groups*))
|
|
1751 (while l
|
|
1752 (cond((TREX-memequal start (cdr (car l)))
|
|
1753 (setq newstart (car (car l)))
|
|
1754 (setq l nil))
|
|
1755 (t
|
|
1756 (setq l (cdr l))))))
|
|
1757
|
|
1758 ;;; $B?7$7$$(B transTable $B$r:n$k!%(B
|
|
1759 (while groups
|
|
1760 (let* ((group (car groups))
|
|
1761 (group-number (car group))
|
|
1762 (member-number (car (cdr group)))
|
|
1763 (member-alist (cdr (assoc member-number table))))
|
|
1764 (TREX-push (cons group-number
|
|
1765 (let ((group-alist nil))
|
|
1766 (while member-alist
|
|
1767 (let ((Mkey (car (car member-alist)))
|
|
1768 (Mnext (cdr (car member-alist))))
|
|
1769 (TREX-push (cons Mkey (DFA-optimize-group-number Mnext))
|
|
1770 group-alist))
|
|
1771 (setq member-alist (cdr member-alist)))
|
|
1772 (nreverse group-alist)))
|
|
1773 newtable)
|
|
1774 (setq groups (cdr groups))))
|
|
1775 (cons newstart newtable))))
|
|
1776
|
|
1777 (defun DFA-optimize-group-number (node)
|
|
1778 (let ((l *DFA-optimize-groups*) (result nil))
|
|
1779 (while l
|
|
1780 (cond((TREX-memequal node (cdr (car l)))
|
|
1781 (setq result (car (car l))
|
|
1782 l nil))
|
|
1783 (t (setq l (cdr l)))))
|
|
1784 result))
|
|
1785
|
|
1786 (defun DFA-optimize-parallel (FA)
|
|
1787 (if *DFA-optimize-debug* (terpri))
|
|
1788 (let* ((start (car FA))
|
|
1789 (table (cdr FA))
|
|
1790 (*DFA-optimize-node* 1)
|
|
1791 (*DFA-optimize-groups*
|
|
1792 (list (cons *DFA-optimize-node* (mapcar 'car table)))))
|
|
1793 (while
|
|
1794 (catch 'DFA-optimize-changed
|
|
1795 (let ((groups *DFA-optimize-groups*))
|
|
1796 (while groups
|
|
1797 (if *DFA-optimize-debug*
|
|
1798 (princ (format "\nGroups to be checked: %s\n" groups)))
|
|
1799 (let* ((Sgroup (car groups))
|
|
1800 (Sgroup-number (car Sgroup))
|
|
1801 (oldgroup (cdr Sgroup))
|
|
1802 (newgroup nil)
|
|
1803 (Smembers oldgroup))
|
|
1804 (if *DFA-optimize-debug*
|
|
1805 (princ (format " Sgroup-number: %s = %s\n" Sgroup-number Smembers)))
|
|
1806 (while Smembers
|
|
1807 (let* ((Snumber (car Smembers))
|
|
1808 (Salist (cdr (assoc Snumber table))))
|
|
1809 (if *DFA-optimize-debug*
|
|
1810 (princ (format " Snumber: %s\n" Snumber)))
|
|
1811 (while Salist
|
|
1812 (let* ((Spair (car Salist))
|
|
1813 (Skey (car Spair))
|
|
1814 (Snext (cdr Spair))
|
|
1815 (Snext-group (DFA-optimize-group-number Snext))
|
|
1816 (Tmembers oldgroup))
|
|
1817 (if *DFA-optimize-debug*
|
|
1818 (princ (format " Skey: %s -> %s(%s)\n" Skey Snext-group Snext)))
|
|
1819 (while Tmembers
|
|
1820 (if (not (eq Snumber (car Tmembers)))
|
|
1821 (let* ((Tnumber (car Tmembers))
|
|
1822 ;;; $BMW:F8!F$(B
|
|
1823 (Tpair (assoc Skey (cdr (assoc Tnumber table))))
|
|
1824 (Tnext (cdr Tpair))
|
|
1825 (Tnext-group (DFA-optimize-group-number (cdr Tpair))))
|
|
1826 (if *DFA-optimize-debug*
|
|
1827 (princ (format " Tnumber: %s : %s -> %s(%s)\n" Tnumber (car Tpair)
|
|
1828 (DFA-optimize-group-number (cdr Tpair))(cdr Tpair))))
|
|
1829 (cond((and (equal Spair '(nil))
|
|
1830 (equal Tpair '(nil))))
|
|
1831 ((and Skey (equal Snext-group Tnext-group)))
|
|
1832 (t
|
|
1833 (TREX-push Tnumber newgroup)
|
|
1834 (setq oldgroup (TREX-delete Tnumber oldgroup))
|
|
1835 (if *DFA-optimize-debug*
|
|
1836 (princ(format (format " oldGroup : %s\n newGroup : %s\n" oldgroup newgroup))))
|
|
1837 ))))
|
|
1838 (setq Tmembers (cdr Tmembers)))
|
|
1839 (cond (newgroup
|
|
1840 (if *DFA-optimize-debug*
|
|
1841 (princ (format "Changed :%s --> " Sgroup)))
|
|
1842 (setcdr Sgroup oldgroup)
|
|
1843 (if *DFA-optimize-debug*
|
|
1844 (princ (format "%s" Sgroup)))
|
|
1845 (TREX-inc *DFA-optimize-node*)
|
|
1846 (if *DFA-optimize-debug*
|
|
1847 (princ (format "+%s\n" (cons *DFA-optimize-node* newgroup))))
|
|
1848 (TREX-push (cons *DFA-optimize-node* newgroup) *DFA-optimize-groups*)
|
|
1849 (throw 'DFA-optimize-changed t))))
|
|
1850 (setq Salist (cdr Salist))))
|
|
1851 (setq Smembers (cdr Smembers))))
|
|
1852 (setq groups (cdr groups))))))
|
|
1853 ;;;
|
|
1854 ;;;
|
|
1855 (if *DFA-optimize-debug*
|
|
1856 (princ (format "table: %s\n" table)))
|
|
1857 (if *DFA-optimize-debug*
|
|
1858 (princ (format "groups: %s\n" *DFA-optimize-groups*)))
|
|
1859 (let ((newtable nil)
|
|
1860 (newstart nil)
|
|
1861 (groups *DFA-optimize-groups*))
|
|
1862
|
|
1863 ;;; start node $B$rC5$9(B
|
|
1864 (let ((l *DFA-optimize-groups*))
|
|
1865 (while l
|
|
1866 (cond((TREX-memequal start (cdr (car l)))
|
|
1867 (setq newstart (car (car l)))
|
|
1868 (setq l nil))
|
|
1869 (t
|
|
1870 (setq l (cdr l))))))
|
|
1871
|
|
1872 ;;; $B?7$7$$(B transTable $B$r:n$k!%(B
|
|
1873 (while groups
|
|
1874 (let* ((group (car groups))
|
|
1875 (group-number (car group))
|
|
1876 (member-number (car (cdr group)))
|
|
1877 (member-alist (cdr (assoc member-number table))))
|
|
1878 (TREX-push (cons group-number
|
|
1879 (let ((group-alist nil))
|
|
1880 (while member-alist
|
|
1881 (let ((Mkey (car (car member-alist)))
|
|
1882 (Mnext (cdr (car member-alist))))
|
|
1883 (TREX-push (cons Mkey
|
|
1884 (if (consp Mnext)
|
|
1885 (cons (DFA-optimize-group-number (car Mnext))
|
|
1886 (DFA-optimize-group-number (cdr Mnext)))
|
|
1887 (DFA-optimize-group-number Mnext)))
|
|
1888 group-alist))
|
|
1889 (setq member-alist (cdr member-alist)))
|
|
1890 group-alist))
|
|
1891 newtable)
|
|
1892 (setq groups (cdr groups))))
|
|
1893 (cons newstart newtable))))
|
|
1894
|
|
1895
|
|
1896
|
|
1897 ;;;
|
|
1898 ;;; Non Empty Finite Automata
|
|
1899 ;;;
|
|
1900
|
|
1901 (defun NEFA-make (EFFA)
|
|
1902 (let* ((start (car EFFA))
|
|
1903 (table (cdr EFFA))
|
|
1904 (Salist (cdr (assoc start table))))
|
|
1905 (cond((equal Salist '((nil)))
|
|
1906 nil)
|
|
1907 ((and (assoc nil Salist)
|
|
1908 (progn
|
|
1909 (while (and Salist (not (equal start (cdr (car Salist)))))
|
|
1910 (setq Salist (cdr Salist)))
|
|
1911 Salist))
|
|
1912 (let ((min 10000)
|
|
1913 (max -10000)
|
|
1914 (l table))
|
|
1915 (while l
|
|
1916 (if (< (car (car l)) min)
|
|
1917 (setq min (car (car l))))
|
|
1918 (if (< max (car (car l)))
|
|
1919 (setq max (car (car l))))
|
|
1920 (setq l (cdr l)))
|
|
1921 (let* ((newstart (1- min))
|
|
1922 (newtable (copy-alist table))
|
|
1923 (oldSalist (cdr (assoc start table)))
|
|
1924 (newSalist (TREX-delete '(nil) (copy-alist oldSalist))))
|
|
1925 (cons newstart
|
|
1926 (cons (cons newstart newSalist) newtable)))))
|
|
1927 (t
|
|
1928 EFFA))))
|
|
1929
|
|
1930 ;;;
|
|
1931 ;;; Simplify FA
|
|
1932 ;;;
|
|
1933
|
|
1934 (defvar *FA-simplify-table* nil)
|
|
1935
|
|
1936 (defun FA-simplify (FA)
|
|
1937 (let ((start (car FA))
|
|
1938 (table (cdr FA))
|
|
1939 (newtable nil)
|
|
1940 (*FA-simplify-table* nil))
|
|
1941 (FA-simplify-mark start table)
|
|
1942 (while *FA-simplify-table*
|
|
1943 (TREX-push (assoc (car *FA-simplify-table*) table) newtable)
|
|
1944 (setq *FA-simplify-table* (cdr *FA-simplify-table*)))
|
|
1945 (cons start newtable)))
|
|
1946
|
|
1947 (defun FA-simplify-mark (node table)
|
|
1948 (cond ((not (TREX-memequal node *FA-simplify-table*))
|
|
1949 (TREX-push node *FA-simplify-table*)
|
|
1950 (let ((alist (cdr (assoc node table))))
|
|
1951 (while alist
|
|
1952 (cond((car (car alist))
|
|
1953 (FA-simplify-mark (cdr (car alist)) table)))
|
|
1954 (setq alist (cdr alist)))))))
|
|
1955
|
|
1956 ;;;
|
|
1957 ;;; Shortest match DFA
|
|
1958 ;;;
|
|
1959
|
|
1960 (defun DFA-shortest-match (DFA)
|
|
1961 (let ((start (car DFA))
|
|
1962 (table (cdr DFA))
|
|
1963 (newtable nil))
|
|
1964 (while table
|
|
1965 (cond ((assoc nil (cdr (car table)))
|
|
1966 (TREX-push (cons (car (car table)) '((nil))) newtable))
|
|
1967 (t
|
|
1968 (TREX-push (car table) newtable)))
|
|
1969 (setq table (cdr table)))
|
|
1970 (cons start newtable)))
|
|
1971
|
|
1972 ;;;
|
|
1973 ;;; Fastmap computation
|
|
1974 ;;;
|
|
1975
|
|
1976 (defvar *DFA-fastmap-chars* nil)
|
|
1977 (defvar *DFA-fastmap-syntax* nil)
|
|
1978 (defvar *DFA-fastmap-category* nil)
|
|
1979 (defvar *DFA-fastmap-init* 0 )
|
|
1980 (defvar *DFA-fastmap-pos* 1 ) ;;; SYNTAXSPEC or CATEGORYSPEC
|
|
1981 (defvar *DFA-fastmap-neg* 2 ) ;;; NOTSYNTAXSPEC or NOTCATEGORYSPEC
|
|
1982
|
|
1983 ;;;; $B$9$Y$F$N(B char $B$OB~0l$D$N(B syntaxspec $B$KB0$9$k(B
|
|
1984 ;;;; ==> syntaxspec(ch) and notsyntaxspec(ch) --> all char
|
|
1985 ;;;; ==> notsyntaxspec(ch1) and notsyntaxspec(ch2) --> all char
|
|
1986 ;;;; ==> notsyntaxspec(ch1) and syntaxspec(ch2) == notsyntaxspec(ch1)
|
|
1987 ;;;; $B$D$^$j(B notsyntaxspec $B$O9b!9#1$D$7$+$J$$!%(B
|
|
1988
|
|
1989 ;;; Returns [ CODE FASTMAP SYNTAX-FASTMAP CATEGOY-FASTMAP ]
|
|
1990
|
|
1991 (defun DFA-code-with-fastmap (DFA)
|
|
1992 (TREX-init *DFA-fastmap-chars* (make-vector 256 nil))
|
|
1993 (TREX-init *DFA-fastmap-syntax* (make-vector 256 nil))
|
|
1994 (TREX-init *DFA-fastmap-category* (make-vector 256 nil))
|
|
1995 (let ((code (regexp-code-gen DFA))
|
|
1996 (start (car DFA))
|
|
1997 (*DFA-fastmap-table* (cdr DFA))
|
|
1998 (*DFA-fastmap-mark* nil)
|
|
1999 (*DFA-fastmap-special* nil))
|
|
2000 (let ((i 0))
|
|
2001 (while (< i 256)
|
|
2002 (aset *DFA-fastmap-chars* i nil)
|
|
2003 (aset *DFA-fastmap-syntax* i nil)
|
|
2004 (aset *DFA-fastmap-category* i nil)
|
|
2005 (TREX-inc i)))
|
|
2006 (DFA-fastmap-collect start)
|
|
2007 (let ((fastmap (if *DFA-fastmap-special*
|
|
2008 nil ;;;(make-string 256 1)
|
|
2009 (make-string 256 0)))
|
|
2010 (fastmap-entries 0)
|
|
2011 (syntax (if *DFA-fastmap-special*
|
|
2012 nil
|
|
2013 (make-string 256 0)))
|
|
2014 (syntax-entries 0)
|
|
2015 (notsyntax-entries 0)
|
|
2016 (category (if *DFA-fastmap-special*
|
|
2017 nil
|
|
2018 (make-string 256 0)))
|
|
2019 (category-entries 0))
|
|
2020 (let ((result (make-vector 4 nil)))
|
|
2021 (aset result 0 code)
|
|
2022 (if *DFA-fastmap-special*
|
|
2023 (progn
|
|
2024 (aset result 1 fastmap)
|
|
2025 (aset result 2 syntax)
|
|
2026 (aset result 3 category))
|
|
2027 (progn
|
|
2028 (let ((i 0))
|
|
2029 (while (< i 256)
|
|
2030 (if (aref *DFA-fastmap-chars* i)
|
|
2031 (progn
|
|
2032 (TREX-inc fastmap-entries)
|
|
2033 (aset fastmap i 1)))
|
|
2034 (aset syntax i
|
|
2035 (cond((null (aref *DFA-fastmap-syntax* i))
|
|
2036 *DFA-fastmap-init*)
|
|
2037 ((eq (aref *DFA-fastmap-syntax* i) 'SYNTAXSPEC)
|
|
2038 (TREX-inc syntax-entries)
|
|
2039 *DFA-fastmap-pos*)
|
|
2040 ((eq (aref *DFA-fastmap-syntax* i) 'NOTSYNTAXSPEC)
|
|
2041 (TREX-inc notsyntax-entries)
|
|
2042 (TREX-inc syntax-entries)
|
|
2043 *DFA-fastmap-neg*)))
|
|
2044 (aset category i
|
|
2045 (cond((null (aref *DFA-fastmap-category* i))
|
|
2046 *DFA-fastmap-init*)
|
|
2047 ((eq (aref *DFA-fastmap-category* i) 'CATEGORYSPEC)
|
|
2048 (TREX-inc category-entries)
|
|
2049 *DFA-fastmap-pos*)
|
|
2050 ((eq (aref *DFA-fastmap-category* i) 'NOTCATEGORYSPEC)
|
|
2051 (TREX-inc category-entries)
|
|
2052 *DFA-fastmap-neg*)))
|
|
2053 (TREX-inc i)))
|
|
2054
|
|
2055 (cond((<= 2 notsyntax-entries)
|
|
2056 (setq fastmap (make-string 256 1)
|
|
2057 syntax nil
|
|
2058 category nil))
|
|
2059 ((= 1 notsyntax-entries)
|
|
2060 (let ((ch 0))
|
|
2061 (while (< ch 256)
|
|
2062 (if (= (aref syntax ch) *DFA-fastmap-neg*)
|
|
2063 (aset syntax ch *DFA-fastmap-init*)
|
|
2064 (aset syntax ch *DFA-fastmap-pos*))
|
|
2065 (TREX-inc ch)))))
|
|
2066 (aset result 1 fastmap)
|
|
2067 (aset result 2 syntax)
|
|
2068 (aset result 3 category)))
|
|
2069 result))))
|
|
2070
|
|
2071 (defun DFA-fastmap-collect (node)
|
|
2072 (if (TREX-memequal node *DFA-fastmap-mark*) nil
|
|
2073 (let ((alist (cdr (assoc node *DFA-fastmap-table*))))
|
|
2074 (TREX-push node *DFA-fastmap-mark*)
|
|
2075 (while alist
|
|
2076 (let ((key (car (car alist))))
|
|
2077 (cond((numberp key)
|
|
2078 (aset *DFA-fastmap-chars* key t))
|
|
2079 ((symbolp key);;; can be null
|
|
2080 (setq *DFA-fastmap-special* t))
|
|
2081 (t
|
|
2082 (let ((op (car key)))
|
|
2083 (cond
|
|
2084 ((TREX-memequal op '(START_MEMORY STOP_MEMORY))
|
|
2085 (DFA-fastmap-collect (cdr (car alist))))
|
|
2086 ((TREX-memequal op '(SYNTAXSPEC NOTSYNTAXSPEC))
|
|
2087 (let ((specch (syntax-code-spec (nth 1 key))))
|
|
2088 (cond((null (aref *DFA-fastmap-syntax* (nth 1 key)))
|
|
2089 (aset *DFA-fastmap-syntax* specch op))
|
|
2090 ((not (eq (aref *DFA-fastmap-syntax* specch) op))
|
|
2091 (setq *DFA-fastmap-special* t)))))
|
|
2092 ((TREX-memequal op '(CATEGORYSPEC NOTCATEGORYSPEC))
|
|
2093 (let ((specch (nth 1 key)))
|
|
2094 (cond((null (aref *DFA-fastmap-category* specch))
|
|
2095 (aset *DFA-fastmap-category* specch op))
|
|
2096 ((not (eq (aref *DFA-fastmap-category* specch) op))
|
|
2097 (setq *DFA-fastmap-special* t)))))
|
|
2098 ((TREX-memequal op '(CHARSET CHARSET_NOT))
|
|
2099 (let ((list (cdr key)))
|
|
2100 (while list
|
|
2101 (let ((from nil) (to nil))
|
|
2102 (cond((stringp (car list))
|
|
2103 (setq from (aref (car list) 0)
|
|
2104 to (aref (car list) 0)))
|
|
2105 (t ;;; :range
|
|
2106 (setq from (aref (nth 1 (car list)) 0)
|
|
2107 to (aref (nth 2 (car list)) 0))))
|
|
2108 (while (<= from to)
|
|
2109 (cond((null (aref *DFA-fastmap-chars* from))
|
|
2110 (aset *DFA-fastmap-chars* from
|
|
2111 (if (eq op 'CHARSET_NOT) 'CHARSET_NOT
|
|
2112 t))))
|
|
2113 (TREX-inc from)))
|
|
2114 (setq list (cdr list))))
|
|
2115 (if (eq op 'CHARSET_NOT)
|
|
2116 (let ((i 0))
|
|
2117 (while (< i 256)
|
|
2118 (cond((null (aref *DFA-fastmap-chars* i))
|
|
2119 (aset *DFA-fastmap-chars* i t))
|
|
2120 ((eq (aref *DFA-fastmap-chars* i) 'CHARSET_NOT)
|
|
2121 (aset *DFA-fastmap-chars* i nil)))
|
|
2122 (TREX-inc i)))))
|
|
2123 (t
|
|
2124 (setq *DFA-fastmap-special* t)))))))
|
|
2125 (setq alist (cdr alist))))))
|
|
2126
|
|
2127 ;;;
|
|
2128 ;;; $B@55,I=8=%3!<%I$NL?NaI=(B
|
|
2129 ;;;
|
|
2130
|
|
2131 (if (= regexp-version 19)
|
|
2132 (TREX-define-enum
|
|
2133 UNUSED ;;; 18
|
|
2134 EXACTN ;;; 18
|
|
2135 ANYCHAR ;;; 18
|
|
2136 CHARSET ;;; 18
|
|
2137 CHARSET_NOT ;;; 18
|
|
2138 START_MEMORY ;;; 18*
|
|
2139 STOP_MEMORY ;;; 18*
|
|
2140 DUPLICATE ;;; 18
|
|
2141 BEGLINE ;;; 18
|
|
2142 ENDLINE ;;; 18
|
|
2143 BEGBUF ;;; 18
|
|
2144 ENDBUF ;;; 18
|
|
2145 JUMP ;;; 18
|
|
2146 JUMP_PAST_ALT ;;; 19
|
|
2147 ON_FAILURE_JUMP ;;; 18
|
|
2148 ON_FAILURE_KEEP_STRING_JUMP ;;; 19
|
|
2149 ;;;; finalize_jump
|
|
2150 ;;;; maybe_finalize_jump
|
|
2151 POP_FAILURE_JUMP ;;; 19
|
|
2152 MAYBE_POP_JUMP ;;; 19
|
|
2153 DUMMY_FAILURE_JUMP ;;; 18
|
|
2154 PUSH_DUMMY_FAILURE ;;; 19
|
|
2155 SUCCEED_N ;;; 19
|
|
2156 JUMP_N ;;; 19
|
|
2157 SET_NUMBER_AT ;;; 19
|
|
2158 WORDCHAR ;;; 18
|
|
2159 NOTWORDCHAR ;;; 18
|
|
2160 WORDBEG ;;; 18
|
|
2161 WORDEND ;;; 18
|
|
2162 WORDBOUND ;;; 18
|
|
2163 NOTWORDBOUND ;;; 18
|
|
2164 BEFORE_DOT ;;; 18
|
|
2165 AT_DOT ;;; 18
|
|
2166 AFTER_DOT ;;; 18
|
|
2167 SYNTAXSPEC ;;; 18
|
|
2168 NOTSYNTAXSPEC ;;; 18
|
|
2169 ;;; TREX code
|
|
2170 EXACT1
|
|
2171 EXACT2
|
|
2172 EXACT3
|
|
2173 CHARSET_M
|
|
2174 CHARSET_M_NOT
|
|
2175 CASEN
|
|
2176 SUCCESS_SHORT
|
|
2177 SUCCESS
|
|
2178 POP
|
|
2179 EXCEPT0
|
|
2180 EXCEPT1
|
|
2181 CATEGORYSPEC
|
|
2182 NOTCATEGORYSPEC
|
|
2183 RANGE
|
|
2184 RANGE_A
|
|
2185 )
|
|
2186 ;; else regexp-version == 18.
|
|
2187 (TREX-define-enum
|
|
2188 UNUSED
|
|
2189 EXACTN
|
|
2190 BEGLINE
|
|
2191 ENDLINE
|
|
2192 JUMP
|
|
2193 ON_FAILURE_JUMP
|
|
2194 FINALIZE_JUMP
|
|
2195 MAYBE_FINALIZE_JUMP
|
|
2196 DUMMY_FAILURE_JUMP
|
|
2197 ANYCHAR
|
|
2198 CHARSET
|
|
2199 CHARSET_NOT
|
|
2200 START_MEMORY
|
|
2201 STOP_MEMORY
|
|
2202 DUPLICATE
|
|
2203 BEFORE_DOT ;;; not used
|
|
2204 AT_DOT ;;; not used
|
|
2205 AFTER_DOT ;;; not used
|
|
2206 BEGBUF
|
|
2207 ENDBUF
|
|
2208 WORDCHAR ;;; not used
|
|
2209 NOTWORDCHAR ;;; not used
|
|
2210 WORDBEG
|
|
2211 WORDEND
|
|
2212 WORDBOUND
|
|
2213 NOTWORDBOUND
|
|
2214 SYNTAXSPEC
|
|
2215 NOTSYNTAXSPEC
|
|
2216 ;;;
|
|
2217 ;;; extended instructions
|
|
2218 ;;;
|
|
2219 EXACT1
|
|
2220 EXACT2
|
|
2221 EXACT3
|
|
2222 CHARSET_M
|
|
2223 CHARSET_M_NOT
|
|
2224 CASEN
|
|
2225 SUCCESS_SHORT ;;; == ON_FAILURE_SUCCESS
|
|
2226 SUCCESS
|
|
2227 POP
|
|
2228 EXCEPT0 ;;; ALLCHAR
|
|
2229 EXCEPT1
|
|
2230 CATEGORYSPEC
|
|
2231 NOTCATEGORYSPEC
|
|
2232 ))
|
|
2233
|
|
2234 (defvar ON_FAILURE_SUCCESS SUCCESS_SHORT)
|
|
2235
|
|
2236 ;;;
|
|
2237 ;;; ANYCHAR = EXCEPT1 \n
|
|
2238 ;;; ALLCHAR = EXCEPT0
|
|
2239
|
|
2240
|
|
2241 ;;;
|
|
2242 ;;; $B@55,I=8=>H9g4o$NL?NaBN7O(B
|
|
2243 ;;;
|
|
2244 ;;; UNUSED
|
|
2245 ;;; EXACTN n ch1 ch2 ... chn
|
|
2246 ;;; BEGLINE
|
|
2247 ;;; ENDLINE
|
|
2248 ;;; JUMP disp[2]
|
|
2249 ;;; +JUMP_PAST_ALT disp[2]
|
|
2250 ;;; ON_FAILURE_JUMP disp[2]
|
|
2251 ;;; +ON_FAILURE_KEEP_STRING_JUMP disp[2]
|
|
2252 ;;; -FINALIZE_JUMP disp[2]
|
|
2253 ;;; -MAYBE_FINALIZE_JUMP disp[2]
|
|
2254 ;;; +POP_FAILURE_JUMP disp[2]
|
|
2255 ;;; +MAYBE_POP_JUMP disp[2]
|
|
2256 ;;; DUMMY_FAILURE_JUMP disp[2]
|
|
2257 ;;; +PUSH_DUMMY_FAILURE
|
|
2258 ;;; +SUCCEED_N disp[2] n[2]
|
|
2259 ;;; +JUMP_N disp[2] n[2]
|
|
2260 ;;; +SET_NUMBER_AT disp[2] n[2]
|
|
2261 ;;; ANYCHAR
|
|
2262 ;;; CHARSET n b1 b2 ... bn
|
|
2263 ;;;**CHARSET 0xff l1 l2 cf1 ct1 cf2 ct2 ... cfn ctn
|
|
2264 ;;; CHARSET_NOT n b1 b2 ... bn
|
|
2265 ;;;**CHARSET_NOT 0xff l1 l2 cf1 ct1 cf2 ct2 ... cfn ctn
|
|
2266 ;;; $B0J2<$O$($J$_;a$NDs0F$K$h$k?7$?$J%;%^%s%F%#%C%/%9(B
|
|
2267 ;;
|
|
2268 ;;; CHARSET n b1 b2 ... bn (n < 0x80)
|
|
2269 ;;; CHARSET n+0x80 b1 b2 ... bn
|
|
2270 ;;; |<-- n bytes -->|
|
|
2271 ;;; lh lo CHARF1 CHART1 .... CHARFm CHARTm
|
|
2272 ;;; |<- lh << 8 + lo bytes ->|
|
|
2273 ;; CHARSET n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn
|
|
2274 ;; |<- bitmap ->| |<- range table ->|
|
|
2275 ;; CHARSET_NOT n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn
|
|
2276 ;; CHARSETM m n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn
|
|
2277 ;; CHARSETM_NOT m n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn
|
|
2278 ;;
|
|
2279 ;; o cfx, ctx $B0J30$O$9$Y$F(B 1byte. cfx, ctx $B$O(B multi byte
|
|
2280 ;; character.
|
|
2281 ;;
|
|
2282 ;; o CHARSET(_NOT) $B$H(B CHARSETM(_NOT) $B$H$N0c$$$O(B, CHARSETM(_NOT)
|
|
2283 ;; $B$N>l9g$K$O(B bitmap $B$N@hF,$N(B m bytes $B$,>J$+$l$F$$$kE@(B.
|
|
2284 ;;
|
|
2285 ;; o b1 ... bn ($B$D$^$j(B bitmap$B$ND9$5(B)$B$O(B, (n & 0x7f) bytes. n $B$N(B
|
|
2286 ;; $BJ,(B 1byte $B$O4^$^$J$$(B.
|
|
2287 ;;
|
|
2288 ;; o lh $B0J2<$O(B n & 0x80 $B$,(B 0 $B$J$iB8:_$7$J$$(B.
|
|
2289 ;;
|
|
2290 ;; o lh $B$+$i(B ctn $B$^$G$ND9$5(B($B$D$^$j(B range table $B$ND9$5(B) $B$O(B ((lh
|
|
2291 ;; << 8) + lo) byte. lh $B$H(B lo $B$N(B 2byte $B$r4^$`(B. ($B>e$N(B n $B$N>l(B
|
|
2292 ;; $B9g$H0c$$$^$9$,(B, $BE}0l$7$?$[$&$,$$$$$+$J(B?).
|
|
2293 ;;
|
|
2294 ;; o cfx $B$O(B multi byte character $B$G(B, cfx $B$H(B ctx $B$N(B leading char
|
|
2295 ;; $B$OF1$8$G$J$$$H$$$1$J$$(B. $B$^$?(B, cfx $B$N(B leading char $B$O(B 0 $B$G(B
|
|
2296 ;; $B$"$C$F$O$$$1$J$$(B(range table $B$K(B leading char $B$,(B 0 (ASCII$B$H(B
|
|
2297 ;; $B$+(B) $B$NJ8;z$,$"$C$F$b(B, $B8=:_$O(B fastmap $B$KH?1G$5$l$J$$$+$i(B).
|
|
2298 ;;
|
|
2299 ;;; START_MEMORY regno
|
|
2300 ;;; STOP_MEMORY regno
|
|
2301 ;;; o emacs 19 $B$N(B regex.c $B$G$O(B,
|
|
2302 ;;; START_MEMORY regno groupno
|
|
2303 ;;; STOP_MEMORY regno groupno
|
|
2304 ;;; groupno $B$O<+J,$h$j2<$N%l%Y%k$N%0%k!<%W$N?t(B
|
|
2305 ;;;
|
|
2306 ;;; DUPLICATE regno
|
|
2307 ;;; BEFORE_DOT ;;; not used
|
|
2308 ;;; AT_DOT ;;; not used
|
|
2309 ;;; AFTER_DOT ;;; not used
|
|
2310 ;;; BEGBUF
|
|
2311 ;;; ENDBUF
|
|
2312 ;;; WORDCHAR ;;; not used
|
|
2313 ;;; NOTWORDCHAR ;;; not used
|
|
2314 ;;; WORDBEG
|
|
2315 ;;; WORDEND
|
|
2316 ;;; WORDBOUND
|
|
2317 ;;; NOTWORDBOUND
|
|
2318 ;;; SYNTAXSPEC ch
|
|
2319 ;;; NOTSYNTAXSPEC ch
|
|
2320
|
|
2321 ;;;
|
|
2322 ;;; $B3HD%L?Na!J(BTREX$B$G;HMQ$9$k$b$N!K(B
|
|
2323 ;;;
|
|
2324 ;;; EXACT1 ch
|
|
2325 ;;; EXACT2 ch1 ch2
|
|
2326 ;;; EXACT3 ch1 ch2 ch3
|
|
2327 ;;; CHARSETM m n b1 b2 .. bn
|
|
2328 ;;; charset $B$N(B bitmaps $B$N$&$A@hF,$N(B m bytes $B$r>J$$$?$b$N(B
|
|
2329 ;;; CHARSETM_NOT m n b1 b2 .. bn
|
|
2330 ;;; charset_not $B$N(B bitmaps $B$N$&$A@hF,$N(B m bytes $B$r>J$$$?$b$N(B
|
|
2331 ;;; CASEN n disp[1] disp[2] ... disp[n] l u ind[l] ... ind[u]
|
|
2332 ;;; $B:G=i$K(B n $B8D$N(B jump relative address(2bytes) $B$,B3$-!$(B
|
|
2333 ;;; $B<!$K(Bcharacter code l $B$+$i(B m $B$^$G$NJ,$N(Bindex(1byte)$B$,B3$/!%(B
|
|
2334 ;;; ON_FAILURE_SUCCESS
|
|
2335 ;;; alternative stack $B$r6u$K$7!$(Bpend $B$r(B push $B$9$k!%(B
|
|
2336 ;;; SUCCESS
|
|
2337 ;;; pend $B$X%8%c%s%W$9$k!%(B
|
|
2338 ;;; POP
|
|
2339 ;;; alternative stack $B$r(B pop $B$9$k!%(B
|
|
2340
|
|
2341 ;;; RANGE ch1 ch2
|
|
2342 ;;; RANGE_A == RANGE 0xA0 0xFF
|
|
2343
|
|
2344
|
|
2345 ;;; [^$B&A(B]$B&B(B\|$B&C(B $B$N0UL#!'(B
|
|
2346 ;;; on_failure_jump L1
|
|
2347 ;;; on_failure_jump L2
|
|
2348 ;;; $B&A(B
|
|
2349 ;;; pop
|
|
2350 ;;; fail
|
|
2351 ;;; L1: ALLCHAR
|
|
2352 ;;; $B&B(B
|
|
2353 ;;; L2: pop
|
|
2354 ;;; $B&C(B
|
|
2355
|
|
2356 ;;;
|
|
2357 ;;; regexp-code-*
|
|
2358 ;;;
|
|
2359
|
|
2360 (defvar *regexp-code-buffer* (get-buffer-create " *regexp-code-buffer*"))
|
|
2361
|
|
2362 (defun regexp-code-gen (FA)
|
|
2363 (let ((start (car FA))
|
|
2364 (table (cdr FA))
|
|
2365 (*table* (cdr FA))
|
|
2366 (*labels* nil)
|
|
2367 (*final* nil)
|
|
2368 (*counter* 0))
|
|
2369 (let ((list table))
|
|
2370 (while (and list (null *final*))
|
|
2371 (if (equal '((nil)) (cdr (car list)))
|
|
2372 (setq *final* (car (car list))))
|
|
2373 (setq list (cdr list))))
|
|
2374 (cond((null *final*)
|
|
2375 (setq *final* (1+ (length table)))
|
|
2376 (setq *counter* (1+ *final*)))
|
|
2377 (t
|
|
2378 (setq *counter* (1+ (length table)))))
|
|
2379 (save-excursion
|
|
2380 (set-buffer *regexp-code-buffer*)
|
|
2381 (let ((kanji-flag nil)
|
|
2382 (mc-flag nil))
|
|
2383 (erase-buffer)
|
|
2384 (regexp-code-gen* start)
|
|
2385 (buffer-substring (point-min) (point-max)))
|
|
2386 )))
|
|
2387
|
|
2388 (defun regexp-code-gen* (node)
|
|
2389 (cond((= node *final*)
|
|
2390 (if (null (assoc node *labels*))
|
|
2391 (TREX-push (cons node (point)) *labels*))
|
|
2392 (insert SUCCESS))
|
|
2393 ((null (assoc node *labels*))
|
|
2394 (TREX-push (cons node (point)) *labels*)
|
|
2395 (let ((alist (cdr (assoc node *table*))))
|
|
2396 (cond((equal '((nil)) alist)
|
|
2397 (insert SUCCESS))
|
|
2398 (t (regexp-code-gen-alist alist)))))
|
|
2399 (t
|
|
2400 (let ((disp (- (cdr (assoc node *labels*)) (+ (point) 3))))
|
|
2401 (insert JUMP
|
|
2402 (logand disp 255)
|
|
2403 (/ (logand disp (* 255 256)) 256))))))
|
|
2404
|
|
2405 (defvar *regexp-charset-table* nil)
|
|
2406 (defvar *regexp-case-table* nil)
|
|
2407
|
|
2408 (defun regexp-code-gen-alist (alist)
|
|
2409 (TREX-init *regexp-charset-table* (make-vector 256 nil))
|
|
2410 (TREX-init *regexp-case-table* (make-vector 256 nil))
|
|
2411 (if (eq (car (car alist)) nil)
|
|
2412 nil
|
|
2413 (let ((nextalist alist)
|
|
2414 (numberkey nil)
|
|
2415 (point nil)
|
|
2416 (min 256) (max -1) (nexts nil) (nodealist nil))
|
|
2417 (cond((numberp (car (car alist)))
|
|
2418 (setq numberkey t)
|
|
2419 (let ((i 0))
|
|
2420 (while (< i 256)
|
|
2421 (aset *regexp-case-table* i nil)
|
|
2422 (TREX-inc i)))
|
|
2423
|
|
2424 (while (and nextalist
|
|
2425 (numberp (car (car nextalist))))
|
|
2426 (let ((ch (car (car nextalist)))
|
|
2427 (next (cdr (car nextalist))))
|
|
2428 (let ((place (assoc next nodealist)))
|
|
2429 (if place
|
|
2430 (setcdr place
|
|
2431 (cons ch (cdr place)))
|
|
2432 (TREX-push (cons ch (list next)) nodealist)))
|
|
2433 (aset *regexp-case-table* ch next)
|
|
2434 (if (< ch min) (setq min ch))
|
|
2435 (if (< max ch) (setq max ch))
|
|
2436 (if (not (TREX-memequal next nexts))
|
|
2437 (TREX-push next nexts)))
|
|
2438 (setq nextalist (cdr nextalist))))
|
|
2439 (t (setq nextalist (cdr alist))))
|
|
2440
|
|
2441 (if nextalist
|
|
2442 (cond((eq (car (car nextalist)) nil)
|
|
2443 (insert ON_FAILURE_SUCCESS )) ;;; SUCCESS_SHORT
|
|
2444 (t
|
|
2445 (insert ON_FAILURE_JUMP 0 0)
|
|
2446 (setq point (point)))))
|
|
2447
|
|
2448 (cond(numberkey
|
|
2449 (cond((= min max)
|
|
2450 ;;; exact1
|
|
2451 (regexp-code-gen-exact (list min) (car nexts)))
|
|
2452
|
|
2453 ((= (length nexts) 1)
|
|
2454 ;;; charset or charset_not
|
|
2455 (if (= (length alist) 256)
|
|
2456 (insert EXCEPT0) ;92.10.26 by T.Saneto
|
|
2457 (let ((not_min 256)
|
|
2458 (not_max -1)
|
|
2459 (ch 0)
|
|
2460 (mode (car nexts)))
|
|
2461 (while (< ch 256)
|
|
2462 (cond((null (aref *regexp-case-table* ch))
|
|
2463 (if (< ch not_min) (setq not_min ch))
|
|
2464 (if (< not_max ch) (setq not_max ch))))
|
|
2465 (TREX-inc ch))
|
|
2466 (if (<= (- not_max not_min) (- max min))
|
|
2467 (setq min not_min
|
|
2468 max not_max
|
|
2469 mode nil))
|
|
2470 (let ((minb (/ min 8))
|
|
2471 (maxb (1+ (/ max 8))))
|
|
2472 (insert (if mode CHARSET_M CHARSET_M_NOT) minb (- maxb minb))
|
|
2473 (let ((b minb))
|
|
2474 (while (< b maxb)
|
|
2475 (let ((i 7) (bits 0))
|
|
2476 (while (<= 0 i)
|
|
2477 (if (eq (aref *regexp-case-table* (+ (* 8 b) i))
|
|
2478 mode)
|
|
2479 ;;;; bits table$B$N=g=x$O<!$NDL$j(B
|
|
2480 (TREX-inc bits (aref [1 2 4 8 16 32 64 128] i)))
|
|
2481 (TREX-dec i))
|
|
2482 (insert bits))
|
|
2483 (TREX-inc b))))))
|
|
2484 (regexp-code-gen* (car nexts)))
|
|
2485 (t
|
|
2486 ;;; case
|
|
2487 (let ((point nil))
|
|
2488 (insert CASEN)
|
|
2489 (insert (length nexts))
|
|
2490 (setq point (point))
|
|
2491 (let ((list nexts))
|
|
2492 (while list
|
|
2493 (insert 0 0)
|
|
2494 (setq list (cdr list))))
|
|
2495 (insert min max)
|
|
2496 (let ((ch min))
|
|
2497 (while (<= ch max)
|
|
2498 (if (aref *regexp-case-table* ch)
|
|
2499 (insert (1+ (TREX-find (aref *regexp-case-table* ch) nexts)))
|
|
2500 (insert 0))
|
|
2501 (TREX-inc ch)))
|
|
2502 (let ((list nexts))
|
|
2503 (while list
|
|
2504 (if (null (assoc (car list) *labels*))
|
|
2505 (regexp-code-gen* (car list)))
|
|
2506 (setq list (cdr list))))
|
|
2507 (save-excursion
|
|
2508 (goto-char point)
|
|
2509 (let ((list nexts))
|
|
2510 (while list
|
|
2511 (delete-char 2)
|
|
2512 (let ((disp (- (cdr (assoc (car list) *labels*)) (+ (point) 2))))
|
|
2513 (insert (logand disp 255)
|
|
2514 (/ (logand disp (* 255 256)) 256)))
|
|
2515 (setq list (cdr list)))))
|
|
2516 ))))
|
|
2517 ((eq (car (car alist)) ':epsilon)
|
|
2518 (regexp-code-gen* (cdr (car alist))))
|
|
2519 (t
|
|
2520 (let ((key (car (car alist)))
|
|
2521 (next (cdr (car alist))))
|
|
2522 (cond ((symbolp key)
|
|
2523 (insert (eval key)))
|
|
2524 ((TREX-memequal (car key) '(CHARSET CHARSET_NOT))
|
|
2525 (let ((charset (cdr key))
|
|
2526 (min 128) (max -1)
|
|
2527 (mcbytes 0)
|
|
2528 (mcchars nil))
|
|
2529 (let ((i 0))
|
|
2530 (while (< i 256)
|
|
2531 (aset *regexp-charset-table* i nil)
|
|
2532 (TREX-inc i)))
|
|
2533 (while charset
|
|
2534 (cond((stringp (car charset))
|
|
2535 (cond((eq (length (car charset)) 1)
|
|
2536 (aset *regexp-charset-table* (aref (car charset) 0) t)
|
|
2537 (if (< (aref (car charset) 0) min)
|
|
2538 (setq min (aref (car charset) 0)))
|
|
2539 (if (< max (aref (car charset) 0))
|
|
2540 (setq max (aref (car charset) 0)))
|
|
2541 )
|
|
2542 (t
|
|
2543 (TREX-inc mcbytes (* 2 (length (car charset))))
|
|
2544 (if (null mcchars) (setq mcchars charset))
|
|
2545 )))
|
|
2546 ((consp (car charset)) ;;; range
|
|
2547 (cond ((eq (length (nth 1 (car charset))) 1)
|
|
2548 (let ((from (aref (nth 1 (car charset)) 0))
|
|
2549 (to (aref (nth 2 (car charset)) 0)))
|
|
2550 (if (< from min) (setq min from))
|
|
2551 (if (< max to) (setq max to))
|
|
2552 (while (<= from to)
|
|
2553 (aset *regexp-charset-table* from t)
|
|
2554 (TREX-inc from)))
|
|
2555 )
|
|
2556 (t
|
|
2557 (TREX-inc mcbytes
|
|
2558 (+ (length (nth 1 (car charset))) (length (nth 2 (car charset)))))
|
|
2559 (if (null mcchars) (setq mcchars charset))))))
|
|
2560 (setq charset (cdr charset)))
|
|
2561 (cond ((< max min)
|
|
2562 (insert (if (eq (car key) 'CHARSET) CHARSET CHARSET_NOT)
|
|
2563 (if (< 0 mcbytes) 128 0)))
|
|
2564 (t
|
|
2565 (let ((minb (/ min 8))
|
|
2566 (maxb (1+ (/ max 8))))
|
|
2567 (insert (if (eq (car key) 'CHARSET) CHARSET_M CHARSET_M_NOT)
|
|
2568 minb (+ (if (< 0 mcbytes) 128 0) (- maxb minb)))
|
|
2569 (let ((b minb))
|
|
2570 (while (< b maxb)
|
|
2571 (let ((i 7) (bits 0))
|
|
2572 (while (<= 0 i)
|
|
2573 (if (aref *regexp-charset-table* (+ (* 8 b) i))
|
|
2574 ;;;; bits table$B$N=g=x$O<!$NDL$j(B
|
|
2575 (TREX-inc bits (aref [1 2 4 8 16 32 64 128] i)))
|
|
2576 (TREX-dec i))
|
|
2577 (insert bits))
|
|
2578 (TREX-inc b))))))
|
|
2579
|
|
2580 (cond( (< 0 mcbytes)
|
|
2581 (TREX-inc mcbytes 2)
|
|
2582 (insert (/ mcbytes 256) (mod mcbytes 256))
|
|
2583 (while mcchars
|
|
2584 (cond((stringp (car mcchars))
|
|
2585 (insert (car mcchars) (car mcchars)))
|
|
2586 ((consp (car mcchars))
|
|
2587 (insert (nth 1 (car mcchars)) (nth 2 (car mcchars)))))
|
|
2588 (setq mcchars (cdr mcchars)))))
|
|
2589 ))
|
|
2590 ((= (length key) 1)
|
|
2591 (insert (eval (car key))))
|
|
2592 ((= (length key) 2)
|
|
2593 (insert (eval (car key)) (nth 1 key)))
|
|
2594 ((= (length key) 3)
|
|
2595 (insert (eval (car key)) (nth 1 key) (nth 2 key)))
|
|
2596 (t
|
|
2597 (regexp-error)))
|
|
2598 (regexp-code-gen* next))))
|
|
2599 (if point
|
|
2600 (let ((disp (- (point) point)))
|
|
2601 (save-excursion
|
|
2602 (goto-char point)
|
|
2603 (delete-char -2)
|
|
2604 (insert (logand disp 255)
|
|
2605 (/ (logand disp (* 255 256)) 256)))
|
|
2606 (regexp-code-gen-alist nextalist))))))
|
|
2607
|
|
2608 (defun regexp-code-gen-exact (chars node)
|
|
2609 (let ((alist (cdr (assoc node *table*))))
|
|
2610 (cond((and (null (assoc node *labels*))
|
|
2611 (= (length alist) 1)
|
|
2612 (numberp (car (car alist))))
|
|
2613 (regexp-code-gen-exact (cons (car (car alist)) chars)
|
|
2614 (cdr (car alist))))
|
|
2615 (t
|
|
2616 (regexp-code-gen-exact* (reverse chars))
|
|
2617 (regexp-code-gen* node)))))
|
|
2618
|
|
2619 (defun regexp-code-gen-exact* (chars)
|
|
2620 (cond((= (length chars) 1)
|
|
2621 (insert EXACT1 (car chars)))
|
|
2622 ((= (length chars) 2)
|
|
2623 (insert EXACT2 (car chars) (nth 1 chars)))
|
|
2624 ((= (length chars) 3)
|
|
2625 (insert EXACT3 (car chars) (nth 1 chars) (nth 2 chars)))
|
|
2626 (t
|
|
2627 (insert EXACTN (length chars))
|
|
2628 (let ((list chars))
|
|
2629 (while list
|
|
2630 (insert (car list))
|
|
2631 (setq list (cdr list)))))))
|
|
2632
|
|
2633 ;;;
|
|
2634 ;;; regexp-code-dump
|
|
2635 ;;; $B@55,I=8=$N%3!<%I$rI=<($9$k!%(B
|
|
2636 ;;;
|
|
2637
|
|
2638 (defvar *regexp-code-dump* nil)
|
|
2639 (defvar *regexp-code-index* nil)
|
|
2640
|
|
2641 (defun regexp-code-dump (*regexp-code-dump*)
|
|
2642 (terpri)
|
|
2643 (let ((*regexp-code-index* 0)
|
|
2644 (max (length *regexp-code-dump*)))
|
|
2645 (while (< *regexp-code-index* max)
|
|
2646 (princ (format "%4d:" *regexp-code-index*))
|
|
2647 (let((op (aref *regexp-code-dump* *regexp-code-index*)))
|
|
2648 (cond((= op UNUSED) (regexp-code-dump-0 "unused"))
|
|
2649 ((= op EXACTN)
|
|
2650 (princ (format "exactn(%d) " (aref *regexp-code-dump* (1+ *regexp-code-index*))))
|
|
2651 (let ((j (+ *regexp-code-index* 2))
|
|
2652 (max (+ *regexp-code-index* 2 (aref *regexp-code-dump* (1+ *regexp-code-index*)))))
|
|
2653 (while (< j max)
|
|
2654 (princ (format "%c" (aref *regexp-code-dump* j)))
|
|
2655 (TREX-inc j))
|
|
2656 (setq *regexp-code-index* j))
|
|
2657 (terpri)
|
|
2658 )
|
|
2659 ((= op BEGLINE) (regexp-code-dump-0 "begline"))
|
|
2660 ((= op ENDLINE) (regexp-code-dump-0 "endline"))
|
|
2661 ((= op JUMP) (regexp-code-dump-jump "jump"))
|
|
2662 ((and (= regexp-version 19)
|
|
2663 (= op JUMP_PAST_ALT))
|
|
2664 (regexp-code-dump-jump "jump_past_alt"))
|
|
2665 ((= op ON_FAILURE_JUMP ) (regexp-code-dump-jump "on_failure_jump"))
|
|
2666 ((and (= regexp-version 19)
|
|
2667 (= op ON_FAILURE_KEEP_STRING_JUMP))
|
|
2668 (regexp-code-dump-jump "on_failure_keep_string_jump"))
|
|
2669 ((and (= regexp-version 18)
|
|
2670 (= op FINALIZE_JUMP))
|
|
2671 (regexp-code-dump-jump "finalize_jump"))
|
|
2672 ((and (= regexp-version 18)
|
|
2673 (= op MAYBE_FINALIZE_JUMP))
|
|
2674 (regexp-code-dump-jump "maybe_finalize_jump"))
|
|
2675 ((and (= regexp-version 19)
|
|
2676 (= op POP_FAILURE_JUMP))
|
|
2677 (regexp-code-dump-jump "pop_failure_jump"))
|
|
2678 ((and (= regexp-version 19)
|
|
2679 (= op MAYBE_POP_JUMP))
|
|
2680 (regexp-code-dump-jump "maybe_pop_jump"))
|
|
2681 ((= op DUMMY_FAILURE_JUMP) (regexp-code-dump-jump "dummy_failure_jump"))
|
|
2682 ((and (= regexp-version 19)
|
|
2683 (= op PUSH_DUMMY_FAILURE))
|
|
2684 (regexp-code-dump-0 "push_dummy_failure"))
|
|
2685 ((and (= regexp-version 19)
|
|
2686 (= op SUCCEED_N))
|
|
2687 (regexp-code-dump-jump-2 "succeed_n"))
|
|
2688 ((and (= regexp-version 19)
|
|
2689 (= op JUMP_N))
|
|
2690 (regexp-code-dump-jump-2 "jump_n"))
|
|
2691 ((and (= regexp-version 19)
|
|
2692 (= op SET_NUMBER_AT))
|
|
2693 (regexp-code-dump-jump-2 "SET_NUMBER_AT"))
|
|
2694 ((= op ANYCHAR) (regexp-code-dump-0 "anychar"))
|
|
2695 ((= op CHARSET) (regexp-code-dump-charset "charset"))
|
|
2696 ((= op CHARSET_NOT) (regexp-code-dump-charset "charset_not"))
|
|
2697 ((= op START_MEMORY)
|
|
2698 (if (= regexp-version 19)
|
|
2699 (regexp-code-dump-2 "start_memory")
|
|
2700 (regexp-code-dump-1 "start_memory")))
|
|
2701 ((= op STOP_MEMORY)
|
|
2702 (if (= regexp-version 19)
|
|
2703 (regexp-code-dump-2 "stop_memory")
|
|
2704 (regexp-code-dump-1 "stop_memory")))
|
|
2705 ((= op DUPLICATE) (regexp-code-dump-1 "duplicate"))
|
|
2706 ((= op BEFORE_DOT) (regexp-code-dump-0 "before_dot"))
|
|
2707 ((= op AT_DOT) (regexp-code-dump-0 "at_dot"))
|
|
2708 ((= op AFTER_DOT) (regexp-code-dump-0 "after_dot"))
|
|
2709 ((= op BEGBUF) (regexp-code-dump-0 "begbuf"))
|
|
2710 ((= op ENDBUF) (regexp-code-dump-0 "endbuf"))
|
|
2711 ((= op WORDCHAR) (regexp-code-dump-0 "wordchar"))
|
|
2712 ((= op NOTWORDCHAR) (regexp-code-dump-0 "notwordchar"))
|
|
2713 ((= op WORDBEG) (regexp-code-dump-0 "wordbeg"))
|
|
2714 ((= op WORDEND) (regexp-code-dump-0 "wordend"))
|
|
2715 ((= op WORDBOUND) (regexp-code-dump-0 "wordbound"))
|
|
2716 ((= op NOTWORDBOUND) (regexp-code-dump-0 "notwordbound"))
|
|
2717 ((= op SYNTAXSPEC) (regexp-code-dump-syntax "syntaxspec"))
|
|
2718 ((= op NOTSYNTAXSPEC) (regexp-code-dump-syntax "notsyntaxspec"))
|
|
2719 ((= op EXACT1) (regexp-code-dump-1ch "EXACT1"))
|
|
2720 ((= op EXACT2)
|
|
2721 (princ (format "EXACT2 %c%c\n" (aref *regexp-code-dump* (1+ *regexp-code-index*))
|
|
2722 (aref *regexp-code-dump* (+ *regexp-code-index* 2))))
|
|
2723 (TREX-inc *regexp-code-index* 3))
|
|
2724 ((= op EXACT3)
|
|
2725 (princ (format "EXACT3 %c%c%c\n"
|
|
2726 (aref *regexp-code-dump* (1+ *regexp-code-index*))
|
|
2727 (aref *regexp-code-dump* (+ *regexp-code-index* 2))
|
|
2728 (aref *regexp-code-dump* (+ *regexp-code-index* 3))))
|
|
2729 (TREX-inc *regexp-code-index* 4))
|
|
2730 ((= op CHARSET_M) (regexp-code-dump-charset-m "CHARSET_M"))
|
|
2731 ((= op CHARSET_M_NOT) (regexp-code-dump-charset-m "CHARSET_M_NOT"))
|
|
2732 ((= op CASEN)
|
|
2733 (princ (format "CASEN %d\n" (aref *regexp-code-dump* (1+ *regexp-code-index*))))
|
|
2734 (let ((j (+ *regexp-code-index* 2))
|
|
2735 (max (+ *regexp-code-index* 2 (* 2 (aref *regexp-code-dump* (1+ *regexp-code-index*))))))
|
|
2736 (while (< j max)
|
|
2737 (princ (format "[%d]::%d\n" (1+ (/ (- j (+ *regexp-code-index* 2)) 2))
|
|
2738 (regexp-get-absolute-address
|
|
2739 (+ j 2) (aref *regexp-code-dump* j)
|
|
2740 (aref *regexp-code-dump* (1+ j)))))
|
|
2741 (TREX-inc j 2))
|
|
2742 (let ((ch (aref *regexp-code-dump* j)) (chmax (aref *regexp-code-dump* (1+ j))))
|
|
2743 (princ (format "%c::%c\n" ch chmax))
|
|
2744 (TREX-inc j 2)
|
|
2745 (while (<= ch chmax)
|
|
2746 (princ (format "%c=>[%d]\n" ch (aref *regexp-code-dump* j)))
|
|
2747 (TREX-inc j)
|
|
2748 (TREX-inc ch)))
|
|
2749 (setq *regexp-code-index* j)))
|
|
2750 ((= op ON_FAILURE_SUCCESS) (regexp-code-dump-0 "ON_FAILURE_SUCCESS"))
|
|
2751 ((= op SUCCESS) (regexp-code-dump-0 "SUCCESS"))
|
|
2752 ((= op POP) (regexp-code-dump-0 "POP"))
|
|
2753 ((= op EXCEPT0) (regexp-code-dump-0 "EXCEPT0"))
|
|
2754 ((= op EXCEPT1) (regexp-code-dump-1ch "EXCEPT1"))
|
|
2755 ((= op CATEGORYSPEC) (regexp-code-dump-1ch "CATEGORYSPEC"))
|
|
2756 ((= op NOTCATEGORYSPEC) (regexp-code-dump-1ch "NOTCATEGORYSPEC"))
|
|
2757 (t (princ (format "unknown op=%d\n" op))
|
|
2758 (TREX-inc *regexp-code-index*)))))
|
|
2759 (princ (format "%4d:\n" *regexp-code-index*)))
|
|
2760 nil
|
|
2761 )
|
|
2762
|
|
2763 (defun regexp-code-dump-0 (op)
|
|
2764 (princ op) (terpri)
|
|
2765 (TREX-inc *regexp-code-index*))
|
|
2766
|
|
2767 (defun regexp-code-dump-1 (op)
|
|
2768 (princ (format "%s %d\n" op (aref *regexp-code-dump* (1+ *regexp-code-index*))))
|
|
2769 (TREX-inc *regexp-code-index* 2))
|
|
2770
|
|
2771 (defun regexp-code-dump-2 (op)
|
|
2772 (princ (format "%s %d %d\n"
|
|
2773 op
|
|
2774 (aref *regexp-code-dump* (1+ *regexp-code-index*))
|
|
2775 (aref *regexp-code-dump* (+ *regexp-code-index* 2))
|
|
2776 ))
|
|
2777 (TREX-inc *regexp-code-index* 3))
|
|
2778
|
|
2779 (defun regexp-code-dump-syntax (op)
|
|
2780 (princ (format "%s %c\n" op (syntax-code-spec (aref *regexp-code-dump* (1+ *regexp-code-index*)))))
|
|
2781 (TREX-inc *regexp-code-index* 2))
|
|
2782
|
|
2783 (defun regexp-code-dump-1ch (op)
|
|
2784 (princ (format "%s %c\n" op (aref *regexp-code-dump* (1+ *regexp-code-index*))))
|
|
2785 (TREX-inc *regexp-code-index* 2))
|
|
2786
|
|
2787 (defun regexp-get-absolute-address (point b1 b2)
|
|
2788 (cond ((< b2 128)
|
|
2789 (+ point (+ (* 256 b2) b1)))
|
|
2790 (t
|
|
2791 (+ point (logior (logxor -1 (+ (* 255 256) 255)) (* 256 b2) b1)))))
|
|
2792
|
|
2793 (defun regexp-code-dump-jump (op)
|
|
2794 (let* ((b1 (aref *regexp-code-dump* (1+ *regexp-code-index*)))
|
|
2795 (b2 (aref *regexp-code-dump* (+ *regexp-code-index* 2)))
|
|
2796 (p (regexp-get-absolute-address (+ *regexp-code-index* 3) b1 b2)))
|
|
2797 (princ (format "%s %d\n" op p)))
|
|
2798 (TREX-inc *regexp-code-index* 3))
|
|
2799
|
|
2800 (defun regexp-code-dump-jump-2 (op)
|
|
2801 (let* ((b1 (aref *regexp-code-dump* (1+ *regexp-code-index*)))
|
|
2802 (b2 (aref *regexp-code-dump* (+ *regexp-code-index* 2)))
|
|
2803 (p (regexp-get-absolute-address (+ *regexp-code-index* 3) b1 b2)))
|
|
2804 (princ (format "%s %d %d\n" op p
|
|
2805 (+
|
|
2806 (* 256 (aref *regexp-code-dump* (+ *regexp-code-index* 3)))
|
|
2807 (aref *regexp-code-dump* (+ *regexp-code-index* 4))))))
|
|
2808 (TREX-inc *regexp-code-index* 5))
|
|
2809
|
|
2810 (defun regexp-code-dump-charset (op)
|
|
2811 (let ((n (aref *regexp-code-dump* (1+ *regexp-code-index*))))
|
|
2812 (princ (format "%s %d " op n))
|
|
2813 (let ((j (+ *regexp-code-index* 2))
|
|
2814 (max (+ *regexp-code-index* 2 (if (<= 128 n) (- n 128) n))))
|
|
2815 (while (< j max)
|
|
2816 (princ (format "0x%2x " (aref *regexp-code-dump* j)))
|
|
2817 (TREX-inc j))
|
|
2818 (cond((<= 128 n)
|
|
2819 (let* ((len (+ (* 256 (aref *regexp-code-dump* j))
|
|
2820 (aref *regexp-code-dump* (1+ j))))
|
|
2821 (last (+ j len)))
|
|
2822 (princ (format "\n range list[%d-2 bytes]" len))
|
|
2823 (TREX-inc j 2)
|
|
2824 (while (< j last)
|
|
2825 (let ((ch (sref *regexp-code-dump* j)))
|
|
2826 (princ (format " %c" ch))
|
|
2827 (TREX-inc j (char-octets ch))
|
|
2828 (setq ch (sref *regexp-code-dump* j))
|
|
2829 (princ (format "-%c" ch))
|
|
2830 (TREX-inc j (char-octets ch))))
|
|
2831 )))
|
|
2832 (setq *regexp-code-index* j)
|
|
2833 (terpri))
|
|
2834 ))
|
|
2835
|
|
2836 (defun regexp-code-dump-charset-m (op)
|
|
2837 (let ((m (aref *regexp-code-dump* (1+ *regexp-code-index*)))
|
|
2838 (n (aref *regexp-code-dump* (+ *regexp-code-index* 2))))
|
|
2839 (princ (format "%s %d %d " op m n))
|
|
2840 (let ((j (+ *regexp-code-index* 3))
|
|
2841 (max (+ *regexp-code-index* 3 (if (<= 128 n) (- n 128) n))))
|
|
2842 (while (< j max)
|
|
2843 (princ (format "0x%02x " (aref *regexp-code-dump* j)))
|
|
2844 (TREX-inc j))
|
|
2845 (cond((<= 128 n)
|
|
2846 (let* ((len (+ (* 256 (aref *regexp-code-dump* j))
|
|
2847 (aref *regexp-code-dump* (1+ j))))
|
|
2848 (last (+ j len)))
|
|
2849 (princ (format "\n range list[%d-2 bytes]" len))
|
|
2850 (TREX-inc j 2)
|
|
2851 (while (< j last)
|
|
2852 (let ((ch (sref *regexp-code-dump* j)))
|
|
2853 (princ (format " %c" ch))
|
|
2854 (TREX-inc j (char-octets ch))
|
|
2855 (setq ch (sref *regexp-code-dump* j))
|
|
2856 (princ (format "-%c" ch))
|
|
2857 (TREX-inc j (char-octets ch))))
|
|
2858 )))
|
|
2859 (setq *regexp-code-index* j)
|
|
2860 (terpri)
|
|
2861 )))
|
|
2862
|
|
2863 ;;;
|
|
2864 ;;; Compile functions
|
|
2865 ;;;
|
|
2866
|
|
2867 (defun TREX-simple-test1 ()
|
|
2868 (regexp-word-compile
|
|
2869 "\\cA+\\cH*\\|\\cK+\\cH*\\|\\cC+\\cH*\\|\\cH+\\|\\sw+"))
|
|
2870
|
|
2871 (defun TREX-test1 (pattern)
|
|
2872 (let* ((regexp (regexp-parse pattern))
|
|
2873 (fFA (EFFA-make (FA-make regexp)))
|
|
2874 (bFA (EFFA-make (FA-inverse fFA)))
|
|
2875 (l (cdr fFA))
|
|
2876 (result nil))
|
|
2877 (TREX-push (cons (DFA-optimize (DFA-make fFA))
|
|
2878 (DFA-optimize (DFA-make bFA)))
|
|
2879 result)
|
|
2880 (while l
|
|
2881 (let* ((forward (NEFA-make (EFFA-make (cons (car (car l)) (cdr fFA)))))
|
|
2882 (backward (NEFA-make (EFFA-make (cons (car (car l)) (cdr bFA))))))
|
|
2883 (cond((and forward backward)
|
|
2884 (TREX-push (cons (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make forward))))
|
|
2885 (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make backward)))))
|
|
2886 result))))
|
|
2887 (setq l (cdr l)))
|
|
2888 (setq result (reverse result))
|
|
2889 (let ((count 0))
|
|
2890 (while result
|
|
2891 (princ (format "\nForward[%2d]:" count)) (FA-dump (car (car result)))
|
|
2892 (princ (format "\nBackward[%2d]:" count)) (FA-dump (cdr (car result)))
|
|
2893 (TREX-inc count)
|
|
2894 (setq result (cdr result))))))
|
|
2895
|
|
2896 (defun TREX-test2 (pattern)
|
|
2897 (let* ((regexp (regexp-parse pattern))
|
|
2898 (fFA (EFFA-make (FA-make regexp)))
|
|
2899 (l (cdr fFA))
|
|
2900 (result nil))
|
|
2901 (regexp-code-dump (setq result (regexp-code-gen (DFA-optimize (DFA-make fFA)))))
|
|
2902 result))
|
|
2903
|
|
2904 ;;;###autoload
|
|
2905 (defun regexp-compile (pattern)
|
|
2906 (regexp-compile-internal pattern nil))
|
|
2907
|
|
2908 ;;;###autoload
|
|
2909 (defun regexp-word-compile (pattern)
|
|
2910 (regexp-compile-internal pattern t))
|
|
2911
|
|
2912 ;;;
|
|
2913 ;;; Returns a list of pair of forward-code and backward-code
|
|
2914 ;;;
|
|
2915
|
|
2916
|
|
2917 (defun regexp-compile-internal (pattern &optional word)
|
|
2918 (let* ((*regexp-word-definition* word)
|
|
2919 (*regexp-parse-translate*
|
|
2920 (if case-fold-search
|
|
2921 ;;; DOWNCASE or CANONICAL?
|
|
2922 (nth 2 (current-case-table))
|
|
2923 nil))
|
|
2924 (regexp (regexp-parse pattern))
|
|
2925 (fFA (EFFA-make (FA-make (regexp-reform-duplication regexp))))
|
|
2926 (bFA (EFFA-make (FA-make (regexp-reform-duplication (regexp-inverse regexp)))))
|
|
2927 (result nil))
|
|
2928 (let ((ofFA (DFA-optimize (DFA-make fFA)))
|
|
2929 (obFA (DFA-optimize (DFA-make bFA))))
|
|
2930 (TREX-push (cons (DFA-code-with-fastmap ofFA)
|
|
2931 (let* ((START_MEMORY STOP_MEMORY)
|
|
2932 (STOP_MEMORY START_MEMORY))
|
|
2933 (DFA-code-with-fastmap obFA)))
|
|
2934 result))
|
|
2935 (if word
|
|
2936 (let ((l (cdr fFA))
|
|
2937 (bFA (EFFA-make (FA-inverse fFA))))
|
|
2938 (while l
|
|
2939 (let* ((forward (NEFA-make (EFFA-make (cons (car (car l)) (cdr fFA)))))
|
|
2940 (backward (NEFA-make (EFFA-make (cons (car (car l)) (cdr bFA))))))
|
|
2941 (cond((and forward backward)
|
|
2942 (let ((fFA (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make forward)))))
|
|
2943 (bFA (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make backward))))))
|
|
2944 (TREX-push (cons (DFA-code-with-fastmap fFA)
|
|
2945 (DFA-code-with-fastmap bFA))
|
|
2946 result)))))
|
|
2947 (setq l (cdr l)))
|
|
2948 (setq result (nreverse result))))
|
|
2949 result))
|
|
2950
|
|
2951 (defun regexp-compiled-pattern-dump (pattern)
|
|
2952 ;;; PATTERN is a vector of [ code fastmap fastmap-syntax fastmap-categoy]
|
|
2953 (regexp-code-dump (aref pattern 0))
|
|
2954 (print-fastmap (aref pattern 1) " fastmap[char]")
|
|
2955 (print-fastmap (aref pattern 2) " fastmap[synt]")
|
|
2956 (print-fastmap (aref pattern 3) " fastmap[cate]")
|
|
2957 )
|
|
2958
|
|
2959 (defun regexp-compile-dump (code)
|
|
2960 (let ((Fcode (aref (car (car code)) 0))
|
|
2961 (Bcode (aref (cdr (car code)) 0))
|
|
2962 (words (cdr code)))
|
|
2963 (princ (format "\nRegular Expression Compiler Dump:\n"))
|
|
2964 (princ (format "Forward Search:"))
|
|
2965 (regexp-compiled-pattern-dump (car (car code)))
|
|
2966 (princ (format "Backward Search:"))
|
|
2967 (if Bcode (regexp-compiled-pattern-dump (cdr (car code)))
|
|
2968 (princ (format "\n Use the interpreter\n")))
|
|
2969 (if words
|
|
2970 (let ((i 1))
|
|
2971 (princ (format "In word conditions:\n"))
|
|
2972 (while words
|
|
2973 (princ (format "Forward[%d]" i))
|
|
2974 (regexp-compiled-pattern-dump (car (car words)))
|
|
2975 (princ (format "Backward[%d]" i))
|
|
2976 (regexp-compiled-pattern-dump (cdr (car words)))
|
|
2977 (TREX-inc i)
|
|
2978 (setq words (cdr words)))))))
|
|
2979
|
|
2980 (defun regexp-compile-and-dump (regexp)
|
|
2981 (regexp-compile-dump (regexp-compile regexp)))
|
|
2982
|
|
2983
|
|
2984 ;;;###autoload
|
|
2985 (defmacro define-word-regexp (name regexp)
|
|
2986 (` (defconst (, name) '(, (regexp-word-compile regexp)))))
|
|
2987
|
|
2988 (put 'define-word-regexp 'byte-hunk-handler ;93.7.16 by S.Tomura
|
|
2989 'macroexpand)
|
|
2990
|
|
2991 ;;;
|
|
2992 ;;; Coding system
|
|
2993 ;;;
|
|
2994
|
|
2995 (defmacro define-coding-systems (&rest rest)
|
|
2996 (` (define-coding-systems* '(, rest))))
|
|
2997
|
|
2998 (defun define-coding-systems* (names)
|
|
2999 (let ((systems
|
|
3000 (` (:or (,@ (mapcar (function (lambda (name) (` (:seq (, (regexp-get-definition name))
|
|
3001 (, name)))))
|
|
3002 names))))))
|
|
3003 systems))
|
|
3004
|
|
3005 (defun oct (str) (aref str 0))
|
|
3006
|
|
3007 (defvar *TREX-range-from* nil)
|
|
3008 (defvar *TREX-range-to* nil)
|
|
3009
|
|
3010 (defun TREX-range-make-jisjoint (regexp)
|
|
3011 (TREX-init *TREX-range-from* (make-vector 256 nil))
|
|
3012 (TREX-init *TREX-range-to* (make-vector 256 nil))
|
|
3013 (let ((i 0))
|
|
3014 (while (< i 256)
|
|
3015 (aset *TREX-range-from* i nil)
|
|
3016 (aset *TREX-range-to* i nil)
|
|
3017 (TREX-inc i)))
|
|
3018 (aset *TREX-range-from* 0 t)
|
|
3019 (aset *TREX-range-to* 255 t)
|
|
3020 (TREX-range-mark regexp)
|
|
3021 (TREX-range-replace regexp))
|
|
3022
|
|
3023 (defun TREX-range-mark (regexp)
|
|
3024 (cond
|
|
3025 ((consp regexp)
|
|
3026 (let ((op (car regexp)))
|
|
3027 (cond((eq op ':mark)
|
|
3028 (TREX-range-mark (nth 3 regexp)))
|
|
3029 ((eq op ':or)
|
|
3030 (mapcar 'TREX-range-mark (cdr regexp)))
|
|
3031 ((eq op ':seq)
|
|
3032 (mapcar 'TREX-range-mark (cdr regexp)))
|
|
3033 ((eq op ':optional)
|
|
3034 (TREX-range-mark (nth 1 regexp)))
|
|
3035 ((eq op ':star)
|
|
3036 (TREX-range-mark (nth 1 regexp)))
|
|
3037 ((eq op ':plus)
|
|
3038 (TREX-range-mark (nth 1 regexp)))
|
|
3039 ((eq op ':range)
|
|
3040 (TREX-range-mark2 (nth 1 regexp) (nth 2 regexp))))))
|
|
3041 ((stringp regexp)
|
|
3042 (TREX-range-mark2 regexp regexp))
|
|
3043 ((numberp regexp)
|
|
3044 (TREX-range-mark2 regexp regexp))))
|
|
3045
|
|
3046 (defun TREX-range-mark2 (from to)
|
|
3047 (if (stringp from) (setq from (aref from 0)))
|
|
3048 (if (stringp to) (setq to (aref to 0)))
|
|
3049 (if (< 0 from) (aset *TREX-range-to* (1- from) t))
|
|
3050 (if (< to 255) (aset *TREX-range-from* (1+ to) t))
|
|
3051 (aset *TREX-range-from* from t)
|
|
3052 (aset *TREX-range-to* to t))
|
|
3053
|
|
3054 (defun TREX-range-replace (regexp)
|
|
3055 (cond
|
|
3056 ((consp regexp)
|
|
3057 (let ((op (car regexp)))
|
|
3058 (cond((eq op ':mark)
|
|
3059 (` (:mark (, (nth 1 regexp))
|
|
3060 (, (nth 2 regexp))
|
|
3061 (, (TREX-range-replace (nth 3 regexp))))))
|
|
3062 ((eq op ':or)
|
|
3063 (` (:or (,@ (mapcar 'TREX-range-replace (cdr regexp))))))
|
|
3064 ((eq op ':seq)
|
|
3065 (` (:seq (,@ (mapcar 'TREX-range-replace (cdr regexp))))))
|
|
3066 ((eq op ':optional)
|
|
3067 (` (:optional (,(TREX-range-replace (nth 1 regexp))))))
|
|
3068 ((eq op ':star)
|
|
3069 (` (:star (,(TREX-range-replace (nth 1 regexp))))))
|
|
3070 ((eq op ':plus)
|
|
3071 (` (:plus (,(TREX-range-replace (nth 1 regexp))))))
|
|
3072 ((eq op ':range)
|
|
3073 (let ((from (nth 1 regexp))
|
|
3074 (to (nth 2 regexp))
|
|
3075 i j
|
|
3076 (result nil))
|
|
3077 (if (stringp from) (setq from (aref from 0)))
|
|
3078 (if (stringp to ) (setq to (aref to 0)))
|
|
3079 (setq i from
|
|
3080 j from)
|
|
3081 (while (<= i to)
|
|
3082 (while (not (aref *TREX-range-to* j))
|
|
3083 (TREX-inc j))
|
|
3084 (if (not (= i j)) (TREX-push (` (:range (, i) (, j))) result)
|
|
3085 (TREX-push i result))
|
|
3086 (TREX-inc j)
|
|
3087 (setq i j))
|
|
3088 (if (= (length result) 1) (car result)
|
|
3089 (` (:or (,@ (nreverse result))))))))))
|
|
3090 ((stringp regexp)
|
|
3091 (if (= (length regexp) 1)
|
|
3092 (aref regexp 0)
|
|
3093 regexp))
|
|
3094 ((numberp regexp)
|
|
3095 regexp)
|
|
3096 (t regexp)))
|
|
3097
|
|
3098 (defun FA-sort (FA)
|
|
3099 (let ((start (car FA))
|
|
3100 (alist (cdr FA)))
|
|
3101 (setq alist (sort alist 'TREX-lessp-car))
|
|
3102 (while alist
|
|
3103 (setcdr (car alist) (sort (cdr (car alist)) 'TREX-lessp-car))
|
|
3104 (setcdr (car alist ) (TREX-sort (cdr (car alist)) 'TREX-key-lessp 'cdr))
|
|
3105 (setq alist (cdr alist)))
|
|
3106 FA))
|
|
3107
|
|
3108 ;;;
|
|
3109 ;;; CHARSET functions:
|
|
3110 ;;;
|
|
3111 ;;; CHARSET ::= RANGE |
|
|
3112 ;;; (:or RANGE+) |
|
|
3113 ;;; (:nor RANGE+)
|
|
3114 ;;; RANGE+ ::= CHAR |
|
|
3115 ;;; (:range CHAR CHAR)
|
|
3116 ;;;
|
|
3117
|
|
3118 (defun CHARSET-rangep (charset)
|
|
3119 (or (numberp charset)
|
|
3120 (and (consp charset) (eq (car charset) ':range))))
|
|
3121
|
|
3122 (defun CHARSET-orp (charset)
|
|
3123 (and (consp charset) (eq (car charset) ':or)))
|
|
3124
|
|
3125 (defun CHARSET-range-from (range)
|
|
3126 (if (numberp range) range
|
|
3127 (nth 1 range)))
|
|
3128
|
|
3129 (defun CHARSET-range-to (range)
|
|
3130 (if (numberp range) range
|
|
3131 (nth 2 range)))
|
|
3132
|
|
3133 (defun CHARSET-range-make (from to)
|
|
3134 (if (= from to) from
|
|
3135 (list ':range from to)))
|
|
3136
|
|
3137 (defun CHARSET-membership (range charset)
|
|
3138 (let ((from (CHARSET-range-from range))
|
|
3139 (to (CHARSET-range-to range))
|
|
3140 (flag nil))
|
|
3141 (while (and charset flag1)
|
|
3142 (if (< from (CHARSET-range-from (car charset)))
|
|
3143 (setq charset (cdr charset))
|
|
3144 (setq flag t)))
|
|
3145 (and flag1 (<= to (CHARSET-range-to (car charset))))))
|
|
3146
|
|
3147 (defun CHARSET-not (charset)
|
|
3148 (cond((CHARSET-rangep charset)
|
|
3149 (list ':nor charset))
|
|
3150 ((CHARSET-orp charset)
|
|
3151 (cons ':nor (cdr charset)))
|
|
3152 (t
|
|
3153 (cons ':or (cdr charset)))))
|
|
3154
|
|
3155 (defun CHARSET-union (charset1 charset2)
|
|
3156 (cond((CHARSET-rangep charset1)
|
|
3157 (cond ((CHARSET-rangep charset2)
|
|
3158 (CHARSET-union-range-range charset1 charset2))
|
|
3159 ((CHARSET-orp charset2)
|
|
3160 (CHARSET-union-range-or charset1 charset2))
|
|
3161 (t
|
|
3162 (CHARSET-union-range-nor charset1 charset2))))
|
|
3163 ((CHARSET-orp charset1)
|
|
3164 (cond ((CHARSET-rangep charset2)
|
|
3165 (CHARSET-union-range-or charset2 charset1))
|
|
3166 ((CHARSET-orp charset2)
|
|
3167 (CHARSET-union-or-or charset1 charset2))
|
|
3168 (t
|
|
3169 (CHARSET-union-or-nor charset1 charset2))))
|
|
3170 (t ;;; (CHARSET-norp charset1)
|
|
3171 (cond((CHARSET-rangep charset2)
|
|
3172 (CHARSET-union-range-nor charset2 charset1))
|
|
3173 ((CHARSET-orp charset2)
|
|
3174 (CHARSET-union-or-nor charset2 charset1))
|
|
3175 (t
|
|
3176 (CHARSET-union-nor-nor charset1 charset2))))))
|
|
3177
|
|
3178 (defun CHARSET-union-range-range (range1 range2)
|
|
3179 (let ((from1 (CHARSET-range-from range1))
|
|
3180 (to1 (CHARSET-range-to range1))
|
|
3181 (from2 (CHARSET-range-from range2))
|
|
3182 (to2 (CHARSET-range-to range2)))
|
|
3183 (cond((< to1 from2)
|
|
3184 (list ':or range1 range2))
|
|
3185 (t ;;; (<= from2 (1+ to1))
|
|
3186 (cond((<= to1 to2) ;;; (<= from2 to1 to2)
|
|
3187 (CHARSET-range-make (min from1 from2) to2))
|
|
3188 ((<= from1 to2) ;;; (<= from1 to2 to1)
|
|
3189 (CHARSET-range-make (min from1 from2) to1))
|
|
3190 (t ;;; (<= to2 from1 to1)
|
|
3191 (list ':or range2 range1)))))))
|
|
3192
|
|
3193 (defun CHARSET-union-range-or (range or)
|
|
3194 (cons ':or (CHARSET-union-range-or* range (cdr or))))
|
|
3195
|
|
3196 (defun CHARSET-union-range-or* (range or-body)
|
|
3197 (let ((from (CHARSET-range-from range))
|
|
3198 (to (CHARSET-range-to range))
|
|
3199 (part1 nil))
|
|
3200 (let ((flag nil))
|
|
3201 (while (and or-body (null flag))
|
|
3202 (let ((next (car or-body)))
|
|
3203 (if (< (CHARSET-range-from next) from)
|
|
3204 ;;; from[i] < from
|
|
3205 (if (< (CHARSET-range-to next) from)
|
|
3206 ;;; to[i] < from
|
|
3207 (setq part1 (cons next part1)
|
|
3208 or-body (cdr or-body))
|
|
3209 ;;; from[i] < from <= to[i]
|
|
3210 (setq from (CHARSET-range-from next)
|
|
3211 flag t))
|
|
3212 ;;; from <= from[1]
|
|
3213 ;;; to[i-1] < from <= from[i]
|
|
3214 (setq flag t)))))
|
|
3215 ;;; part1 < from <= from[i]
|
|
3216 (if (and part1 (<= (1+ (CHARSET-range-to (car part1))) from))
|
|
3217 (setq from (CHARSET-range-from (car part1))
|
|
3218 part1 (cdr part1)))
|
|
3219 ;;; part1 << from <= from[i]
|
|
3220 (let ((flag nil))
|
|
3221 (while (and or-body (null flag))
|
|
3222 (let ((next (car or-body)))
|
|
3223 (if (< (CHARSET-range-from next) to)
|
|
3224 ;;; from[j] < from
|
|
3225 (if (< (CHARSET-range-to next) to)
|
|
3226 ;;; to[j] < to
|
|
3227 (setq or-body (cdr or-body))
|
|
3228 ;;; from[j] < to <= to[j]
|
|
3229 (setq to (CHARSET-range-to next)
|
|
3230 flag t))
|
|
3231 ;;; to <= from[1]
|
|
3232 ;;; to[j-1] < to <= from[j]
|
|
3233 (setq flag t)))))
|
|
3234 ;;; part2 < to <= from[j]
|
|
3235 (if (and or-body (<= (CHARSET-range-from (car or-body)) (1+ to)))
|
|
3236 (setq to (CHARSET-range-to (car or-body))
|
|
3237 or-body (cdr or-body)))
|
|
3238 ;;; part2 <= to << from[j]
|
|
3239 (nconc (reverse part1)
|
|
3240 (cons (CHARSET-range-make from to)
|
|
3241 or-body))))
|
|
3242
|
|
3243
|
|
3244 (defun CHARSET-union-range-nor (range nor)
|
|
3245 (let ((from (CHARSET-range-from range))
|
|
3246 (to (CHARSET-range-to range))
|
|
3247 (nor-body (cdr nor)))
|
|
3248
|
|
3249 ))
|
|
3250
|
|
3251 (defun CHARSET-union-or-or (or1 or2)
|
|
3252 (cons ':or (CHARSET-union-or*-or* (cdr or1) (cdr or2))))
|
|
3253
|
|
3254 (defun CHARSET-union-or*-or* (or1-body or2-body)
|
|
3255 (let ((result-body or2-body))
|
|
3256 (while or1-body
|
|
3257 (setq result-body
|
|
3258 (CHARSET-union-range-or* (car or1-body) result-body))
|
|
3259 (setq or1-body (cdr or1-body)))
|
|
3260 result-body))
|
|
3261
|
|
3262 (defun CHARSET-union-or-nor (or nor)
|
|
3263 )
|
|
3264
|
|
3265 (defun CHARSET-union-nor-nor (nor1 nor2)
|
|
3266 (cons ':nor (CHARSET-intersection-or*-or* (cdr nor1) (cdr nor2))))
|
|
3267
|
|
3268 (defun CHARSET-intersection (charset1 charset2)
|
|
3269 (cond((CHARSET-rangep charset1)
|
|
3270 (cond ((CHARSET-rangep charset2)
|
|
3271 (CHARSET-intersection-range-range charset1 charset2))
|
|
3272 ((CHARSET-orp charset2)
|
|
3273 (CHARSET-intersection-range-or charset1 charset2))
|
|
3274 (t
|
|
3275 (CHARSET-intersection-range-nor charset1 charset2))))
|
|
3276 ((CHARSET-orp charset1)
|
|
3277 (cond ((CHARSET-rangep charset2)
|
|
3278 (CHARSET-intersection-range-or charset2 charset1))
|
|
3279 ((CHARSET-orp charset2)
|
|
3280 (CHARSET-intersection-or-or charset1 charset2))
|
|
3281 (t
|
|
3282 (CHARSET-intersection-or-nor charset1 charset2))))
|
|
3283 (t ;;; (CHARSET-norp charset1)
|
|
3284 (cond((CHARSET-rangep charset2)
|
|
3285 (CHARSET-intersection-range-nor charset2 charset1))
|
|
3286 ((CHARSET-orp charset2)
|
|
3287 (CHARSET-intersection-or-nor charset2 charset1))
|
|
3288 (t
|
|
3289 (CHARSET-intersection-nor-nor charset1 charset2))))))
|
|
3290
|
|
3291 (defun CHARSET-intersection-range-or (range or)
|
|
3292 (CHARSET-intersection-range-or* range (cdr or)))
|
|
3293
|
|
3294 (defun CHARSET-intersection-range-or* (range or-body)
|
|
3295 (let ((from (CHARSET-range-from range))
|
|
3296 (to (CHARSET-range-to range))
|
|
3297 (part2 nil))
|
|
3298 (let ((flag nil))
|
|
3299 (while (and or-body (null flag))
|
|
3300 (let ((next (car or-body)))
|
|
3301 (if (< (CHARSET-range-from next) from)
|
|
3302 ;;; from[i] < from
|
|
3303 (if (< (CHARSET-range-to next) from)
|
|
3304 ;;; to[i] < from
|
|
3305 (setq or-body (cdr or-body))
|
|
3306 ;;; from[i] < from <= to[i]
|
|
3307 (setq flag t))
|
|
3308 ;;; from <= from[1]
|
|
3309 ;;; to[i-1] < from <= from[i]
|
|
3310 (setq flag t)))))
|
|
3311 ;;; from[i] < from <= to[i]
|
|
3312 ;;; from <= from[1]
|
|
3313 ;;; to[i-1] < from <= from[i]
|
|
3314 (let ((flag nil))
|
|
3315 (while (and or-body (null flag))
|
|
3316 (let ((next (car or-body)))
|
|
3317 (if (<= (CHARSET-range-from next) to)
|
|
3318 ;;; from[j] <= to
|
|
3319 (if (<= (CHARSET-range-to next) to)
|
|
3320 ;;; to[j] <= to
|
|
3321 (setq part2 (cons next part2)
|
|
3322 or-body (cdr or-body))
|
|
3323 ;;; from[j] <= to < to[j]
|
|
3324 (setq part2 (cons next part2)
|
|
3325 or-body (cdr or-body)
|
|
3326 flag t)
|
|
3327 ;;; to < from[1]
|
|
3328 ;;; to[j-1] <= to < from[j]
|
|
3329 (setq flag t)))))
|
|
3330 ;;; from[j] <= to < to[j]
|
|
3331 ;;; to < from[1]
|
|
3332 ;;; to[j-1] <= to < from[j]
|
|
3333 (cond ((null part2) nil)
|
|
3334 ((= (length part2) 1)
|
|
3335 (list (CHARSET-range-make (max from (CHARSET-range-from (car part2)))
|
|
3336 (min to (CHARSET-range-to (car part2))))))
|
|
3337 (t
|
|
3338 (setcar part2 (CHARSET-range-make (CHARSET-range-from (car part2))
|
|
3339 (min to (CHARSET-range-to (car part2)))))
|
|
3340 (setq part2 (nreverse part2))
|
|
3341 (setcar part2 (CHARSET-range-make (max from (CHARSET-range-from (car part2)))
|
|
3342 (CHARSET-range-to (car part2))))
|
|
3343 part2)))))
|
|
3344
|
|
3345 (defun CHARSET-intersection-range-nor (range nor)
|
|
3346 (CHARSET-intersection-range-nor* range (cdr nor)))
|
|
3347
|
|
3348 (defun CHARSET-intersecion-range-nor* (range nor-body)
|
|
3349 (let ((from (CHARSET-range-from range))
|
|
3350 (to (CHARSET-range-to range)))
|
|
3351 ))
|
|
3352
|
|
3353 ;;; (and (or a b) c) == (or (and a c) (and b c))
|
|
3354
|
|
3355 (defun CHARSET-intersection-or-or (or1 or2)
|
|
3356 (let ((result nil)
|
|
3357 (or1-body (cdr or1))
|
|
3358 (or2-body (cdr or2)))
|
|
3359 (while or1-body
|
|
3360 (setq result (CHARSET-union-or*-or*
|
|
3361 (CHARSET-intersection-range-or* (car or1-body) or2-body)
|
|
3362 result))
|
|
3363 (setq or1-body (cdr or1-body)))
|
|
3364 (if (= (length result) 1) (car result)
|
|
3365 (cons ':or result))))
|
|
3366
|
|
3367 (defun CHARSET-intersection-or-nor (or nor)
|
|
3368 )
|
|
3369
|
|
3370 ;;; (and (not or1) (not or2)) == (not (or or1 or2))
|
|
3371
|
|
3372 (defun CHARSET-intersection-nor-nor (nor1 nor2)
|
|
3373 (cons ':nor (CHARSET-union-or*-or* (cdr nor1) (cdr nor2))))
|
|
3374
|
|
3375 (defun FA-compaction (FA)
|
|
3376 (let ((start (car FA))
|
|
3377 (alist (cdr FA)))
|
|
3378 (setq alist (TREX-sort alist 'TREX-key-lessp 'car))
|
|
3379 (while alist
|
|
3380 (let ((table (cdr (car alist)))
|
|
3381 (newtable nil)
|
|
3382 (keys nil) (next nil))
|
|
3383 (setq table (TREX-sort table '< 'car))
|
|
3384 (while table
|
|
3385 (setq next (cdr (car table)))
|
|
3386 (TREX-push (car (car table)) keys)
|
|
3387 (setq table (cdr table))
|
|
3388 (while (and table (eq next (cdr (car table))))
|
|
3389 (TREX-push (car (car table)) keys)
|
|
3390 (setq table (cdr table)))
|
|
3391 (setq keys (reverse (sort keys 'TREX-key-lessp)))
|
|
3392 (let ((newkeys nil))
|
|
3393 (setq newkeys (car keys)
|
|
3394 keys (cdr keys))
|
|
3395 (while keys
|
|
3396 (cond((numberp (car keys))
|
|
3397 (cond((numberp (car newkeys))
|
|
3398 (if (= (1+ (car keys)) (car newkeys))
|
|
3399 (setcar newkeys (list ':range (car keys) (car newkeys)))
|
|
3400 (TREX-push (car keys) newkeys)))
|
|
3401 ((and (consp (car newkeys)) (eq (car (car newkeys)) ':range)))))))))))))
|
|
3402
|
|
3403
|
|
3404
|
|
3405 (defun FA-dump2 (table)
|
|
3406 (let ((start (car table))
|
|
3407 (l (cdr table)))
|
|
3408 (princ (format "\nstart = %d\n" start))
|
|
3409 (while l
|
|
3410 (princ (format "%3d: " (car (car l))))
|
|
3411 (let ((alist (cdr (car l))))
|
|
3412 (cond ((numberp (car (car alist)))
|
|
3413 (princ (format "\\%03o(%c) -> %s\n" (car (car alist))(car (car alist)) (cdr (car alist)))))
|
|
3414 ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC)))
|
|
3415 (princ (format "(%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist)))))
|
|
3416 ((and (consp (car (car alist))) (eq (car (car (car alist))) ':range))
|
|
3417 (princ (format "(:range \\%03o \\%03o) -> %s\n"
|
|
3418 (nth 1 (car (car alist))) (nth 2 (car (car alist)))
|
|
3419 (cdr (car alist)))))
|
|
3420 (t
|
|
3421 (princ (format "%s -> %s\n" (car (car alist)) (cdr (car alist))))))
|
|
3422 (setq alist (cdr alist))
|
|
3423 (while alist
|
|
3424 (cond ((numberp (car (car alist)))
|
|
3425 (princ (format " \\%03o(%c) -> %s\n" (car (car alist))(car (car alist)) (cdr (car alist)))))
|
|
3426 ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC)))
|
|
3427 (princ (format " (%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist)))))
|
|
3428 ((and (consp (car (car alist))) (eq (car (car (car alist))) ':range))
|
|
3429 (princ (format " (:range \\%03o \\%03o) -> %s\n"
|
|
3430 (nth 1 (car (car alist))) (nth 2 (car (car alist)))
|
|
3431 (cdr (car alist)))))
|
|
3432 (t
|
|
3433 (princ (format " %s -> %s\n" (car (car alist)) (cdr (car alist))))))
|
|
3434 (setq alist (cdr alist))))
|
|
3435 (setq l (cdr l)))))
|
|
3436
|
|
3437 ;;;function re-compile REGEXP
|
|
3438 ;;;Compile REGEXP by GNU Emacs original regexp compiler,
|
|
3439 ;;;and return information of the compiled code by a vector of length 11:
|
|
3440 ;;; [ COMPILED-PATTERN (string)
|
|
3441 ;;; RE-NSUB REGS-ALLOCATED CAN-BE-NULL NEWLINE-ANCHOR (integers)
|
|
3442 ;;; NO-SUB NOT-BOL NOT-EOL SYNTAX (integers)
|
|
3443 ;;; FASTMAP TRANSLATE (string) ].
|
|
3444 ;;;
|
|
3445
|
|
3446 (defun print-compiled-pattern (compiled-code)
|
|
3447 (let ((compiled-pattern (aref compiled-code 0))
|
|
3448 (re-nsub (aref compiled-code 1))
|
|
3449 (regs-allocated (aref compiled-code 2))
|
|
3450 (can-be-null (aref compiled-code 3))
|
|
3451 (newline-anchor (aref compiled-code 4))
|
|
3452 (no-sub (aref compiled-code 5))
|
|
3453 (not-bol (aref compiled-code 6))
|
|
3454 (not-eol (aref compiled-code 7))
|
|
3455 (syntax (aref compiled-code 8))
|
|
3456 (fastmap (aref compiled-code 9))
|
|
3457 (translate (aref compiled-code 10)))
|
|
3458 (regexp-code-dump compiled-pattern)
|
|
3459 ;;; fastmap
|
|
3460 (if fastmap (print-fastmap fastmap "fastmap"))
|
|
3461 (princ (format "re_nsub: %d\n" re-nsub))
|
|
3462 (princ (format "regs-alloc: %d\n" regs-allocated))
|
|
3463 (princ (format "can-be-null: %d\n" can-be-null))
|
|
3464 (princ (format "newline-anchor: %d\n" newline-anchor))
|
|
3465 (princ (format "no-sub: %d\n" no-sub))
|
|
3466 (princ (format "not-bol: %d\n" not-bol))
|
|
3467 (princ (format "not-eol: %d\n" not-eol))
|
|
3468 (princ (format "syntax: %d\n" syntax))
|
|
3469 (if translate (print-translate translate))
|
|
3470 ;;; translate
|
|
3471 nil
|
|
3472 ))
|
|
3473
|
|
3474 (defun print-fastmap (fastmap name)
|
|
3475 (if fastmap
|
|
3476 (progn
|
|
3477 (princ (format "%s:[" name))
|
|
3478 (let ((max (length fastmap))
|
|
3479 (i 0))
|
|
3480 (while (< i max)
|
|
3481 (if (not (= (aref fastmap i) 0))
|
|
3482 (princ (format "%c" i)))
|
|
3483 (setq i (1+ i))))
|
|
3484 (princ "]\n"))))
|
|
3485
|
|
3486 (defun print-translate (trans)
|
|
3487 (if trans
|
|
3488 (progn
|
|
3489 (princ "translate:\n")
|
|
3490 (let ((max (length trans))
|
|
3491 (i 0))
|
|
3492 (while (< i max)
|
|
3493 (if (not (= (aref trans i) i))
|
|
3494 (princ (format " %c --> %c" i (aref trans i))))
|
|
3495 (setq i (1+ i))))
|
|
3496 (princ "\n"))))
|
|
3497
|
|
3498 (defun re-compile-and-dump (regexp)
|
|
3499 (print-compiled-pattern (re-compile regexp)))
|
|
3500
|
|
3501
|
|
3502
|
|
3503
|
|
3504
|
|
3505
|