Mercurial > hg > xemacs-beta
diff lisp/mule/mule-trex.el @ 70:131b0175ea99 r20-0b30
Import from CVS: tag r20-0b30
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:02:59 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/mule/mule-trex.el Mon Aug 13 09:02:59 2007 +0200 @@ -0,0 +1,3505 @@ +;; TREX: Tools for Regluar EXpressions +;; +;; Regular Expression Compiler +;; +;; Coded by S.Tomura <tomura@etl.go.jp> + +;; Copyright (C) 1992 Free Software Foundation, Inc. + +;; This file is part of XEmacs. +;; This file contains Japanese characters + +;; XEmacs is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; XEmacs is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with XEmacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +(defvar TREX-version "0.41") +;;; Last modified date: Thu Jun 15 13:07:39 1995 + +;;; 95.6.15 modified by S.Tomura <tomura@etl.go.jp> +;;; +;;; $BFbB"$N(Bre_compile_pattern $B$HF1MM$K(B case-fold-search $B$K$h$C$F!"(B +;;; translate $B$9$k$h$&$KJQ99$7$?!#(B +;;; +;;; 95.6.14 modified by S.Tomura <tomura@etl.go.jp> +;;; print-translate $B$rDI2C!#(B<0.38> +;;; print-fastmap $B$rDI2C!#(B +;;; +;;; start_memory, end_memory $B$NBh(B2$B0z?t$r@8@.$9$k$?$a$K!"(B:mark $B$NFbIt9=(B +;;; $BB$$rJQ99$7$?!#(B +;;; +;;; re-compile-and-dump, regexp-compile-and-dump $B$rDI2C!#(B +;;; +;;; 95.6.13 +;;; regexp19.c $B$KBP1~$7$F(B start_memory, end_memory $B$N(B dump $B%k!<%A%s$r=$@5(B +;;; +;;; $B$9$Y$-$3$H!'(B +;;; +;;; (1) \(\)* +;;; (2) $B;^$N=gHV(B +;;; (3) $B0UL#$N$J$$%0%k!<%W;2>H$N8!=P(B "\(a\\)\\2"$B$J$I(B + +(defmacro TREX-inc (symbol &optional delta) + (list 'setq symbol (if delta (list '+ symbol delta) + (list '1+ symbol)))) + +(defmacro TREX-dec (symbol &optional delta) + (list 'setq symbol (if delta (list '- symbol delta) + (list '1- symbol)))) + +(defmacro num (sym) + (list 'num* (list 'quote sym))) + +(defun num* (sym) + (TREX-read-hexa (substring (symbol-name sym) 2))) + +(defun TREX-read-hexa (str) + (let ((result 0) (i 0) (max (length str))) + (while (< i max) + (let ((ch (aref str i))) + (cond((and (<= ?0 ch) (<= ch ?9)) + (setq result (+ (* result 16) (- ch ?0)))) + ((and (<= ?a ch) (<= ch ?f)) + (setq result (+ (* result 16) (+ (- ch ?a) 10)))) + ((and (<= ?A ch) (<= ch ?F)) + (setq result (+ (* result 16) (+ (- ch ?A) 10))))) + (TREX-inc i))) + result)) + +;;; 1 bytes : 0x00 <= C11 <= 0x7F +;;; n bytes : 0x80 == LCCMP +;;; 2 bytes 0xA0 <= LC <= 0xAF +;;; 3 bytes 0xB0 <= LC <= 0xBB +;;; 4 bytes 0xBC <= LC <= 0xBE +;;; 2 bytes : 0x81 <= LC <= 0x8F +;;; 3 bytes : 0x90 <= LC <= 0x9B +;;; 4 bytes : 0x9C <= LC <= 0x9E + + +(defun TREX-char-octets (str index) + (let ((max (length str))) + (if (or (< index 0) (<= max index)) 0 + (let ((ch (aref str index)) + (bytes)) + (setq bytes + (cond ((<= ch (num 0x7f)) 1) + ((= ch (num 0x80)) + (let ((max (length str)) + (i index)) + (while (and (< i max) + (<= (num 0xa0) (aref str i)) + (<= (aref str i) (num 0xbe))) + (setq ch (aref str i)) + (cond ((<= ch (num 0xaf)) (TREX-inc i 2)) + ((<= ch (num 0xbb)) (TREX-inc i 3)) + ((<= ch (num 0xbe)) (TREX-inc i 4)))) + (- i index))) + ((<= ch (num 0x8f)) 2) + ((<= ch (num 0x9b)) 3) + ((<= ch (num 0x9e)) 4) + (t 1))) + (if (<= (+ index bytes) max) bytes 1))))) + +(defun TREX-comp-charp (str index) + (= (aref str index) (num 0x80))) + +;;; 0x00 <= C11 <= 0x7F : 1 bytes +;;; Type 1-1 C11 +;;; 0x80 == LCCMP : n bytes +;;; Type N LCCMP LCN1 C11 ... LCN2 C21 ... LCNn Cn1 ... +;;; 0xA0 <= LCN* <= 0xBE +;;; LCN* = LC + 0x20 +;;; LCN* = 0xA0 (ASCII) +;;; 0x81 <= LC1 <= 0x8F : 2 bytes +;;; Type 1-2 LC1 C11 : +;;; 0xA0 <= C11 <= 0xFF +;;; 0x90 <= LC2 <= 0x99 : 3 bytes +;;; Type 2-3 LC2 C21 C22 +;;; 0xA0 <= C21 <= 0xFF +;;; 0xA0 <= C22 <= 0xFF +;;; 0x9A == LCPRV1 : 3 bytes +;;; Type 1-3 LCPRV1 LC12 C11 +;;; 0xA0 <= LC12 <= 0xB7 +;;; 0xA0 <= C11 <= 0xFF +;;; 0x9B == LCPRV1 : 3 bytes +;;; Type 1-3 LCPRV1 LC12 C11 +;;; 0xB8 <= LC12 <= 0xBF +;;; 0xA0 <= C11 <= 0xFF +;;; 0x9C == LCPRV2 : 4 bytes +;;; Type 2-4 LCPRV2 LC22 C21 C22 +;;; 0xC0 <= LC22 <= 0xC7 +;;; 0xA0 <= C21 <= 0xFF +;;; 0xA0 <= C22 <= 0xFF +;;; 0x9D == LCPRV2 : 4 bytes +;;; Type 2-4 LCPRV2 LC22 C21 C22 +;;; 0xC8 <= LC22 <= 0xDF +;;; 0xA0 <= C21 <= 0xFF +;;; 0xA0 <= C22 <= 0xFF +;;; 0x9E == LCPRV3 : 4 bytes +;;; Type 3-4 LCPRV3 C31 C32 C33 +;;; 0xA0 <= C31 <= 0xBF +;;; 0xA0 <= C32 <= 0xFF +;;; 0xA0 <= C33 <= 0xFF +;;; char = [0x00-0x7f]\| +;;; 0x80 +;;; \(0xa0[0xa0-0xff]\| +;;; [0xa1-0xaf][0xa0-0xff]\| +;;; [0xb0-0xb9][0xa0-0xff][0xa0-0xff]\| +;;; 0xba[0xa0-0xb7][0xa0-0xff]\| +;;; 0xbb[0xb8-0xbf][0xa0-0xff]\| +;;; 0xbc[0xc0-0xc7][0xa0-0xff][0xa0-0xff]\| +;;; 0xbd[0xc8-0xdf][0xa0-0xff][0xa0-0xff]\| +;;; 0xbe[0xa0-0xbf][0xa0-0xff][0xa0-0xff] +;;; \)*\| +;;; [0x81-0x8f][0xa0-0xff]\| +;;; [0x90-0x99][0xa0-0xff][0xa0-0xff]\| +;;; 0x9a[0xa0-0xb7][0xa0-0xff]\| +;;; 0x9b[0xb8-0xbf][0xa0-0xff]\| +;;; 0x9c[0xc0-0xc7][0xa0-0xff][0xa0-0xff]\| +;;; 0x9d[0xc8-0xdf][0xa0-0xff][0xa0-0xff]\| +;;; 0x9e[0xa0-0xbf][0xa0-0xff][0xa0-0xff] + +(defun regexp-make-or (&rest body) + (cons ':or body)) + +(defun regexp-make-seq (&rest body) + (cons ':seq body)) + +(defun regexp-make-star (regexp) + (list ':star regexp)) + +(defun regexp-make-range (from to) + (list 'CHARSET (list ':range from to))) + + +(defvar regexp-allchar-regexp + (regexp-make-or + (regexp-make-range 0 (num 0x7f)) + (regexp-make-seq + (num 0x80) + (regexp-make-star + (regexp-make-or + (regexp-make-seq + (num 0xa0) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (regexp-make-range (num 0xa1) (num 0xaf)) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (regexp-make-range (num 0xb0) (num 0xb9)) + (regexp-make-range (num 0xa0) (num 0xff)) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (num 0xba) + (regexp-make-range (num 0xa0) (num 0xb7)) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (num 0xbb) + (regexp-make-range (num 0xb8) (num 0xbf)) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (num 0xbc) + (regexp-make-range (num 0xc0) (num 0xc7)) + (regexp-make-range (num 0xa0) (num 0xff)) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (num 0xbd) + (regexp-make-range (num 0xc8) (num 0xdf)) + (regexp-make-range (num 0xa0) (num 0xff)) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (num 0xbe) + (regexp-make-range (num 0xa0) (num 0xbf)) + (regexp-make-range (num 0xa0) (num 0xff)) + (regexp-make-range (num 0xa0) (num 0xff)))))) + (regexp-make-seq + (regexp-make-range (num 0x81) (num 0x8f)) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (regexp-make-range (num 0x90) (num 0x99)) + (regexp-make-range (num 0xa0) (num 0xff)) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (num 0x9a) + (regexp-make-range (num 0xa0) (num 0xb7)) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (num 0x9b) + (regexp-make-range (num 0xb8) (num 0xbf)) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (num 0x9c) + (regexp-make-range (num 0xc0) (num 0xc7)) + (regexp-make-range (num 0xa0) (num 0xff)) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (num 0x9d) + (regexp-make-range (num 0xc8) (num 0xdf)) + (regexp-make-range (num 0xa0) (num 0xff)) + (regexp-make-range (num 0xa0) (num 0xff))) + (regexp-make-seq + (num 0x9e) + (regexp-make-range (num 0xa0) (num 0xbf)) + (regexp-make-range (num 0xa0) (num 0xff)) + (regexp-make-range (num 0xa0) (num 0xff))))) + +;;;; +;;;; +;;;; + +(defun TREX-string-reverse (str) + (if (<= (length str) 1) str + (let ((result (make-string (length str) 0)) + (i 0) + (j (1- (length str)))) + (while (<= 0 j) + (aset result i (aref str j)) + (TREX-inc i) + (TREX-dec j)) + result))) + +(defun TREX-string-forward-anychar (str start) + (and (stringp str) (numberp start) + (let ((max (length str))) + (and (<= 0 start) + (< start max) + (+ start (TREX-char-octets str start)))))) + +(defmacro TREX-init (symbol value) + (` (if (null (, symbol)) + (setq (, symbol) (, value))))) + +(defmacro TREX-push (val symbol) + (list 'setq symbol (list 'cons val symbol))) + +(defun TREX-member (elm list pred) + (while (and list (not (funcall pred elm (car list)))) + (setq list (cdr list))) + list) + +(defun TREX-memequal (elm list) + (while (and list (not (equal elm (car list)))) + (setq list (cdr list))) + list) + +(defun TREX-find (elm list) + (let ((pos 0)) + (while (and list (not (equal elm (car list)))) + (setq list (cdr list)) + (TREX-inc pos)) + (if list pos + nil))) + +(defun TREX-find-if (pred list) + (let ((pos 0)) + (while (and list (not (funcall pred (car list)))) + (TREX-inc pos) + (setq list (cdr list))) + (if list pos + nil))) + +(defun TREX-firstn (list n) + (if (or (<= n 0) (null list)) nil + (cons (car list) (TREX-firstn (cdr list) (1- n))))) + +(defun TREX-delete-duplicate (list) + (let ((result nil)) + (while list + (let ((elm (car list))) + (if (not (TREX-memequal elm result)) + (TREX-push elm result))) + (setq list (cdr list))) + (nreverse result))) + +(defun TREX-delete (elm list) + (let ((result nil)) + (while list + (if (not (equal elm (car list))) + (TREX-push (car list) result)) + (setq list (cdr list))) + (nreverse result))) + +(defun TREX-string-to-list (str) + (let ((result nil) + (i 0) + (max (length str))) + (while (< i max) + (TREX-push (aref str i) result) + (TREX-inc i)) + (nreverse result))) + +(defun TREX-sort (list lessp &optional key) + (if (null key) + (sort list lessp) + (sort list (function (lambda (x y) (funcall lessp (funcall key x) (funcall key y))))))) + +(defun TREX-key-lessp (x y) + (cond((symbolp x) + (cond ((symbolp y) + (string-lessp x y)) + (t;; (not (symbolp)) + t))) + ((numberp x) + (cond ((numberp y) + (< x y)) + ((and (consp y) (eq (car y) ':range)) + (< x (nth 1 y))) + (t nil))) + ((and (consp x) (eq (car x) ':range)) + (cond ((and (consp y) (eq (car y) ':range)) + (< (nth 2 x) (nth 1 y))) + ((numberp y) + (< (nth 2 x) y)) + (t nil))) + (t nil))) + +(defun TREX-lessp-car (x y) + (let ((x (car x)) + (y (car y))) + (TREX-key-lessp x y))) + +(defmacro TREX-define-enum (&rest list) + (list 'TREX-define-enum* (list 'quote list))) + +(defun TREX-define-enum* (list) + (let ((i 0)) + (while list + (set (car list) i) + (TREX-inc i) + (setq list (cdr list))))) + +;;; +;;; regexp-parse +;;; + +;;; +;;; $B@55,I=8=(B(regular expression) +;;; +;;; . single character except a newline +;;; REG* more than zero +;;; REG+ at least once +;;; REG? once or not at all +;;; [...] character set +;;; [^...] character not set +;;; ^ beginning of line +;;; $ end of line +;;; \ quote +;;; \| alternative +;;; \( ... \) group and mark +;;; \DIGIT +;;; \` beginning of buffer +;;; \' end of buffer +;;; \b beginning of word or end of word +;;; \B not \b +;;; \< beginning of word +;;; \> end of word +;;; +;;; \w word-constituent character +;;; \W not \w +;;; \sCODE syntax CODE character +;;; \SCODE not \sCODE + +;;; +;;; REG0 ::= REG1 | +;;; REG1 "\\|" REG0 +;;; +;;; REG1 ::= REG2 | +;;; REG2 REG1 +;;; +;;; REG2 ::= REG3 | +;;; REG2 "*" | +;;; REG2 "+" | +;;; REG2 "?" | +;;; +;;; REG3 ::= "." | +;;; "[" ... "]" | +;;; "[" "^" ... "]" | +;;; "^" | +;;; "$" | +;;; "\\" DIGIT | +;;; "\\(" REG0 "\\)" + +;;; $B>H9g$O@55,I=8=$N:8$+$i1&$X9T$o$l$k!%(B + +(defvar *regexp-parse-translate* nil + "$B@55,I=8=$rFI$_9~$_Cf$K;HMQ$9$k(B translate table.\n +case-fold-search $B$NCM$K$h$C$F(B downcasetable $B$r@_Dj$9$k!#(B") + +(defun regexp-parse-translate-char-string (str) + (if (and *regexp-parse-translate* + (= (length str) 1)) + ;;; $BK\Ev$O(B destructive $B$G$b(B OK + (char-to-string (aref *regexp-parse-translate* (aref str 0))) + str)) + +(defvar *regexp-word-definition* nil) + +(defvar *regexp-parse-index* nil) +(defvar *regexp-parse-end* nil) +(defvar *regexp-parse-str* nil) +(defvar *regexp-parse-regno* 1) + +(defun regexp-error (&optional reason) + (if (null reason) (setq reason "Bad regexp")) + (error "Regexp-parse::%s \"%s\" * \"%s\"" reason (substring *regexp-parse-str* 0 *regexp-parse-index*) + (substring *regexp-parse-str* *regexp-parse-index*))) + +(defun word-parse (pattern) + (let ((*regexp-word-definition* t)) + (regexp-parse pattern))) + +(defun regexp-parse (pattern) + (let*((*regexp-parse-str* pattern) + (*regexp-parse-index* 0) + (*regexp-parse-end* (length pattern)) + (*regexp-parse-regno* 1) + (result (regexp-parse-0))) + (if (<= *regexp-parse-end* *regexp-parse-index*) + result + (regexp-error)))) + +(defun regexp-parse-0 () + (let* ((result (regexp-parse-1))) + (cond((<= *regexp-parse-end* *regexp-parse-index*) + result) + ((and (< (1+ *regexp-parse-index*) *regexp-parse-end*) + (= (aref *regexp-parse-str* *regexp-parse-index*) ?\\) + (= (aref *regexp-parse-str* (1+ *regexp-parse-index*)) ?|)) + (TREX-inc *regexp-parse-index* 2) + (list ':or result (regexp-parse-0))) + (t result)))) + +(defun regexp-parse-1 () + (let ((results nil) + (result2 nil)) + (while (setq result2 (regexp-parse-2)) + (TREX-push result2 results)) + (if results + (if (cdr results) + (cons ':seq (nreverse results)) + (car results)) + nil))) + +(defun regexp-parse-2 () + (let ((result (regexp-parse-3))) + (while (and (< *regexp-parse-index* *regexp-parse-end*) + (TREX-memequal (aref *regexp-parse-str* *regexp-parse-index*) + '(?* ?+ ??))) + (let ((ch (aref *regexp-parse-str* *regexp-parse-index*))) + (TREX-inc *regexp-parse-index*) + (setq result + (cond((= ch ?*) (list ':star result)) + ((= ch ?+) (list ':plus result)) + ((= ch ??) (list ':optional result)))))) + result)) + +(defun regexp-parse-3 () + (if (<= *regexp-parse-end* *regexp-parse-index*) + nil + (let* ((start *regexp-parse-index*) + (i *regexp-parse-index*) + (end *regexp-parse-end*) + (ch (aref *regexp-parse-str* i))) + (TREX-inc *regexp-parse-index*) + (cond ((= ch ?.) '(ANYCHAR)) + ((= ch ?^) '(BEGLINE)) + ((= ch ?$) '(ENDLINE)) + ((= ch ?\[) + (regexp-parse-charset)) + ((= ch ?\]) + (setq *regexp-parse-index* start) + nil) + ((= ch ?*) + (setq *regexp-parse-index* start) + nil) + ((= ch ?+) + (setq *regexp-parse-index* start) + nil) + ((= ch ??) + (setq *regexp-parse-index* start) + nil) + ((and (= ch ?\\) (< (1+ i) end)) + (setq ch (aref *regexp-parse-str* (1+ i))) + (TREX-inc i) + (TREX-inc *regexp-parse-index*) + (cond ((= ch ?| ) + (setq *regexp-parse-index* start) + nil) + ((= ch ?\( ) + (if (< 9 *regexp-parse-regno*) + (regexp-error "Too many parenth")) + (let ((regexp-parse-regno *regexp-parse-regno*)) + (TREX-inc *regexp-parse-regno*) + (let ((result (regexp-parse-0))) + + (cond((and (< (1+ *regexp-parse-index*) *regexp-parse-end*) + (= (aref *regexp-parse-str* *regexp-parse-index*) ?\\ ) + (= (aref *regexp-parse-str* (1+ *regexp-parse-index*)) ?\) )) + (TREX-inc *regexp-parse-index* 2) + (if *regexp-word-definition* + result + (list ':mark regexp-parse-regno + (- *regexp-parse-regno* regexp-parse-regno 1) + result))) + (t + (regexp-error)))))) + ((= ch ?\) ) + (setq *regexp-parse-index* start) + nil) + ((= ch ?` ) '(BEGBUF)) + ((= ch ?' ) '(ENDBUF)) + ((= ch ?b ) + (if *regexp-word-definition* (regexp-error) '(WORDBOUND))) + ((= ch ?B ) + (if *regexp-word-definition* (regexp-error) '(NOTWORDBOUND))) + ((= ch ?< ) + (if *regexp-word-definition* (regexp-error) '(WORDBEG))) + ((= ch ?> ) + (if *regexp-word-definition* (regexp-error) '(WORDEND))) + ((= ch ?w ) (list 'SYNTAXSPEC + (syntax-spec-code ?w))) ;;;WORDCHAR + ((= ch ?W ) (list 'NOTSYNTAXSPEC + (syntax-spec-code ?w))) ;;;NOTWORDCHAR + ;;; ((= ch ?=) 'AT_DOT) + ((and (<= ?1 ch) + (<= ch ?9)) + (if *regexp-word-definition* + (regexp-error) (list 'DUPLICATE (- ch ?0)))) + ((= ch ?0) + (regexp-error)) + ((and (= ch ?s ) + (< (1+ i) end)) + (TREX-inc *regexp-parse-index*) + (list 'SYNTAXSPEC (syntax-spec-code (aref *regexp-parse-str* (1+ i))))) + ((and (= ch ?S ) + (< (1+ i) end)) + (TREX-inc *regexp-parse-index*) + (list 'NOTSYNTAXSPEC (syntax-spec-code (aref *regexp-parse-str* (1+ i))))) + ((and (= ch ?c ) + (< (1+ i) end)) + (TREX-inc *regexp-parse-index*) + (list 'CATEGORYSPEC (aref *regexp-parse-str* (1+ i)))) + ((and (= ch ?C ) + (< (1+ i) end)) + (TREX-inc *regexp-parse-index*) + (list 'NOTCATEGORYSPEC (aref *regexp-parse-str* (1+ i)))) + (t + (regexp-parse-translate-char-string + (substring *regexp-parse-str* (1+ i) (+ i 2)))))) + (t + (let ((nextpos (TREX-string-forward-anychar *regexp-parse-str* i))) + (cond(nextpos + (setq *regexp-parse-index* nextpos) + (regexp-parse-translate-char-string + (substring *regexp-parse-str* i nextpos))) + (t (regexp-error))))))))) + +(defun regexp-parse-charset () + (if (< *regexp-parse-index* *regexp-parse-end*) + (cond((eq (aref *regexp-parse-str* *regexp-parse-index*) ?^) + (TREX-inc *regexp-parse-index*) + (regexp-parse-charset0 'CHARSET_NOT nil)) + (t (regexp-parse-charset0 'CHARSET ;; ':or + nil))) + (regexp-error))) + +(defun regexp-parse-charset0 (op list) + (if (< *regexp-parse-index* *regexp-parse-end*) + (cond ((eq (aref *regexp-parse-str* *regexp-parse-index*) ?\]) + (TREX-inc *regexp-parse-index*) + (regexp-parse-charset1 op '("\]"))) + (t + (regexp-parse-charset1 op nil))) + (regexp-error))) + +(defun regexp-parse-charset1 (op list) + (if (< *regexp-parse-index* *regexp-parse-end*) + (let* ((pos0 *regexp-parse-index*) + (pos1 (TREX-string-forward-anychar *regexp-parse-str* pos0)) + (pos2 (TREX-string-forward-anychar *regexp-parse-str* pos1)) + (pos3 (TREX-string-forward-anychar *regexp-parse-str* pos2))) + (if pos0 + ;;; ] + (cond((eq (aref *regexp-parse-str* pos0) ?\]) + (setq *regexp-parse-index* pos1) + ;;; returns charset form + (cons op (sort (nreverse list) 'TREX-charset-lessp))) + ;;; [^]] - [^]] + ((and pos1 pos2 pos3 + (eq (aref *regexp-parse-str* pos1) ?-) + (not (eq (aref *regexp-parse-str* pos2) ?\]))) + (let ((from (substring *regexp-parse-str* pos0 pos1)) + (to (substring *regexp-parse-str* pos2 pos3))) + (if (and (= (length from) (length to)) + (not (TREX-comp-charp from 0)) + (not (TREX-comp-charp to 0)) + (or (= (length from) 1) + (= (aref from 0) (aref to 0))) + (or (string-equal from to) ;;; by Enami 93.08.08 + (string-lessp from to))) + (if (string-equal from to) + (TREX-push from list) + (TREX-push (list ':range from to) list)) + (regexp-error))) + (setq *regexp-parse-index* pos3) + (regexp-parse-charset1 op list)) + ;;; [^]] - ] ;;; by Enami 93.08.08 + ((and pos1 pos2 + (eq (aref *regexp-parse-str* pos1) ?-) + (eq (aref *regexp-parse-str* pos2) ?\])) + (TREX-push (substring *regexp-parse-str* pos0 pos1) list) + (TREX-push (substring *regexp-parse-str* pos1 pos2) list) + (setq *regexp-parse-index* pos2) + (regexp-parse-charset1 op list)) + (t + (TREX-push (substring *regexp-parse-str* pos0 pos1) list) + (setq *regexp-parse-index* pos1) + (regexp-parse-charset1 op list))) + (regexp-error))) + (regexp-error))) + +(defun TREX-charset-lessp (ch1 ch2) + (cond((and (stringp ch1) (stringp ch2)) + (string-lessp ch1 ch2)) + ((and (consp ch1) (consp ch2)) + (string-lessp (nth 2 ch1) (nth 1 ch2))) + ((consp ch1) + (string-lessp (nth 2 ch1) ch2)) + ((consp ch2) + (string-lessp ch1 (nth 1 ch2))))) + +;;; +;;; define-regexp +;;; + +(defmacro define-regexp (name &rest forms) + (` (define-regexp* '(, name) '(, forms)))) + +(defun define-regexp* (name forms) + (put name ':regexp-has-definition t) + (put name ':regexp-definition + (if (= (length forms) 1) + (nth 0 forms) + (` (:seq (,@ forms)))))) + +(defun regexp-get-definition (name) + (get name ':regexp-definition)) + +(defun regexp-define-specials (names) + (mapcar (function (lambda (name) + (put name ':regexp-special t))) + names)) + +(defun regexp-has-definition (name) + (get name ':regexp-has-definition)) + +(defun regexp-specialp (name) + (get name ':regexp-special)) + +(defun regexp-expand-definition (regexp &optional callers) + (cond + ((consp regexp) + (let ((op (car regexp))) + (cond((eq op ':mark) + (` (:mark (, (nth 1 regexp)) + (, (nth 2 regexp)) + (, (regexp-expand-definition (nth 3 regexp)))))) + ((eq op ':or) + (` (:or (,@ (mapcar 'regexp-expand-definition (cdr regexp)))))) + ((eq op ':seq) + (` (:seq (,@ (mapcar 'regexp-expand-definition (cdr regexp)))))) + ((eq op ':optional) + (` (:optional (, (regexp-expand-definition (nth 1 regexp)))))) + ((eq op ':star) + (` (:star (, (regexp-expand-definition (nth 1 regexp)))))) + ((eq op ':plus) + (` (:plus (, (regexp-expand-definition (nth 1 regexp)))))) + ;;;;**** + ((eq op ':range) + regexp) + ((regexp-specialp op) + regexp) + ((memq op callers) + (error "regexp defs(%s)" op)) + ((regexp-has-definition op) + (regexp-expand-definition (regexp-get-definition op) + (cons op callers))) + (t + (error "undefined regexp(%s)" op))))) + ((stringp regexp) + regexp) + ((null regexp) + regexp) + (t + regexp))) + +;;; +;;; regexp-*-lessp +;;; $B@55,7A<0$NA4=g=x$rDj5A$9$k!%(B +;;; + +;;; nil < number < string < symbol < cons + +(defun regexp-lessp (exp1 exp2) + (cond((equal exp1 exp2) + nil) + ((null exp1) t) + ((numberp exp1) + (cond((null exp2) nil) + ((numberp exp2) + (< exp1 exp2)) + (t t))) + ((stringp exp1) + (cond((or (null exp2) + (numberp exp2)) + nil) + ((stringp exp2) + (string< exp1 exp2)) + (t t))) + ((symbolp exp1) + (cond((or (null exp2) + (numberp exp2) + (stringp exp2)) + nil) + ((symbolp exp2) + (string< exp1 exp2)) + (t t))) + ((consp exp1) + (cond ((not (consp exp2)) + nil) + ((< (length exp1) (length exp2)) + t) + ((= (length exp1) (length exp2)) + (regexp-lessp-list exp1 exp2)) + (t nil))))) + +(defun regexp-lessp-list (exp1 exp2) + (cond((null exp1) nil) + ((regexp-lessp (car exp1) (car exp2)) + t) + ((equal (car exp1) (car exp2)) + (regexp-lessp-list (cdr exp1) (cdr exp2))) + (t nil))) + +;;; +;;; item = list of seq-body(== list of regexp) +;;; nil < cons +;;; + +(defun regexp-item-lessp (item1 item2) + (cond((equal item1 item2) + nil) + ((null item2) t) + ((consp item1) + (cond((consp item2) + (cond ((regexp-key-lessp (car item1) (car item2)) + t) + ((equal (car item1) (car item2)) + (regexp-item-lessp (cdr item1) (cdr item2))) + (t nil))) + (t nil))))) + + +(defun regexp-key-lessp-list (sym1 sym2 list) + (< (TREX-find sym1 list) (TREX-find sym2 list))) + +(defun regexp-key-lessp (key1 key2) + (cond ((regexp-key-class0 key1) + (cond((regexp-key-class0 key2) + (regexp-key-lessp-list (car key1) (car key2) *regexp-key-class0*)) + (t t))) + ((regexp-key-class1 key1) + (cond((regexp-key-class1 key2) + (regexp-key-lessp-list key1 key2 *regexp-key-class1*)) + ((or (regexp-key-class2 key2) + (regexp-key-class3 key2) + (regexp-key-class4 key2) + (null key2)) + t))) + ((regexp-key-class2 key1) + (cond((regexp-key-class2 key2) + (regexp-key-lessp-list key1 key2 *regexp-key-class2*)) + ((or (regexp-key-class3 key2) + (regexp-key-class4 key2) + (null key2)) + t))) + ((regexp-key-class3 key1) + (cond((regexp-key-class3 key2) + (regexp-key-lessp-list (car key1) (car key2) *regexp-key-class3*)) + ((or (regexp-key-class4 key2) + (null key2)) + t))) + ((regexp-key-class4 key1) + (or (null key2) + (and (regexp-key-class4 key2) (< key1 key2)))) + (t nil))) + +(defun regexp-alist-lessp (pair1 pair2) + (regexp-key-lessp (car pair1) (car pair2))) + +;;; +;;; +;;; + +(defvar *regexp-key-class0* '(START_MEMORY STOP_MEMORY)) + +(defvar *regexp-key-class1* '(BEGLINE ENDLINE + ;;; BEFORE_DOT AT_DOT AFTER_DOT + BEGBUF ENDBUF + WORDBEG WORDEND + WORDBOUND NOTWORDBOUND)) + +(defvar *regexp-key-class2* '(ANYCHAR + CHARSET + CHARSET_NOT + ;;;WORDCHAR NOTWORDCHAR + )) + +(defvar *regexp-key-class3* '(DUPLICATE + SYNTAXSPEC NOTSYNTAXSPEC + CATEGORYSPEC NOTCATEGORYSPEC +)) + +(regexp-define-specials *regexp-key-class0*) +(regexp-define-specials *regexp-key-class1*) +(regexp-define-specials *regexp-key-class2*) +(regexp-define-specials *regexp-key-class3*) + +(defun regexp-key-class0 (key) + (and (consp key) (TREX-memequal (car key) *regexp-key-class0*))) + +(defun regexp-key-class1 (key) + (and (consp key) + (TREX-memequal (car key) *regexp-key-class1*))) + +(defun regexp-key-class2 (key) + (and (consp key) (TREX-memequal (car key) *regexp-key-class2*))) + +(defun regexp-key-class3 (key) + (and (consp key) + (TREX-memequal (car key) *regexp-key-class3*))) + +(defun regexp-key-class4 (key) + (or (and (consp key) (eq (car key) ':range)) + (numberp key) (symbolp key))) + +(defun regexp-item-key-class0 (item) + (regexp-key-class0 (car item))) + +(defun regexp-item-key-class1 (item) + (regexp-key-class1 (car item))) + +(defun regexp-item-key-class2 (item) + (regexp-key-class2 (car item))) + +(defun regexp-item-key-class3 (item) + (regexp-key-class3 (car item))) + +(defun regexp-item-key-class4 (item) + (regexp-key-class4 (car item))) + +;;; +;;; regexp-sort +;;; $B@55,I=8=$NI8=`7A<0$r5a$a$k$?$a$K@0Ns$r9T$&!%(B +;;; + +(defvar *regexp-sort-flag* t) +(defvar *regexp-debug* nil) + +(defun regexp-sort (list pred) + (if *regexp-sort-flag* + (progn + (if *regexp-debug* (princ (format "(regexp-sort %s %s)\n" list pred))) + (let ((result (sort list pred))) + (if *regexp-debug* (princ (format "<== %s\n" result))) + result)) + list)) + +;;; +;;; regexp-inverse +;;; + +(defun regexp-inverse (regexp) + (if (consp regexp) + (let ((op (car regexp))) + (cond((eq op ':mark) + (list ':mark (nth 1 regexp) (nth 2 regexp) + (regexp-inverse (nth 3 regexp)))) + ((eq op 'DUPLICATE) + regexp) + ((eq op ':or) + (cons ':or (mapcar 'regexp-inverse (cdr regexp)))) + ((eq op ':seq) + (cons ':seq (nreverse (mapcar 'regexp-inverse (cdr regexp))))) + ((eq op ':optional) + (list ':optional (regexp-inverse (nth 1 regexp)))) + ((eq op ':star) + (list ':star (regexp-inverse (nth 1 regexp)))) + ((eq op ':plus) + (list ':plus (regexp-inverse (nth 1 regexp)))) + (t regexp))) + (if (stringp regexp) + (TREX-string-reverse regexp) + regexp))) + +;;; +;;; regexp-remove-infinite-loop +;;; + +(defun regexp-remove-infinite-loop (regexp) + (cond((consp regexp) + (let ((op (car regexp))) + (cond((eq op ':mark) + ) + ((eq op 'DUPLICATE) + regexp) + ((eq op ':or) + ) + ((eq op ':seq) + ) + ((eq op ':optional) + ) + ((eq op ':star) + ) + ((eq op ':plus) + ) + (t regexp)))) + ((stringp regexp) + ) + ((null regexp) + ) + (t + regexp))) + + +;;; +;;; regexp-reform +;;; + +(defvar *regexp-register-definitions* nil) +(defvar *regexp-registers* nil) + +(defun regexp-reform-duplication (regexp) + (let* ((*regexp-register-definitions* nil) + (newregexp (regexp-reform-duplication-1 regexp))) + (let ((*regexp-registers* nil)) + (regexp-reform-duplication-2 newregexp)))) + +(defun regexp-reform-duplication-1 (regexp) + (if (not (consp regexp)) regexp + (let ((mop (car regexp))) + (cond((eq mop ':or) + (cons ':or (mapcar 'regexp-reform-duplication-1 + (cdr regexp)))) + ((eq mop ':seq) + (cons ':seq (mapcar 'regexp-reform-duplication-1 + (cdr regexp)))) + ((TREX-memequal mop '(:star :plus :optional)) + (list mop (regexp-reform-duplication-1 (nth 1 regexp)))) + ((eq mop ':mark) + (TREX-push (cdr regexp) + *regexp-register-definitions*) + (list 'DUPLICATE (nth 1 regexp))) + (t regexp))))) + +(defun regexp-reform-duplication-2 (regexp) + (if (not (consp regexp)) regexp + (let ((mop (car regexp))) + (cond((eq mop ':or) + (let ((registers *regexp-registers*) + (newregisters nil) + (result nil) + (or-body (cdr regexp))) + (while or-body + (setq *regexp-registers* registers) + (TREX-push (regexp-reform-duplication-2 (car or-body)) result) + (setq newregisters (TREX-delete-duplicate (append *regexp-registers* newregisters))) + (setq or-body (cdr or-body))) + (setq *regexp-registers* newregisters) + (cons ':or (nreverse result)))) + ((eq mop ':seq) + (cons ':seq (mapcar 'regexp-reform-duplication-2 + (cdr regexp)))) + ((TREX-memequal mop '(:star :plus :optional)) + (list mop (regexp-reform-duplication-2 (nth 1 regexp)))) + ((eq mop 'DUPLICATE) + (let ((regno (nth 1 regexp))) + (if (TREX-memequal regno *regexp-registers*) + regexp + (let ((def (assoc regno *regexp-register-definitions*))) + (TREX-push regno *regexp-registers*) + ;;; $BBg>fIW!)(B + (if def + (cons ':mark def) + regexp))))) + (t regexp))))) + +;;; +;;; regexp-expand +;;; + +;;; +;;; <ISLAND> ::= ( <ITEM> ...) +;;; <ITEM> ::= ( <SEQ-BODY> ... ) +;;; + +(defun regexp-expand-regexp (regexp) + ;;; returns island + (if (consp regexp) + (let ((mop (car regexp))) + (cond + ;;;((eq mop 'CHARSET) + ;;; (regexp-expand-charset t (cdr regexp))) + ;;;((eq mop 'CHARSET_NOT) + ;;; (regexp-expand-charset nil (cdr regexp))) + ((eq mop ':or) + (regexp-expand-or (cdr regexp))) + ((eq mop ':seq) + (regexp-expand-seq (cdr regexp))) + ((eq mop ':star) + (let ((arg (nth 1 regexp))) + (if arg + (append (regexp-expand-seq (list arg regexp)) (list nil)) + (list nil)))) + ((eq mop ':plus) + (let ((arg (nth 1 regexp))) + (if arg + (regexp-expand-seq (list arg (list ':star arg))) + (list nil)))) + ((eq mop ':optional) + (append (regexp-expand-regexp (nth 1 regexp)) (list nil))) + ((eq mop ':mark) + (let ((regno (nth 1 regexp)) + (groups (nth 2 regexp)) + (arg (nth 3 regexp))) + (if arg + (list (list (list 'START_MEMORY regno groups) + arg + (list 'STOP_MEMORY regno groups))) + (list (list (list 'START_MEMORY regno groups) + (list 'STOP_MEMORY regno groups)))))) + (t (list (list regexp))))) + (cond((null regexp) (list nil)) + ((symbolp regexp) (list (list regexp))) + ((numberp regexp) (list (list regexp))) + ((stringp regexp) + (let ((result nil)) + (let ((i 0) (max (length regexp))) + (while (< i max) + (TREX-push (aref regexp i) result) + (TREX-inc i)) + (list (nreverse result))))) + (t (list (list regexp)))))) + +;;; +;;; (CHARSET "abc" ... ) == (:or (:seq "a" "b" "c") .... ) +;;; +;;; (:range "abc" "ade") == (:seq "a" (:range "bc" "de")) +;;; (:range "bc" "de" ) == (:or (:seq "b" (:range "c" 0xFF)) +;;; (:seq (:range "b"+1 "d"-1) (:range 0xA0 0xFF)) +;;; (:seq "d" (:range 0xA0 "e"))) +;;; + +;;; charset:: + +(defun charset-member-elt (ch elt) + (if (consp elt) + (if (eq (nth 0 elt) ':range) + (and (<= ch (nth 1 elt)) + (<= (nth 2 elt) ch)) + nil) + (equal ch elt))) + +(defun charset-member-P (ch or-form) + (let ((result) (l (cdr or-form))) + (while (and l (null result)) + (if (charset-membership-elt ch (car l)) + (setq result t)) + (setq l (cdr l))) + result)) + +(defun charset-member-N (ch nor-form) + (not (charset-member+ ch nor-form))) + +(defun charset-norp (form) + (and (consp form) (eq (car form) 'CHARSET_NOT))) + +(defun charset-and (form1 form2) + (if (charset-norp form1) + (if (charset-norp form2) + (cons ':or (charset-or-PP (cdr form1) (cdr form2))) + (charset-and-PN form2 form1)) + (if (charset-norp form2) + (charset-and-pn form1 form2) + (charset-and-PP form1 form2)))) + +(defun charset-or-PP (or-body1 or-body2) + (append or-body1 or-body2)) + + + + +(defun regexp-charset-to-regexp (charsets) + (cons ':or (mapcar 'regexp-charset-to-regexp* charsets))) + +(defun regexp-charset-to-regexp* (elm) + (cond((consp elm) (regexp-charset-range-to-regexp (nth 1 elm) (nth 2 elm))) + ((stringp elm) (cons ':seq (TREX-string-to-list elm))) + (t elm))) + +(defun regexp-charset-range-to-regexp (str1 str2) + (let ((result (regexp-charset-range-to-regexp* (TREX-string-to-list str1) + (TREX-string-to-list str2)))) + (if (= (length result) 1) (car result) (cons ':seq result)))) + + +(defun regexp-charset-range-to-regexp* (nums1 nums2) + (let ((len (length (cdr nums1))) + (ch1 (car nums1)) + (ch2 (car nums2))) + (if (= len 0) + (if (= ch1 ch2) (list ch1) + (list (regexp-charset-range-1 ch1 ch2))) + (if (= ch1 ch2) + (cons ch1 (regexp-charset-range-to-regexp* (cdr nums1) (cdr nums2))) + (let ((part1 (cons ch1 (regexp-charset-range-to-regexp* (cdr nums1) (make-list (length (cdr nums1)) 255)))) + (part2 (if (<= (1+ ch1) (1- ch2)) + (cons (regexp-charset-range-1 (1+ ch1) (1- ch2)) + (regexp-charset-range-to-regexp* (make-list len 160) (make-list len 255))) + nil)) + (part3 (cons ch2 (regexp-charset-range-to-regexp* (make-list len 160) (cdr nums2))))) + (if part2 + (list (list ':or (cons ':seq part1) (cons ':seq part2) (cons ':seq part3))) + (list (list ':or (cons ':seq part1) (cons ':seq part3))))))))) + +(defun regexp-charset-range-1 (from to) + (let ((result nil)) + (while (<= from to) + (TREX-push to result) + (TREX-dec to)) + (cons ':or result))) + +(defun regexp-charset-range-1* (from to) + (if (not (<= from to)) nil + (cons from (regexp-charset-range-1* (1+ from) to)))) + +(defvar *regexp-charset-vector* nil) + +(defun regexp-expand-charset (mode charsets) + (TREX-init *regexp-charset-vector* (make-vector 256 nil)) + (let ((i 0)) + (while (< i 256) + (aset *regexp-charset-vector* i nil) + (TREX-inc i))) + (while charsets + (cond((numberp (car charsets)) + (aset *regexp-charset-vector* (car charsets) t)) + ((stringp (car charsets)) + (if (= (length (car charsets)) 1) + (aset *regexp-charset-vector* (aref (car charsets) 0) t) + (let ((list (TREX-string-to-list (car charsets)))) + (aset *regexp-charset-vector* (car list) + (regexp-expand-charset-set-mark (cdr list) + (aref *regexp-charset-vector* (car list))))))) + ((and (consp (car charsets)) + (eq (car (car charsets)) ':range)) + (let ((from (aref (nth 1 (car charsets)) 0)) + (to (aref (nth 2 (car charsets)) 0))) + (if (<= from to) + (if (< to 128) + (let ((char from)) + (while (<= char to) + (aset *regexp-charset-vector* char t) + (TREX-inc char))) + (let ((from-list (TREX-string-to-list (nth 1 (car charsets)))) + (to-list (TREX-string-to-list (nth 2 (car charsets))))) + ;;; $B$I$&$9$s$N!*(B + )))))) + (setq charsets (cdr charsets))) + (let ((result nil) + (i 0)) + (while (< i 256) + (if (eq (aref *regexp-charset-vector* i) mode) + (TREX-push (list i) result)) + (TREX-inc i)) + (nreverse result))) + + +(defun regexp-expand-charset-set-mark (chars alist) + (if (null chars) t + (let ((place (assoc (car chars) alist))) + (cond((null place) + (cons + (cons (car chars) + (regexp-expand-charset-set-mark (cdr chars) nil)) + alist)) + (t + (setcdr place + (regexp-expand-charset-set-mark (cdr chars) (cdr place))) + alist))))) + +(defun regexp-expand-or (regexps) + (if regexps + (append (regexp-expand-regexp (car regexps)) + (regexp-expand-or (cdr regexps))) + nil)) + +(defun regexp-expand-seq (regexps) + (if (null regexps) + (list nil) + (let ((result (regexp-expand-regexp (car regexps)))) + (if (TREX-memequal nil result) + (let ((newresult (regexp-expand-seq (cdr regexps)))) + (setq result (TREX-delete nil result)) + (while result + (TREX-push (append (car result) (cdr regexps)) newresult) + (setq result (cdr result))) + newresult) + (let ((newresult nil)) + (while result + (TREX-push (append (car result) (cdr regexps)) newresult) + (setq result (cdr result))) + newresult))))) + +(defun regexp-expand-items (items) + (if items + (append (regexp-expand-seq (car items)) + (regexp-expand-items (cdr items))) + nil)) + +;;; +;;; regexp- +;;; + +(defun regexp-make-island (items) + (let ((result (TREX-delete-duplicate (regexp-expand-items items)))) + (let ((l result)) + (while l + (cond((null (car l)) + (setcdr l nil) + (setq l nil)) + (t (setq l (cdr l)))))) + result)) + +(defun regexp-make-island-parallel (items) + (regexp-sort (TREX-delete-duplicate (regexp-expand-items items)) + 'regexp-item-lessp)) + + +;;; Finate state Automaton: +;;; +;;; FA : Non-deterministic FA +;;; EFFA : Epsilon Free FA +;;; DFA : Deterministic FA +;;; +;;; +;;; DFA-optimize <- DFA <- EFFA <- NDFA <- regexp + + +;;; +;;; Table structure +;;; <FA> ::= ( <START> . <TransTables> ) +;;; <TransTables> ::= ( <Node> . <TransTable> ) ... +;;; <TransTable> ::= ( <Key> . <Next> ) ... +;;; <Key> ::= <Char> | <Condition> | :epsilon +;;; + +(defvar *regexp-node-to-transtable* nil) +(defvar *regexp-island-to-node* nil) +(defvar *regexp-counter* 0) + +(defun FA-make (regexp) + (setq *regexp-island-to-node* nil) + (let ((*regexp-node-to-transtable* nil) +;;; (*regexp-island-to-node* nil) + (*regexp-counter* 0)) + (let ((island (regexp-make-island (regexp-expand-regexp regexp)))) + (cons (FA-make-closure island) (nreverse *regexp-node-to-transtable*))))) + +(defun FA-make-closure (island) + (if *regexp-debug* (princ (format "FA-make-closure %s\n" island))) + (if (null island) nil + (let ((place (assoc island *regexp-island-to-node*)) + (pos nil)) + (cond(place (cdr place)) + ;;; START_MEMORY and STOP_MEMORY $B!JL5>r7o!$:GM%@h$GA+0\$9$k$b$N!K(B + ((setq pos (TREX-find-if 'regexp-item-key-class0 island)) + (let ((pre (TREX-firstn island pos)) + (item (nth pos island)) + (post (nthcdr (1+ pos) island))) + (let* ((number (TREX-inc *regexp-counter*)) + (pair (cons (car item) nil)) + (alist (list pair)) + (place (cons number alist))) + (TREX-push (cons island number) *regexp-island-to-node*) + (TREX-push place *regexp-node-to-transtable*) + (setcdr pair + (FA-make-closure + (regexp-make-island (append pre (list (cdr item)) post)))) + number))) + ;;; BEGLINE, ENDLINE, WORDBEG, ....$B!JD9$5#0$N$b$N!K(B + ;;; $BA+0\$O(B + ;;; KEY --> TRUE+FALSE + ;;; :epsilon --> FALSE $B$H$J$k!%(B + ((setq pos (TREX-find-if 'regexp-item-key-class1 island)) + (let((key (car (nth pos island))) + (items island) + (result-true nil) + (result-false nil)) + (while items + (let ((item (car items))) + (if (equal key (car item)) + (TREX-push (cdr item) result-true) + (progn + (TREX-push item result-true) + (TREX-push item result-false)))) + (setq items (cdr items))) + (setq result-true (nreverse result-true) + result-false (nreverse result-false)) + (if (null result-false) + (let* ((number (TREX-inc *regexp-counter*)) + (pair-true (cons key nil)) + (alist (list pair-true)) + (place (cons number alist))) + (TREX-push (cons island number) *regexp-island-to-node*) + (TREX-push place *regexp-node-to-transtable*) + (setcdr pair-true (FA-make-closure (regexp-make-island result-true))) + number) + (let* ((number (TREX-inc *regexp-counter*)) + (pair-true (cons key nil)) + (pair-false (cons ':epsilon nil)) + (alist (list pair-true pair-false)) + (place (cons number alist))) + (TREX-push (cons island number) *regexp-island-to-node*) + (TREX-push place *regexp-node-to-transtable*) + (setcdr pair-true (FA-make-closure (regexp-make-island result-true))) + (setcdr pair-false (FA-make-closure (regexp-make-island result-false))) + number)))) + (t + (FA-make-closure* island (FA-make-pre-alist island))))))) + +;;; +;;; $B$3$3$G07$&$N$O(B class2,3,4 $B$N$_(B +;;; +(defun FA-make-closure* (island pre-alist) + (if *regexp-debug* (princ (format "\nregexp-make-clousre* %s" pre-alist))) + (let* ((number (TREX-inc *regexp-counter*)) + (place (cons number pre-alist))) + (TREX-push (cons island number) *regexp-island-to-node*) + (TREX-push place *regexp-node-to-transtable*) + (while pre-alist + (let ((pair (car pre-alist))) + (setcdr pair + (FA-make-closure (regexp-make-island (cdr pair))))) + (setq pre-alist (cdr pre-alist))) + number)) + +;;; +;;; PRE-ALIST ::= ( (key . items) ... ) +;;; + +(defun FA-make-pre-alist (items) + (let ((pre-alist nil)) + (while items + (let ((item (car items))) + (cond((or (regexp-key-class2 (car item)) + (regexp-key-class3 (car item))) + (let ((key (car item)) + (newitems nil)) + (while (and items (equal key (car (car items)))) + (TREX-push (cdr (car items)) newitems) + (setq items (cdr items))) + (setq newitems (nreverse newitems)) + (TREX-push (cons key newitems) pre-alist))) + ((null item) + (TREX-push (list nil) pre-alist) + (setq items (cdr items))) + ((regexp-key-class4 (car item)) + (let((alist nil)) + (while (and items (regexp-key-class4 (car (car items)))) + (let* ((newitem (car items)) + (place (assoc (car newitem) alist))) + (if place + (setcdr place + (cons (cdr newitem) (cdr place))) + (TREX-push (cons (car newitem) (list (cdr newitem))) alist))) + (setq items (cdr items))) + (setq alist (sort alist 'TREX-lessp-car)) + (let ((list alist)) + (while list + (setcdr (car list) (nreverse (cdr (car list)))) + (setq list (cdr list))) + (setq pre-alist (append alist pre-alist)) + ))) + (t (error "undefined items(%s)" item))))) + (nreverse pre-alist))) + +;;; +;;; FA-inverse +;;; + +(defun FA-inverse (FA) + (let ((invFA nil) + (start (car FA)) + (table (cdr FA)) + (minnode 10000) + (maxnode 0) + (newtable nil) + (newstart nil) + (newfinal nil)) + (let ((l table)) + (while l + (let ((n (car (car l)))) + (if (< n minnode) (setq minnode n)) + (if (< maxnode n) (setq maxnode n))) + (setq l (cdr l)))) + (setq newstart (1- minnode)) + (setq newfinal (1+ maxnode)) + (setq newtable (FA-link newfinal nil nil newtable)) + (while table + (let* ((Snode (car table)) + (Snumber (car Snode)) + (Salist (cdr Snode))) + (while Salist + (let* ((pair (car Salist)) + (key (car pair)) + (Tnumber (cdr pair))) + (cond((null key) + (setq newtable (FA-link newstart ':epsilon Snumber newtable))) + (t + (setq newtable (FA-link Tnumber key Snumber newtable)))) + (setq Salist (cdr Salist))))) + (setq table (cdr table))) + (setq newtable (FA-link start ':epsilon newfinal newtable)) + ;;;; FA $B$N(B final $B$X(B invFA $B$N(B start $B$+$i(B :epsilon link $B$rD%$k!%(B + (let ((l newtable)) + (while l + (setcdr (car l) (reverse (cdr(car l)))) + (setq l (cdr l)))) + (setq newtable (sort newtable 'TREX-lessp-car)) + (cons newstart newtable))) + +(defun FA-link (from key to table) + (let ((place (assoc from table))) + (cond ((null place ) + (setq place (cons from nil)) + (TREX-push place table))) + (setcdr place (cons (cons key to) (cdr place))) + table)) + +;;; +;;; FA-dump +;;; + +(defun FA-dump (table) + (let ((start (car table)) + (l (cdr table))) + (princ (format "\nstart = %d\n" start)) + (while l + (princ (format "%3d: " (car (car l)))) + (let ((alist (cdr (car l)))) + (cond ((numberp (car (car alist))) + (princ (format "%c -> %s\n" (car (car alist)) (cdr (car alist))))) + ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC))) + (princ (format "(%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist))))) + (t + (princ (format "%s -> %s\n" (car (car alist)) (cdr (car alist)))))) + (setq alist (cdr alist)) + (while alist + (cond ((numberp (car (car alist))) + (princ (format " %c -> %s\n" (car (car alist)) (cdr (car alist))))) + ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC))) + (princ (format " (%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist))))) + (t + (princ (format " %s -> %s\n" (car (car alist)) (cdr (car alist)))))) + (setq alist (cdr alist)))) + (setq l (cdr l))))) + +;;; +;;; EFFA: Epsilon Free Finate Automaton +;;; + +(defvar *FA-table* nil) +(defvar *EFFA-table* nil) + +(defun EFFA-make (FA) + (let* ((start (car FA)) + (*FA-table* (cdr FA)) + (newstart start) + (*EFFA-table* nil)) + (cons newstart (reverse (EFFA-make* start))))) + +(defun EFFA-make* (node) + (let ((place (assoc node *EFFA-table*))) + (cond((null place) + (let ((place (cons node nil))) + (TREX-push place *EFFA-table*) + (setcdr place + (reverse (EFFA-make-alist nil (cdr (assoc node *FA-table*)) + (list node)))) + (let ((alist (cdr place))) + (while alist + (cond((car (car alist)) + (EFFA-make* (cdr (car alist))))) + (setq alist (cdr alist)))))))) + *EFFA-table*) + +(defun EFFA-make-alist (newalist alist set) + (while alist + (let ((node (cdr (car alist)))) + (cond((eq (car (car alist)) ':epsilon) + (cond((not (TREX-memequal node set)) + (TREX-push node set) + (setq newalist + (EFFA-make-alist newalist (cdr (assoc node *FA-table*)) set))))) + (t + (TREX-push (car alist) newalist)))) + (setq alist (cdr alist))) + newalist) + +;;; +;;; DFA: Deterministic Finate Automata +;;; + +(defvar *DFA-node-counter* nil) + +(defvar *DFA-node-definitions* nil + "List of FD-nodes to node number") + +(defvar *DFA-table* nil + "node number to alist") + +(defun DFA-make (EFFA) + (let ((start (car EFFA)) + (*EFFA-table* (cdr EFFA)) + (*DFA-node-counter* 0) + (*DFA-node-definitions* nil ) + (*DFA-table* nil)) + (DFA-make-1 (list start)) + (cons (cdr (assoc (list start) *DFA-node-definitions*)) *DFA-table*))) + +(defun DFA-make-1 (states) + (let ((place (assoc states *DFA-node-definitions*))) + (cond((null place) + (TREX-inc *DFA-node-counter*) + (setq place (cons states *DFA-node-counter*)) + (TREX-push place *DFA-node-definitions*) + (let ((pair (cons *DFA-node-counter* nil))) + (TREX-push pair *DFA-table*) + (setcdr pair (DFA-make-pre-alist (DFA-collect-alist states))) + (let ((alist (cdr pair))) + (while alist + (let ((top (car alist))) + (if (car top) + (setcdr top + (DFA-make-1 (cdr top))))) + (setq alist (cdr alist)))) + ))) + (cdr place))) + +(defun DFA-collect-alist (states) + (let ((result nil)) + (while states + (setq result (append (cdr (assoc (car states) *EFFA-table*)) result)) + (setq states (cdr states))) + result)) + +(defun DFA-make-pre-alist (oldAlist) + (let ((pre-alist nil)) + (while oldAlist + (let ((oldKey (car (car oldAlist)))) + (cond((or (regexp-key-class0 oldKey) + (regexp-key-class1 oldKey) + (regexp-key-class2 oldKey) + (regexp-key-class3 oldKey)) + (let ((key oldKey) + (newAlist nil)) + (while (and oldAlist (equal key (car (car oldAlist)))) + (TREX-push (cdr (car oldAlist)) newAlist) + (setq oldAlist (cdr oldAlist))) + (setq newAlist (nreverse newAlist)) + (TREX-push (cons key newAlist) pre-alist))) + ((regexp-key-class4 oldKey) + (let((alist nil)) + (while (and oldAlist (regexp-key-class4 (car (car oldAlist)))) + (let ((place (assoc (car (car oldAlist)) alist))) + (if place + (setcdr place + (cons (cdr (car oldAlist)) (cdr place))) + (TREX-push (cons (car (car oldAlist)) (list(cdr (car oldAlist)))) alist))) + (setq oldAlist (cdr oldAlist))) + (setq alist (sort alist 'TREX-lessp-car)) + (let ((list alist)) + (while list + (setcdr (car list) (reverse (cdr (car list)))) + (setq list (cdr list))) + (setq pre-alist (append alist pre-alist)) + ))) + ((null oldKey) + (TREX-push (list nil) pre-alist) + (setq oldAlist (cdr oldAlist))) + (t + (setq oldAlist (cdr oldAlist)))))) + (nreverse pre-alist))) + +;;; +;;; DFA-optimize +;;; $B$3$3$G$N:GE,2=$O>H9g=g=x$rJ]B8$9$k!%(B +;;; longer match $B$J$I$r$9$k>l9g$OJQ99$9$kI,MW$,$"$k!%(B + +(defvar *DFA-optimize-debug* nil) + +(defvar *DFA-optimize-groups* nil) +(defvar *DFA-optimize-node* 1) + +(defun DFA-optimize (FA) + (if *DFA-optimize-debug* (terpri)) + (let* ((start (car FA)) + (table (cdr FA)) + (*DFA-optimize-node* 1) + (*DFA-optimize-groups* + (list (cons *DFA-optimize-node* (mapcar 'car table))))) + (while + (catch 'DFA-optimize-changed + (let ((groups *DFA-optimize-groups*)) + (while groups + (if *DFA-optimize-debug* + (princ (format "\nGroups to be checked: %s\n" groups))) + (let* ((Sgroup (car groups)) + (Sgroup-number (car Sgroup)) + (oldgroup (cdr Sgroup)) + (newgroup nil) + (Smembers oldgroup)) + (if *DFA-optimize-debug* + (princ (format " Sgroup-number: %s = %s\n" Sgroup-number Smembers))) + (while Smembers + (let* ((Snumber (car Smembers)) + (Salist (cdr (assoc Snumber table)))) + (if *DFA-optimize-debug* + (princ (format " Snumber: %s\n" Snumber))) + (let ((Tmembers (cdr Smembers))) + (while Tmembers + (if (not (eq Snumber (car Tmembers))) + (let* ((Tnumber (car Tmembers)) + (Talist (cdr (assoc Tnumber table))) + (Salist Salist)) + (if *DFA-optimize-debug* + (princ (format " Tnumber: %s\n" Tnumber))) + (while (and Talist Salist + (equal (car (car Talist)) + (car (car Salist))) ;;; key + (equal (DFA-optimize-group-number + (cdr (car Talist))) + (DFA-optimize-group-number + (cdr (car Salist))) ;;; next group + )) + (if *DFA-optimize-debug* + (progn + (princ (format " Skey: %s -> %s(%s)\n" + (car (car Salist)) + (cdr (car Salist)) + (DFA-optimize-group-number (cdr (car Salist))))) + (princ (format " Tkey: %s -> %s(%s)\n" + (car (car Talist)) + (cdr (car Talist)) + (DFA-optimize-group-number (cdr (car Talist))))))) + (setq Talist (cdr Talist) + Salist (cdr Salist))) + (cond((or Talist Salist) + (setq newgroup (cons Tnumber newgroup) + oldgroup (TREX-delete Tnumber oldgroup)) + (if *DFA-optimize-debug* + (princ(format " oldGroup : %s\n newGroup : %s\n" oldgroup newgroup))))) + )) + (setq Tmembers (cdr Tmembers))))) + (cond (newgroup + (if *DFA-optimize-debug* + (princ (format "Changed :%s --> " Sgroup))) + (setcdr Sgroup oldgroup) + (if *DFA-optimize-debug* + (princ (format "%s" Sgroup))) + (TREX-inc *DFA-optimize-node*) + (if *DFA-optimize-debug* + (princ (format "+%s\n" (cons *DFA-optimize-node* newgroup)))) + (TREX-push (cons *DFA-optimize-node* newgroup) *DFA-optimize-groups*) + (throw 'DFA-optimize-changed t))) + (setq Smembers (cdr Smembers)))) + (setq groups (cdr groups)))))) + ;;; + ;;; + (if *DFA-optimize-debug* + (princ (format "table: %s\n" table))) + (if *DFA-optimize-debug* + (princ (format "groups: %s\n" *DFA-optimize-groups*))) + (let ((newtable nil) + (newstart nil) + (groups *DFA-optimize-groups*)) + + ;;; start node $B$rC5$9(B + (let ((l *DFA-optimize-groups*)) + (while l + (cond((TREX-memequal start (cdr (car l))) + (setq newstart (car (car l))) + (setq l nil)) + (t + (setq l (cdr l)))))) + + ;;; $B?7$7$$(B transTable $B$r:n$k!%(B + (while groups + (let* ((group (car groups)) + (group-number (car group)) + (member-number (car (cdr group))) + (member-alist (cdr (assoc member-number table)))) + (TREX-push (cons group-number + (let ((group-alist nil)) + (while member-alist + (let ((Mkey (car (car member-alist))) + (Mnext (cdr (car member-alist)))) + (TREX-push (cons Mkey (DFA-optimize-group-number Mnext)) + group-alist)) + (setq member-alist (cdr member-alist))) + (nreverse group-alist))) + newtable) + (setq groups (cdr groups)))) + (cons newstart newtable)))) + +(defun DFA-optimize-group-number (node) + (let ((l *DFA-optimize-groups*) (result nil)) + (while l + (cond((TREX-memequal node (cdr (car l))) + (setq result (car (car l)) + l nil)) + (t (setq l (cdr l))))) + result)) + +(defun DFA-optimize-parallel (FA) + (if *DFA-optimize-debug* (terpri)) + (let* ((start (car FA)) + (table (cdr FA)) + (*DFA-optimize-node* 1) + (*DFA-optimize-groups* + (list (cons *DFA-optimize-node* (mapcar 'car table))))) + (while + (catch 'DFA-optimize-changed + (let ((groups *DFA-optimize-groups*)) + (while groups + (if *DFA-optimize-debug* + (princ (format "\nGroups to be checked: %s\n" groups))) + (let* ((Sgroup (car groups)) + (Sgroup-number (car Sgroup)) + (oldgroup (cdr Sgroup)) + (newgroup nil) + (Smembers oldgroup)) + (if *DFA-optimize-debug* + (princ (format " Sgroup-number: %s = %s\n" Sgroup-number Smembers))) + (while Smembers + (let* ((Snumber (car Smembers)) + (Salist (cdr (assoc Snumber table)))) + (if *DFA-optimize-debug* + (princ (format " Snumber: %s\n" Snumber))) + (while Salist + (let* ((Spair (car Salist)) + (Skey (car Spair)) + (Snext (cdr Spair)) + (Snext-group (DFA-optimize-group-number Snext)) + (Tmembers oldgroup)) + (if *DFA-optimize-debug* + (princ (format " Skey: %s -> %s(%s)\n" Skey Snext-group Snext))) + (while Tmembers + (if (not (eq Snumber (car Tmembers))) + (let* ((Tnumber (car Tmembers)) + ;;; $BMW:F8!F$(B + (Tpair (assoc Skey (cdr (assoc Tnumber table)))) + (Tnext (cdr Tpair)) + (Tnext-group (DFA-optimize-group-number (cdr Tpair)))) + (if *DFA-optimize-debug* + (princ (format " Tnumber: %s : %s -> %s(%s)\n" Tnumber (car Tpair) + (DFA-optimize-group-number (cdr Tpair))(cdr Tpair)))) + (cond((and (equal Spair '(nil)) + (equal Tpair '(nil)))) + ((and Skey (equal Snext-group Tnext-group))) + (t + (TREX-push Tnumber newgroup) + (setq oldgroup (TREX-delete Tnumber oldgroup)) + (if *DFA-optimize-debug* + (princ(format (format " oldGroup : %s\n newGroup : %s\n" oldgroup newgroup)))) + )))) + (setq Tmembers (cdr Tmembers))) + (cond (newgroup + (if *DFA-optimize-debug* + (princ (format "Changed :%s --> " Sgroup))) + (setcdr Sgroup oldgroup) + (if *DFA-optimize-debug* + (princ (format "%s" Sgroup))) + (TREX-inc *DFA-optimize-node*) + (if *DFA-optimize-debug* + (princ (format "+%s\n" (cons *DFA-optimize-node* newgroup)))) + (TREX-push (cons *DFA-optimize-node* newgroup) *DFA-optimize-groups*) + (throw 'DFA-optimize-changed t)))) + (setq Salist (cdr Salist)))) + (setq Smembers (cdr Smembers)))) + (setq groups (cdr groups)))))) + ;;; + ;;; + (if *DFA-optimize-debug* + (princ (format "table: %s\n" table))) + (if *DFA-optimize-debug* + (princ (format "groups: %s\n" *DFA-optimize-groups*))) + (let ((newtable nil) + (newstart nil) + (groups *DFA-optimize-groups*)) + + ;;; start node $B$rC5$9(B + (let ((l *DFA-optimize-groups*)) + (while l + (cond((TREX-memequal start (cdr (car l))) + (setq newstart (car (car l))) + (setq l nil)) + (t + (setq l (cdr l)))))) + + ;;; $B?7$7$$(B transTable $B$r:n$k!%(B + (while groups + (let* ((group (car groups)) + (group-number (car group)) + (member-number (car (cdr group))) + (member-alist (cdr (assoc member-number table)))) + (TREX-push (cons group-number + (let ((group-alist nil)) + (while member-alist + (let ((Mkey (car (car member-alist))) + (Mnext (cdr (car member-alist)))) + (TREX-push (cons Mkey + (if (consp Mnext) + (cons (DFA-optimize-group-number (car Mnext)) + (DFA-optimize-group-number (cdr Mnext))) + (DFA-optimize-group-number Mnext))) + group-alist)) + (setq member-alist (cdr member-alist))) + group-alist)) + newtable) + (setq groups (cdr groups)))) + (cons newstart newtable)))) + + + +;;; +;;; Non Empty Finite Automata +;;; + +(defun NEFA-make (EFFA) + (let* ((start (car EFFA)) + (table (cdr EFFA)) + (Salist (cdr (assoc start table)))) + (cond((equal Salist '((nil))) + nil) + ((and (assoc nil Salist) + (progn + (while (and Salist (not (equal start (cdr (car Salist))))) + (setq Salist (cdr Salist))) + Salist)) + (let ((min 10000) + (max -10000) + (l table)) + (while l + (if (< (car (car l)) min) + (setq min (car (car l)))) + (if (< max (car (car l))) + (setq max (car (car l)))) + (setq l (cdr l))) + (let* ((newstart (1- min)) + (newtable (copy-alist table)) + (oldSalist (cdr (assoc start table))) + (newSalist (TREX-delete '(nil) (copy-alist oldSalist)))) + (cons newstart + (cons (cons newstart newSalist) newtable))))) + (t + EFFA)))) + +;;; +;;; Simplify FA +;;; + +(defvar *FA-simplify-table* nil) + +(defun FA-simplify (FA) + (let ((start (car FA)) + (table (cdr FA)) + (newtable nil) + (*FA-simplify-table* nil)) + (FA-simplify-mark start table) + (while *FA-simplify-table* + (TREX-push (assoc (car *FA-simplify-table*) table) newtable) + (setq *FA-simplify-table* (cdr *FA-simplify-table*))) + (cons start newtable))) + +(defun FA-simplify-mark (node table) + (cond ((not (TREX-memequal node *FA-simplify-table*)) + (TREX-push node *FA-simplify-table*) + (let ((alist (cdr (assoc node table)))) + (while alist + (cond((car (car alist)) + (FA-simplify-mark (cdr (car alist)) table))) + (setq alist (cdr alist))))))) + +;;; +;;; Shortest match DFA +;;; + +(defun DFA-shortest-match (DFA) + (let ((start (car DFA)) + (table (cdr DFA)) + (newtable nil)) + (while table + (cond ((assoc nil (cdr (car table))) + (TREX-push (cons (car (car table)) '((nil))) newtable)) + (t + (TREX-push (car table) newtable))) + (setq table (cdr table))) + (cons start newtable))) + +;;; +;;; Fastmap computation +;;; + +(defvar *DFA-fastmap-chars* nil) +(defvar *DFA-fastmap-syntax* nil) +(defvar *DFA-fastmap-category* nil) +(defvar *DFA-fastmap-init* 0 ) +(defvar *DFA-fastmap-pos* 1 ) ;;; SYNTAXSPEC or CATEGORYSPEC +(defvar *DFA-fastmap-neg* 2 ) ;;; NOTSYNTAXSPEC or NOTCATEGORYSPEC + +;;;; $B$9$Y$F$N(B char $B$OB~0l$D$N(B syntaxspec $B$KB0$9$k(B +;;;; ==> syntaxspec(ch) and notsyntaxspec(ch) --> all char +;;;; ==> notsyntaxspec(ch1) and notsyntaxspec(ch2) --> all char +;;;; ==> notsyntaxspec(ch1) and syntaxspec(ch2) == notsyntaxspec(ch1) +;;;; $B$D$^$j(B notsyntaxspec $B$O9b!9#1$D$7$+$J$$!%(B + +;;; Returns [ CODE FASTMAP SYNTAX-FASTMAP CATEGOY-FASTMAP ] + +(defun DFA-code-with-fastmap (DFA) + (TREX-init *DFA-fastmap-chars* (make-vector 256 nil)) + (TREX-init *DFA-fastmap-syntax* (make-vector 256 nil)) + (TREX-init *DFA-fastmap-category* (make-vector 256 nil)) + (let ((code (regexp-code-gen DFA)) + (start (car DFA)) + (*DFA-fastmap-table* (cdr DFA)) + (*DFA-fastmap-mark* nil) + (*DFA-fastmap-special* nil)) + (let ((i 0)) + (while (< i 256) + (aset *DFA-fastmap-chars* i nil) + (aset *DFA-fastmap-syntax* i nil) + (aset *DFA-fastmap-category* i nil) + (TREX-inc i))) + (DFA-fastmap-collect start) + (let ((fastmap (if *DFA-fastmap-special* + nil ;;;(make-string 256 1) + (make-string 256 0))) + (fastmap-entries 0) + (syntax (if *DFA-fastmap-special* + nil + (make-string 256 0))) + (syntax-entries 0) + (notsyntax-entries 0) + (category (if *DFA-fastmap-special* + nil + (make-string 256 0))) + (category-entries 0)) + (let ((result (make-vector 4 nil))) + (aset result 0 code) + (if *DFA-fastmap-special* + (progn + (aset result 1 fastmap) + (aset result 2 syntax) + (aset result 3 category)) + (progn + (let ((i 0)) + (while (< i 256) + (if (aref *DFA-fastmap-chars* i) + (progn + (TREX-inc fastmap-entries) + (aset fastmap i 1))) + (aset syntax i + (cond((null (aref *DFA-fastmap-syntax* i)) + *DFA-fastmap-init*) + ((eq (aref *DFA-fastmap-syntax* i) 'SYNTAXSPEC) + (TREX-inc syntax-entries) + *DFA-fastmap-pos*) + ((eq (aref *DFA-fastmap-syntax* i) 'NOTSYNTAXSPEC) + (TREX-inc notsyntax-entries) + (TREX-inc syntax-entries) + *DFA-fastmap-neg*))) + (aset category i + (cond((null (aref *DFA-fastmap-category* i)) + *DFA-fastmap-init*) + ((eq (aref *DFA-fastmap-category* i) 'CATEGORYSPEC) + (TREX-inc category-entries) + *DFA-fastmap-pos*) + ((eq (aref *DFA-fastmap-category* i) 'NOTCATEGORYSPEC) + (TREX-inc category-entries) + *DFA-fastmap-neg*))) + (TREX-inc i))) + + (cond((<= 2 notsyntax-entries) + (setq fastmap (make-string 256 1) + syntax nil + category nil)) + ((= 1 notsyntax-entries) + (let ((ch 0)) + (while (< ch 256) + (if (= (aref syntax ch) *DFA-fastmap-neg*) + (aset syntax ch *DFA-fastmap-init*) + (aset syntax ch *DFA-fastmap-pos*)) + (TREX-inc ch))))) + (aset result 1 fastmap) + (aset result 2 syntax) + (aset result 3 category))) + result)))) + +(defun DFA-fastmap-collect (node) + (if (TREX-memequal node *DFA-fastmap-mark*) nil + (let ((alist (cdr (assoc node *DFA-fastmap-table*)))) + (TREX-push node *DFA-fastmap-mark*) + (while alist + (let ((key (car (car alist)))) + (cond((numberp key) + (aset *DFA-fastmap-chars* key t)) + ((symbolp key);;; can be null + (setq *DFA-fastmap-special* t)) + (t + (let ((op (car key))) + (cond + ((TREX-memequal op '(START_MEMORY STOP_MEMORY)) + (DFA-fastmap-collect (cdr (car alist)))) + ((TREX-memequal op '(SYNTAXSPEC NOTSYNTAXSPEC)) + (let ((specch (syntax-code-spec (nth 1 key)))) + (cond((null (aref *DFA-fastmap-syntax* (nth 1 key))) + (aset *DFA-fastmap-syntax* specch op)) + ((not (eq (aref *DFA-fastmap-syntax* specch) op)) + (setq *DFA-fastmap-special* t))))) + ((TREX-memequal op '(CATEGORYSPEC NOTCATEGORYSPEC)) + (let ((specch (nth 1 key))) + (cond((null (aref *DFA-fastmap-category* specch)) + (aset *DFA-fastmap-category* specch op)) + ((not (eq (aref *DFA-fastmap-category* specch) op)) + (setq *DFA-fastmap-special* t))))) + ((TREX-memequal op '(CHARSET CHARSET_NOT)) + (let ((list (cdr key))) + (while list + (let ((from nil) (to nil)) + (cond((stringp (car list)) + (setq from (aref (car list) 0) + to (aref (car list) 0))) + (t ;;; :range + (setq from (aref (nth 1 (car list)) 0) + to (aref (nth 2 (car list)) 0)))) + (while (<= from to) + (cond((null (aref *DFA-fastmap-chars* from)) + (aset *DFA-fastmap-chars* from + (if (eq op 'CHARSET_NOT) 'CHARSET_NOT + t)))) + (TREX-inc from))) + (setq list (cdr list)))) + (if (eq op 'CHARSET_NOT) + (let ((i 0)) + (while (< i 256) + (cond((null (aref *DFA-fastmap-chars* i)) + (aset *DFA-fastmap-chars* i t)) + ((eq (aref *DFA-fastmap-chars* i) 'CHARSET_NOT) + (aset *DFA-fastmap-chars* i nil))) + (TREX-inc i))))) + (t + (setq *DFA-fastmap-special* t))))))) + (setq alist (cdr alist)))))) + +;;; +;;; $B@55,I=8=%3!<%I$NL?NaI=(B +;;; + +(if (= regexp-version 19) + (TREX-define-enum + UNUSED ;;; 18 + EXACTN ;;; 18 + ANYCHAR ;;; 18 + CHARSET ;;; 18 + CHARSET_NOT ;;; 18 + START_MEMORY ;;; 18* + STOP_MEMORY ;;; 18* + DUPLICATE ;;; 18 + BEGLINE ;;; 18 + ENDLINE ;;; 18 + BEGBUF ;;; 18 + ENDBUF ;;; 18 + JUMP ;;; 18 + JUMP_PAST_ALT ;;; 19 + ON_FAILURE_JUMP ;;; 18 + ON_FAILURE_KEEP_STRING_JUMP ;;; 19 + ;;;; finalize_jump + ;;;; maybe_finalize_jump + POP_FAILURE_JUMP ;;; 19 + MAYBE_POP_JUMP ;;; 19 + DUMMY_FAILURE_JUMP ;;; 18 + PUSH_DUMMY_FAILURE ;;; 19 + SUCCEED_N ;;; 19 + JUMP_N ;;; 19 + SET_NUMBER_AT ;;; 19 + WORDCHAR ;;; 18 + NOTWORDCHAR ;;; 18 + WORDBEG ;;; 18 + WORDEND ;;; 18 + WORDBOUND ;;; 18 + NOTWORDBOUND ;;; 18 + BEFORE_DOT ;;; 18 + AT_DOT ;;; 18 + AFTER_DOT ;;; 18 + SYNTAXSPEC ;;; 18 + NOTSYNTAXSPEC ;;; 18 + ;;; TREX code + EXACT1 + EXACT2 + EXACT3 + CHARSET_M + CHARSET_M_NOT + CASEN + SUCCESS_SHORT + SUCCESS + POP + EXCEPT0 + EXCEPT1 + CATEGORYSPEC + NOTCATEGORYSPEC + RANGE + RANGE_A + ) + ;; else regexp-version == 18. + (TREX-define-enum + UNUSED + EXACTN + BEGLINE + ENDLINE + JUMP + ON_FAILURE_JUMP + FINALIZE_JUMP + MAYBE_FINALIZE_JUMP + DUMMY_FAILURE_JUMP + ANYCHAR + CHARSET + CHARSET_NOT + START_MEMORY + STOP_MEMORY + DUPLICATE + BEFORE_DOT ;;; not used + AT_DOT ;;; not used + AFTER_DOT ;;; not used + BEGBUF + ENDBUF + WORDCHAR ;;; not used + NOTWORDCHAR ;;; not used + WORDBEG + WORDEND + WORDBOUND + NOTWORDBOUND + SYNTAXSPEC + NOTSYNTAXSPEC +;;; +;;; extended instructions +;;; + EXACT1 + EXACT2 + EXACT3 + CHARSET_M + CHARSET_M_NOT + CASEN + SUCCESS_SHORT ;;; == ON_FAILURE_SUCCESS + SUCCESS + POP + EXCEPT0 ;;; ALLCHAR + EXCEPT1 + CATEGORYSPEC + NOTCATEGORYSPEC + )) + +(defvar ON_FAILURE_SUCCESS SUCCESS_SHORT) + +;;; +;;; ANYCHAR = EXCEPT1 \n +;;; ALLCHAR = EXCEPT0 + + +;;; +;;; $B@55,I=8=>H9g4o$NL?NaBN7O(B +;;; +;;; UNUSED +;;; EXACTN n ch1 ch2 ... chn +;;; BEGLINE +;;; ENDLINE +;;; JUMP disp[2] +;;; +JUMP_PAST_ALT disp[2] +;;; ON_FAILURE_JUMP disp[2] +;;; +ON_FAILURE_KEEP_STRING_JUMP disp[2] +;;; -FINALIZE_JUMP disp[2] +;;; -MAYBE_FINALIZE_JUMP disp[2] +;;; +POP_FAILURE_JUMP disp[2] +;;; +MAYBE_POP_JUMP disp[2] +;;; DUMMY_FAILURE_JUMP disp[2] +;;; +PUSH_DUMMY_FAILURE +;;; +SUCCEED_N disp[2] n[2] +;;; +JUMP_N disp[2] n[2] +;;; +SET_NUMBER_AT disp[2] n[2] +;;; ANYCHAR +;;; CHARSET n b1 b2 ... bn +;;;**CHARSET 0xff l1 l2 cf1 ct1 cf2 ct2 ... cfn ctn +;;; CHARSET_NOT n b1 b2 ... bn +;;;**CHARSET_NOT 0xff l1 l2 cf1 ct1 cf2 ct2 ... cfn ctn +;;; $B0J2<$O$($J$_;a$NDs0F$K$h$k?7$?$J%;%^%s%F%#%C%/%9(B +;; +;;; CHARSET n b1 b2 ... bn (n < 0x80) +;;; CHARSET n+0x80 b1 b2 ... bn +;;; |<-- n bytes -->| +;;; lh lo CHARF1 CHART1 .... CHARFm CHARTm +;;; |<- lh << 8 + lo bytes ->| +;; CHARSET n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn +;; |<- bitmap ->| |<- range table ->| +;; CHARSET_NOT n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn +;; CHARSETM m n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn +;; CHARSETM_NOT m n b1 b2 ... bn lh lo cf1 ct1 cf2 ct2 ... cfn ctn +;; +;; o cfx, ctx $B0J30$O$9$Y$F(B 1byte. cfx, ctx $B$O(B multi byte +;; character. +;; +;; o CHARSET(_NOT) $B$H(B CHARSETM(_NOT) $B$H$N0c$$$O(B, CHARSETM(_NOT) +;; $B$N>l9g$K$O(B bitmap $B$N@hF,$N(B m bytes $B$,>J$+$l$F$$$kE@(B. +;; +;; o b1 ... bn ($B$D$^$j(B bitmap$B$ND9$5(B)$B$O(B, (n & 0x7f) bytes. n $B$N(B +;; $BJ,(B 1byte $B$O4^$^$J$$(B. +;; +;; o lh $B0J2<$O(B n & 0x80 $B$,(B 0 $B$J$iB8:_$7$J$$(B. +;; +;; o lh $B$+$i(B ctn $B$^$G$ND9$5(B($B$D$^$j(B range table $B$ND9$5(B) $B$O(B ((lh +;; << 8) + lo) byte. lh $B$H(B lo $B$N(B 2byte $B$r4^$`(B. ($B>e$N(B n $B$N>l(B +;; $B9g$H0c$$$^$9$,(B, $BE}0l$7$?$[$&$,$$$$$+$J(B?). +;; +;; o cfx $B$O(B multi byte character $B$G(B, cfx $B$H(B ctx $B$N(B leading char +;; $B$OF1$8$G$J$$$H$$$1$J$$(B. $B$^$?(B, cfx $B$N(B leading char $B$O(B 0 $B$G(B +;; $B$"$C$F$O$$$1$J$$(B(range table $B$K(B leading char $B$,(B 0 (ASCII$B$H(B +;; $B$+(B) $B$NJ8;z$,$"$C$F$b(B, $B8=:_$O(B fastmap $B$KH?1G$5$l$J$$$+$i(B). +;; +;;; START_MEMORY regno +;;; STOP_MEMORY regno +;;; o emacs 19 $B$N(B regex.c $B$G$O(B, +;;; START_MEMORY regno groupno +;;; STOP_MEMORY regno groupno +;;; groupno $B$O<+J,$h$j2<$N%l%Y%k$N%0%k!<%W$N?t(B +;;; +;;; DUPLICATE regno +;;; BEFORE_DOT ;;; not used +;;; AT_DOT ;;; not used +;;; AFTER_DOT ;;; not used +;;; BEGBUF +;;; ENDBUF +;;; WORDCHAR ;;; not used +;;; NOTWORDCHAR ;;; not used +;;; WORDBEG +;;; WORDEND +;;; WORDBOUND +;;; NOTWORDBOUND +;;; SYNTAXSPEC ch +;;; NOTSYNTAXSPEC ch + +;;; +;;; $B3HD%L?Na!J(BTREX$B$G;HMQ$9$k$b$N!K(B +;;; +;;; EXACT1 ch +;;; EXACT2 ch1 ch2 +;;; EXACT3 ch1 ch2 ch3 +;;; CHARSETM m n b1 b2 .. bn +;;; charset $B$N(B bitmaps $B$N$&$A@hF,$N(B m bytes $B$r>J$$$?$b$N(B +;;; CHARSETM_NOT m n b1 b2 .. bn +;;; charset_not $B$N(B bitmaps $B$N$&$A@hF,$N(B m bytes $B$r>J$$$?$b$N(B +;;; CASEN n disp[1] disp[2] ... disp[n] l u ind[l] ... ind[u] +;;; $B:G=i$K(B n $B8D$N(B jump relative address(2bytes) $B$,B3$-!$(B +;;; $B<!$K(Bcharacter code l $B$+$i(B m $B$^$G$NJ,$N(Bindex(1byte)$B$,B3$/!%(B +;;; ON_FAILURE_SUCCESS +;;; alternative stack $B$r6u$K$7!$(Bpend $B$r(B push $B$9$k!%(B +;;; SUCCESS +;;; pend $B$X%8%c%s%W$9$k!%(B +;;; POP +;;; alternative stack $B$r(B pop $B$9$k!%(B + +;;; RANGE ch1 ch2 +;;; RANGE_A == RANGE 0xA0 0xFF + + +;;; [^$B&A(B]$B&B(B\|$B&C(B $B$N0UL#!'(B +;;; on_failure_jump L1 +;;; on_failure_jump L2 +;;; $B&A(B +;;; pop +;;; fail +;;; L1: ALLCHAR +;;; $B&B(B +;;; L2: pop +;;; $B&C(B + +;;; +;;; regexp-code-* +;;; + +(defvar *regexp-code-buffer* (get-buffer-create " *regexp-code-buffer*")) + +(defun regexp-code-gen (FA) + (let ((start (car FA)) + (table (cdr FA)) + (*table* (cdr FA)) + (*labels* nil) + (*final* nil) + (*counter* 0)) + (let ((list table)) + (while (and list (null *final*)) + (if (equal '((nil)) (cdr (car list))) + (setq *final* (car (car list)))) + (setq list (cdr list)))) + (cond((null *final*) + (setq *final* (1+ (length table))) + (setq *counter* (1+ *final*))) + (t + (setq *counter* (1+ (length table))))) + (save-excursion + (set-buffer *regexp-code-buffer*) + (let ((kanji-flag nil) + (mc-flag nil)) + (erase-buffer) + (regexp-code-gen* start) + (buffer-substring (point-min) (point-max))) + ))) + +(defun regexp-code-gen* (node) + (cond((= node *final*) + (if (null (assoc node *labels*)) + (TREX-push (cons node (point)) *labels*)) + (insert SUCCESS)) + ((null (assoc node *labels*)) + (TREX-push (cons node (point)) *labels*) + (let ((alist (cdr (assoc node *table*)))) + (cond((equal '((nil)) alist) + (insert SUCCESS)) + (t (regexp-code-gen-alist alist))))) + (t + (let ((disp (- (cdr (assoc node *labels*)) (+ (point) 3)))) + (insert JUMP + (logand disp 255) + (/ (logand disp (* 255 256)) 256)))))) + +(defvar *regexp-charset-table* nil) +(defvar *regexp-case-table* nil) + +(defun regexp-code-gen-alist (alist) + (TREX-init *regexp-charset-table* (make-vector 256 nil)) + (TREX-init *regexp-case-table* (make-vector 256 nil)) + (if (eq (car (car alist)) nil) + nil + (let ((nextalist alist) + (numberkey nil) + (point nil) + (min 256) (max -1) (nexts nil) (nodealist nil)) + (cond((numberp (car (car alist))) + (setq numberkey t) + (let ((i 0)) + (while (< i 256) + (aset *regexp-case-table* i nil) + (TREX-inc i))) + + (while (and nextalist + (numberp (car (car nextalist)))) + (let ((ch (car (car nextalist))) + (next (cdr (car nextalist)))) + (let ((place (assoc next nodealist))) + (if place + (setcdr place + (cons ch (cdr place))) + (TREX-push (cons ch (list next)) nodealist))) + (aset *regexp-case-table* ch next) + (if (< ch min) (setq min ch)) + (if (< max ch) (setq max ch)) + (if (not (TREX-memequal next nexts)) + (TREX-push next nexts))) + (setq nextalist (cdr nextalist)))) + (t (setq nextalist (cdr alist)))) + + (if nextalist + (cond((eq (car (car nextalist)) nil) + (insert ON_FAILURE_SUCCESS )) ;;; SUCCESS_SHORT + (t + (insert ON_FAILURE_JUMP 0 0) + (setq point (point))))) + + (cond(numberkey + (cond((= min max) + ;;; exact1 + (regexp-code-gen-exact (list min) (car nexts))) + + ((= (length nexts) 1) + ;;; charset or charset_not + (if (= (length alist) 256) + (insert EXCEPT0) ;92.10.26 by T.Saneto + (let ((not_min 256) + (not_max -1) + (ch 0) + (mode (car nexts))) + (while (< ch 256) + (cond((null (aref *regexp-case-table* ch)) + (if (< ch not_min) (setq not_min ch)) + (if (< not_max ch) (setq not_max ch)))) + (TREX-inc ch)) + (if (<= (- not_max not_min) (- max min)) + (setq min not_min + max not_max + mode nil)) + (let ((minb (/ min 8)) + (maxb (1+ (/ max 8)))) + (insert (if mode CHARSET_M CHARSET_M_NOT) minb (- maxb minb)) + (let ((b minb)) + (while (< b maxb) + (let ((i 7) (bits 0)) + (while (<= 0 i) + (if (eq (aref *regexp-case-table* (+ (* 8 b) i)) + mode) + ;;;; bits table$B$N=g=x$O<!$NDL$j(B + (TREX-inc bits (aref [1 2 4 8 16 32 64 128] i))) + (TREX-dec i)) + (insert bits)) + (TREX-inc b)))))) + (regexp-code-gen* (car nexts))) + (t + ;;; case + (let ((point nil)) + (insert CASEN) + (insert (length nexts)) + (setq point (point)) + (let ((list nexts)) + (while list + (insert 0 0) + (setq list (cdr list)))) + (insert min max) + (let ((ch min)) + (while (<= ch max) + (if (aref *regexp-case-table* ch) + (insert (1+ (TREX-find (aref *regexp-case-table* ch) nexts))) + (insert 0)) + (TREX-inc ch))) + (let ((list nexts)) + (while list + (if (null (assoc (car list) *labels*)) + (regexp-code-gen* (car list))) + (setq list (cdr list)))) + (save-excursion + (goto-char point) + (let ((list nexts)) + (while list + (delete-char 2) + (let ((disp (- (cdr (assoc (car list) *labels*)) (+ (point) 2)))) + (insert (logand disp 255) + (/ (logand disp (* 255 256)) 256))) + (setq list (cdr list))))) + )))) + ((eq (car (car alist)) ':epsilon) + (regexp-code-gen* (cdr (car alist)))) + (t + (let ((key (car (car alist))) + (next (cdr (car alist)))) + (cond ((symbolp key) + (insert (eval key))) + ((TREX-memequal (car key) '(CHARSET CHARSET_NOT)) + (let ((charset (cdr key)) + (min 128) (max -1) + (mcbytes 0) + (mcchars nil)) + (let ((i 0)) + (while (< i 256) + (aset *regexp-charset-table* i nil) + (TREX-inc i))) + (while charset + (cond((stringp (car charset)) + (cond((eq (length (car charset)) 1) + (aset *regexp-charset-table* (aref (car charset) 0) t) + (if (< (aref (car charset) 0) min) + (setq min (aref (car charset) 0))) + (if (< max (aref (car charset) 0)) + (setq max (aref (car charset) 0))) + ) + (t + (TREX-inc mcbytes (* 2 (length (car charset)))) + (if (null mcchars) (setq mcchars charset)) + ))) + ((consp (car charset)) ;;; range + (cond ((eq (length (nth 1 (car charset))) 1) + (let ((from (aref (nth 1 (car charset)) 0)) + (to (aref (nth 2 (car charset)) 0))) + (if (< from min) (setq min from)) + (if (< max to) (setq max to)) + (while (<= from to) + (aset *regexp-charset-table* from t) + (TREX-inc from))) + ) + (t + (TREX-inc mcbytes + (+ (length (nth 1 (car charset))) (length (nth 2 (car charset))))) + (if (null mcchars) (setq mcchars charset)))))) + (setq charset (cdr charset))) + (cond ((< max min) + (insert (if (eq (car key) 'CHARSET) CHARSET CHARSET_NOT) + (if (< 0 mcbytes) 128 0))) + (t + (let ((minb (/ min 8)) + (maxb (1+ (/ max 8)))) + (insert (if (eq (car key) 'CHARSET) CHARSET_M CHARSET_M_NOT) + minb (+ (if (< 0 mcbytes) 128 0) (- maxb minb))) + (let ((b minb)) + (while (< b maxb) + (let ((i 7) (bits 0)) + (while (<= 0 i) + (if (aref *regexp-charset-table* (+ (* 8 b) i)) + ;;;; bits table$B$N=g=x$O<!$NDL$j(B + (TREX-inc bits (aref [1 2 4 8 16 32 64 128] i))) + (TREX-dec i)) + (insert bits)) + (TREX-inc b)))))) + + (cond( (< 0 mcbytes) + (TREX-inc mcbytes 2) + (insert (/ mcbytes 256) (mod mcbytes 256)) + (while mcchars + (cond((stringp (car mcchars)) + (insert (car mcchars) (car mcchars))) + ((consp (car mcchars)) + (insert (nth 1 (car mcchars)) (nth 2 (car mcchars))))) + (setq mcchars (cdr mcchars))))) + )) + ((= (length key) 1) + (insert (eval (car key)))) + ((= (length key) 2) + (insert (eval (car key)) (nth 1 key))) + ((= (length key) 3) + (insert (eval (car key)) (nth 1 key) (nth 2 key))) + (t + (regexp-error))) + (regexp-code-gen* next)))) + (if point + (let ((disp (- (point) point))) + (save-excursion + (goto-char point) + (delete-char -2) + (insert (logand disp 255) + (/ (logand disp (* 255 256)) 256))) + (regexp-code-gen-alist nextalist)))))) + +(defun regexp-code-gen-exact (chars node) + (let ((alist (cdr (assoc node *table*)))) + (cond((and (null (assoc node *labels*)) + (= (length alist) 1) + (numberp (car (car alist)))) + (regexp-code-gen-exact (cons (car (car alist)) chars) + (cdr (car alist)))) + (t + (regexp-code-gen-exact* (reverse chars)) + (regexp-code-gen* node))))) + +(defun regexp-code-gen-exact* (chars) + (cond((= (length chars) 1) + (insert EXACT1 (car chars))) + ((= (length chars) 2) + (insert EXACT2 (car chars) (nth 1 chars))) + ((= (length chars) 3) + (insert EXACT3 (car chars) (nth 1 chars) (nth 2 chars))) + (t + (insert EXACTN (length chars)) + (let ((list chars)) + (while list + (insert (car list)) + (setq list (cdr list))))))) + +;;; +;;; regexp-code-dump +;;; $B@55,I=8=$N%3!<%I$rI=<($9$k!%(B +;;; + +(defvar *regexp-code-dump* nil) +(defvar *regexp-code-index* nil) + +(defun regexp-code-dump (*regexp-code-dump*) + (terpri) + (let ((*regexp-code-index* 0) + (max (length *regexp-code-dump*))) + (while (< *regexp-code-index* max) + (princ (format "%4d:" *regexp-code-index*)) + (let((op (aref *regexp-code-dump* *regexp-code-index*))) + (cond((= op UNUSED) (regexp-code-dump-0 "unused")) + ((= op EXACTN) + (princ (format "exactn(%d) " (aref *regexp-code-dump* (1+ *regexp-code-index*)))) + (let ((j (+ *regexp-code-index* 2)) + (max (+ *regexp-code-index* 2 (aref *regexp-code-dump* (1+ *regexp-code-index*))))) + (while (< j max) + (princ (format "%c" (aref *regexp-code-dump* j))) + (TREX-inc j)) + (setq *regexp-code-index* j)) + (terpri) + ) + ((= op BEGLINE) (regexp-code-dump-0 "begline")) + ((= op ENDLINE) (regexp-code-dump-0 "endline")) + ((= op JUMP) (regexp-code-dump-jump "jump")) + ((and (= regexp-version 19) + (= op JUMP_PAST_ALT)) + (regexp-code-dump-jump "jump_past_alt")) + ((= op ON_FAILURE_JUMP ) (regexp-code-dump-jump "on_failure_jump")) + ((and (= regexp-version 19) + (= op ON_FAILURE_KEEP_STRING_JUMP)) + (regexp-code-dump-jump "on_failure_keep_string_jump")) + ((and (= regexp-version 18) + (= op FINALIZE_JUMP)) + (regexp-code-dump-jump "finalize_jump")) + ((and (= regexp-version 18) + (= op MAYBE_FINALIZE_JUMP)) + (regexp-code-dump-jump "maybe_finalize_jump")) + ((and (= regexp-version 19) + (= op POP_FAILURE_JUMP)) + (regexp-code-dump-jump "pop_failure_jump")) + ((and (= regexp-version 19) + (= op MAYBE_POP_JUMP)) + (regexp-code-dump-jump "maybe_pop_jump")) + ((= op DUMMY_FAILURE_JUMP) (regexp-code-dump-jump "dummy_failure_jump")) + ((and (= regexp-version 19) + (= op PUSH_DUMMY_FAILURE)) + (regexp-code-dump-0 "push_dummy_failure")) + ((and (= regexp-version 19) + (= op SUCCEED_N)) + (regexp-code-dump-jump-2 "succeed_n")) + ((and (= regexp-version 19) + (= op JUMP_N)) + (regexp-code-dump-jump-2 "jump_n")) + ((and (= regexp-version 19) + (= op SET_NUMBER_AT)) + (regexp-code-dump-jump-2 "SET_NUMBER_AT")) + ((= op ANYCHAR) (regexp-code-dump-0 "anychar")) + ((= op CHARSET) (regexp-code-dump-charset "charset")) + ((= op CHARSET_NOT) (regexp-code-dump-charset "charset_not")) + ((= op START_MEMORY) + (if (= regexp-version 19) + (regexp-code-dump-2 "start_memory") + (regexp-code-dump-1 "start_memory"))) + ((= op STOP_MEMORY) + (if (= regexp-version 19) + (regexp-code-dump-2 "stop_memory") + (regexp-code-dump-1 "stop_memory"))) + ((= op DUPLICATE) (regexp-code-dump-1 "duplicate")) + ((= op BEFORE_DOT) (regexp-code-dump-0 "before_dot")) + ((= op AT_DOT) (regexp-code-dump-0 "at_dot")) + ((= op AFTER_DOT) (regexp-code-dump-0 "after_dot")) + ((= op BEGBUF) (regexp-code-dump-0 "begbuf")) + ((= op ENDBUF) (regexp-code-dump-0 "endbuf")) + ((= op WORDCHAR) (regexp-code-dump-0 "wordchar")) + ((= op NOTWORDCHAR) (regexp-code-dump-0 "notwordchar")) + ((= op WORDBEG) (regexp-code-dump-0 "wordbeg")) + ((= op WORDEND) (regexp-code-dump-0 "wordend")) + ((= op WORDBOUND) (regexp-code-dump-0 "wordbound")) + ((= op NOTWORDBOUND) (regexp-code-dump-0 "notwordbound")) + ((= op SYNTAXSPEC) (regexp-code-dump-syntax "syntaxspec")) + ((= op NOTSYNTAXSPEC) (regexp-code-dump-syntax "notsyntaxspec")) + ((= op EXACT1) (regexp-code-dump-1ch "EXACT1")) + ((= op EXACT2) + (princ (format "EXACT2 %c%c\n" (aref *regexp-code-dump* (1+ *regexp-code-index*)) + (aref *regexp-code-dump* (+ *regexp-code-index* 2)))) + (TREX-inc *regexp-code-index* 3)) + ((= op EXACT3) + (princ (format "EXACT3 %c%c%c\n" + (aref *regexp-code-dump* (1+ *regexp-code-index*)) + (aref *regexp-code-dump* (+ *regexp-code-index* 2)) + (aref *regexp-code-dump* (+ *regexp-code-index* 3)))) + (TREX-inc *regexp-code-index* 4)) + ((= op CHARSET_M) (regexp-code-dump-charset-m "CHARSET_M")) + ((= op CHARSET_M_NOT) (regexp-code-dump-charset-m "CHARSET_M_NOT")) + ((= op CASEN) + (princ (format "CASEN %d\n" (aref *regexp-code-dump* (1+ *regexp-code-index*)))) + (let ((j (+ *regexp-code-index* 2)) + (max (+ *regexp-code-index* 2 (* 2 (aref *regexp-code-dump* (1+ *regexp-code-index*)))))) + (while (< j max) + (princ (format "[%d]::%d\n" (1+ (/ (- j (+ *regexp-code-index* 2)) 2)) + (regexp-get-absolute-address + (+ j 2) (aref *regexp-code-dump* j) + (aref *regexp-code-dump* (1+ j))))) + (TREX-inc j 2)) + (let ((ch (aref *regexp-code-dump* j)) (chmax (aref *regexp-code-dump* (1+ j)))) + (princ (format "%c::%c\n" ch chmax)) + (TREX-inc j 2) + (while (<= ch chmax) + (princ (format "%c=>[%d]\n" ch (aref *regexp-code-dump* j))) + (TREX-inc j) + (TREX-inc ch))) + (setq *regexp-code-index* j))) + ((= op ON_FAILURE_SUCCESS) (regexp-code-dump-0 "ON_FAILURE_SUCCESS")) + ((= op SUCCESS) (regexp-code-dump-0 "SUCCESS")) + ((= op POP) (regexp-code-dump-0 "POP")) + ((= op EXCEPT0) (regexp-code-dump-0 "EXCEPT0")) + ((= op EXCEPT1) (regexp-code-dump-1ch "EXCEPT1")) + ((= op CATEGORYSPEC) (regexp-code-dump-1ch "CATEGORYSPEC")) + ((= op NOTCATEGORYSPEC) (regexp-code-dump-1ch "NOTCATEGORYSPEC")) + (t (princ (format "unknown op=%d\n" op)) + (TREX-inc *regexp-code-index*))))) + (princ (format "%4d:\n" *regexp-code-index*))) + nil + ) + +(defun regexp-code-dump-0 (op) + (princ op) (terpri) + (TREX-inc *regexp-code-index*)) + +(defun regexp-code-dump-1 (op) + (princ (format "%s %d\n" op (aref *regexp-code-dump* (1+ *regexp-code-index*)))) + (TREX-inc *regexp-code-index* 2)) + +(defun regexp-code-dump-2 (op) + (princ (format "%s %d %d\n" + op + (aref *regexp-code-dump* (1+ *regexp-code-index*)) + (aref *regexp-code-dump* (+ *regexp-code-index* 2)) + )) + (TREX-inc *regexp-code-index* 3)) + +(defun regexp-code-dump-syntax (op) + (princ (format "%s %c\n" op (syntax-code-spec (aref *regexp-code-dump* (1+ *regexp-code-index*))))) + (TREX-inc *regexp-code-index* 2)) + +(defun regexp-code-dump-1ch (op) + (princ (format "%s %c\n" op (aref *regexp-code-dump* (1+ *regexp-code-index*)))) + (TREX-inc *regexp-code-index* 2)) + +(defun regexp-get-absolute-address (point b1 b2) + (cond ((< b2 128) + (+ point (+ (* 256 b2) b1))) + (t + (+ point (logior (logxor -1 (+ (* 255 256) 255)) (* 256 b2) b1))))) + +(defun regexp-code-dump-jump (op) + (let* ((b1 (aref *regexp-code-dump* (1+ *regexp-code-index*))) + (b2 (aref *regexp-code-dump* (+ *regexp-code-index* 2))) + (p (regexp-get-absolute-address (+ *regexp-code-index* 3) b1 b2))) + (princ (format "%s %d\n" op p))) + (TREX-inc *regexp-code-index* 3)) + +(defun regexp-code-dump-jump-2 (op) + (let* ((b1 (aref *regexp-code-dump* (1+ *regexp-code-index*))) + (b2 (aref *regexp-code-dump* (+ *regexp-code-index* 2))) + (p (regexp-get-absolute-address (+ *regexp-code-index* 3) b1 b2))) + (princ (format "%s %d %d\n" op p + (+ + (* 256 (aref *regexp-code-dump* (+ *regexp-code-index* 3))) + (aref *regexp-code-dump* (+ *regexp-code-index* 4)))))) + (TREX-inc *regexp-code-index* 5)) + +(defun regexp-code-dump-charset (op) + (let ((n (aref *regexp-code-dump* (1+ *regexp-code-index*)))) + (princ (format "%s %d " op n)) + (let ((j (+ *regexp-code-index* 2)) + (max (+ *regexp-code-index* 2 (if (<= 128 n) (- n 128) n)))) + (while (< j max) + (princ (format "0x%2x " (aref *regexp-code-dump* j))) + (TREX-inc j)) + (cond((<= 128 n) + (let* ((len (+ (* 256 (aref *regexp-code-dump* j)) + (aref *regexp-code-dump* (1+ j)))) + (last (+ j len))) + (princ (format "\n range list[%d-2 bytes]" len)) + (TREX-inc j 2) + (while (< j last) + (let ((ch (sref *regexp-code-dump* j))) + (princ (format " %c" ch)) + (TREX-inc j (char-octets ch)) + (setq ch (sref *regexp-code-dump* j)) + (princ (format "-%c" ch)) + (TREX-inc j (char-octets ch)))) + ))) + (setq *regexp-code-index* j) + (terpri)) + )) + +(defun regexp-code-dump-charset-m (op) + (let ((m (aref *regexp-code-dump* (1+ *regexp-code-index*))) + (n (aref *regexp-code-dump* (+ *regexp-code-index* 2)))) + (princ (format "%s %d %d " op m n)) + (let ((j (+ *regexp-code-index* 3)) + (max (+ *regexp-code-index* 3 (if (<= 128 n) (- n 128) n)))) + (while (< j max) + (princ (format "0x%02x " (aref *regexp-code-dump* j))) + (TREX-inc j)) + (cond((<= 128 n) + (let* ((len (+ (* 256 (aref *regexp-code-dump* j)) + (aref *regexp-code-dump* (1+ j)))) + (last (+ j len))) + (princ (format "\n range list[%d-2 bytes]" len)) + (TREX-inc j 2) + (while (< j last) + (let ((ch (sref *regexp-code-dump* j))) + (princ (format " %c" ch)) + (TREX-inc j (char-octets ch)) + (setq ch (sref *regexp-code-dump* j)) + (princ (format "-%c" ch)) + (TREX-inc j (char-octets ch)))) + ))) + (setq *regexp-code-index* j) + (terpri) + ))) + +;;; +;;; Compile functions +;;; + +(defun TREX-simple-test1 () + (regexp-word-compile + "\\cA+\\cH*\\|\\cK+\\cH*\\|\\cC+\\cH*\\|\\cH+\\|\\sw+")) + +(defun TREX-test1 (pattern) + (let* ((regexp (regexp-parse pattern)) + (fFA (EFFA-make (FA-make regexp))) + (bFA (EFFA-make (FA-inverse fFA))) + (l (cdr fFA)) + (result nil)) + (TREX-push (cons (DFA-optimize (DFA-make fFA)) + (DFA-optimize (DFA-make bFA))) + result) + (while l + (let* ((forward (NEFA-make (EFFA-make (cons (car (car l)) (cdr fFA))))) + (backward (NEFA-make (EFFA-make (cons (car (car l)) (cdr bFA)))))) + (cond((and forward backward) + (TREX-push (cons (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make forward)))) + (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make backward))))) + result)))) + (setq l (cdr l))) + (setq result (reverse result)) + (let ((count 0)) + (while result + (princ (format "\nForward[%2d]:" count)) (FA-dump (car (car result))) + (princ (format "\nBackward[%2d]:" count)) (FA-dump (cdr (car result))) + (TREX-inc count) + (setq result (cdr result)))))) + +(defun TREX-test2 (pattern) + (let* ((regexp (regexp-parse pattern)) + (fFA (EFFA-make (FA-make regexp))) + (l (cdr fFA)) + (result nil)) + (regexp-code-dump (setq result (regexp-code-gen (DFA-optimize (DFA-make fFA))))) + result)) + +;;;###autoload +(defun regexp-compile (pattern) + (regexp-compile-internal pattern nil)) + +;;;###autoload +(defun regexp-word-compile (pattern) + (regexp-compile-internal pattern t)) + +;;; +;;; Returns a list of pair of forward-code and backward-code +;;; + + +(defun regexp-compile-internal (pattern &optional word) + (let* ((*regexp-word-definition* word) + (*regexp-parse-translate* + (if case-fold-search + ;;; DOWNCASE or CANONICAL? + (nth 2 (current-case-table)) + nil)) + (regexp (regexp-parse pattern)) + (fFA (EFFA-make (FA-make (regexp-reform-duplication regexp)))) + (bFA (EFFA-make (FA-make (regexp-reform-duplication (regexp-inverse regexp))))) + (result nil)) + (let ((ofFA (DFA-optimize (DFA-make fFA))) + (obFA (DFA-optimize (DFA-make bFA)))) + (TREX-push (cons (DFA-code-with-fastmap ofFA) + (let* ((START_MEMORY STOP_MEMORY) + (STOP_MEMORY START_MEMORY)) + (DFA-code-with-fastmap obFA))) + result)) + (if word + (let ((l (cdr fFA)) + (bFA (EFFA-make (FA-inverse fFA)))) + (while l + (let* ((forward (NEFA-make (EFFA-make (cons (car (car l)) (cdr fFA))))) + (backward (NEFA-make (EFFA-make (cons (car (car l)) (cdr bFA)))))) + (cond((and forward backward) + (let ((fFA (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make forward))))) + (bFA (DFA-optimize (FA-simplify (DFA-shortest-match (DFA-make backward)))))) + (TREX-push (cons (DFA-code-with-fastmap fFA) + (DFA-code-with-fastmap bFA)) + result))))) + (setq l (cdr l))) + (setq result (nreverse result)))) + result)) + +(defun regexp-compiled-pattern-dump (pattern) + ;;; PATTERN is a vector of [ code fastmap fastmap-syntax fastmap-categoy] + (regexp-code-dump (aref pattern 0)) + (print-fastmap (aref pattern 1) " fastmap[char]") + (print-fastmap (aref pattern 2) " fastmap[synt]") + (print-fastmap (aref pattern 3) " fastmap[cate]") + ) + +(defun regexp-compile-dump (code) + (let ((Fcode (aref (car (car code)) 0)) + (Bcode (aref (cdr (car code)) 0)) + (words (cdr code))) + (princ (format "\nRegular Expression Compiler Dump:\n")) + (princ (format "Forward Search:")) + (regexp-compiled-pattern-dump (car (car code))) + (princ (format "Backward Search:")) + (if Bcode (regexp-compiled-pattern-dump (cdr (car code))) + (princ (format "\n Use the interpreter\n"))) + (if words + (let ((i 1)) + (princ (format "In word conditions:\n")) + (while words + (princ (format "Forward[%d]" i)) + (regexp-compiled-pattern-dump (car (car words))) + (princ (format "Backward[%d]" i)) + (regexp-compiled-pattern-dump (cdr (car words))) + (TREX-inc i) + (setq words (cdr words))))))) + +(defun regexp-compile-and-dump (regexp) + (regexp-compile-dump (regexp-compile regexp))) + + +;;;###autoload +(defmacro define-word-regexp (name regexp) + (` (defconst (, name) '(, (regexp-word-compile regexp))))) + +(put 'define-word-regexp 'byte-hunk-handler ;93.7.16 by S.Tomura + 'macroexpand) + +;;; +;;; Coding system +;;; + +(defmacro define-coding-systems (&rest rest) + (` (define-coding-systems* '(, rest)))) + +(defun define-coding-systems* (names) + (let ((systems + (` (:or (,@ (mapcar (function (lambda (name) (` (:seq (, (regexp-get-definition name)) + (, name))))) + names)))))) + systems)) + +(defun oct (str) (aref str 0)) + +(defvar *TREX-range-from* nil) +(defvar *TREX-range-to* nil) + +(defun TREX-range-make-jisjoint (regexp) + (TREX-init *TREX-range-from* (make-vector 256 nil)) + (TREX-init *TREX-range-to* (make-vector 256 nil)) + (let ((i 0)) + (while (< i 256) + (aset *TREX-range-from* i nil) + (aset *TREX-range-to* i nil) + (TREX-inc i))) + (aset *TREX-range-from* 0 t) + (aset *TREX-range-to* 255 t) + (TREX-range-mark regexp) + (TREX-range-replace regexp)) + +(defun TREX-range-mark (regexp) + (cond + ((consp regexp) + (let ((op (car regexp))) + (cond((eq op ':mark) + (TREX-range-mark (nth 3 regexp))) + ((eq op ':or) + (mapcar 'TREX-range-mark (cdr regexp))) + ((eq op ':seq) + (mapcar 'TREX-range-mark (cdr regexp))) + ((eq op ':optional) + (TREX-range-mark (nth 1 regexp))) + ((eq op ':star) + (TREX-range-mark (nth 1 regexp))) + ((eq op ':plus) + (TREX-range-mark (nth 1 regexp))) + ((eq op ':range) + (TREX-range-mark2 (nth 1 regexp) (nth 2 regexp)))))) + ((stringp regexp) + (TREX-range-mark2 regexp regexp)) + ((numberp regexp) + (TREX-range-mark2 regexp regexp)))) + +(defun TREX-range-mark2 (from to) + (if (stringp from) (setq from (aref from 0))) + (if (stringp to) (setq to (aref to 0))) + (if (< 0 from) (aset *TREX-range-to* (1- from) t)) + (if (< to 255) (aset *TREX-range-from* (1+ to) t)) + (aset *TREX-range-from* from t) + (aset *TREX-range-to* to t)) + +(defun TREX-range-replace (regexp) + (cond + ((consp regexp) + (let ((op (car regexp))) + (cond((eq op ':mark) + (` (:mark (, (nth 1 regexp)) + (, (nth 2 regexp)) + (, (TREX-range-replace (nth 3 regexp)))))) + ((eq op ':or) + (` (:or (,@ (mapcar 'TREX-range-replace (cdr regexp)))))) + ((eq op ':seq) + (` (:seq (,@ (mapcar 'TREX-range-replace (cdr regexp)))))) + ((eq op ':optional) + (` (:optional (,(TREX-range-replace (nth 1 regexp)))))) + ((eq op ':star) + (` (:star (,(TREX-range-replace (nth 1 regexp)))))) + ((eq op ':plus) + (` (:plus (,(TREX-range-replace (nth 1 regexp)))))) + ((eq op ':range) + (let ((from (nth 1 regexp)) + (to (nth 2 regexp)) + i j + (result nil)) + (if (stringp from) (setq from (aref from 0))) + (if (stringp to ) (setq to (aref to 0))) + (setq i from + j from) + (while (<= i to) + (while (not (aref *TREX-range-to* j)) + (TREX-inc j)) + (if (not (= i j)) (TREX-push (` (:range (, i) (, j))) result) + (TREX-push i result)) + (TREX-inc j) + (setq i j)) + (if (= (length result) 1) (car result) + (` (:or (,@ (nreverse result)))))))))) + ((stringp regexp) + (if (= (length regexp) 1) + (aref regexp 0) + regexp)) + ((numberp regexp) + regexp) + (t regexp))) + +(defun FA-sort (FA) + (let ((start (car FA)) + (alist (cdr FA))) + (setq alist (sort alist 'TREX-lessp-car)) + (while alist + (setcdr (car alist) (sort (cdr (car alist)) 'TREX-lessp-car)) + (setcdr (car alist ) (TREX-sort (cdr (car alist)) 'TREX-key-lessp 'cdr)) + (setq alist (cdr alist))) + FA)) + +;;; +;;; CHARSET functions: +;;; +;;; CHARSET ::= RANGE | +;;; (:or RANGE+) | +;;; (:nor RANGE+) +;;; RANGE+ ::= CHAR | +;;; (:range CHAR CHAR) +;;; + +(defun CHARSET-rangep (charset) + (or (numberp charset) + (and (consp charset) (eq (car charset) ':range)))) + +(defun CHARSET-orp (charset) + (and (consp charset) (eq (car charset) ':or))) + +(defun CHARSET-range-from (range) + (if (numberp range) range + (nth 1 range))) + +(defun CHARSET-range-to (range) + (if (numberp range) range + (nth 2 range))) + +(defun CHARSET-range-make (from to) + (if (= from to) from + (list ':range from to))) + +(defun CHARSET-membership (range charset) + (let ((from (CHARSET-range-from range)) + (to (CHARSET-range-to range)) + (flag nil)) + (while (and charset flag1) + (if (< from (CHARSET-range-from (car charset))) + (setq charset (cdr charset)) + (setq flag t))) + (and flag1 (<= to (CHARSET-range-to (car charset)))))) + +(defun CHARSET-not (charset) + (cond((CHARSET-rangep charset) + (list ':nor charset)) + ((CHARSET-orp charset) + (cons ':nor (cdr charset))) + (t + (cons ':or (cdr charset))))) + +(defun CHARSET-union (charset1 charset2) + (cond((CHARSET-rangep charset1) + (cond ((CHARSET-rangep charset2) + (CHARSET-union-range-range charset1 charset2)) + ((CHARSET-orp charset2) + (CHARSET-union-range-or charset1 charset2)) + (t + (CHARSET-union-range-nor charset1 charset2)))) + ((CHARSET-orp charset1) + (cond ((CHARSET-rangep charset2) + (CHARSET-union-range-or charset2 charset1)) + ((CHARSET-orp charset2) + (CHARSET-union-or-or charset1 charset2)) + (t + (CHARSET-union-or-nor charset1 charset2)))) + (t ;;; (CHARSET-norp charset1) + (cond((CHARSET-rangep charset2) + (CHARSET-union-range-nor charset2 charset1)) + ((CHARSET-orp charset2) + (CHARSET-union-or-nor charset2 charset1)) + (t + (CHARSET-union-nor-nor charset1 charset2)))))) + +(defun CHARSET-union-range-range (range1 range2) + (let ((from1 (CHARSET-range-from range1)) + (to1 (CHARSET-range-to range1)) + (from2 (CHARSET-range-from range2)) + (to2 (CHARSET-range-to range2))) + (cond((< to1 from2) + (list ':or range1 range2)) + (t ;;; (<= from2 (1+ to1)) + (cond((<= to1 to2) ;;; (<= from2 to1 to2) + (CHARSET-range-make (min from1 from2) to2)) + ((<= from1 to2) ;;; (<= from1 to2 to1) + (CHARSET-range-make (min from1 from2) to1)) + (t ;;; (<= to2 from1 to1) + (list ':or range2 range1))))))) + +(defun CHARSET-union-range-or (range or) + (cons ':or (CHARSET-union-range-or* range (cdr or)))) + +(defun CHARSET-union-range-or* (range or-body) + (let ((from (CHARSET-range-from range)) + (to (CHARSET-range-to range)) + (part1 nil)) + (let ((flag nil)) + (while (and or-body (null flag)) + (let ((next (car or-body))) + (if (< (CHARSET-range-from next) from) + ;;; from[i] < from + (if (< (CHARSET-range-to next) from) + ;;; to[i] < from + (setq part1 (cons next part1) + or-body (cdr or-body)) + ;;; from[i] < from <= to[i] + (setq from (CHARSET-range-from next) + flag t)) + ;;; from <= from[1] + ;;; to[i-1] < from <= from[i] + (setq flag t))))) + ;;; part1 < from <= from[i] + (if (and part1 (<= (1+ (CHARSET-range-to (car part1))) from)) + (setq from (CHARSET-range-from (car part1)) + part1 (cdr part1))) + ;;; part1 << from <= from[i] + (let ((flag nil)) + (while (and or-body (null flag)) + (let ((next (car or-body))) + (if (< (CHARSET-range-from next) to) + ;;; from[j] < from + (if (< (CHARSET-range-to next) to) + ;;; to[j] < to + (setq or-body (cdr or-body)) + ;;; from[j] < to <= to[j] + (setq to (CHARSET-range-to next) + flag t)) + ;;; to <= from[1] + ;;; to[j-1] < to <= from[j] + (setq flag t))))) + ;;; part2 < to <= from[j] + (if (and or-body (<= (CHARSET-range-from (car or-body)) (1+ to))) + (setq to (CHARSET-range-to (car or-body)) + or-body (cdr or-body))) + ;;; part2 <= to << from[j] + (nconc (reverse part1) + (cons (CHARSET-range-make from to) + or-body)))) + + +(defun CHARSET-union-range-nor (range nor) + (let ((from (CHARSET-range-from range)) + (to (CHARSET-range-to range)) + (nor-body (cdr nor))) + + )) + +(defun CHARSET-union-or-or (or1 or2) + (cons ':or (CHARSET-union-or*-or* (cdr or1) (cdr or2)))) + +(defun CHARSET-union-or*-or* (or1-body or2-body) + (let ((result-body or2-body)) + (while or1-body + (setq result-body + (CHARSET-union-range-or* (car or1-body) result-body)) + (setq or1-body (cdr or1-body))) + result-body)) + +(defun CHARSET-union-or-nor (or nor) + ) + +(defun CHARSET-union-nor-nor (nor1 nor2) + (cons ':nor (CHARSET-intersection-or*-or* (cdr nor1) (cdr nor2)))) + +(defun CHARSET-intersection (charset1 charset2) + (cond((CHARSET-rangep charset1) + (cond ((CHARSET-rangep charset2) + (CHARSET-intersection-range-range charset1 charset2)) + ((CHARSET-orp charset2) + (CHARSET-intersection-range-or charset1 charset2)) + (t + (CHARSET-intersection-range-nor charset1 charset2)))) + ((CHARSET-orp charset1) + (cond ((CHARSET-rangep charset2) + (CHARSET-intersection-range-or charset2 charset1)) + ((CHARSET-orp charset2) + (CHARSET-intersection-or-or charset1 charset2)) + (t + (CHARSET-intersection-or-nor charset1 charset2)))) + (t ;;; (CHARSET-norp charset1) + (cond((CHARSET-rangep charset2) + (CHARSET-intersection-range-nor charset2 charset1)) + ((CHARSET-orp charset2) + (CHARSET-intersection-or-nor charset2 charset1)) + (t + (CHARSET-intersection-nor-nor charset1 charset2)))))) + +(defun CHARSET-intersection-range-or (range or) + (CHARSET-intersection-range-or* range (cdr or))) + +(defun CHARSET-intersection-range-or* (range or-body) + (let ((from (CHARSET-range-from range)) + (to (CHARSET-range-to range)) + (part2 nil)) + (let ((flag nil)) + (while (and or-body (null flag)) + (let ((next (car or-body))) + (if (< (CHARSET-range-from next) from) + ;;; from[i] < from + (if (< (CHARSET-range-to next) from) + ;;; to[i] < from + (setq or-body (cdr or-body)) + ;;; from[i] < from <= to[i] + (setq flag t)) + ;;; from <= from[1] + ;;; to[i-1] < from <= from[i] + (setq flag t))))) + ;;; from[i] < from <= to[i] + ;;; from <= from[1] + ;;; to[i-1] < from <= from[i] + (let ((flag nil)) + (while (and or-body (null flag)) + (let ((next (car or-body))) + (if (<= (CHARSET-range-from next) to) + ;;; from[j] <= to + (if (<= (CHARSET-range-to next) to) + ;;; to[j] <= to + (setq part2 (cons next part2) + or-body (cdr or-body)) + ;;; from[j] <= to < to[j] + (setq part2 (cons next part2) + or-body (cdr or-body) + flag t) + ;;; to < from[1] + ;;; to[j-1] <= to < from[j] + (setq flag t))))) + ;;; from[j] <= to < to[j] + ;;; to < from[1] + ;;; to[j-1] <= to < from[j] + (cond ((null part2) nil) + ((= (length part2) 1) + (list (CHARSET-range-make (max from (CHARSET-range-from (car part2))) + (min to (CHARSET-range-to (car part2)))))) + (t + (setcar part2 (CHARSET-range-make (CHARSET-range-from (car part2)) + (min to (CHARSET-range-to (car part2))))) + (setq part2 (nreverse part2)) + (setcar part2 (CHARSET-range-make (max from (CHARSET-range-from (car part2))) + (CHARSET-range-to (car part2)))) + part2))))) + +(defun CHARSET-intersection-range-nor (range nor) + (CHARSET-intersection-range-nor* range (cdr nor))) + +(defun CHARSET-intersecion-range-nor* (range nor-body) + (let ((from (CHARSET-range-from range)) + (to (CHARSET-range-to range))) + )) + +;;; (and (or a b) c) == (or (and a c) (and b c)) + +(defun CHARSET-intersection-or-or (or1 or2) + (let ((result nil) + (or1-body (cdr or1)) + (or2-body (cdr or2))) + (while or1-body + (setq result (CHARSET-union-or*-or* + (CHARSET-intersection-range-or* (car or1-body) or2-body) + result)) + (setq or1-body (cdr or1-body))) + (if (= (length result) 1) (car result) + (cons ':or result)))) + +(defun CHARSET-intersection-or-nor (or nor) + ) + +;;; (and (not or1) (not or2)) == (not (or or1 or2)) + +(defun CHARSET-intersection-nor-nor (nor1 nor2) + (cons ':nor (CHARSET-union-or*-or* (cdr nor1) (cdr nor2)))) + +(defun FA-compaction (FA) + (let ((start (car FA)) + (alist (cdr FA))) + (setq alist (TREX-sort alist 'TREX-key-lessp 'car)) + (while alist + (let ((table (cdr (car alist))) + (newtable nil) + (keys nil) (next nil)) + (setq table (TREX-sort table '< 'car)) + (while table + (setq next (cdr (car table))) + (TREX-push (car (car table)) keys) + (setq table (cdr table)) + (while (and table (eq next (cdr (car table)))) + (TREX-push (car (car table)) keys) + (setq table (cdr table))) + (setq keys (reverse (sort keys 'TREX-key-lessp))) + (let ((newkeys nil)) + (setq newkeys (car keys) + keys (cdr keys)) + (while keys + (cond((numberp (car keys)) + (cond((numberp (car newkeys)) + (if (= (1+ (car keys)) (car newkeys)) + (setcar newkeys (list ':range (car keys) (car newkeys))) + (TREX-push (car keys) newkeys))) + ((and (consp (car newkeys)) (eq (car (car newkeys)) ':range))))))))))))) + + + +(defun FA-dump2 (table) + (let ((start (car table)) + (l (cdr table))) + (princ (format "\nstart = %d\n" start)) + (while l + (princ (format "%3d: " (car (car l)))) + (let ((alist (cdr (car l)))) + (cond ((numberp (car (car alist))) + (princ (format "\\%03o(%c) -> %s\n" (car (car alist))(car (car alist)) (cdr (car alist))))) + ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC))) + (princ (format "(%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist))))) + ((and (consp (car (car alist))) (eq (car (car (car alist))) ':range)) + (princ (format "(:range \\%03o \\%03o) -> %s\n" + (nth 1 (car (car alist))) (nth 2 (car (car alist))) + (cdr (car alist))))) + (t + (princ (format "%s -> %s\n" (car (car alist)) (cdr (car alist)))))) + (setq alist (cdr alist)) + (while alist + (cond ((numberp (car (car alist))) + (princ (format " \\%03o(%c) -> %s\n" (car (car alist))(car (car alist)) (cdr (car alist))))) + ((and (consp (car (car alist))) (TREX-memequal (car (car (car alist))) '(CATEGORYSPEC NOTCATEGORYSPEC))) + (princ (format " (%s %c) -> %s\n" (car (car (car alist))) (nth 1 (car (car alist))) (cdr (car alist))))) + ((and (consp (car (car alist))) (eq (car (car (car alist))) ':range)) + (princ (format " (:range \\%03o \\%03o) -> %s\n" + (nth 1 (car (car alist))) (nth 2 (car (car alist))) + (cdr (car alist))))) + (t + (princ (format " %s -> %s\n" (car (car alist)) (cdr (car alist)))))) + (setq alist (cdr alist)))) + (setq l (cdr l))))) + +;;;function re-compile REGEXP +;;;Compile REGEXP by GNU Emacs original regexp compiler, +;;;and return information of the compiled code by a vector of length 11: +;;; [ COMPILED-PATTERN (string) +;;; RE-NSUB REGS-ALLOCATED CAN-BE-NULL NEWLINE-ANCHOR (integers) +;;; NO-SUB NOT-BOL NOT-EOL SYNTAX (integers) +;;; FASTMAP TRANSLATE (string) ]. +;;; + +(defun print-compiled-pattern (compiled-code) + (let ((compiled-pattern (aref compiled-code 0)) + (re-nsub (aref compiled-code 1)) + (regs-allocated (aref compiled-code 2)) + (can-be-null (aref compiled-code 3)) + (newline-anchor (aref compiled-code 4)) + (no-sub (aref compiled-code 5)) + (not-bol (aref compiled-code 6)) + (not-eol (aref compiled-code 7)) + (syntax (aref compiled-code 8)) + (fastmap (aref compiled-code 9)) + (translate (aref compiled-code 10))) + (regexp-code-dump compiled-pattern) + ;;; fastmap + (if fastmap (print-fastmap fastmap "fastmap")) + (princ (format "re_nsub: %d\n" re-nsub)) + (princ (format "regs-alloc: %d\n" regs-allocated)) + (princ (format "can-be-null: %d\n" can-be-null)) + (princ (format "newline-anchor: %d\n" newline-anchor)) + (princ (format "no-sub: %d\n" no-sub)) + (princ (format "not-bol: %d\n" not-bol)) + (princ (format "not-eol: %d\n" not-eol)) + (princ (format "syntax: %d\n" syntax)) + (if translate (print-translate translate)) + ;;; translate + nil + )) + +(defun print-fastmap (fastmap name) + (if fastmap + (progn + (princ (format "%s:[" name)) + (let ((max (length fastmap)) + (i 0)) + (while (< i max) + (if (not (= (aref fastmap i) 0)) + (princ (format "%c" i))) + (setq i (1+ i)))) + (princ "]\n")))) + +(defun print-translate (trans) + (if trans + (progn + (princ "translate:\n") + (let ((max (length trans)) + (i 0)) + (while (< i max) + (if (not (= (aref trans i) i)) + (princ (format " %c --> %c" i (aref trans i)))) + (setq i (1+ i)))) + (princ "\n")))) + +(defun re-compile-and-dump (regexp) + (print-compiled-pattern (re-compile regexp))) + + + + + +