Mercurial > hg > xemacs-beta
diff lisp/utils/regexp-opt.el @ 197:acd284d43ca1 r20-3b25
Import from CVS: tag r20-3b25
author | cvs |
---|---|
date | Mon, 13 Aug 2007 10:00:02 +0200 |
parents | 489f57a838ef |
children |
line wrap: on
line diff
--- a/lisp/utils/regexp-opt.el Mon Aug 13 09:59:07 2007 +0200 +++ b/lisp/utils/regexp-opt.el Mon Aug 13 10:00:02 2007 +0200 @@ -5,6 +5,9 @@ ;; Author: Simon Marshall <simon@gnu.ai.mit.edu> ;; Keywords: strings, regexps +;; Modified by Karl M. Hegbloom Sep. 1997 to support the new regexp syntax +;; with shy groups. (benchmarks pending) + ;; This file is part of XEmacs. ;; XEmacs is free software; you can redistribute it and/or modify @@ -43,8 +46,20 @@ ;; "save-current-buffer" "save-match-data" ;; "catch" "throw" "unwind-protect" "condition-case"))) ;; (concat "(" (regexp-opt strings t) "\\>")) +;; +;; => "(\\(?:c\\(?:atch\\|ond\\(?:ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(?:current-buffer\\|excursion\\|match-data\\|restriction\\|window-excursion\\)\\|throw\\|un\\(?:less\\|wind-protect\\)\\|wh\\(?:en\\|ile\\)\\)\\>" +;; +;; +;; (let ((strings '("cond" "if" "when" "unless" "while" +;; "let" "let*" "progn" "prog1" "prog2" +;; "save-restriction" "save-excursion" "save-window-excursion" +;; "save-current-buffer" "save-match-data" +;; "catch" "throw" "unwind-protect" "condition-case"))) +;; (concat "(" (regexp-opt strings t t) "\\>")) +;; ^ ;; => "(\\(c\\(atch\\|ond\\(ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(current-buffer\\|excursion\\|match-data\\|restriction\\|window-excursion\\)\\|throw\\|un\\(less\\|wind-protect\\)\\|wh\\(en\\|ile\\)\\)\\>" ;; +;; ;; Searching using the above example `regexp-opt' regexp takes approximately ;; two-thirds of the time taken using the equivalent `mapconcat' regexp. @@ -81,11 +96,13 @@ ;;; Code: ;;;###autoload -(defun regexp-opt (strings &optional paren) +(defun regexp-opt (strings &optional paren non-shy) "Return a regexp to match a string in STRINGS. Each string should be unique in STRINGS and should not contain any regexps, -quoted or not. If optional PAREN is non-nil, ensure that the returned regexp -is enclosed by at least one regexp grouping construct. +quoted or not. If optional PAREN is non-nil, ensure that the returned +regexp is enclosed by at least one regexp match grouping construct. If +optional NON-SHY is non nil, the inner groupings will use \"\\\\( \\\\)\" grouping, +rather than the default \"\\\\(?: \\\\)\" 'shy', or non-match-capturing groups. The returned regexp is typically more efficient than the equivalent regexp: (let ((open-paren (if PAREN \"\\\\(\" \"\")) (close-paren (if PAREN \"\\\\)\" \"\"))) @@ -97,20 +114,26 @@ ;; Recurse on the sorted list. (let ((max-lisp-eval-depth (* 1024 1024)) (completion-ignore-case nil)) - (regexp-opt-group (sort (copy-sequence strings) 'string-lessp) paren)))) + (regexp-opt-group (sort (copy-sequence strings) 'string-lessp) paren nil non-shy)))) ;;;###autoload -(defun regexp-opt-depth (regexp) +(defun regexp-opt-depth (regexp &optional count-shy-groups-too) "Return the depth of REGEXP. This means the number of regexp grouping constructs (parenthesised expressions) -in REGEXP." +in REGEXP, not counting the \"\\\\(?: \\\\)\" non-match-capturing groups unless +COUNT-SHY-GROUPS-TOO is non-nil. +See `regexp-opt'." (save-match-data ;; Hack to signal an error if REGEXP does not have balanced parentheses. (string-match regexp "") ;; Count the number of open parentheses in REGEXP. - (let ((count 0) start) + (let ((max (1- (length regexp))) + (count 0) start) (while (string-match "\\\\(" regexp start) - (setq count (1+ count) start (match-end 0))) + (setq start (match-end 0)) + (when (or count-shy-groups-too + (not (string= (substring regexp start (min (+ start 2) max)) "?:"))) + (setq count (1+ count)))) count))) ;;; Workhorse functions. @@ -121,11 +144,12 @@ (unless (fboundp 'make-bool-vector) (defalias 'make-bool-vector 'make-vector)) -(defun regexp-opt-group (strings &optional paren lax) +(defun regexp-opt-group (strings &optional paren lax non-shy) ;; ;; Return a regexp to match a string in STRINGS. ;; If PAREN non-nil, output regexp parentheses around returned regexp. ;; If LAX non-nil, don't output parentheses if it doesn't require them. + ;; If NON-SHY non-nil, don't use \\(?: \\) shy groups, use match capturing ones. ;; Merges keywords to avoid backtracking in Emacs' regexp matcher. ;; ;; The basic idea is to find the shortest common prefix, remove it and @@ -136,7 +160,10 @@ ;; until we're sure we need them, and try to remove one-character sequences ;; so we can use character sets rather than grouping parenthesis. ;; - (let* ((open-group (if paren "\\(" "")) + (let* ((open-group (cond + ((and paren non-shy) "\\(") + (paren "\\(?:") + (t ""))) (close-group (if paren "\\)" "")) (open-charset (if lax "" open-group)) (close-charset (if lax "" close-group))) @@ -151,7 +178,7 @@ ;; If there is an empty string, remove it and recurse on the rest. ((= (length (car strings)) 0) (concat open-charset - (regexp-opt-group (cdr strings) t t) "?" + (regexp-opt-group (cdr strings) t t non-shy) "?" close-charset)) ;; ;; If all are one-character strings, just return a character set. @@ -172,7 +199,7 @@ (let* ((length (length prefix)) (suffixes (mapcar (lambda (s) (substring s length)) strings))) (concat open-group - (regexp-quote prefix) (regexp-opt-group suffixes t t) + (regexp-quote prefix) (regexp-opt-group suffixes t t non-shy) close-group))) ;; ;; If there are several one-character strings, remove them and recurse @@ -181,7 +208,7 @@ (let ((rest (let ((completion-regexp-list '("^..+$"))) (all-completions "" (mapcar 'list strings))))) (concat open-group - (regexp-opt-group rest) "\\|" (regexp-opt-charset letters) + (regexp-opt-group rest nil nil non-shy) "\\|" (regexp-opt-charset letters) close-group))) ;; ;; Otherwise, divide the list into those that start with a particular @@ -191,7 +218,7 @@ (half1 (all-completions char (mapcar 'list strings))) (half2 (nthcdr (length half1) strings))) (concat open-group - (regexp-opt-group half1) "\\|" (regexp-opt-group half2) + (regexp-opt-group half1 nil nil non-shy) "\\|" (regexp-opt-group half2 nil nil non-shy) close-group))))))))) (defun regexp-opt-charset (chars)