Mercurial > hg > xemacs-beta
changeset 1425:74cb069b8417
[xemacs-hg @ 2003-04-23 15:42:44 by stephent]
stale match data <87fzo99rje.fsf@tleepslib.sk.tsukuba.ac.jp>
new split-string <87d6jd9qis.fsf@tleepslib.sk.tsukuba.ac.jp>
support (info "(file)node") <87adeh9qa7.fsf@tleepslib.sk.tsukuba.ac.jp>
author | stephent |
---|---|
date | Wed, 23 Apr 2003 15:42:52 +0000 |
parents | c35e2ad2f97d |
children | d5077d949761 |
files | lisp/ChangeLog lisp/info.el lisp/subr.el src/ChangeLog src/regex.c src/search.c tests/ChangeLog tests/automated/lisp-tests.el tests/automated/regexp-tests.el tests/automated/test-harness.el |
diffstat | 10 files changed, 200 insertions(+), 41 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/ChangeLog Tue Apr 22 03:24:13 2003 +0000 +++ b/lisp/ChangeLog Wed Apr 23 15:42:52 2003 +0000 @@ -1,3 +1,13 @@ +2003-04-23 Stephen J. Turnbull <stephen@xemacs.org> + + * subr.el (split-string): Revert to regular behavior, except when + both separators and omit-nulls are nil. + (split-string-default-separators): New constant. + +2003-04-15 Reiner Steib <Reiner.Steib@gmx.de> + + * info.el (info): Support `(filename)nodename' as argument. + 2003-04-14 Steve Youngs <youngs@xemacs.org> * package-admin.el (package-admin-find-top-directory): Use
--- a/lisp/info.el Tue Apr 22 03:24:13 2003 +0000 +++ b/lisp/info.el Wed Apr 23 15:42:52 2003 +0000 @@ -574,6 +574,9 @@ Optional argument FILE specifies the file to examine; the default is the top-level directory of Info. +Called from a program, FILE may specify an Info node of the form +`(FILENAME)NODENAME'. + In interactive use, a prefix argument directs this command to read a file name from the minibuffer." (interactive (if current-prefix-arg @@ -592,7 +595,15 @@ ; (Info-setup-x) ??? What was this going to be? Can anyone tell karlheg? (if file (unwind-protect - (Info-goto-node (concat "(" file ")")) + (progn + (pop-to-buffer "*info*") + ;; If argument already contains parentheses, don't add another set + ;; since the argument will then be parsed improperly. This also + ;; has the added benefit of allowing node names to be included + ;; following the parenthesized filename. + (if (and (stringp file) (string-match "(.*)" file)) + (Info-goto-node file) + (Info-goto-node (concat "(" file ")")))) (and Info-standalone (info))) (if (get-buffer "*info*") (switch-to-buffer "*info*")
--- a/lisp/subr.el Tue Apr 22 03:24:13 2003 +0000 +++ b/lisp/subr.el Wed Apr 23 15:42:52 2003 +0000 @@ -607,19 +607,45 @@ (buffer-substring-no-properties (match-beginning num) (match-end num))))) -(defun split-string (string &optional separators) - "Splits STRING into substrings where there are matches for SEPARATORS. -Each match for SEPARATORS is a splitting point. -The substrings between the splitting points are made into a list +(defconst split-string-default-separators "[ \f\t\n\r\v]+" + "The default value of separators for `split-string'. + +A regexp matching strings of whitespace. May be locale-dependent +\(as yet unimplemented). Should not match non-breaking spaces. + +Warning: binding this to a different value and using it as default is +likely to have undesired semantics.") + +;; specification for `split-string' agreed with rms 2003-04-23 +;; xemacs design <87vfx5vor0.fsf@tleepslib.sk.tsukuba.ac.jp> + +(defun split-string (string &optional separators omit-nulls) + "Splits STRING into substrings bounded by matches for SEPARATORS. + +The beginning and end of STRING, and each match for SEPARATORS, are +splitting points. The substrings matching SEPARATORS are removed, and +the substrings between the splitting points are collected as a list, which is returned. -If SEPARATORS is absent, it defaults to \"[ \\f\\t\\n\\r\\v]+\". + +If SEPARATORS is nil, it defaults to the value of +`split-string-default-separators', normally \"[ \\f\\t\\n\\r\\v]+\". -If there is match for SEPARATORS at the beginning of STRING, we do not -include a null substring for that. Likewise, if there is a match -at the end of STRING, we don't include a null substring for that. +If OMIT-NULLs is t, zero-length substrings are omitted from the list \(so +that for the default value of SEPARATORS leading and trailing whitespace +are effectively trimmed). If nil, all zero-length substrings are retained, +which correctly parses CSV format, for example. + +As a special case, if both SEPARATORS and OMIT-NULLS are nil, white-space +will be trimmed (ie, the effect of `(split-string STRING)' is the same as +`(split-string STRING split-string-default-separators t)'). In the very +rare case that you need to retain zero-length substrings when splitting on +the default separators, use +`(split-string STRING split-string-default-separators)'. Modifies the match data; use `save-match-data' if necessary." - (let ((rexp (or separators "[ \f\t\n\r\v]+")) + + (let ((keep-nulls (if separators (not omit-nulls) nil)) + (rexp (or separators split-string-default-separators)) (start 0) notfirst (list nil)) @@ -628,16 +654,14 @@ (= start (match-beginning 0)) (< start (length string))) (1+ start) start)) - (< (match-beginning 0) (length string))) + (< start (length string))) (setq notfirst t) - (or (eq (match-beginning 0) 0) - (and (eq (match-beginning 0) (match-end 0)) - (eq (match-beginning 0) start)) + (if (or keep-nulls (< start (match-beginning 0))) (setq list (cons (substring string start (match-beginning 0)) list))) (setq start (match-end 0))) - (or (eq start (length string)) + (if (or keep-nulls (< start (length string))) (setq list (cons (substring string start) list)))
--- a/src/ChangeLog Tue Apr 22 03:24:13 2003 +0000 +++ b/src/ChangeLog Wed Apr 23 15:42:52 2003 +0000 @@ -1,3 +1,20 @@ +2003-04-17 Stephen J. Turnbull <stephen@xemacs.org> + + Fix stale match data after failed search bug. + + * search.c (search_buffer): + Don't find null string if repetition count is 0. + (string_match_1): + (looking_at_1): + (search_buffer): + (simple_search): + (boyer_moore): + Clear all search regs as start, not unused ones on success. + * regex.c (re_match_2_internal): Fix comment about clearing regs. + + * search.c (Freplace_match): Document error conditions in docstring. + (These actually error now after a failed match.) + 2003-04-16 Chris Palmer <chris@nodewarrior.org> * sysdll.c (dll_open): Removed Objective C-ism to fix Mac build.
--- a/src/regex.c Tue Apr 22 03:24:13 2003 +0000 +++ b/src/regex.c Wed Apr 23 15:42:52 2003 +0000 @@ -5209,7 +5209,10 @@ It would be possible to require the caller to do this, but we'd have to change the API for this function to reflect that, and - audit all callers. */ + audit all callers. Note: as of 2003-04-17 callers in XEmacs + do clear the registers, but it's safer to leave this code in + because of reallocation. + */ if (regs && regs->num_regs > 0) for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) regs->start[mcnt] = regs->end[mcnt] = -1;
--- a/src/search.c Tue Apr 22 03:24:13 2003 +0000 +++ b/src/search.c Wed Apr 23 15:42:52 2003 +0000 @@ -93,6 +93,30 @@ */ static struct re_registers search_regs; +/* Every function that _may_ set the match data _must_ clear the search + registers on entry. An unsuccessful search should leave the search + registers cleared. Applications that are no-ops by definition (eg, + searches with a repetition count of 0) _must not_ clear the search + registers. + + XEmacs 21.5 up to beta 11 may have permitted the following idiom to + "win" in the sense that the match data was set to the last successful + match's match data, and not cleared as the current implemenation does: + + (while (search_forward "string")) + (use-match-data-of-last-successful-search) + + This no longer can work. You must use save-match-data to preserve the + match data: + + (let (md) + (while (when (search-forward "string") (setq md (match-data)))) + (set-match-data md)) + (use-match-data-of-last-successful-search) + */ +static void set_search_regs (struct buffer *buf, Charbpos beg, Charcount len); +static void clear_search_regs (struct re_registers *regp); + /* The buffer in which the last search was performed, or Qt if the last search was done in a string; Qnil if no searching has been done yet. */ @@ -110,8 +134,6 @@ /* range table for use with skip_chars. Only needed for Mule. */ Lisp_Object Vskip_chars_range_table; -static void set_search_regs (struct buffer *buf, Charbpos beg, Charcount len); -static void clear_unused_search_regs (struct re_registers *regp, int no_sub); static Charbpos simple_search (struct buffer *buf, Ibyte *base_pat, Bytecount len, Bytebpos pos, Bytebpos lim, EMACS_INT n, Lisp_Object trt); @@ -304,6 +326,9 @@ struct syntax_cache scache_struct; struct syntax_cache *scache = &scache_struct; + /* clear search registers *now*. no mercy, not even for errors */ + clear_search_regs (&search_regs); + CHECK_STRING (string); bufp = compile_pattern (string, &search_regs, (!NILP (buf->case_fold_search) @@ -394,6 +419,9 @@ data. Not necessary because we don't call process filters asynchronously (i.e. from within QUIT). */ + /* clear search registers *now*. no mercy, not even for errors */ + clear_search_regs (&search_regs); + CHECK_STRING (regexp); CHECK_STRING (string); @@ -1219,18 +1247,20 @@ data. Not necessary because we don't call process filters asynchronously (i.e. from within QUIT). */ + /* Searching 0 times means noop---don't move, don't touch registers. */ + if (n == 0) + return charbpos; + + /* clear the search regs now */ + clear_search_regs (&search_regs); + /* Null string is found at starting position. */ if (len == 0) { set_search_regs (buf, charbpos, 0); - clear_unused_search_regs (&search_regs, 0); return charbpos; } - /* Searching 0 times means noop---don't move, don't touch registers. */ - if (n == 0) - return charbpos; - pos = charbpos_to_bytebpos (buf, charbpos); lim = charbpos_to_bytebpos (buf, buflim); if (RE && !trivial_regexp_p (string)) @@ -1483,7 +1513,6 @@ end = bytebpos_to_charbpos (buf, pos + buf_len); } set_search_regs (buf, beg, end - beg); - clear_unused_search_regs (&search_regs, 0); return retval; } @@ -1617,6 +1646,10 @@ for (i = 0; i < 0400; i++) simple_translate[i] = (Ibyte) i; i = 0; + + /* clear search regs now */ + clear_search_regs (&search_regs); + while (i != infinity) { Ibyte *ptr = base_pat + i; @@ -1847,7 +1880,6 @@ Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len); set_search_regs (buf, bufstart, bufend - bufstart); - clear_unused_search_regs (&search_regs, 0); } if ((n -= direction) != 0) @@ -1938,7 +1970,6 @@ Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len); set_search_regs (buf, bufstart, bufend - bufstart); - clear_unused_search_regs (&search_regs, 0); } if ((n -= direction) != 0) @@ -1978,21 +2009,17 @@ last_thing_searched = wrap_buffer (buf); } -/* Clear unused search registers so match data will be null. +/* Clear search registers so match data will be null. REGP is a pointer to the register structure to clear, usually the global - search_regs. - NO_SUB is the number of subexpressions to allow for. (Does not count - the whole match, ie, for a string search NO_SUB == 0.) - It is an error if NO_SUB > REGP.num_regs - 1. */ + search_regs. */ static void -clear_unused_search_regs (struct re_registers *regp, int no_sub) +clear_search_regs (struct re_registers *regp) { /* This function has been Mule-ized. */ int i; - assert (no_sub >= 0 && no_sub < regp->num_regs); - for (i = no_sub + 1; i < regp->num_regs; i++) + for (i = 0; i < regp->num_regs; i++) regp->start[i] = regp->end[i] = -1; } @@ -2323,6 +2350,12 @@ the match. It says to replace just that subexpression instead of the whole match. This is useful only after a regular expression search or match since only regular expressions have distinguished subexpressions. + +If no match (including searches) has been conducted, the last match +operation failed, or the requested subexpression was not matched, an +`args-out-of-range' error will be signaled. (If no match has ever been +conducted in this instance of XEmacs, an `invalid-operation' error will +be signaled. This is very rare.) */ (replacement, fixedcase, literal, string, strbuffer)) {
--- a/tests/ChangeLog Tue Apr 22 03:24:13 2003 +0000 +++ b/tests/ChangeLog Wed Apr 23 15:42:52 2003 +0000 @@ -1,3 +1,16 @@ +2003-04-23 Stephen J. Turnbull <stephen@xemacs.org> + + * automated/test-harness.el (test-harness-risk-infloops): + New variable. + + * automated/lisp-tests.el (split-string): Add tests for new API + spec. Conditionally re-enable potential infloops. + +2003-04-17 Stephen J. Turnbull <stephen@xemacs.org> + + * automated/regexp-tests.el (stale match data): Update comment. + (replace-match): Check-Error after failed match. + 2003-04-15 Stephen J. Turnbull <stephen@xemacs.org> * automated/test-harness.el (test-harness-known-bug): Deleted.
--- a/tests/automated/lisp-tests.el Tue Apr 22 03:24:13 2003 +0000 +++ b/tests/automated/lisp-tests.el Wed Apr 23 15:42:52 2003 +0000 @@ -852,10 +852,13 @@ ;;----------------------------------------------------- ;; Test split-string ;;----------------------------------------------------- -;; Hrvoje didn't like these tests so I'm disabling them for now. -sb -;(Assert (equal (split-string "foo" "") '("" "f" "o" "o" ""))) -;(Assert (equal (split-string "foo" "^") '("" "foo"))) -;(Assert (equal (split-string "foo" "$") '("foo" ""))) +;; Keep nulls, explicit SEPARATORS +;; Hrvoje didn't like the next 3 tests so I'm disabling them for now. -sb +;; I assume Hrvoje worried about the possibility of infloops. -sjt +(when test-harness-risk-infloops + (Assert (equal (split-string "foo" "") '("" "f" "o" "o" ""))) + (Assert (equal (split-string "foo" "^") '("" "foo"))) + (Assert (equal (split-string "foo" "$") '("foo" "")))) (Assert (equal (split-string "foo,bar" ",") '("foo" "bar"))) (Assert (equal (split-string ",foo,bar," ",") '("" "foo" "bar" ""))) (Assert (equal (split-string ",foo,bar," "^,") '("" "foo,bar,"))) @@ -865,6 +868,47 @@ (Assert (equal (split-string "foo,,bar,," ",") '("foo" "" "bar" "" ""))) (Assert (equal (split-string "foo,,bar" ",+") '("foo" "bar"))) (Assert (equal (split-string ",foo,,bar," ",+") '("" "foo" "bar" ""))) +;; Omit nulls, explicit SEPARATORS +(when test-harness-risk-infloops + (Assert (equal (split-string "foo" "" t) '("f" "o" "o"))) + (Assert (equal (split-string "foo" "^" t) '("foo"))) + (Assert (equal (split-string "foo" "$" t) '("foo")))) +(Assert (equal (split-string "foo,bar" "," t) '("foo" "bar"))) +(Assert (equal (split-string ",foo,bar," "," t) '("foo" "bar"))) +(Assert (equal (split-string ",foo,bar," "^," t) '("foo,bar,"))) +(Assert (equal (split-string ",foo,bar," ",$" t) '(",foo,bar"))) +(Assert (equal (split-string ",foo,,bar," "," t) '("foo" "bar"))) +(Assert (equal (split-string "foo,,,bar" "," t) '("foo" "bar"))) +(Assert (equal (split-string "foo,,bar,," "," t) '("foo" "bar"))) +(Assert (equal (split-string "foo,,bar" ",+" t) '("foo" "bar"))) +(Assert (equal (split-string ",foo,,bar," ",+" t) '("foo" "bar"))) +;; "Double-default" case +(Assert (equal (split-string "foo bar") '("foo" "bar"))) +(Assert (equal (split-string " foo bar ") '("foo" "bar"))) +(Assert (equal (split-string " foo bar ") '("foo" "bar"))) +(Assert (equal (split-string "foo bar") '("foo" "bar"))) +(Assert (equal (split-string "foo bar ") '("foo" "bar"))) +(Assert (equal (split-string "foobar") '("foobar"))) +;; Semantics are identical to "double-default" case! Fool ya? +(Assert (equal (split-string "foo bar" nil t) '("foo" "bar"))) +(Assert (equal (split-string " foo bar " nil t) '("foo" "bar"))) +(Assert (equal (split-string " foo bar " nil t) '("foo" "bar"))) +(Assert (equal (split-string "foo bar" nil t) '("foo" "bar"))) +(Assert (equal (split-string "foo bar " nil t) '("foo" "bar"))) +(Assert (equal (split-string "foobar" nil t) '("foobar"))) +;; Perverse "anti-double-default" case +(Assert (equal (split-string "foo bar" split-string-default-separators) + '("foo" "bar"))) +(Assert (equal (split-string " foo bar " split-string-default-separators) + '("" "foo" "bar" ""))) +(Assert (equal (split-string " foo bar " split-string-default-separators) + '("" "foo" "bar" ""))) +(Assert (equal (split-string "foo bar" split-string-default-separators) + '("foo" "bar"))) +(Assert (equal (split-string "foo bar " split-string-default-separators) + '("foo" "bar" ""))) +(Assert (equal (split-string "foobar" split-string-default-separators) + '("foobar"))) (Assert (not (string-match "\\(\\.\\=\\)" "."))) (Assert (string= "" (let ((str "test string"))
--- a/tests/automated/regexp-tests.el Tue Apr 22 03:24:13 2003 +0000 +++ b/tests/automated/regexp-tests.el Wed Apr 23 15:42:52 2003 +0000 @@ -205,14 +205,13 @@ ;; (test-regex-charset-mule-paranoid) -;; Test replace-match +;; Test that replace-match errors after a failed match (with-temp-buffer (insert "This is a test buffer.") (goto-char (point-min)) (search-forward "this is a test ") (looking-at "Unmatchable text") - (replace-match "") - (Assert (looking-at "^buffer.$"))) + (Check-Error args-out-of-range (replace-match ""))) ;; Test that trivial regexps reset unused registers ;; Thanks to Martin Sternholm for the report. @@ -282,6 +281,8 @@ ;; More stale match data tests. ;; Thanks to <bjacob@ca.metsci.com>. +;; These tests used to fail because we cleared match data only on success. +;; Fixed 2003-04-17. (Assert (not (progn (string-match "a" "a") (string-match "b" "a") (match-string 0 "a"))))
--- a/tests/automated/test-harness.el Tue Apr 22 03:24:13 2003 +0000 +++ b/tests/automated/test-harness.el Wed Apr 23 15:42:52 2003 +0000 @@ -48,6 +48,9 @@ (and (not noninteractive) (> (device-baud-rate) search-slow-speed)) "*Non-nil means print messages describing progress of emacs-tester.") +(defvar test-harness-risk-infloops nil + "*Non-nil to run tests that may loop infinitely in buggy implementations.") + (defvar test-harness-current-file nil) (defvar emacs-lisp-file-regexp (purecopy "\\.el\\'")