changeset 1425:74cb069b8417

[xemacs-hg @ 2003-04-23 15:42:44 by stephent] stale match data <87fzo99rje.fsf@tleepslib.sk.tsukuba.ac.jp> new split-string <87d6jd9qis.fsf@tleepslib.sk.tsukuba.ac.jp> support (info "(file)node") <87adeh9qa7.fsf@tleepslib.sk.tsukuba.ac.jp>
author stephent
date Wed, 23 Apr 2003 15:42:52 +0000
parents c35e2ad2f97d
children d5077d949761
files lisp/ChangeLog lisp/info.el lisp/subr.el src/ChangeLog src/regex.c src/search.c tests/ChangeLog tests/automated/lisp-tests.el tests/automated/regexp-tests.el tests/automated/test-harness.el
diffstat 10 files changed, 200 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Tue Apr 22 03:24:13 2003 +0000
+++ b/lisp/ChangeLog	Wed Apr 23 15:42:52 2003 +0000
@@ -1,3 +1,13 @@
+2003-04-23  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* subr.el (split-string): Revert to regular behavior, except when
+	both separators and omit-nulls are nil.
+	(split-string-default-separators): New constant.
+
+2003-04-15  Reiner Steib  <Reiner.Steib@gmx.de>
+
+	* info.el (info): Support `(filename)nodename' as argument.
+
 2003-04-14  Steve Youngs  <youngs@xemacs.org>
 
 	* package-admin.el (package-admin-find-top-directory): Use
--- a/lisp/info.el	Tue Apr 22 03:24:13 2003 +0000
+++ b/lisp/info.el	Wed Apr 23 15:42:52 2003 +0000
@@ -574,6 +574,9 @@
 Optional argument FILE specifies the file to examine;
 the default is the top-level directory of Info.
 
+Called from a program, FILE may specify an Info node of the form
+`(FILENAME)NODENAME'.
+
 In interactive use, a prefix argument directs this command
 to read a file name from the minibuffer."
   (interactive (if current-prefix-arg
@@ -592,7 +595,15 @@
 ;  (Info-setup-x) ??? What was this going to be?  Can anyone tell karlheg?
   (if file
       (unwind-protect
-	  (Info-goto-node (concat "(" file ")"))
+	  (progn
+	   (pop-to-buffer "*info*")
+	   ;; If argument already contains parentheses, don't add another set
+	   ;; since the argument will then be parsed improperly.  This also
+	   ;; has the added benefit of allowing node names to be included
+	   ;; following the parenthesized filename.
+	   (if (and (stringp file) (string-match "(.*)" file))
+	       (Info-goto-node file)
+	     (Info-goto-node (concat "(" file ")"))))
 	(and Info-standalone (info)))
     (if (get-buffer "*info*")
 	(switch-to-buffer "*info*")
--- a/lisp/subr.el	Tue Apr 22 03:24:13 2003 +0000
+++ b/lisp/subr.el	Wed Apr 23 15:42:52 2003 +0000
@@ -607,19 +607,45 @@
 	(buffer-substring-no-properties (match-beginning num)
 					(match-end num)))))
 
-(defun split-string (string &optional separators)
-  "Splits STRING into substrings where there are matches for SEPARATORS.
-Each match for SEPARATORS is a splitting point.
-The substrings between the splitting points are made into a list
+(defconst split-string-default-separators "[ \f\t\n\r\v]+"
+  "The default value of separators for `split-string'.
+
+A regexp matching strings of whitespace.  May be locale-dependent
+\(as yet unimplemented).  Should not match non-breaking spaces.
+
+Warning: binding this to a different value and using it as default is
+likely to have undesired semantics.")
+
+;; specification for `split-string' agreed with rms 2003-04-23
+;; xemacs design <87vfx5vor0.fsf@tleepslib.sk.tsukuba.ac.jp>
+
+(defun split-string (string &optional separators omit-nulls)
+  "Splits STRING into substrings bounded by matches for SEPARATORS.
+
+The beginning and end of STRING, and each match for SEPARATORS, are
+splitting points.  The substrings matching SEPARATORS are removed, and
+the substrings between the splitting points are collected as a list,
 which is returned.
-If SEPARATORS is absent, it defaults to \"[ \\f\\t\\n\\r\\v]+\".
+
+If SEPARATORS is nil, it defaults to the value of
+`split-string-default-separators', normally \"[ \\f\\t\\n\\r\\v]+\".
 
-If there is match for SEPARATORS at the beginning of STRING, we do not
-include a null substring for that.  Likewise, if there is a match
-at the end of STRING, we don't include a null substring for that.
+If OMIT-NULLs is t, zero-length substrings are omitted from the list \(so
+that for the default value of SEPARATORS leading and trailing whitespace
+are effectively trimmed).  If nil, all zero-length substrings are retained,
+which correctly parses CSV format, for example.
+
+As a special case, if both SEPARATORS and OMIT-NULLS are nil, white-space
+will be trimmed (ie, the effect of `(split-string STRING)' is the same as
+`(split-string STRING split-string-default-separators t)').  In the very
+rare case that you need to retain zero-length substrings when splitting on
+the default separators, use
+`(split-string STRING split-string-default-separators)'.
 
 Modifies the match data; use `save-match-data' if necessary."
-  (let ((rexp (or separators "[ \f\t\n\r\v]+"))
+
+  (let ((keep-nulls (if separators (not omit-nulls) nil))
+	(rexp (or separators split-string-default-separators))
 	(start 0)
 	notfirst
 	(list nil))
@@ -628,16 +654,14 @@
 				       (= start (match-beginning 0))
 				       (< start (length string)))
 				  (1+ start) start))
-		(< (match-beginning 0) (length string)))
+		(< start (length string)))
       (setq notfirst t)
-      (or (eq (match-beginning 0) 0)
-	  (and (eq (match-beginning 0) (match-end 0))
-	       (eq (match-beginning 0) start))
+      (if (or keep-nulls (< start (match-beginning 0)))
 	  (setq list
 		(cons (substring string start (match-beginning 0))
 		      list)))
       (setq start (match-end 0)))
-    (or (eq start (length string))
+    (if (or keep-nulls (< start (length string)))
 	(setq list
 	      (cons (substring string start)
 		    list)))
--- a/src/ChangeLog	Tue Apr 22 03:24:13 2003 +0000
+++ b/src/ChangeLog	Wed Apr 23 15:42:52 2003 +0000
@@ -1,3 +1,20 @@
+2003-04-17  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	Fix stale match data after failed search bug.
+
+	* search.c (search_buffer):
+	Don't find null string if repetition count is 0.
+	(string_match_1): 
+	(looking_at_1): 
+	(search_buffer): 
+	(simple_search):
+	(boyer_moore):
+	Clear all search regs as start, not unused ones on success.
+	* regex.c (re_match_2_internal): Fix comment about clearing regs.
+
+	* search.c (Freplace_match): Document error conditions in docstring.
+	(These actually error now after a failed match.)
+
 2003-04-16  Chris Palmer  <chris@nodewarrior.org>
 
 	* sysdll.c (dll_open): Removed Objective C-ism to fix Mac build.
--- a/src/regex.c	Tue Apr 22 03:24:13 2003 +0000
+++ b/src/regex.c	Wed Apr 23 15:42:52 2003 +0000
@@ -5209,7 +5209,10 @@
 
 	       It would be possible to require the caller to do this, but we'd
 	       have to change the API for this function to reflect that, and
-	       audit all callers. */
+	       audit all callers.  Note: as of 2003-04-17 callers in XEmacs
+	       do clear the registers, but it's safer to leave this code in
+	       because of reallocation.
+	    */
 	    if (regs && regs->num_regs > 0)
 	      for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++)
 		regs->start[mcnt] = regs->end[mcnt] = -1;
--- a/src/search.c	Tue Apr 22 03:24:13 2003 +0000
+++ b/src/search.c	Wed Apr 23 15:42:52 2003 +0000
@@ -93,6 +93,30 @@
    */
 static struct re_registers search_regs;
 
+/* Every function that _may_ set the match data _must_ clear the search
+   registers on entry.  An unsuccessful search should leave the search
+   registers cleared.  Applications that are no-ops by definition (eg,
+   searches with a repetition count of 0) _must not_ clear the search
+   registers.
+
+   XEmacs 21.5 up to beta 11 may have permitted the following idiom to
+   "win" in the sense that the match data was set to the last successful
+   match's match data, and not cleared as the current implemenation does:
+
+   (while (search_forward "string"))
+   (use-match-data-of-last-successful-search)
+
+   This no longer can work.  You must use save-match-data to preserve the
+   match data:
+
+   (let (md)
+     (while (when (search-forward "string") (setq md (match-data))))
+     (set-match-data md))
+   (use-match-data-of-last-successful-search)
+   */
+static void set_search_regs (struct buffer *buf, Charbpos beg, Charcount len);
+static void clear_search_regs (struct re_registers *regp);
+
 /* The buffer in which the last search was performed, or
    Qt if the last search was done in a string;
    Qnil if no searching has been done yet.  */
@@ -110,8 +134,6 @@
 /* range table for use with skip_chars.  Only needed for Mule. */
 Lisp_Object Vskip_chars_range_table;
 
-static void set_search_regs (struct buffer *buf, Charbpos beg, Charcount len);
-static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
 static Charbpos simple_search (struct buffer *buf, Ibyte *base_pat,
 			       Bytecount len, Bytebpos pos, Bytebpos lim,
 			       EMACS_INT n, Lisp_Object trt);
@@ -304,6 +326,9 @@
   struct syntax_cache scache_struct;
   struct syntax_cache *scache = &scache_struct;
   
+  /* clear search registers *now*.  no mercy, not even for errors */
+  clear_search_regs (&search_regs);
+
   CHECK_STRING (string);
   bufp = compile_pattern (string, &search_regs,
 			  (!NILP (buf->case_fold_search)
@@ -394,6 +419,9 @@
      data.  Not necessary because we don't call process filters
      asynchronously (i.e. from within QUIT). */
 
+  /* clear search registers *now*.  no mercy, not even for errors */
+  clear_search_regs (&search_regs);
+
   CHECK_STRING (regexp);
   CHECK_STRING (string);
 
@@ -1219,18 +1247,20 @@
      data.  Not necessary because we don't call process filters
      asynchronously (i.e. from within QUIT). */
 
+  /* Searching 0 times means noop---don't move, don't touch registers.  */
+  if (n == 0)
+    return charbpos;
+
+  /* clear the search regs now */
+  clear_search_regs (&search_regs);
+
   /* Null string is found at starting position.  */
   if (len == 0)
     {
       set_search_regs (buf, charbpos, 0);
-      clear_unused_search_regs (&search_regs, 0);
       return charbpos;
     }
 
-  /* Searching 0 times means noop---don't move, don't touch registers.  */
-  if (n == 0)
-    return charbpos;
-
   pos = charbpos_to_bytebpos (buf, charbpos);
   lim = charbpos_to_bytebpos (buf, buflim);
   if (RE && !trivial_regexp_p (string))
@@ -1483,7 +1513,6 @@
 	  end = bytebpos_to_charbpos (buf, pos + buf_len);
 	}
       set_search_regs (buf, beg, end - beg);
-      clear_unused_search_regs (&search_regs, 0);
 
       return retval;
     }
@@ -1617,6 +1646,10 @@
   for (i = 0; i < 0400; i++)
     simple_translate[i] = (Ibyte) i;
   i = 0;
+
+  /* clear search regs now */
+  clear_search_regs (&search_regs);
+
   while (i != infinity)
     {
       Ibyte *ptr = base_pat + i;
@@ -1847,7 +1880,6 @@
 		    Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len);
 
 		    set_search_regs (buf, bufstart, bufend - bufstart);
-		    clear_unused_search_regs (&search_regs, 0);
 		  }
 
 		  if ((n -= direction) != 0)
@@ -1938,7 +1970,6 @@
 		    Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len);
 
 		    set_search_regs (buf, bufstart, bufend - bufstart);
-		    clear_unused_search_regs (&search_regs, 0);
 		  }
 
 		  if ((n -= direction) != 0)
@@ -1978,21 +2009,17 @@
   last_thing_searched = wrap_buffer (buf);
 }
 
-/* Clear unused search registers so match data will be null.
+/* Clear search registers so match data will be null.
    REGP is a pointer to the register structure to clear, usually the global
-   search_regs.
-   NO_SUB is the number of subexpressions to allow for.  (Does not count
-   the whole match, ie, for a string search NO_SUB == 0.)
-   It is an error if NO_SUB > REGP.num_regs - 1. */
+   search_regs. */
 
 static void
-clear_unused_search_regs (struct re_registers *regp, int no_sub)
+clear_search_regs (struct re_registers *regp)
 {
   /* This function has been Mule-ized. */
   int i;
 
-  assert (no_sub >= 0 && no_sub < regp->num_regs);
-  for (i = no_sub + 1; i < regp->num_regs; i++)
+  for (i = 0; i < regp->num_regs; i++)
     regp->start[i] = regp->end[i] = -1;
 }
 
@@ -2323,6 +2350,12 @@
 the match.  It says to replace just that subexpression instead of the
 whole match.  This is useful only after a regular expression search or
 match since only regular expressions have distinguished subexpressions.
+
+If no match (including searches) has been conducted, the last match
+operation failed, or the requested subexpression was not matched, an
+`args-out-of-range' error will be signaled.  (If no match has ever been
+conducted in this instance of XEmacs, an `invalid-operation' error will
+be signaled.  This is very rare.)
 */
        (replacement, fixedcase, literal, string, strbuffer))
 {
--- a/tests/ChangeLog	Tue Apr 22 03:24:13 2003 +0000
+++ b/tests/ChangeLog	Wed Apr 23 15:42:52 2003 +0000
@@ -1,3 +1,16 @@
+2003-04-23  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* automated/test-harness.el (test-harness-risk-infloops):
+	New variable.
+
+	* automated/lisp-tests.el (split-string): Add tests for new API
+	spec.  Conditionally re-enable potential infloops.
+
+2003-04-17  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* automated/regexp-tests.el (stale match data): Update comment.
+	(replace-match): Check-Error after failed match.
+
 2003-04-15  Stephen J. Turnbull  <stephen@xemacs.org>
 
 	* automated/test-harness.el (test-harness-known-bug): Deleted.
--- a/tests/automated/lisp-tests.el	Tue Apr 22 03:24:13 2003 +0000
+++ b/tests/automated/lisp-tests.el	Wed Apr 23 15:42:52 2003 +0000
@@ -852,10 +852,13 @@
 ;;-----------------------------------------------------
 ;; Test split-string
 ;;-----------------------------------------------------
-;; Hrvoje didn't like these tests so I'm disabling them for now. -sb
-;(Assert (equal (split-string "foo" "") '("" "f" "o" "o" "")))
-;(Assert (equal (split-string "foo" "^") '("" "foo")))
-;(Assert (equal (split-string "foo" "$") '("foo" "")))
+;; Keep nulls, explicit SEPARATORS
+;; Hrvoje didn't like the next 3 tests so I'm disabling them for now. -sb
+;; I assume Hrvoje worried about the possibility of infloops. -sjt
+(when test-harness-risk-infloops
+  (Assert (equal (split-string "foo" "") '("" "f" "o" "o" "")))
+  (Assert (equal (split-string "foo" "^") '("" "foo")))
+  (Assert (equal (split-string "foo" "$") '("foo" ""))))
 (Assert (equal (split-string "foo,bar" ",") '("foo" "bar")))
 (Assert (equal (split-string ",foo,bar," ",") '("" "foo" "bar" "")))
 (Assert (equal (split-string ",foo,bar," "^,") '("" "foo,bar,")))
@@ -865,6 +868,47 @@
 (Assert (equal (split-string "foo,,bar,," ",") '("foo" "" "bar" "" "")))
 (Assert (equal (split-string "foo,,bar" ",+") '("foo" "bar")))
 (Assert (equal (split-string ",foo,,bar," ",+") '("" "foo" "bar" "")))
+;; Omit nulls, explicit SEPARATORS
+(when test-harness-risk-infloops
+  (Assert (equal (split-string "foo" "" t) '("f" "o" "o")))
+  (Assert (equal (split-string "foo" "^" t) '("foo")))
+  (Assert (equal (split-string "foo" "$" t) '("foo"))))
+(Assert (equal (split-string "foo,bar" "," t) '("foo" "bar")))
+(Assert (equal (split-string ",foo,bar," "," t) '("foo" "bar")))
+(Assert (equal (split-string ",foo,bar," "^," t) '("foo,bar,")))
+(Assert (equal (split-string ",foo,bar," ",$" t) '(",foo,bar")))
+(Assert (equal (split-string ",foo,,bar," "," t) '("foo" "bar")))
+(Assert (equal (split-string "foo,,,bar" "," t) '("foo" "bar")))
+(Assert (equal (split-string "foo,,bar,," "," t) '("foo" "bar")))
+(Assert (equal (split-string "foo,,bar" ",+" t) '("foo" "bar")))
+(Assert (equal (split-string ",foo,,bar," ",+" t) '("foo" "bar")))
+;; "Double-default" case
+(Assert (equal (split-string "foo bar") '("foo" "bar")))
+(Assert (equal (split-string " foo bar ") '("foo" "bar")))
+(Assert (equal (split-string " foo  bar ") '("foo" "bar")))
+(Assert (equal (split-string "foo   bar") '("foo" "bar")))
+(Assert (equal (split-string "foo  bar  ") '("foo" "bar")))
+(Assert (equal (split-string "foobar") '("foobar")))
+;; Semantics are identical to "double-default" case!  Fool ya?
+(Assert (equal (split-string "foo bar" nil t) '("foo" "bar")))
+(Assert (equal (split-string " foo bar " nil t) '("foo" "bar")))
+(Assert (equal (split-string " foo  bar " nil t) '("foo" "bar")))
+(Assert (equal (split-string "foo   bar" nil t) '("foo" "bar")))
+(Assert (equal (split-string "foo  bar  " nil t) '("foo" "bar")))
+(Assert (equal (split-string "foobar" nil t) '("foobar")))
+;; Perverse "anti-double-default" case
+(Assert (equal (split-string "foo bar" split-string-default-separators)
+	       '("foo" "bar")))
+(Assert (equal (split-string " foo bar " split-string-default-separators)
+	       '("" "foo" "bar" "")))
+(Assert (equal (split-string " foo  bar " split-string-default-separators)
+	       '("" "foo" "bar" "")))
+(Assert (equal (split-string "foo   bar" split-string-default-separators)
+	       '("foo" "bar")))
+(Assert (equal (split-string "foo  bar  " split-string-default-separators)
+	       '("foo" "bar" "")))
+(Assert (equal (split-string "foobar" split-string-default-separators)
+	       '("foobar")))
 
 (Assert (not (string-match "\\(\\.\\=\\)" ".")))
 (Assert (string= "" (let ((str "test string"))
--- a/tests/automated/regexp-tests.el	Tue Apr 22 03:24:13 2003 +0000
+++ b/tests/automated/regexp-tests.el	Wed Apr 23 15:42:52 2003 +0000
@@ -205,14 +205,13 @@
 
 ;; (test-regex-charset-mule-paranoid)
 
-;; Test replace-match
+;; Test that replace-match errors after a failed match
 (with-temp-buffer
   (insert "This is a test buffer.")
   (goto-char (point-min))
   (search-forward "this is a test ")
   (looking-at "Unmatchable text")
-  (replace-match "")
-  (Assert (looking-at "^buffer.$")))
+  (Check-Error args-out-of-range (replace-match "")))
 
 ;; Test that trivial regexps reset unused registers
 ;; Thanks to Martin Sternholm for the report.
@@ -282,6 +281,8 @@
 
 ;; More stale match data tests.
 ;; Thanks to <bjacob@ca.metsci.com>.
+;; These tests used to fail because we cleared match data only on success.
+;; Fixed 2003-04-17.
 (Assert (not (progn (string-match "a" "a")
 		    (string-match "b" "a")
 		    (match-string 0 "a"))))
--- a/tests/automated/test-harness.el	Tue Apr 22 03:24:13 2003 +0000
+++ b/tests/automated/test-harness.el	Wed Apr 23 15:42:52 2003 +0000
@@ -48,6 +48,9 @@
   (and (not noninteractive) (> (device-baud-rate) search-slow-speed))
   "*Non-nil means print messages describing progress of emacs-tester.")
 
+(defvar test-harness-risk-infloops nil
+  "*Non-nil to run tests that may loop infinitely in buggy implementations.")
+
 (defvar test-harness-current-file nil)
 
 (defvar emacs-lisp-file-regexp (purecopy "\\.el\\'")