changeset 1468:0f42689481f0

[xemacs-hg @ 2003-05-09 14:52:37 by stephent] revert clobber match data <873cjoi38t.fsf@tleepslib.sk.tsukuba.ac.jp>
author stephent
date Fri, 09 May 2003 14:52:43 +0000
parents e5da225ea2ca
children 51cfeb2ed271
files man/ChangeLog man/lispref/searching.texi src/ChangeLog src/regex.c src/regex.h src/search.c
diffstat 6 files changed, 155 insertions(+), 68 deletions(-) [+]
line wrap: on
line diff
--- a/man/ChangeLog	Fri May 09 09:50:42 2003 +0000
+++ b/man/ChangeLog	Fri May 09 14:52:43 2003 +0000
@@ -1,3 +1,7 @@
+2003-05-09  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* lispref/searching.texi (Match Data): Failed match preserves data.
+
 2003-04-28  Stephen J. Turnbull  <stephen@xemacs.org>
 
 	* xemacs-faq.texi (Q6.4.3): New: auxiliary programs for Windows.
--- a/man/lispref/searching.texi	Fri May 09 09:50:42 2003 +0000
+++ b/man/lispref/searching.texi	Fri May 09 14:52:43 2003 +0000
@@ -1058,11 +1058,19 @@
 message, and then extract parts of the match under control of the
 pattern.
 
-  Because the match data normally describe the most recent search only,
-you must be careful not to do another search inadvertently between the
-search you wish to refer back to and the use of the match data.  If you
-can't avoid another intervening search, you must save and restore the
-match data around it, to prevent it from being overwritten.
+  Because the match data normally describe the most recent successful
+search only, you must be careful not to do another search inadvertently
+between the search you wish to refer back to and the use of the match
+data.  If you can't avoid another intervening search, you must save and
+restore the match data around it, to prevent it from being overwritten.
+
+To make it possible to write iterative or recursive code that repeatedly
+searches, and uses the data from the last successful search when no more
+matches can be found, a search or match which fails will preserve the
+match data from the last successful search.  (You must not depend on
+match data being preserved in case the search or match signals an
+error.)  If for some reason you need to clear the match data, you may
+use @code{(store-match-data nil)}.
 
 @menu
 * Simple Match Data::     Accessing single items of match data,
--- a/src/ChangeLog	Fri May 09 09:50:42 2003 +0000
+++ b/src/ChangeLog	Fri May 09 14:52:43 2003 +0000
@@ -1,3 +1,35 @@
+2003-05-09  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* search.c (search_regs): Make comment reflect reality.
+	(clear_search_regs): Unconditionally clear global regs.
+	(set_search_regs): Call clear_search_regs to clear unused regs.
+	(looking_at_1):
+	(string_match_1):
+	(search_buffer):
+	(simple_search):
+	(boyer_moore):
+	Don't call clear_search_regs directly.
+	(Flooking_at):
+	(Fposix_looking_at):
+	(Fstring_match):
+	(Fposix_string_match):
+	(Fsearch_backward):
+	(Fsearch_forward):
+	(Fword_search_backward):
+	(Fword_search_forward):
+	(Fre_search_backward): 
+	(Fre_search_forward): 
+	(Fposix_search_backward):
+	(Fposix_search_forward):
+	Document that search/match failure preserves match data.
+	(Freplace_match):
+	(Fstore_match_data):
+	Improve docstrings.
+
+	* regex.c (re_match_2_internal): Init syn1, syn2 to quiet compiler.
+
+	* regex.h (struct re_registers): Add comment.
+
 2003-05-09  Stephen J. Turnbull  <stephen@xemacs.org>
 
 	Various cleanups against the possibility that <sys/stat.h> does
--- a/src/regex.c	Fri May 09 09:50:42 2003 +0000
+++ b/src/regex.c	Fri May 09 14:52:43 2003 +0000
@@ -6080,7 +6080,8 @@
 	       emch1, emch2 is the character at d, and syn2 is the
 	       syntax of emch2. */
 	    Ichar emch1, emch2;
-	    int syn1, syn2;
+	    int syn1 = 0,
+	        syn2 = 0;
 	    re_char *d_before, *d_after;
 	    int result,
 		at_beg = AT_STRINGS_BEG (d),
--- a/src/regex.h	Fri May 09 09:50:42 2003 +0000
+++ b/src/regex.h	Fri May 09 14:52:43 2003 +0000
@@ -418,7 +418,7 @@
    regex.texinfo for a full description of what registers match.  */
 struct re_registers
 {
-  int num_regs;
+  int num_regs;			/* number of registers allocated */
   regoff_t *start;
   regoff_t *end;
 };
--- a/src/search.c	Fri May 09 09:50:42 2003 +0000
+++ b/src/search.c	Fri May 09 14:52:43 2003 +0000
@@ -93,29 +93,20 @@
    */
 static struct re_registers search_regs;
 
-/* Every function that _may_ set the match data _must_ clear the search
-   registers on entry.  An unsuccessful search should leave the search
-   registers cleared.  Applications that are no-ops by definition (eg,
-   searches with a repetition count of 0) _must not_ clear the search
+/* Every function that sets the match data _must_ clear unused search
+   registers on success.  An unsuccessful search or match _must_ preserve
+   the search registers.  The traditional documentation implied that
+   any match operation might trash the registers, but in fact failures
+   have always preserved the match data (in GNU Emacs as well).  Some
+   plausible code depends on this behavior (cf. `w3-configuration-data'
+   in library "w3-cfg").
+
+   Ordinary string searchs use set_search_regs to set the whole-string
+   match.  That function takes care of clearing the unused subexpression
    registers.
-
-   XEmacs 21.5 up to beta 11 may have permitted the following idiom to
-   "win" in the sense that the match data was set to the last successful
-   match's match data, and not cleared as the current implemenation does:
-
-   (while (search_forward "string"))
-   (use-match-data-of-last-successful-search)
-
-   This no longer can work.  You must use save-match-data to preserve the
-   match data:
-
-   (let (md)
-     (while (when (search-forward "string") (setq md (match-data))))
-     (set-match-data md))
-   (use-match-data-of-last-successful-search)
    */
 static void set_search_regs (struct buffer *buf, Charbpos beg, Charcount len);
-static void clear_search_regs (struct re_registers *regp);
+static void clear_search_regs (void);
 
 /* The buffer in which the last search was performed, or
    Qt if the last search was done in a string;
@@ -326,9 +317,6 @@
   struct syntax_cache scache_struct;
   struct syntax_cache *scache = &scache_struct;
   
-  /* clear search registers *now*.  no mercy, not even for errors */
-  clear_search_regs (&search_regs);
-
   CHECK_STRING (string);
   bufp = compile_pattern (string, &search_regs,
 			  (!NILP (buf->case_fold_search)
@@ -382,9 +370,11 @@
 
 DEFUN ("looking-at", Flooking_at, 1, 2, 0, /*
 Return t if text after point matches regular expression REGEXP.
-This function modifies the match data that `match-beginning',
-`match-end' and `match-data' access; save and restore the match
-data if you want to preserve them.
+When the match is successful, this function modifies the match data
+that `match-beginning', `match-end' and `match-data' access; save the
+match data with `match-data' and restore it with `store-match-data' if
+you want to preserve them.  If the match fails, the match data from the
+previous success match is preserved.
 
 Optional argument BUFFER defaults to the current buffer.
 */
@@ -396,9 +386,11 @@
 DEFUN ("posix-looking-at", Fposix_looking_at, 1, 2, 0, /*
 Return t if text after point matches regular expression REGEXP.
 Find the longest match, in accord with Posix regular expression rules.
-This function modifies the match data that `match-beginning',
-`match-end' and `match-data' access; save and restore the match
-data if you want to preserve them.
+When the match is successful, this function modifies the match data
+that `match-beginning', `match-end' and `match-data' access; save the
+match data with `match-data' and restore it with `store-match-data' if
+you want to preserve them.  If the match fails, the match data from the
+previous success match is preserved.
 
 Optional argument BUFFER defaults to the current buffer.
 */
@@ -419,9 +411,6 @@
      data.  Not necessary because we don't call process filters
      asynchronously (i.e. from within QUIT). */
 
-  /* clear search registers *now*.  no mercy, not even for errors */
-  clear_search_regs (&search_regs);
-
   CHECK_STRING (regexp);
   CHECK_STRING (string);
 
@@ -491,9 +480,14 @@
          (string-match "^foo.*bar" string))
 
    but the case, syntax, and category tables come from the standard tables,
-   which are accessed through functions `default-{case,syntax,category}-table' and serve as the parents of the
-   tables in particular buffer
-
+   which are accessed through functions `default-{case,syntax,category}-table'
+   and serve as the parents of the tables in particular buffer.
+
+When the match is successful, this function modifies the match data
+that `match-beginning', `match-end' and `match-data' access; save the
+match data with `match-data' and restore it with `store-match-data' if
+you want to preserve them.  If the match fails, the match data from the
+previous success match is preserved.
 */
        (regexp, string, start, buffer))
 {
@@ -513,6 +507,12 @@
 Optional arg BUFFER controls how case folding is done (according to
 the value of `case-fold-search' in that buffer and that buffer's case
 tables) and defaults to the current buffer.
+
+When the match is successful, this function modifies the match data
+that `match-beginning', `match-end' and `match-data' access; save the
+match data with `match-data' and restore it with `store-match-data' if
+you want to preserve them.  If the match fails, the match data from the
+previous success match is preserved.
 */
        (regexp, string, start, buffer))
 {
@@ -1251,9 +1251,6 @@
   if (n == 0)
     return charbpos;
 
-  /* clear the search regs now */
-  clear_search_regs (&search_regs);
-
   /* Null string is found at starting position.  */
   if (len == 0)
     {
@@ -1647,9 +1644,6 @@
     simple_translate[i] = (Ibyte) i;
   i = 0;
 
-  /* clear search regs now */
-  clear_search_regs (&search_regs);
-
   while (i != infinity)
     {
       Ibyte *ptr = base_pat + i;
@@ -2004,23 +1998,22 @@
       search_regs.num_regs = 1;
     }
 
+  clear_search_regs ();
   search_regs.start[0] = beg;
   search_regs.end[0] = beg + len;
   last_thing_searched = wrap_buffer (buf);
 }
 
-/* Clear search registers so match data will be null.
-   REGP is a pointer to the register structure to clear, usually the global
-   search_regs. */
+/* Clear search registers so match data will be null. */
 
 static void
-clear_search_regs (struct re_registers *regp)
+clear_search_regs (void)
 {
   /* This function has been Mule-ized. */
   int i;
 
-  for (i = 0; i < regp->num_regs; i++)
-    regp->start[i] = regp->end[i] = -1;
+  for (i = 0; i < search_regs.num_regs; i++)
+    search_regs.start[i] = search_regs.end[i] = -1;
 }
 
 
@@ -2106,7 +2099,13 @@
 Optional fifth argument BUFFER specifies the buffer to search in and
 defaults to the current buffer.
 
-See also the functions `match-beginning', `match-end' and `replace-match'.
+When the match is successful, this function modifies the match data
+that `match-beginning', `match-end' and `match-data' access; save the
+match data with `match-data' and restore it with `store-match-data' if
+you want to preserve them.  If the match fails, the match data from the
+previous success match is preserved.
+
+See also the function `replace-match'.
 */
        (string, limit, noerror, count, buffer))
 {
@@ -2131,7 +2130,13 @@
 Optional fifth argument BUFFER specifies the buffer to search in and
 defaults to the current buffer.
 
-See also the functions `match-beginning', `match-end' and `replace-match'.
+When the match is successful, this function modifies the match data
+that `match-beginning', `match-end' and `match-data' access; save the
+match data with `match-data' and restore it with `store-match-data' if
+you want to preserve them.  If the match fails, the match data from the
+previous success match is preserved.
+
+See also the function `replace-match'.
 */
        (string, limit, noerror, count, buffer))
 {
@@ -2157,7 +2162,13 @@
 Optional fifth argument BUFFER specifies the buffer to search in and
 defaults to the current buffer.
 
-See also the functions `match-beginning', `match-end' and `replace-match'.
+When the match is successful, this function modifies the match data
+that `match-beginning', `match-end' and `match-data' access; save the
+match data with `match-data' and restore it with `store-match-data' if
+you want to preserve them.  If the match fails, the match data from the
+previous success match is preserved.
+
+See also the function `replace-match'.
 */
        (string, limit, noerror, count, buffer))
 {
@@ -2183,7 +2194,13 @@
 Optional fifth argument BUFFER specifies the buffer to search in and
 defaults to the current buffer.
 
-See also the functions `match-beginning', `match-end' and `replace-match'.
+When the match is successful, this function modifies the match data
+that `match-beginning', `match-end' and `match-data' access; save the
+match data with `match-data' and restore it with `store-match-data' if
+you want to preserve them.  If the match fails, the match data from the
+previous success match is preserved.
+
+See also the function `replace-match'.
 */
        (string, limit, noerror, count, buffer))
 {
@@ -2212,7 +2229,13 @@
 Optional fifth argument BUFFER specifies the buffer to search in and
 defaults to the current buffer.
 
-See also the functions `match-beginning', `match-end' and `replace-match'.
+When the match is successful, this function modifies the match data
+that `match-beginning', `match-end' and `match-data' access; save the
+match data with `match-data' and restore it with `store-match-data' if
+you want to preserve them.  If the match fails, the match data from the
+previous success match is preserved.
+
+See also the function `replace-match'.
 */
        (regexp, limit, noerror, count, buffer))
 {
@@ -2237,7 +2260,13 @@
 Optional fifth argument BUFFER specifies the buffer to search in and
 defaults to the current buffer.
 
-See also the functions `match-beginning', `match-end' and `replace-match'.
+When the match is successful, this function modifies the match data
+that `match-beginning', `match-end' and `match-data' access; save the
+match data with `match-data' and restore it with `store-match-data' if
+you want to preserve them.  If the match fails, the match data from the
+previous success match is preserved.
+
+See also the function `replace-match'.
 */
        (regexp, limit, noerror, count, buffer))
 {
@@ -2266,7 +2295,13 @@
 Optional fifth argument BUFFER specifies the buffer to search in and
 defaults to the current buffer.
 
-See also the functions `match-beginning', `match-end' and `replace-match'.
+When the match is successful, this function modifies the match data
+that `match-beginning', `match-end' and `match-data' access; save the
+match data with `match-data' and restore it with `store-match-data' if
+you want to preserve them.  If the match fails, the match data from the
+previous success match is preserved.
+
+See also the function `replace-match'.
 */
        (regexp, limit, noerror, count, buffer))
 {
@@ -2292,7 +2327,13 @@
 Optional fifth argument BUFFER specifies the buffer to search in and
 defaults to the current buffer.
 
-See also the functions `match-beginning', `match-end' and `replace-match'.
+When the match is successful, this function modifies the match data
+that `match-beginning', `match-end' and `match-data' access; save the
+match data with `match-data' and restore it with `store-match-data' if
+you want to preserve them.  If the match fails, the match data from the
+previous success match is preserved.
+
+See also the function `replace-match'.
 */
        (regexp, limit, noerror, count, buffer))
 {
@@ -2351,11 +2392,11 @@
 whole match.  This is useful only after a regular expression search or
 match since only regular expressions have distinguished subexpressions.
 
-If no match (including searches) has been conducted, the last match
-operation failed, or the requested subexpression was not matched, an
-`args-out-of-range' error will be signaled.  (If no match has ever been
-conducted in this instance of XEmacs, an `invalid-operation' error will
-be signaled.  This is very rare.)
+If no match (including searches) has been conducted or the requested
+subexpression was not matched, an `args-out-of-range' error will be
+signaled.  (If no match has ever been conducted in this instance of
+XEmacs, an `invalid-operation' error will be signaled.  This is very
+rare.)
 */
        (replacement, fixedcase, literal, string, strbuffer))
 {
@@ -2919,7 +2960,8 @@
 
 DEFUN ("store-match-data", Fstore_match_data, 1, 1, 0, /*
 Set internal data on last search match from elements of LIST.
-LIST should have been created by calling `match-data' previously.
+LIST should have been created by calling `match-data' previously,
+or be nil, to clear the internal match data.
 */
        (list))
 {