changeset 5035:b1e48555be7d

Add a new optional ESCAPE-CHAR argument to #'split-string-by-char. src/ChangeLog addition: 2010-02-07 Aidan Kehoe <kehoea@parhasard.net> * fns.c (split_string_by_ichar_1): Extend this to take UNESCAPE and ESCAPECHAR arguments. (split_external_path, split_env_path, Fsplit_string_by_char) (Fsplit_path): Pass the new arguments to split_string_by_ichar_1(); take a new optional argument, ESCAPE-CHAR, in #'split-string-by-char, allowing SEPCHAR to be escaped. tests/ChangeLog addition: 2010-02-07 Aidan Kehoe <kehoea@parhasard.net> * automated/lisp-tests.el (split-string-by-char): Test this function, and its new ESCAPE-CHAR argument.
author Aidan Kehoe <kehoea@parhasard.net>
date Sun, 07 Feb 2010 12:24:03 +0000
parents 1b96882bdf37
children 9624523604c5
files src/ChangeLog src/fns.c tests/ChangeLog tests/automated/lisp-tests.el
diffstat 4 files changed, 221 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Fri Feb 19 23:21:27 2010 +0000
+++ b/src/ChangeLog	Sun Feb 07 12:24:03 2010 +0000
@@ -3451,6 +3451,16 @@
 	reasons.
 	
 
+2010-02-07  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* fns.c (split_string_by_ichar_1): Extend this to take UNESCAPE
+	and ESCAPECHAR arguments.
+	(split_external_path, split_env_path, Fsplit_string_by_char)
+	(Fsplit_path):
+	Pass the new arguments to split_string_by_ichar_1(); take a new
+	optional argument, ESCAPE-CHAR, in #'split-string-by-char,
+	allowing SEPCHAR to be escaped.
+
 2010-01-09  Didier Verna  <didier@xemacs.org>
 
 	* glyphs.c (query_string_font): Use proper domain for cachel
--- a/src/fns.c	Fri Feb 19 23:21:27 2010 +0000
+++ b/src/fns.c	Sun Feb 07 12:24:03 2010 +0000
@@ -1053,31 +1053,129 @@
 }
 
 /* Split STRING into a list of substrings.  The substrings are the
-   parts of original STRING separated by SEPCHAR.  */
+   parts of original STRING separated by SEPCHAR.
+
+   If UNESCAPE is non-zero, ESCAPECHAR specifies a character that will quote
+   SEPCHAR, and cause it not to split STRING. A double ESCAPECHAR is
+   necessary for ESCAPECHAR to appear once in a substring. */
+
 static Lisp_Object
 split_string_by_ichar_1 (const Ibyte *string, Bytecount size,
-			  Ichar sepchar)
+                         Ichar sepchar, int unescape, Ichar escapechar)
 {
   Lisp_Object result = Qnil;
   const Ibyte *end = string + size;
 
-  while (1)
+  if (unescape)
     {
-      const Ibyte *p = string;
-      while (p < end)
-	{
-	  if (itext_ichar (p) == sepchar)
-	    break;
-	  INC_IBYTEPTR (p);
-	}
-      result = Fcons (make_string (string, p - string), result);
-      if (p < end)
-	{
-	  string = p;
-	  INC_IBYTEPTR (string);	/* skip sepchar */
-	}
-      else
-	break;
+      Ibyte unescape_buffer[64], *unescape_buffer_ptr = unescape_buffer,
+        escaped[MAX_ICHAR_LEN], *unescape_cursor;
+      int deleting_escapes, previous_escaped, escaped_len; 
+      Ichar pchar, unescape_buffer_size = countof (unescape_buffer);
+
+      escaped_len = set_itext_ichar (escaped, escapechar);
+
+      while (1)
+        {
+          const Ibyte *p = string, *cursor;
+          deleting_escapes = 0;
+          previous_escaped = 0;
+
+          while (p < end)
+            {
+              pchar = itext_ichar (p);
+
+              if (pchar == sepchar)
+                {
+                  if (!previous_escaped)
+                    {
+                      break;
+                    }
+                }
+              else if (pchar == escapechar
+                       /* Doubled escapes don't escape: */
+                       && !previous_escaped)
+                {
+                  ++deleting_escapes;
+                  previous_escaped = 1;
+                }
+              else
+                {
+                  previous_escaped = 0;
+                }
+
+              INC_IBYTEPTR (p);
+            }
+
+          if (deleting_escapes)
+            {
+              if (((p - string) - (escaped_len * deleting_escapes))
+                  > unescape_buffer_size)
+                {
+                  unescape_buffer_size =
+                    ((p - string) - (escaped_len * deleting_escapes)) * 1.5;
+                  unescape_buffer_ptr = alloca_ibytes (unescape_buffer_size);
+                }
+
+              cursor = string;
+              unescape_cursor = unescape_buffer_ptr;
+              previous_escaped = 0;
+
+              while (cursor < p)
+                {
+                  pchar = itext_ichar (cursor);
+
+                  if (pchar != escapechar || previous_escaped)
+                    {
+                      memcpy (unescape_cursor, cursor,
+                              itext_ichar_len (cursor));
+                      INC_IBYTEPTR (unescape_cursor);
+                    }
+
+                  previous_escaped = !previous_escaped
+                    && (pchar == escapechar);
+
+                  INC_IBYTEPTR (cursor);
+                }
+
+              result = Fcons (make_string (unescape_buffer_ptr,
+                                           unescape_cursor
+                                           - unescape_buffer_ptr),
+                              result);
+            }
+          else
+            {
+              result = Fcons (make_string (string, p - string), result);
+            }
+          if (p < end)
+            {
+              string = p;
+              INC_IBYTEPTR (string);	/* skip sepchar */
+            }
+          else
+            break;
+        }
+    }
+  else
+    {
+      while (1)
+        {
+          const Ibyte *p = string;
+          while (p < end)
+            {
+              if (itext_ichar (p) == sepchar)
+                break;
+              INC_IBYTEPTR (p);
+            }
+          result = Fcons (make_string (string, p - string), result);
+          if (p < end)
+            {
+              string = p;
+              INC_IBYTEPTR (string);	/* skip sepchar */
+            }
+          else
+            break;
+        }
     }
   return Fnreverse (result);
 }
@@ -1102,7 +1200,7 @@
   if (!newlen)
     return Qnil;
 
-  return split_string_by_ichar_1 (newpath, newlen, SEPCHAR);
+  return split_string_by_ichar_1 (newpath, newlen, SEPCHAR, 0, 0);
 }
 
 Lisp_Object
@@ -1115,22 +1213,34 @@
     path = default_;
   if (!path)
     return Qnil;
-  return split_string_by_ichar_1 (path, qxestrlen (path), SEPCHAR);
+  return split_string_by_ichar_1 (path, qxestrlen (path), SEPCHAR, 0, 0);
 }
 
 /* Ben thinks this function should not exist or be exported to Lisp.
    We use it to define split-path-string in subr.el (not!).  */
 
-DEFUN ("split-string-by-char", Fsplit_string_by_char, 2, 2, 0, /*
+DEFUN ("split-string-by-char", Fsplit_string_by_char, 2, 3, 0, /*
 Split STRING into a list of substrings originally separated by SEPCHAR.
+
+With optional ESCAPE-CHAR, any instances of SEPCHAR preceded by that
+character will not split the string, and a double instance of ESCAPE-CHAR
+will be necessary for a single ESCAPE-CHAR to appear in the output string.
 */
-       (string, sepchar))
+       (string, sepchar, escape_char))
 {
+  Ichar escape_ichar = 0;
+
   CHECK_STRING (string);
   CHECK_CHAR (sepchar);
+  if (!NILP (escape_char))
+    {
+      CHECK_CHAR (escape_char);
+      escape_ichar = XCHAR (escape_char);
+    }
   return split_string_by_ichar_1 (XSTRING_DATA (string),
-				   XSTRING_LENGTH (string),
-				   XCHAR (sepchar));
+                                  XSTRING_LENGTH (string),
+                                  XCHAR (sepchar),
+                                  !NILP (escape_char), escape_ichar);
 }
 
 /* #### This was supposed to be in subr.el, but is used VERY early in
@@ -1154,7 +1264,7 @@
 
   return (split_string_by_ichar_1
 	  (XSTRING_DATA (path), XSTRING_LENGTH (path),
-	   itext_ichar (XSTRING_DATA (Vpath_separator))));
+	   itext_ichar (XSTRING_DATA (Vpath_separator)), 0, 0));
 }
 
 
--- a/tests/ChangeLog	Fri Feb 19 23:21:27 2010 +0000
+++ b/tests/ChangeLog	Sun Feb 07 12:24:03 2010 +0000
@@ -274,6 +274,11 @@
 	* automated/mule-tests.el (featurep):
 	Use utf-8 as file-name-coding-system under Cygwin 1.7+.
 
+2010-02-07  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* automated/lisp-tests.el (split-string-by-char):
+	Test this function, and its new ESCAPE-CHAR argument.
+
 2010-01-01  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* automated/lisp-tests.el: 
--- a/tests/automated/lisp-tests.el	Fri Feb 19 23:21:27 2010 +0000
+++ b/tests/automated/lisp-tests.el	Sun Feb 07 12:24:03 2010 +0000
@@ -1,4 +1,4 @@
-;; Copyright (C) 1998 Free Software Foundation, Inc.
+;; Copyright (C) 1998 Free Software Foundation, Inc. -*- coding: iso-8859-1 -*-
 
 ;; Author: Martin Buchholz <martin@xemacs.org>
 ;; Maintainer: Martin Buchholz <martin@xemacs.org>
@@ -1077,6 +1077,76 @@
 	       '("foobar"))
 
 ;;-----------------------------------------------------
+;; Test split-string-by-char
+;;-----------------------------------------------------
+
+(Assert
+ (equal
+  (split-string-by-char
+   #r"re\:ee:this\\is\\text\\\\:oo\ps:
+Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der
+bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser,
+worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die
+unreinen\: Geister brüten.\\
+tocopherol
+Vitamin E: any or all of a group of closely related fat-soluble compounds
+that occur especially in plant oils and are anti-oxidants essential in the
+diets of many animals and probably of man. "
+  ?: ?\\)
+  '("re:ee" "this\\is\\text\\\\" "oops" "
+Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der
+bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser,
+worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die
+unreinen: Geister brüten.\\
+tocopherol
+Vitamin E" " any or all of a group of closely related fat-soluble compounds
+that occur especially in plant oils and are anti-oxidants essential in the
+diets of many animals and probably of man. ")))
+(Assert
+ (equal
+  (split-string-by-char
+   #r"re\:ee:this\\is\\text\\\\:oo\ps:
+Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der
+bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser,
+worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die
+unreinen\: Geister brüten.\\
+tocopherol
+Vitamin E: any or all of a group of closely related fat-soluble compounds
+that occur especially in plant oils and are anti-oxidants essential in the
+diets of many animals and probably of man. "
+   ?: ?\x00)
+  '("re\\" "ee" "this\\\\is\\\\text\\\\\\\\" "oo\\ps" "
+Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der
+bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser,
+worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die
+unreinen\\" " Geister brüten.\\\\
+tocopherol
+Vitamin E" " any or all of a group of closely related fat-soluble compounds
+that occur especially in plant oils and are anti-oxidants essential in the
+diets of many animals and probably of man. ")))
+(Assert
+ (equal
+  (split-string-by-char
+   #r"re\:ee:this\\is\\text\\\\:oo\ps:
+Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der
+bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser,
+worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die
+unreinen\: Geister brüten.\\
+tocopherol
+Vitamin E: any or all of a group of closely related fat-soluble compounds
+that occur especially in plant oils and are anti-oxidants essential in the
+diets of many animals and probably of man. " ?\\)
+  '("re" ":ee:this" "" "is" "" "text" "" "" "" ":oo" "ps:
+Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der
+bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser,
+worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die
+unreinen" ": Geister brüten." "" "
+tocopherol
+Vitamin E: any or all of a group of closely related fat-soluble compounds
+that occur especially in plant oils and are anti-oxidants essential in the
+diets of many animals and probably of man. ")))
+
+;;-----------------------------------------------------
 ;; Test near-text buffer functions.
 ;;-----------------------------------------------------
 (with-temp-buffer