Mercurial > hg > xemacs-beta
changeset 5035:b1e48555be7d
Add a new optional ESCAPE-CHAR argument to #'split-string-by-char.
src/ChangeLog addition:
2010-02-07 Aidan Kehoe <kehoea@parhasard.net>
* fns.c (split_string_by_ichar_1): Extend this to take UNESCAPE
and ESCAPECHAR arguments.
(split_external_path, split_env_path, Fsplit_string_by_char)
(Fsplit_path):
Pass the new arguments to split_string_by_ichar_1(); take a new
optional argument, ESCAPE-CHAR, in #'split-string-by-char,
allowing SEPCHAR to be escaped.
tests/ChangeLog addition:
2010-02-07 Aidan Kehoe <kehoea@parhasard.net>
* automated/lisp-tests.el (split-string-by-char):
Test this function, and its new ESCAPE-CHAR argument.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sun, 07 Feb 2010 12:24:03 +0000 |
parents | 1b96882bdf37 |
children | 9624523604c5 |
files | src/ChangeLog src/fns.c tests/ChangeLog tests/automated/lisp-tests.el |
diffstat | 4 files changed, 221 insertions(+), 26 deletions(-) [+] |
line wrap: on
line diff
--- a/src/ChangeLog Fri Feb 19 23:21:27 2010 +0000 +++ b/src/ChangeLog Sun Feb 07 12:24:03 2010 +0000 @@ -3451,6 +3451,16 @@ reasons. +2010-02-07 Aidan Kehoe <kehoea@parhasard.net> + + * fns.c (split_string_by_ichar_1): Extend this to take UNESCAPE + and ESCAPECHAR arguments. + (split_external_path, split_env_path, Fsplit_string_by_char) + (Fsplit_path): + Pass the new arguments to split_string_by_ichar_1(); take a new + optional argument, ESCAPE-CHAR, in #'split-string-by-char, + allowing SEPCHAR to be escaped. + 2010-01-09 Didier Verna <didier@xemacs.org> * glyphs.c (query_string_font): Use proper domain for cachel
--- a/src/fns.c Fri Feb 19 23:21:27 2010 +0000 +++ b/src/fns.c Sun Feb 07 12:24:03 2010 +0000 @@ -1053,31 +1053,129 @@ } /* Split STRING into a list of substrings. The substrings are the - parts of original STRING separated by SEPCHAR. */ + parts of original STRING separated by SEPCHAR. + + If UNESCAPE is non-zero, ESCAPECHAR specifies a character that will quote + SEPCHAR, and cause it not to split STRING. A double ESCAPECHAR is + necessary for ESCAPECHAR to appear once in a substring. */ + static Lisp_Object split_string_by_ichar_1 (const Ibyte *string, Bytecount size, - Ichar sepchar) + Ichar sepchar, int unescape, Ichar escapechar) { Lisp_Object result = Qnil; const Ibyte *end = string + size; - while (1) + if (unescape) { - const Ibyte *p = string; - while (p < end) - { - if (itext_ichar (p) == sepchar) - break; - INC_IBYTEPTR (p); - } - result = Fcons (make_string (string, p - string), result); - if (p < end) - { - string = p; - INC_IBYTEPTR (string); /* skip sepchar */ - } - else - break; + Ibyte unescape_buffer[64], *unescape_buffer_ptr = unescape_buffer, + escaped[MAX_ICHAR_LEN], *unescape_cursor; + int deleting_escapes, previous_escaped, escaped_len; + Ichar pchar, unescape_buffer_size = countof (unescape_buffer); + + escaped_len = set_itext_ichar (escaped, escapechar); + + while (1) + { + const Ibyte *p = string, *cursor; + deleting_escapes = 0; + previous_escaped = 0; + + while (p < end) + { + pchar = itext_ichar (p); + + if (pchar == sepchar) + { + if (!previous_escaped) + { + break; + } + } + else if (pchar == escapechar + /* Doubled escapes don't escape: */ + && !previous_escaped) + { + ++deleting_escapes; + previous_escaped = 1; + } + else + { + previous_escaped = 0; + } + + INC_IBYTEPTR (p); + } + + if (deleting_escapes) + { + if (((p - string) - (escaped_len * deleting_escapes)) + > unescape_buffer_size) + { + unescape_buffer_size = + ((p - string) - (escaped_len * deleting_escapes)) * 1.5; + unescape_buffer_ptr = alloca_ibytes (unescape_buffer_size); + } + + cursor = string; + unescape_cursor = unescape_buffer_ptr; + previous_escaped = 0; + + while (cursor < p) + { + pchar = itext_ichar (cursor); + + if (pchar != escapechar || previous_escaped) + { + memcpy (unescape_cursor, cursor, + itext_ichar_len (cursor)); + INC_IBYTEPTR (unescape_cursor); + } + + previous_escaped = !previous_escaped + && (pchar == escapechar); + + INC_IBYTEPTR (cursor); + } + + result = Fcons (make_string (unescape_buffer_ptr, + unescape_cursor + - unescape_buffer_ptr), + result); + } + else + { + result = Fcons (make_string (string, p - string), result); + } + if (p < end) + { + string = p; + INC_IBYTEPTR (string); /* skip sepchar */ + } + else + break; + } + } + else + { + while (1) + { + const Ibyte *p = string; + while (p < end) + { + if (itext_ichar (p) == sepchar) + break; + INC_IBYTEPTR (p); + } + result = Fcons (make_string (string, p - string), result); + if (p < end) + { + string = p; + INC_IBYTEPTR (string); /* skip sepchar */ + } + else + break; + } } return Fnreverse (result); } @@ -1102,7 +1200,7 @@ if (!newlen) return Qnil; - return split_string_by_ichar_1 (newpath, newlen, SEPCHAR); + return split_string_by_ichar_1 (newpath, newlen, SEPCHAR, 0, 0); } Lisp_Object @@ -1115,22 +1213,34 @@ path = default_; if (!path) return Qnil; - return split_string_by_ichar_1 (path, qxestrlen (path), SEPCHAR); + return split_string_by_ichar_1 (path, qxestrlen (path), SEPCHAR, 0, 0); } /* Ben thinks this function should not exist or be exported to Lisp. We use it to define split-path-string in subr.el (not!). */ -DEFUN ("split-string-by-char", Fsplit_string_by_char, 2, 2, 0, /* +DEFUN ("split-string-by-char", Fsplit_string_by_char, 2, 3, 0, /* Split STRING into a list of substrings originally separated by SEPCHAR. + +With optional ESCAPE-CHAR, any instances of SEPCHAR preceded by that +character will not split the string, and a double instance of ESCAPE-CHAR +will be necessary for a single ESCAPE-CHAR to appear in the output string. */ - (string, sepchar)) + (string, sepchar, escape_char)) { + Ichar escape_ichar = 0; + CHECK_STRING (string); CHECK_CHAR (sepchar); + if (!NILP (escape_char)) + { + CHECK_CHAR (escape_char); + escape_ichar = XCHAR (escape_char); + } return split_string_by_ichar_1 (XSTRING_DATA (string), - XSTRING_LENGTH (string), - XCHAR (sepchar)); + XSTRING_LENGTH (string), + XCHAR (sepchar), + !NILP (escape_char), escape_ichar); } /* #### This was supposed to be in subr.el, but is used VERY early in @@ -1154,7 +1264,7 @@ return (split_string_by_ichar_1 (XSTRING_DATA (path), XSTRING_LENGTH (path), - itext_ichar (XSTRING_DATA (Vpath_separator)))); + itext_ichar (XSTRING_DATA (Vpath_separator)), 0, 0)); }
--- a/tests/ChangeLog Fri Feb 19 23:21:27 2010 +0000 +++ b/tests/ChangeLog Sun Feb 07 12:24:03 2010 +0000 @@ -274,6 +274,11 @@ * automated/mule-tests.el (featurep): Use utf-8 as file-name-coding-system under Cygwin 1.7+. +2010-02-07 Aidan Kehoe <kehoea@parhasard.net> + + * automated/lisp-tests.el (split-string-by-char): + Test this function, and its new ESCAPE-CHAR argument. + 2010-01-01 Aidan Kehoe <kehoea@parhasard.net> * automated/lisp-tests.el:
--- a/tests/automated/lisp-tests.el Fri Feb 19 23:21:27 2010 +0000 +++ b/tests/automated/lisp-tests.el Sun Feb 07 12:24:03 2010 +0000 @@ -1,4 +1,4 @@ -;; Copyright (C) 1998 Free Software Foundation, Inc. +;; Copyright (C) 1998 Free Software Foundation, Inc. -*- coding: iso-8859-1 -*- ;; Author: Martin Buchholz <martin@xemacs.org> ;; Maintainer: Martin Buchholz <martin@xemacs.org> @@ -1077,6 +1077,76 @@ '("foobar")) ;;----------------------------------------------------- +;; Test split-string-by-char +;;----------------------------------------------------- + +(Assert + (equal + (split-string-by-char + #r"re\:ee:this\\is\\text\\\\:oo\ps: +Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der +bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser, +worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die +unreinen\: Geister brüten.\\ +tocopherol +Vitamin E: any or all of a group of closely related fat-soluble compounds +that occur especially in plant oils and are anti-oxidants essential in the +diets of many animals and probably of man. " + ?: ?\\) + '("re:ee" "this\\is\\text\\\\" "oops" " +Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der +bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser, +worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die +unreinen: Geister brüten.\\ +tocopherol +Vitamin E" " any or all of a group of closely related fat-soluble compounds +that occur especially in plant oils and are anti-oxidants essential in the +diets of many animals and probably of man. "))) +(Assert + (equal + (split-string-by-char + #r"re\:ee:this\\is\\text\\\\:oo\ps: +Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der +bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser, +worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die +unreinen\: Geister brüten.\\ +tocopherol +Vitamin E: any or all of a group of closely related fat-soluble compounds +that occur especially in plant oils and are anti-oxidants essential in the +diets of many animals and probably of man. " + ?: ?\x00) + '("re\\" "ee" "this\\\\is\\\\text\\\\\\\\" "oo\\ps" " +Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der +bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser, +worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die +unreinen\\" " Geister brüten.\\\\ +tocopherol +Vitamin E" " any or all of a group of closely related fat-soluble compounds +that occur especially in plant oils and are anti-oxidants essential in the +diets of many animals and probably of man. "))) +(Assert + (equal + (split-string-by-char + #r"re\:ee:this\\is\\text\\\\:oo\ps: +Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der +bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser, +worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die +unreinen\: Geister brüten.\\ +tocopherol +Vitamin E: any or all of a group of closely related fat-soluble compounds +that occur especially in plant oils and are anti-oxidants essential in the +diets of many animals and probably of man. " ?\\) + '("re" ":ee:this" "" "is" "" "text" "" "" "" ":oo" "ps: +Eine Sprache, die stagnirt, ist zu vergleichen mit einem See, dem der +bisherige Quellenzufluß versiegt oder abgeleitet wird. Aus dem Wasser, +worüber der Geist Gottes schwebte, wird Sumpf und Moder, worüber die +unreinen" ": Geister brüten." "" " +tocopherol +Vitamin E: any or all of a group of closely related fat-soluble compounds +that occur especially in plant oils and are anti-oxidants essential in the +diets of many animals and probably of man. "))) + +;;----------------------------------------------------- ;; Test near-text buffer functions. ;;----------------------------------------------------- (with-temp-buffer