comparison src/fns.c @ 5035:b1e48555be7d

Add a new optional ESCAPE-CHAR argument to #'split-string-by-char. src/ChangeLog addition: 2010-02-07 Aidan Kehoe <kehoea@parhasard.net> * fns.c (split_string_by_ichar_1): Extend this to take UNESCAPE and ESCAPECHAR arguments. (split_external_path, split_env_path, Fsplit_string_by_char) (Fsplit_path): Pass the new arguments to split_string_by_ichar_1(); take a new optional argument, ESCAPE-CHAR, in #'split-string-by-char, allowing SEPCHAR to be escaped. tests/ChangeLog addition: 2010-02-07 Aidan Kehoe <kehoea@parhasard.net> * automated/lisp-tests.el (split-string-by-char): Test this function, and its new ESCAPE-CHAR argument.
author Aidan Kehoe <kehoea@parhasard.net>
date Sun, 07 Feb 2010 12:24:03 +0000
parents 1b96882bdf37
children 9624523604c5
comparison
equal deleted inserted replaced
5034:1b96882bdf37 5035:b1e48555be7d
1051 return Qnil; 1051 return Qnil;
1052 } 1052 }
1053 } 1053 }
1054 1054
1055 /* Split STRING into a list of substrings. The substrings are the 1055 /* Split STRING into a list of substrings. The substrings are the
1056 parts of original STRING separated by SEPCHAR. */ 1056 parts of original STRING separated by SEPCHAR.
1057
1058 If UNESCAPE is non-zero, ESCAPECHAR specifies a character that will quote
1059 SEPCHAR, and cause it not to split STRING. A double ESCAPECHAR is
1060 necessary for ESCAPECHAR to appear once in a substring. */
1061
1057 static Lisp_Object 1062 static Lisp_Object
1058 split_string_by_ichar_1 (const Ibyte *string, Bytecount size, 1063 split_string_by_ichar_1 (const Ibyte *string, Bytecount size,
1059 Ichar sepchar) 1064 Ichar sepchar, int unescape, Ichar escapechar)
1060 { 1065 {
1061 Lisp_Object result = Qnil; 1066 Lisp_Object result = Qnil;
1062 const Ibyte *end = string + size; 1067 const Ibyte *end = string + size;
1063 1068
1064 while (1) 1069 if (unescape)
1065 { 1070 {
1066 const Ibyte *p = string; 1071 Ibyte unescape_buffer[64], *unescape_buffer_ptr = unescape_buffer,
1067 while (p < end) 1072 escaped[MAX_ICHAR_LEN], *unescape_cursor;
1068 { 1073 int deleting_escapes, previous_escaped, escaped_len;
1069 if (itext_ichar (p) == sepchar) 1074 Ichar pchar, unescape_buffer_size = countof (unescape_buffer);
1070 break; 1075
1071 INC_IBYTEPTR (p); 1076 escaped_len = set_itext_ichar (escaped, escapechar);
1072 } 1077
1073 result = Fcons (make_string (string, p - string), result); 1078 while (1)
1074 if (p < end) 1079 {
1075 { 1080 const Ibyte *p = string, *cursor;
1076 string = p; 1081 deleting_escapes = 0;
1077 INC_IBYTEPTR (string); /* skip sepchar */ 1082 previous_escaped = 0;
1078 } 1083
1079 else 1084 while (p < end)
1080 break; 1085 {
1086 pchar = itext_ichar (p);
1087
1088 if (pchar == sepchar)
1089 {
1090 if (!previous_escaped)
1091 {
1092 break;
1093 }
1094 }
1095 else if (pchar == escapechar
1096 /* Doubled escapes don't escape: */
1097 && !previous_escaped)
1098 {
1099 ++deleting_escapes;
1100 previous_escaped = 1;
1101 }
1102 else
1103 {
1104 previous_escaped = 0;
1105 }
1106
1107 INC_IBYTEPTR (p);
1108 }
1109
1110 if (deleting_escapes)
1111 {
1112 if (((p - string) - (escaped_len * deleting_escapes))
1113 > unescape_buffer_size)
1114 {
1115 unescape_buffer_size =
1116 ((p - string) - (escaped_len * deleting_escapes)) * 1.5;
1117 unescape_buffer_ptr = alloca_ibytes (unescape_buffer_size);
1118 }
1119
1120 cursor = string;
1121 unescape_cursor = unescape_buffer_ptr;
1122 previous_escaped = 0;
1123
1124 while (cursor < p)
1125 {
1126 pchar = itext_ichar (cursor);
1127
1128 if (pchar != escapechar || previous_escaped)
1129 {
1130 memcpy (unescape_cursor, cursor,
1131 itext_ichar_len (cursor));
1132 INC_IBYTEPTR (unescape_cursor);
1133 }
1134
1135 previous_escaped = !previous_escaped
1136 && (pchar == escapechar);
1137
1138 INC_IBYTEPTR (cursor);
1139 }
1140
1141 result = Fcons (make_string (unescape_buffer_ptr,
1142 unescape_cursor
1143 - unescape_buffer_ptr),
1144 result);
1145 }
1146 else
1147 {
1148 result = Fcons (make_string (string, p - string), result);
1149 }
1150 if (p < end)
1151 {
1152 string = p;
1153 INC_IBYTEPTR (string); /* skip sepchar */
1154 }
1155 else
1156 break;
1157 }
1158 }
1159 else
1160 {
1161 while (1)
1162 {
1163 const Ibyte *p = string;
1164 while (p < end)
1165 {
1166 if (itext_ichar (p) == sepchar)
1167 break;
1168 INC_IBYTEPTR (p);
1169 }
1170 result = Fcons (make_string (string, p - string), result);
1171 if (p < end)
1172 {
1173 string = p;
1174 INC_IBYTEPTR (string); /* skip sepchar */
1175 }
1176 else
1177 break;
1178 }
1081 } 1179 }
1082 return Fnreverse (result); 1180 return Fnreverse (result);
1083 } 1181 }
1084 1182
1085 /* The same as the above, except PATH is an external C string (it is 1183 /* The same as the above, except PATH is an external C string (it is
1100 depend on split_external_path("") returning nil instead of an empty 1198 depend on split_external_path("") returning nil instead of an empty
1101 string? */ 1199 string? */
1102 if (!newlen) 1200 if (!newlen)
1103 return Qnil; 1201 return Qnil;
1104 1202
1105 return split_string_by_ichar_1 (newpath, newlen, SEPCHAR); 1203 return split_string_by_ichar_1 (newpath, newlen, SEPCHAR, 0, 0);
1106 } 1204 }
1107 1205
1108 Lisp_Object 1206 Lisp_Object
1109 split_env_path (const CIbyte *evarname, const Ibyte *default_) 1207 split_env_path (const CIbyte *evarname, const Ibyte *default_)
1110 { 1208 {
1113 path = egetenv (evarname); 1211 path = egetenv (evarname);
1114 if (!path) 1212 if (!path)
1115 path = default_; 1213 path = default_;
1116 if (!path) 1214 if (!path)
1117 return Qnil; 1215 return Qnil;
1118 return split_string_by_ichar_1 (path, qxestrlen (path), SEPCHAR); 1216 return split_string_by_ichar_1 (path, qxestrlen (path), SEPCHAR, 0, 0);
1119 } 1217 }
1120 1218
1121 /* Ben thinks this function should not exist or be exported to Lisp. 1219 /* Ben thinks this function should not exist or be exported to Lisp.
1122 We use it to define split-path-string in subr.el (not!). */ 1220 We use it to define split-path-string in subr.el (not!). */
1123 1221
1124 DEFUN ("split-string-by-char", Fsplit_string_by_char, 2, 2, 0, /* 1222 DEFUN ("split-string-by-char", Fsplit_string_by_char, 2, 3, 0, /*
1125 Split STRING into a list of substrings originally separated by SEPCHAR. 1223 Split STRING into a list of substrings originally separated by SEPCHAR.
1126 */ 1224
1127 (string, sepchar)) 1225 With optional ESCAPE-CHAR, any instances of SEPCHAR preceded by that
1128 { 1226 character will not split the string, and a double instance of ESCAPE-CHAR
1227 will be necessary for a single ESCAPE-CHAR to appear in the output string.
1228 */
1229 (string, sepchar, escape_char))
1230 {
1231 Ichar escape_ichar = 0;
1232
1129 CHECK_STRING (string); 1233 CHECK_STRING (string);
1130 CHECK_CHAR (sepchar); 1234 CHECK_CHAR (sepchar);
1235 if (!NILP (escape_char))
1236 {
1237 CHECK_CHAR (escape_char);
1238 escape_ichar = XCHAR (escape_char);
1239 }
1131 return split_string_by_ichar_1 (XSTRING_DATA (string), 1240 return split_string_by_ichar_1 (XSTRING_DATA (string),
1132 XSTRING_LENGTH (string), 1241 XSTRING_LENGTH (string),
1133 XCHAR (sepchar)); 1242 XCHAR (sepchar),
1243 !NILP (escape_char), escape_ichar);
1134 } 1244 }
1135 1245
1136 /* #### This was supposed to be in subr.el, but is used VERY early in 1246 /* #### This was supposed to be in subr.el, but is used VERY early in
1137 the bootstrap process, so it goes here. Damn. */ 1247 the bootstrap process, so it goes here. Damn. */
1138 1248
1152 "`path-separator' should be set to a single-character string", 1262 "`path-separator' should be set to a single-character string",
1153 Vpath_separator); 1263 Vpath_separator);
1154 1264
1155 return (split_string_by_ichar_1 1265 return (split_string_by_ichar_1
1156 (XSTRING_DATA (path), XSTRING_LENGTH (path), 1266 (XSTRING_DATA (path), XSTRING_LENGTH (path),
1157 itext_ichar (XSTRING_DATA (Vpath_separator)))); 1267 itext_ichar (XSTRING_DATA (Vpath_separator)), 0, 0));
1158 } 1268 }
1159 1269
1160 1270
1161 DEFUN ("nthcdr", Fnthcdr, 2, 2, 0, /* 1271 DEFUN ("nthcdr", Fnthcdr, 2, 2, 0, /*
1162 Take cdr N times on LIST, and return the result. 1272 Take cdr N times on LIST, and return the result.