Mercurial > hg > xemacs-beta
comparison src/fns.c @ 5035:b1e48555be7d
Add a new optional ESCAPE-CHAR argument to #'split-string-by-char.
src/ChangeLog addition:
2010-02-07 Aidan Kehoe <kehoea@parhasard.net>
* fns.c (split_string_by_ichar_1): Extend this to take UNESCAPE
and ESCAPECHAR arguments.
(split_external_path, split_env_path, Fsplit_string_by_char)
(Fsplit_path):
Pass the new arguments to split_string_by_ichar_1(); take a new
optional argument, ESCAPE-CHAR, in #'split-string-by-char,
allowing SEPCHAR to be escaped.
tests/ChangeLog addition:
2010-02-07 Aidan Kehoe <kehoea@parhasard.net>
* automated/lisp-tests.el (split-string-by-char):
Test this function, and its new ESCAPE-CHAR argument.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sun, 07 Feb 2010 12:24:03 +0000 |
parents | 1b96882bdf37 |
children | 9624523604c5 |
comparison
equal
deleted
inserted
replaced
5034:1b96882bdf37 | 5035:b1e48555be7d |
---|---|
1051 return Qnil; | 1051 return Qnil; |
1052 } | 1052 } |
1053 } | 1053 } |
1054 | 1054 |
1055 /* Split STRING into a list of substrings. The substrings are the | 1055 /* Split STRING into a list of substrings. The substrings are the |
1056 parts of original STRING separated by SEPCHAR. */ | 1056 parts of original STRING separated by SEPCHAR. |
1057 | |
1058 If UNESCAPE is non-zero, ESCAPECHAR specifies a character that will quote | |
1059 SEPCHAR, and cause it not to split STRING. A double ESCAPECHAR is | |
1060 necessary for ESCAPECHAR to appear once in a substring. */ | |
1061 | |
1057 static Lisp_Object | 1062 static Lisp_Object |
1058 split_string_by_ichar_1 (const Ibyte *string, Bytecount size, | 1063 split_string_by_ichar_1 (const Ibyte *string, Bytecount size, |
1059 Ichar sepchar) | 1064 Ichar sepchar, int unescape, Ichar escapechar) |
1060 { | 1065 { |
1061 Lisp_Object result = Qnil; | 1066 Lisp_Object result = Qnil; |
1062 const Ibyte *end = string + size; | 1067 const Ibyte *end = string + size; |
1063 | 1068 |
1064 while (1) | 1069 if (unescape) |
1065 { | 1070 { |
1066 const Ibyte *p = string; | 1071 Ibyte unescape_buffer[64], *unescape_buffer_ptr = unescape_buffer, |
1067 while (p < end) | 1072 escaped[MAX_ICHAR_LEN], *unescape_cursor; |
1068 { | 1073 int deleting_escapes, previous_escaped, escaped_len; |
1069 if (itext_ichar (p) == sepchar) | 1074 Ichar pchar, unescape_buffer_size = countof (unescape_buffer); |
1070 break; | 1075 |
1071 INC_IBYTEPTR (p); | 1076 escaped_len = set_itext_ichar (escaped, escapechar); |
1072 } | 1077 |
1073 result = Fcons (make_string (string, p - string), result); | 1078 while (1) |
1074 if (p < end) | 1079 { |
1075 { | 1080 const Ibyte *p = string, *cursor; |
1076 string = p; | 1081 deleting_escapes = 0; |
1077 INC_IBYTEPTR (string); /* skip sepchar */ | 1082 previous_escaped = 0; |
1078 } | 1083 |
1079 else | 1084 while (p < end) |
1080 break; | 1085 { |
1086 pchar = itext_ichar (p); | |
1087 | |
1088 if (pchar == sepchar) | |
1089 { | |
1090 if (!previous_escaped) | |
1091 { | |
1092 break; | |
1093 } | |
1094 } | |
1095 else if (pchar == escapechar | |
1096 /* Doubled escapes don't escape: */ | |
1097 && !previous_escaped) | |
1098 { | |
1099 ++deleting_escapes; | |
1100 previous_escaped = 1; | |
1101 } | |
1102 else | |
1103 { | |
1104 previous_escaped = 0; | |
1105 } | |
1106 | |
1107 INC_IBYTEPTR (p); | |
1108 } | |
1109 | |
1110 if (deleting_escapes) | |
1111 { | |
1112 if (((p - string) - (escaped_len * deleting_escapes)) | |
1113 > unescape_buffer_size) | |
1114 { | |
1115 unescape_buffer_size = | |
1116 ((p - string) - (escaped_len * deleting_escapes)) * 1.5; | |
1117 unescape_buffer_ptr = alloca_ibytes (unescape_buffer_size); | |
1118 } | |
1119 | |
1120 cursor = string; | |
1121 unescape_cursor = unescape_buffer_ptr; | |
1122 previous_escaped = 0; | |
1123 | |
1124 while (cursor < p) | |
1125 { | |
1126 pchar = itext_ichar (cursor); | |
1127 | |
1128 if (pchar != escapechar || previous_escaped) | |
1129 { | |
1130 memcpy (unescape_cursor, cursor, | |
1131 itext_ichar_len (cursor)); | |
1132 INC_IBYTEPTR (unescape_cursor); | |
1133 } | |
1134 | |
1135 previous_escaped = !previous_escaped | |
1136 && (pchar == escapechar); | |
1137 | |
1138 INC_IBYTEPTR (cursor); | |
1139 } | |
1140 | |
1141 result = Fcons (make_string (unescape_buffer_ptr, | |
1142 unescape_cursor | |
1143 - unescape_buffer_ptr), | |
1144 result); | |
1145 } | |
1146 else | |
1147 { | |
1148 result = Fcons (make_string (string, p - string), result); | |
1149 } | |
1150 if (p < end) | |
1151 { | |
1152 string = p; | |
1153 INC_IBYTEPTR (string); /* skip sepchar */ | |
1154 } | |
1155 else | |
1156 break; | |
1157 } | |
1158 } | |
1159 else | |
1160 { | |
1161 while (1) | |
1162 { | |
1163 const Ibyte *p = string; | |
1164 while (p < end) | |
1165 { | |
1166 if (itext_ichar (p) == sepchar) | |
1167 break; | |
1168 INC_IBYTEPTR (p); | |
1169 } | |
1170 result = Fcons (make_string (string, p - string), result); | |
1171 if (p < end) | |
1172 { | |
1173 string = p; | |
1174 INC_IBYTEPTR (string); /* skip sepchar */ | |
1175 } | |
1176 else | |
1177 break; | |
1178 } | |
1081 } | 1179 } |
1082 return Fnreverse (result); | 1180 return Fnreverse (result); |
1083 } | 1181 } |
1084 | 1182 |
1085 /* The same as the above, except PATH is an external C string (it is | 1183 /* The same as the above, except PATH is an external C string (it is |
1100 depend on split_external_path("") returning nil instead of an empty | 1198 depend on split_external_path("") returning nil instead of an empty |
1101 string? */ | 1199 string? */ |
1102 if (!newlen) | 1200 if (!newlen) |
1103 return Qnil; | 1201 return Qnil; |
1104 | 1202 |
1105 return split_string_by_ichar_1 (newpath, newlen, SEPCHAR); | 1203 return split_string_by_ichar_1 (newpath, newlen, SEPCHAR, 0, 0); |
1106 } | 1204 } |
1107 | 1205 |
1108 Lisp_Object | 1206 Lisp_Object |
1109 split_env_path (const CIbyte *evarname, const Ibyte *default_) | 1207 split_env_path (const CIbyte *evarname, const Ibyte *default_) |
1110 { | 1208 { |
1113 path = egetenv (evarname); | 1211 path = egetenv (evarname); |
1114 if (!path) | 1212 if (!path) |
1115 path = default_; | 1213 path = default_; |
1116 if (!path) | 1214 if (!path) |
1117 return Qnil; | 1215 return Qnil; |
1118 return split_string_by_ichar_1 (path, qxestrlen (path), SEPCHAR); | 1216 return split_string_by_ichar_1 (path, qxestrlen (path), SEPCHAR, 0, 0); |
1119 } | 1217 } |
1120 | 1218 |
1121 /* Ben thinks this function should not exist or be exported to Lisp. | 1219 /* Ben thinks this function should not exist or be exported to Lisp. |
1122 We use it to define split-path-string in subr.el (not!). */ | 1220 We use it to define split-path-string in subr.el (not!). */ |
1123 | 1221 |
1124 DEFUN ("split-string-by-char", Fsplit_string_by_char, 2, 2, 0, /* | 1222 DEFUN ("split-string-by-char", Fsplit_string_by_char, 2, 3, 0, /* |
1125 Split STRING into a list of substrings originally separated by SEPCHAR. | 1223 Split STRING into a list of substrings originally separated by SEPCHAR. |
1126 */ | 1224 |
1127 (string, sepchar)) | 1225 With optional ESCAPE-CHAR, any instances of SEPCHAR preceded by that |
1128 { | 1226 character will not split the string, and a double instance of ESCAPE-CHAR |
1227 will be necessary for a single ESCAPE-CHAR to appear in the output string. | |
1228 */ | |
1229 (string, sepchar, escape_char)) | |
1230 { | |
1231 Ichar escape_ichar = 0; | |
1232 | |
1129 CHECK_STRING (string); | 1233 CHECK_STRING (string); |
1130 CHECK_CHAR (sepchar); | 1234 CHECK_CHAR (sepchar); |
1235 if (!NILP (escape_char)) | |
1236 { | |
1237 CHECK_CHAR (escape_char); | |
1238 escape_ichar = XCHAR (escape_char); | |
1239 } | |
1131 return split_string_by_ichar_1 (XSTRING_DATA (string), | 1240 return split_string_by_ichar_1 (XSTRING_DATA (string), |
1132 XSTRING_LENGTH (string), | 1241 XSTRING_LENGTH (string), |
1133 XCHAR (sepchar)); | 1242 XCHAR (sepchar), |
1243 !NILP (escape_char), escape_ichar); | |
1134 } | 1244 } |
1135 | 1245 |
1136 /* #### This was supposed to be in subr.el, but is used VERY early in | 1246 /* #### This was supposed to be in subr.el, but is used VERY early in |
1137 the bootstrap process, so it goes here. Damn. */ | 1247 the bootstrap process, so it goes here. Damn. */ |
1138 | 1248 |
1152 "`path-separator' should be set to a single-character string", | 1262 "`path-separator' should be set to a single-character string", |
1153 Vpath_separator); | 1263 Vpath_separator); |
1154 | 1264 |
1155 return (split_string_by_ichar_1 | 1265 return (split_string_by_ichar_1 |
1156 (XSTRING_DATA (path), XSTRING_LENGTH (path), | 1266 (XSTRING_DATA (path), XSTRING_LENGTH (path), |
1157 itext_ichar (XSTRING_DATA (Vpath_separator)))); | 1267 itext_ichar (XSTRING_DATA (Vpath_separator)), 0, 0)); |
1158 } | 1268 } |
1159 | 1269 |
1160 | 1270 |
1161 DEFUN ("nthcdr", Fnthcdr, 2, 2, 0, /* | 1271 DEFUN ("nthcdr", Fnthcdr, 2, 2, 0, /* |
1162 Take cdr N times on LIST, and return the result. | 1272 Take cdr N times on LIST, and return the result. |