comparison src/text.c @ 2421:ab71ad6ff3dd

[xemacs-hg @ 2004-12-06 03:50:53 by ben] (none) README.packages: Document use of --package-prefix. Fix error in specifying standard package location. make-docfile.c: Use QXE_PATH_MAX. info.el: Correct doc string giving example package path. menubar-items.el: Move Prefix Rectangle command up one level. xemacs/packages.texi: Add long form of Lisp Reference Manual to links. Add links pointing to Lisp Reference Manual for more detailed package discussion. lispref/range-tables.texi: Document range-table changes. internals/internals.texi: Update history section. elhash.c, elhash.h, profile.c: Create inchash_eq() to allow direct incrementing of hash-table entry. Use in profile.c to try to reduce profiling overhead. Increase initial size of profile hash tables to reduce profiling overhead. buffer.c, device-msw.c, dialog-msw.c, dired-msw.c, editfns.c, event-msw.c, events.c, glyphs-msw.c, keymap.c, objects-msw.c, process-nt.c, syswindows.h, text.c, text.h, unexnt.c: Rename xetcs* -> qxetcs* for consistency with qxestr*. Rename ei*_c(_*) -> ei*_ascii(_*) since they work with ASCII-only strings not "C strings", whatever those are. This is the last place where "c" was incorrectly being used for "ascii". dialog-msw.c, dumper.c, event-msw.c, fileio.c, glyphs-gtk.c, glyphs-x.c, nt.c, process-nt.c, realpath.c, sysdep.c, sysfile.h, unexcw.c, unexnext.c, unexnt.c: Try to avoid differences in systems that do or do not include final null byte in PATH_MAX. Create PATH_MAX_INTERNAL and PATH_MAX_EXTERNAL and use them everywhere. Rewrite code in dumper.c to avoid use of PATH_MAX. When necessary in nt.c, use _MAX_PATH instead of MAX_PATH to be consistent with other places. text.c: Code to short-circuit when binary or Unicode was not working due to EOL wrapping. Fix this code to work when either no EOL autodetection or no CR's or LF's in the text. lisp.h, rangetab.c, rangetab.h, regex.c, search.c: Implement different types of ranges (open/closed start and end). Change default to be start-closed, end-open.
author ben
date Mon, 06 Dec 2004 03:52:23 +0000
parents 6b957313bd8e
children 3d8143fc88e1
comparison
equal deleted inserted replaced
2420:ad56e5a6d09f 2421:ab71ad6ff3dd
2134 } 2134 }
2135 2135
2136 int 2136 int
2137 eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff, 2137 eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff,
2138 Bytecount len, Charcount charlen, const Ibyte *data, 2138 Bytecount len, Charcount charlen, const Ibyte *data,
2139 const Eistring *ei2, int is_c, int fold_case) 2139 const Eistring *ei2, int is_ascii, int fold_case)
2140 { 2140 {
2141 assert ((off < 0) != (charoff < 0)); 2141 assert ((off < 0) != (charoff < 0));
2142 if (off < 0) 2142 if (off < 0)
2143 { 2143 {
2144 off = charcount_to_bytecount (ei->data_, charoff); 2144 off = charcount_to_bytecount (ei->data_, charoff);
2151 len = ei->bytelen_ - off; 2151 len = ei->bytelen_ - off;
2152 2152
2153 assert (off >= 0 && off <= ei->bytelen_); 2153 assert (off >= 0 && off <= ei->bytelen_);
2154 assert (len >= 0 && off + len <= ei->bytelen_); 2154 assert (len >= 0 && off + len <= ei->bytelen_);
2155 assert ((data == 0) != (ei == 0)); 2155 assert ((data == 0) != (ei == 0));
2156 assert ((is_c != 0) == (data != 0)); 2156 assert ((is_ascii != 0) == (data != 0));
2157 assert (fold_case >= 0 && fold_case <= 2); 2157 assert (fold_case >= 0 && fold_case <= 2);
2158 2158
2159 { 2159 {
2160 Bytecount dstlen; 2160 Bytecount dstlen;
2161 const Ibyte *src = ei->data_, *dst; 2161 const Ibyte *src = ei->data_, *dst;
2169 { 2169 {
2170 dst = ei2->data_; 2170 dst = ei2->data_;
2171 dstlen = ei2->bytelen_; 2171 dstlen = ei2->bytelen_;
2172 } 2172 }
2173 2173
2174 if (is_c) 2174 if (is_ascii)
2175 ASSERT_ASCTEXT_ASCII_LEN ((Ascbyte *) dst, dstlen); 2175 ASSERT_ASCTEXT_ASCII_LEN ((Ascbyte *) dst, dstlen);
2176 2176
2177 return (fold_case == 0 ? qxememcmp4 (src, len, dst, dstlen) : 2177 return (fold_case == 0 ? qxememcmp4 (src, len, dst, dstlen) :
2178 fold_case == 1 ? qxememcasecmp4 (src, len, dst, dstlen) : 2178 fold_case == 1 ? qxememcasecmp4 (src, len, dst, dstlen) :
2179 qxetextcasecmp (src, len, dst, dstlen)); 2179 qxetextcasecmp (src, len, dst, dstlen));
4251 /* It's guaranteed that many callers are not prepared for GC here, 4251 /* It's guaranteed that many callers are not prepared for GC here,
4252 esp. given that this code conversion occurs in many very hidden 4252 esp. given that this code conversion occurs in many very hidden
4253 places. */ 4253 places. */
4254 int count; 4254 int count;
4255 Ibyte_dynarr *conversion_in_dynarr; 4255 Ibyte_dynarr *conversion_in_dynarr;
4256 Lisp_Object underlying_cs;
4256 PROFILE_DECLARE (); 4257 PROFILE_DECLARE ();
4257 4258
4258 assert (!inhibit_non_essential_conversion_operations); 4259 assert (!inhibit_non_essential_conversion_operations);
4259 PROFILE_RECORD_ENTERING_SECTION (QSin_internal_external_conversion); 4260 PROFILE_RECORD_ENTERING_SECTION (QSin_internal_external_conversion);
4260 4261
4275 Dynarr_reset (conversion_in_dynarr); 4276 Dynarr_reset (conversion_in_dynarr);
4276 4277
4277 internal_bind_int (&dfc_convert_to_internal_format_in_use, 4278 internal_bind_int (&dfc_convert_to_internal_format_in_use,
4278 dfc_convert_to_internal_format_in_use + 1); 4279 dfc_convert_to_internal_format_in_use + 1);
4279 4280
4280 coding_system = get_coding_system_for_text_file (coding_system, 1); 4281 /* The second call does the equivalent of both calls, but we need
4282 the result after the first call (which wraps just a to-text
4283 converter) as well as the result after the second call (which
4284 also wraps an EOL-detection converter). */
4285 underlying_cs = get_coding_system_for_text_file (coding_system, 0);
4286 coding_system = get_coding_system_for_text_file (underlying_cs, 1);
4281 4287
4282 if (source_type != DFC_TYPE_LISP_LSTREAM && 4288 if (source_type != DFC_TYPE_LISP_LSTREAM &&
4283 sink_type != DFC_TYPE_LISP_LSTREAM && 4289 sink_type != DFC_TYPE_LISP_LSTREAM &&
4284 coding_system_is_binary (coding_system)) 4290 coding_system_is_binary (underlying_cs))
4285 { 4291 {
4286 #ifdef MULE 4292 #ifdef MULE
4287 const Ibyte *ptr = (const Ibyte *) source->data.ptr; 4293 const Ibyte *ptr;
4288 Bytecount len = source->data.len; 4294 Bytecount len = source->data.len;
4289 const Ibyte *end = ptr + len; 4295 const Ibyte *end;
4290 4296
4291 for (; ptr < end; ptr++) 4297 /* Make sure no EOL conversion is needed. With a little work we
4298 could handle EOL conversion as well but it may not be needed as an
4299 optimization. */
4300 if (!EQ (coding_system, underlying_cs))
4301 {
4302 for (ptr = (const Ibyte *) source->data.ptr, end = ptr + len;
4303 ptr < end; ptr++)
4304 {
4305 if (*ptr == '\r' || *ptr == '\n')
4306 goto the_hard_way;
4307 }
4308 }
4309
4310 for (ptr = (const Ibyte *) source->data.ptr, end = ptr + len;
4311 ptr < end; ptr++)
4292 { 4312 {
4293 Ibyte c = *ptr; 4313 Ibyte c = *ptr;
4294 4314
4295 if (byte_ascii_p (c)) 4315 if (byte_ascii_p (c))
4296 Dynarr_add (conversion_in_dynarr, c); 4316 Dynarr_add (conversion_in_dynarr, c);
4312 #ifdef WIN32_ANY 4332 #ifdef WIN32_ANY
4313 /* Optimize the common case involving Unicode where only ASCII/Latin-1 is 4333 /* Optimize the common case involving Unicode where only ASCII/Latin-1 is
4314 involved */ 4334 involved */
4315 else if (source_type != DFC_TYPE_LISP_LSTREAM && 4335 else if (source_type != DFC_TYPE_LISP_LSTREAM &&
4316 sink_type != DFC_TYPE_LISP_LSTREAM && 4336 sink_type != DFC_TYPE_LISP_LSTREAM &&
4317 dfc_coding_system_is_unicode (coding_system)) 4337 dfc_coding_system_is_unicode (underlying_cs))
4318 { 4338 {
4319 const Ibyte *ptr = (const Ibyte *) source->data.ptr + 1; 4339 const Ibyte *ptr;
4320 Bytecount len = source->data.len; 4340 Bytecount len = source->data.len;
4321 const Ibyte *end = ptr + len; 4341 const Ibyte *end;
4322 4342
4323 if (len & 1) 4343 if (len & 1)
4324 goto the_hard_way; 4344 goto the_hard_way;
4325 4345
4326 for (; ptr < end; ptr += 2) 4346 /* Make sure only ASCII/Latin-1 is involved */
4347 for (ptr = (const Ibyte *) source->data.ptr + 1, end = ptr + len;
4348 ptr < end; ptr += 2)
4327 { 4349 {
4328 if (*ptr) 4350 if (*ptr)
4329 goto the_hard_way; 4351 goto the_hard_way;
4330 } 4352 }
4331 4353
4332 ptr = (const Ibyte *) source->data.ptr; 4354 /* Make sure no EOL conversion is needed. With a little work we
4333 end = ptr + len; 4355 could handle EOL conversion as well but it may not be needed as an
4334 4356 optimization. */
4335 for (; ptr < end; ptr += 2) 4357 if (!EQ (coding_system, underlying_cs))
4358 {
4359 for (ptr = (const Ibyte *) source->data.ptr, end = ptr + len;
4360 ptr < end; ptr += 2)
4361 {
4362 if (*ptr == '\r' || *ptr == '\n')
4363 goto the_hard_way;
4364 }
4365 }
4366
4367 for (ptr = (const Ibyte *) source->data.ptr, end = ptr + len;
4368 ptr < end; ptr += 2)
4336 { 4369 {
4337 Ibyte c = *ptr; 4370 Ibyte c = *ptr;
4338 4371
4339 if (byte_ascii_p (c)) 4372 if (byte_ascii_p (c))
4340 Dynarr_add (conversion_in_dynarr, c); 4373 Dynarr_add (conversion_in_dynarr, c);
4358 Lisp_Object streams_to_delete[3]; 4391 Lisp_Object streams_to_delete[3];
4359 int delete_count; 4392 int delete_count;
4360 Lisp_Object instream, outstream; 4393 Lisp_Object instream, outstream;
4361 Lstream *reader, *writer; 4394 Lstream *reader, *writer;
4362 4395
4363 #ifdef WIN32_ANY 4396 #if defined (WIN32_ANY) || defined (MULE)
4364 the_hard_way: 4397 the_hard_way:
4365 #endif /* WIN32_ANY */ 4398 #endif
4366 delete_count = 0; 4399 delete_count = 0;
4367 if (source_type == DFC_TYPE_LISP_LSTREAM) 4400 if (source_type == DFC_TYPE_LISP_LSTREAM)
4368 instream = source->lisp_object; 4401 instream = source->lisp_object;
4369 else 4402 else
4370 { 4403 {