Mercurial > hg > xemacs-beta

diff src/text.c @ 2421:ab71ad6ff3dd
[xemacs-hg @ 2004-12-06 03:50:53 by ben] (none) README.packages: Document use of --package-prefix. Fix error in specifying standard package location. make-docfile.c: Use QXE_PATH_MAX. info.el: Correct doc string giving example package path. menubar-items.el: Move Prefix Rectangle command up one level. xemacs/packages.texi: Add long form of Lisp Reference Manual to links. Add links pointing to Lisp Reference Manual for more detailed package discussion. lispref/range-tables.texi: Document range-table changes. internals/internals.texi: Update history section. elhash.c, elhash.h, profile.c: Create inchash_eq() to allow direct incrementing of hash-table entry. Use in profile.c to try to reduce profiling overhead. Increase initial size of profile hash tables to reduce profiling overhead. buffer.c, device-msw.c, dialog-msw.c, dired-msw.c, editfns.c, event-msw.c, events.c, glyphs-msw.c, keymap.c, objects-msw.c, process-nt.c, syswindows.h, text.c, text.h, unexnt.c: Rename xetcs* -> qxetcs* for consistency with qxestr*. Rename ei*_c(_*) -> ei*_ascii(_*) since they work with ASCII-only strings not "C strings", whatever those are. This is the last place where "c" was incorrectly being used for "ascii". dialog-msw.c, dumper.c, event-msw.c, fileio.c, glyphs-gtk.c, glyphs-x.c, nt.c, process-nt.c, realpath.c, sysdep.c, sysfile.h, unexcw.c, unexnext.c, unexnt.c: Try to avoid differences in systems that do or do not include final null byte in PATH_MAX. Create PATH_MAX_INTERNAL and PATH_MAX_EXTERNAL and use them everywhere. Rewrite code in dumper.c to avoid use of PATH_MAX. When necessary in nt.c, use _MAX_PATH instead of MAX_PATH to be consistent with other places. text.c: Code to short-circuit when binary or Unicode was not working due to EOL wrapping. Fix this code to work when either no EOL autodetection or no CR's or LF's in the text. lisp.h, rangetab.c, rangetab.h, regex.c, search.c: Implement different types of ranges (open/closed start and end). Change default to be start-closed, end-open.
author: ben
date: Mon, 06 Dec 2004 03:52:23 +0000
parents: 6b957313bd8e
children: 3d8143fc88e1
--- a/src/text.c	Mon Dec 06 03:46:07 2004 +0000
+++ b/src/text.c	Mon Dec 06 03:52:23 2004 +0000
@@ -2136,7 +2136,7 @@
 int
 eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff,
 	 Bytecount len, Charcount charlen, const Ibyte *data,
-	 const Eistring *ei2, int is_c, int fold_case)
+	 const Eistring *ei2, int is_ascii, int fold_case)
 {
   assert ((off < 0) != (charoff < 0));
   if (off < 0)
@@ -2153,7 +2153,7 @@
   assert (off >= 0 && off <= ei->bytelen_);
   assert (len >= 0 && off + len <= ei->bytelen_);
   assert ((data == 0) != (ei == 0)); 
-  assert ((is_c != 0) == (data != 0));
+  assert ((is_ascii != 0) == (data != 0));
   assert (fold_case >= 0 && fold_case <= 2);
 
   {
@@ -2171,7 +2171,7 @@
 	dstlen = ei2->bytelen_;
       }
 
-    if (is_c)
+    if (is_ascii)
       ASSERT_ASCTEXT_ASCII_LEN ((Ascbyte *) dst, dstlen);
 
     return (fold_case == 0 ? qxememcmp4 (src, len, dst, dstlen) :
@@ -4253,6 +4253,7 @@
      places. */
   int count;
   Ibyte_dynarr *conversion_in_dynarr;
+  Lisp_Object underlying_cs;
   PROFILE_DECLARE ();
 
   assert (!inhibit_non_essential_conversion_operations);
@@ -4277,18 +4278,37 @@
   internal_bind_int (&dfc_convert_to_internal_format_in_use,
 		     dfc_convert_to_internal_format_in_use + 1);
 
-  coding_system = get_coding_system_for_text_file (coding_system, 1);
+  /* The second call does the equivalent of both calls, but we need
+     the result after the first call (which wraps just a to-text
+     converter) as well as the result after the second call (which
+     also wraps an EOL-detection converter). */
+  underlying_cs = get_coding_system_for_text_file (coding_system, 0);
+  coding_system = get_coding_system_for_text_file (underlying_cs, 1);
 
   if (source_type != DFC_TYPE_LISP_LSTREAM &&
       sink_type   != DFC_TYPE_LISP_LSTREAM &&
-      coding_system_is_binary (coding_system))
+      coding_system_is_binary (underlying_cs))
     {
 #ifdef MULE
-      const Ibyte *ptr = (const Ibyte *) source->data.ptr;
+      const Ibyte *ptr;
       Bytecount len = source->data.len;
-      const Ibyte *end = ptr + len;
-
-      for (; ptr < end; ptr++)
+      const Ibyte *end;
+
+      /* Make sure no EOL conversion is needed.  With a little work we
+	 could handle EOL conversion as well but it may not be needed as an
+	 optimization. */
+      if (!EQ (coding_system, underlying_cs))
+	{
+	  for (ptr = (const Ibyte *) source->data.ptr, end = ptr + len;
+	       ptr < end; ptr++)
+	    {
+	      if (*ptr == '\r' || *ptr == '\n')
+		goto the_hard_way;
+	    }
+	}
+
+      for (ptr = (const Ibyte *) source->data.ptr, end = ptr + len;
+	   ptr < end; ptr++)
         {
           Ibyte c = *ptr;
 
@@ -4314,25 +4334,38 @@
      involved */
   else if (source_type != DFC_TYPE_LISP_LSTREAM &&
 	   sink_type   != DFC_TYPE_LISP_LSTREAM &&
-	   dfc_coding_system_is_unicode (coding_system))
+	   dfc_coding_system_is_unicode (underlying_cs))
     {
-      const Ibyte *ptr = (const Ibyte *) source->data.ptr + 1;
+      const Ibyte *ptr;
       Bytecount len = source->data.len;
-      const Ibyte *end = ptr + len;
+      const Ibyte *end;
 
       if (len & 1)
 	goto the_hard_way;
 
-      for (; ptr < end; ptr += 2)
+      /* Make sure only ASCII/Latin-1 is involved */
+      for (ptr = (const Ibyte *) source->data.ptr + 1, end = ptr + len;
+	   ptr < end; ptr += 2)
 	{
 	  if (*ptr)
 	    goto the_hard_way;
 	}
 
-      ptr = (const Ibyte *) source->data.ptr;
-      end = ptr + len;
-
-      for (; ptr < end; ptr += 2)
+      /* Make sure no EOL conversion is needed.  With a little work we
+	 could handle EOL conversion as well but it may not be needed as an
+	 optimization. */
+      if (!EQ (coding_system, underlying_cs))
+	{
+	  for (ptr = (const Ibyte *) source->data.ptr, end = ptr + len;
+	       ptr < end; ptr += 2)
+	    {
+	      if (*ptr == '\r' || *ptr == '\n')
+		goto the_hard_way;
+	    }
+	}
+
+      for (ptr = (const Ibyte *) source->data.ptr, end = ptr + len;
+	   ptr < end; ptr += 2)
 	{
           Ibyte c = *ptr;
 
@@ -4360,9 +4393,9 @@
       Lisp_Object instream, outstream;
       Lstream *reader, *writer;
 
-#ifdef WIN32_ANY
+#if defined (WIN32_ANY) || defined (MULE)
     the_hard_way:
-#endif /* WIN32_ANY */
+#endif
       delete_count = 0;
       if (source_type == DFC_TYPE_LISP_LSTREAM)
 	instream = source->lisp_object;
author	ben
date	Mon, 06 Dec 2004 03:52:23 +0000
parents	6b957313bd8e
children	3d8143fc88e1