changeset 5774:7a538e1a4676

Use skip_ascii() in no_conversion_convert() when encoding. src/ChangeLog addition: 2013-12-19 Aidan Kehoe <kehoea@parhasard.net> * text.c: * text.h: * text.h (skip_ascii): Move skip_ascii (), the very fast inline function from the bytecount-to-charcount code, to text.h, to allow the coding systems to use it too as needed. * file-coding.c (no_conversion_convert): Use skip_ascii() as appropriate here, halving the time taken to write large files in my tests (again, relevant to VM buffers, but not a panacea to our issues with them.)
author Aidan Kehoe <kehoea@parhasard.net>
date Thu, 19 Dec 2013 18:13:11 +0000
parents 94a6b8fbd56e
children 4004c3266c09
files src/ChangeLog src/file-coding.c src/text.c src/text.h
diffstat 4 files changed, 111 insertions(+), 81 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Tue Dec 17 20:49:52 2013 +0200
+++ b/src/ChangeLog	Thu Dec 19 18:13:11 2013 +0000
@@ -1,3 +1,16 @@
+2013-12-19  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* text.c:
+	* text.h:
+	* text.h (skip_ascii):
+	Move skip_ascii (), the very fast inline function from the
+	bytecount-to-charcount code, to text.h, to allow the coding
+	systems to use it too as needed.
+	* file-coding.c (no_conversion_convert):
+	Use skip_ascii() as appropriate here, halving the time taken to
+	write large files in my tests (again, relevant to VM buffers, but
+	not a panacea to our issues with them.)
+
 2013-12-17  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* lisp.h:
--- a/src/file-coding.c	Tue Dec 17 20:49:52 2013 +0200
+++ b/src/file-coding.c	Thu Dec 19 18:13:11 2013 +0000
@@ -2851,18 +2851,29 @@
     }
   else
     {
-
-      while (n--)
+      const Ibyte *bend = (const Ibyte *)src + n;
+
+      while (n > 0)
 	{
-	  c = *src++;
-	  if (byte_ascii_p (c))
+	  if (byte_ascii_p (*src))
 	    {
-	      assert (ch == 0);
-	      Dynarr_add (dst, c);
+              const Ibyte *nonascii = skip_ascii ((Ibyte *)src, bend);
+
+              Dynarr_add_many (dst, src, nonascii - src);
+              n -= nonascii - src;
+
+              src = nonascii;
+              if (n < 1)
+                {
+                  break;
+                }
 	    }
+
+	  n--, c = *src++;
+
 #ifdef MULE
-	  else if (ibyte_leading_byte_p (c))
-	    {
+	  if (ibyte_leading_byte_p (c))
+ 	    {
 	      assert (ch == 0);
 	      if (c == LEADING_BYTE_LATIN_ISO8859_1 ||
 		  c == LEADING_BYTE_CONTROL_1)
--- a/src/text.c	Tue Dec 17 20:49:52 2013 +0200
+++ b/src/text.c	Thu Dec 19 18:13:11 2013 +0000
@@ -2204,79 +2204,6 @@
 
 #ifdef MULE
 
-#ifdef EFFICIENT_INT_128_BIT
-# define STRIDE_TYPE INT_128_BIT
-# define HIGH_BIT_MASK \
-    MAKE_128_BIT_UNSIGNED_CONSTANT (0x80808080808080808080808080808080)
-#elif defined (EFFICIENT_INT_64_BIT)
-# define STRIDE_TYPE INT_64_BIT
-# define HIGH_BIT_MASK MAKE_64_BIT_UNSIGNED_CONSTANT (0x8080808080808080)
-#else
-# define STRIDE_TYPE INT_32_BIT
-# define HIGH_BIT_MASK MAKE_32_BIT_UNSIGNED_CONSTANT (0x80808080)
-#endif
-
-#define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1))
-#define ALIGN_MASK (~ ALIGN_BITS)
-#define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0)
-#define STRIDE sizeof (STRIDE_TYPE)
-
-/* Skip as many ASCII bytes as possible in the memory block [PTR, END).
-   Return pointer to the first non-ASCII byte.  optimized for long
-   stretches of ASCII. */
-inline static const Ibyte *
-skip_ascii (const Ibyte *ptr, const Ibyte *end)
-{
-  const unsigned STRIDE_TYPE *ascii_end;
-
-  /* Need to do in 3 sections -- before alignment start, aligned chunk,
-     after alignment end. */
-  while (!ALIGNED (ptr))
-    {
-      if (ptr == end || !byte_ascii_p (*ptr))
-	return ptr;
-      ptr++;
-    }
-  ascii_end = (const unsigned STRIDE_TYPE *) ptr;
-  /* This loop screams, because we can detect ASCII
-     characters 4 or 8 at a time. */
-  while ((const Ibyte *) ascii_end + STRIDE <= end
-	 && !(*ascii_end & HIGH_BIT_MASK))
-    ascii_end++;
-  ptr = (Ibyte *) ascii_end;
-  while (ptr < end && byte_ascii_p (*ptr))
-    ptr++;
-  return ptr;
-}
-
-/* Skip as many ASCII bytes as possible in the memory block [END, PTR),
-   going downwards.  Return pointer to the location above the first
-   non-ASCII byte.  Optimized for long stretches of ASCII. */
-inline static const Ibyte *
-skip_ascii_down (const Ibyte *ptr, const Ibyte *end)
-{
-  const unsigned STRIDE_TYPE *ascii_end;
-
-  /* Need to do in 3 sections -- before alignment start, aligned chunk,
-     after alignment end. */
-  while (!ALIGNED (ptr))
-    {
-      if (ptr == end || !byte_ascii_p (*(ptr - 1)))
-	return ptr;
-      ptr--;
-    }
-  ascii_end = (const unsigned STRIDE_TYPE *) ptr - 1;
-  /* This loop screams, because we can detect ASCII
-     characters 4 or 8 at a time. */
-  while ((const Ibyte *) ascii_end >= end
-	 && !(*ascii_end & HIGH_BIT_MASK))
-    ascii_end--;
-  ptr = (Ibyte *) (ascii_end + 1);
-  while (ptr > end && byte_ascii_p (*(ptr - 1)))
-    ptr--;
-  return ptr;
-}
-
 /* Function equivalents of bytecount_to_charcount/charcount_to_bytecount.
    These work on strings of all sizes but are more efficient than a simple
    loop on large strings and probably less efficient on sufficiently small
--- a/src/text.h	Tue Dec 17 20:49:52 2013 +0200
+++ b/src/text.h	Thu Dec 19 18:13:11 2013 +0000
@@ -831,12 +831,91 @@
     }
 }
 
+#ifdef EFFICIENT_INT_128_BIT
+# define STRIDE_TYPE INT_128_BIT
+# define HIGH_BIT_MASK \
+    MAKE_128_BIT_UNSIGNED_CONSTANT (0x80808080808080808080808080808080)
+#elif defined (EFFICIENT_INT_64_BIT)
+# define STRIDE_TYPE INT_64_BIT
+# define HIGH_BIT_MASK MAKE_64_BIT_UNSIGNED_CONSTANT (0x8080808080808080)
+#else
+# define STRIDE_TYPE INT_32_BIT
+# define HIGH_BIT_MASK MAKE_32_BIT_UNSIGNED_CONSTANT (0x80808080)
+#endif
+
+#define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1))
+#define ALIGN_MASK (~ ALIGN_BITS)
+#define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0)
+#define STRIDE sizeof (STRIDE_TYPE)
+
+/* Skip as many ASCII bytes as possible in the memory block [PTR, END).
+   Return pointer to the first non-ASCII byte.  optimized for long
+   stretches of ASCII. */
+DECLARE_INLINE_HEADER (
+const Ibyte *
+skip_ascii (const Ibyte *ptr, const Ibyte *end)
+)
+{
+  const unsigned STRIDE_TYPE *ascii_end;
+
+  /* Need to do in 3 sections -- before alignment start, aligned chunk,
+     after alignment end. */
+  while (!ALIGNED (ptr))
+    {
+      if (ptr == end || !byte_ascii_p (*ptr))
+	return ptr;
+      ptr++;
+    }
+  ascii_end = (const unsigned STRIDE_TYPE *) ptr;
+  /* This loop screams, because we can detect ASCII
+     characters 4 or 8 at a time. */
+  while ((const Ibyte *) ascii_end + STRIDE <= end
+	 && !(*ascii_end & HIGH_BIT_MASK))
+    ascii_end++;
+  ptr = (Ibyte *) ascii_end;
+  while (ptr < end && byte_ascii_p (*ptr))
+    ptr++;
+  return ptr;
+}
+
+/* Skip as many ASCII bytes as possible in the memory block [END, PTR),
+   going downwards.  Return pointer to the location above the first
+   non-ASCII byte.  Optimized for long stretches of ASCII. */
+DECLARE_INLINE_HEADER (
+const Ibyte *
+skip_ascii_down (const Ibyte *ptr, const Ibyte *end)
+)
+{
+  const unsigned STRIDE_TYPE *ascii_end;
+
+  /* Need to do in 3 sections -- before alignment start, aligned chunk,
+     after alignment end. */
+  while (!ALIGNED (ptr))
+    {
+      if (ptr == end || !byte_ascii_p (*(ptr - 1)))
+	return ptr;
+      ptr--;
+    }
+  ascii_end = (const unsigned STRIDE_TYPE *) ptr - 1;
+  /* This loop screams, because we can detect ASCII
+     characters 4 or 8 at a time. */
+  while ((const Ibyte *) ascii_end >= end
+	 && !(*ascii_end & HIGH_BIT_MASK))
+    ascii_end--;
+  ptr = (Ibyte *) (ascii_end + 1);
+  while (ptr > end && byte_ascii_p (*(ptr - 1)))
+    ptr--;
+  return ptr;
+}
+
 #else
 
 #define bytecount_to_charcount(ptr, len) ((Charcount) (len))
 #define bytecount_to_charcount_fmt(ptr, len, fmt) ((Charcount) (len))
 #define charcount_to_bytecount(ptr, len) ((Bytecount) (len))
 #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len))
+#define skip_ascii(ptr, end) end
+#define skip_ascii_down(ptr, end) end
 
 #endif /* MULE */