xemacs-beta: src/insdel.c comparison

comparison src/insdel.c @ 400:a86b2b5e0111 r21-2-30

Import from CVS: tag r21-2-30

author	cvs
date	Mon, 13 Aug 2007 11:14:34 +0200
parents	74fd4e045ea6
children	b8cc9ab3f761

comparison

equal deleted inserted replaced

-:376370fb5946
+:a86b2b5e0111
 bytecount_to_charcount (const Bufbyte *ptr, Bytecount len)
 {
 Charcount count = 0;
 const Bufbyte *end = ptr + len;
-#if (LONGBITS == 32 || LONGBITS == 64)
+#if SIZEOF_LONG == 8
+# define STRIDE_TYPE long
-# if (LONGBITS == 32)
+# define HIGH_BIT_MASK 0x8080808080808080UL
-#  define LONG_BYTES 4
+#elif SIZEOF_LONG_LONG == 8 && !(defined (i386) || defined (__i386__))
-#  define ALIGN_MASK 0xFFFFFFFCU
+# define STRIDE_TYPE long long
-#  define HIGH_BIT_MASK 0x80808080U
+# define HIGH_BIT_MASK 0x8080808080808080ULL
-# else
+#elif SIZEOF_LONG == 4
-#  define LONG_BYTES 8
+# define STRIDE_TYPE long
-#  define ALIGN_MASK 0xFFFFFFFFFFFFFFF8UL
+# define HIGH_BIT_MASK 0x80808080UL
-/* I had a dream, I was being overrun with early Intel processors ... */
+#else
-#  define HIGH_BIT_MASK 0x8080808080808080UL
+# error Add support for 128-bit systems here
-# endif
+#endif
-/* When we have a large number of bytes to scan, we can be trickier
+#define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1))
-and significantly faster by scanning them in chunks of the CPU word
+#define ALIGN_MASK (~ ALIGN_BITS)
-size (assuming that they're all ASCII -- we cut out as soon as
+#define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0)
-we find something non-ASCII). */
+#define STRIDE sizeof (STRIDE_TYPE)
-if (len >= 12)
-{
-/* Determine the section in the middle of the string that's
-	 amenable to this treatment.  Everything has to be aligned
-	 on CPU word boundaries. */
-const Bufbyte *aligned_ptr =
-	(const Bufbyte *) (((unsigned long) (ptr + LONG_BYTES - 1)) &
-			   ALIGN_MASK);
-const Bufbyte *aligned_end =
-	(const Bufbyte *) (((unsigned long) end) & ALIGN_MASK);
-/* Handle unaligned stuff at the beginning. */
-while (ptr < aligned_ptr)
-	{
-	  if (!BYTE_ASCII_P (*ptr))
-	    goto bail;
-	  count++, ptr++;
-	}
-/* Now do it. */
-while (ptr < aligned_end)
-	{
-	  if ((* (unsigned long *) ptr) & HIGH_BIT_MASK)
-	    goto bail;
-	  ptr += LONG_BYTES;
-	  count += LONG_BYTES;
-	}
-}
-#endif /* LONGBITS == 32 || LONGBITS == 64 */
-bail:
 while (ptr < end)
 {
-count++;
+if (BYTE_ASCII_P (*ptr))
-INC_CHARPTR (ptr);
+	{
-}
+	  /* optimize for long stretches of ASCII */
+	  if (! ALIGNED (ptr))
+	    ptr++, count++;
+	  else
+	    {
+	      const unsigned STRIDE_TYPE *ascii_end =
+		(const unsigned STRIDE_TYPE *) ptr;
+	      /* This loop screams, because we can typically
+		 detect ASCII characters 8 at a time. */
+	      while ((const Bufbyte *) ascii_end + STRIDE <= end
+		     && !(*ascii_end & HIGH_BIT_MASK))
+		ascii_end++;
+	      if ((Bufbyte *) ascii_end == ptr)
+		ptr++, count++;
+	      else
+		{
+		  count += (Bufbyte *) ascii_end - ptr;
+		  ptr = (Bufbyte *) ascii_end;
+		}
+	    }
+	}
+else
+	{
+	  /* optimize for successive characters from the same charset */
+	  Bufbyte leading_byte = *ptr;
+	  size_t bytes = REP_BYTES_BY_FIRST_BYTE (leading_byte);
+	  while ((ptr < end) && (*ptr == leading_byte))
+	    ptr += bytes, count++;
+	}
+}
 #ifdef ERROR_CHECK_BUFPOS
 /* Bomb out if the specified substring ends in the middle
 of a character.  Note that we might have already gotten
 a core dump above from an invalid reference, but at least
 we will get no farther than here. */

Mercurial > hg > xemacs-beta

comparison src/insdel.c @ 400:a86b2b5e0111 r21-2-30