Mercurial > hg > xemacs-beta
changeset 5774:7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
src/ChangeLog addition:
2013-12-19 Aidan Kehoe <kehoea@parhasard.net>
* text.c:
* text.h:
* text.h (skip_ascii):
Move skip_ascii (), the very fast inline function from the
bytecount-to-charcount code, to text.h, to allow the coding
systems to use it too as needed.
* file-coding.c (no_conversion_convert):
Use skip_ascii() as appropriate here, halving the time taken to
write large files in my tests (again, relevant to VM buffers, but
not a panacea to our issues with them.)
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Thu, 19 Dec 2013 18:13:11 +0000 |
parents | 94a6b8fbd56e |
children | 4004c3266c09 |
files | src/ChangeLog src/file-coding.c src/text.c src/text.h |
diffstat | 4 files changed, 111 insertions(+), 81 deletions(-) [+] |
line wrap: on
line diff
--- a/src/ChangeLog Tue Dec 17 20:49:52 2013 +0200 +++ b/src/ChangeLog Thu Dec 19 18:13:11 2013 +0000 @@ -1,3 +1,16 @@ +2013-12-19 Aidan Kehoe <kehoea@parhasard.net> + + * text.c: + * text.h: + * text.h (skip_ascii): + Move skip_ascii (), the very fast inline function from the + bytecount-to-charcount code, to text.h, to allow the coding + systems to use it too as needed. + * file-coding.c (no_conversion_convert): + Use skip_ascii() as appropriate here, halving the time taken to + write large files in my tests (again, relevant to VM buffers, but + not a panacea to our issues with them.) + 2013-12-17 Aidan Kehoe <kehoea@parhasard.net> * lisp.h:
--- a/src/file-coding.c Tue Dec 17 20:49:52 2013 +0200 +++ b/src/file-coding.c Thu Dec 19 18:13:11 2013 +0000 @@ -2851,18 +2851,29 @@ } else { - - while (n--) + const Ibyte *bend = (const Ibyte *)src + n; + + while (n > 0) { - c = *src++; - if (byte_ascii_p (c)) + if (byte_ascii_p (*src)) { - assert (ch == 0); - Dynarr_add (dst, c); + const Ibyte *nonascii = skip_ascii ((Ibyte *)src, bend); + + Dynarr_add_many (dst, src, nonascii - src); + n -= nonascii - src; + + src = nonascii; + if (n < 1) + { + break; + } } + + n--, c = *src++; + #ifdef MULE - else if (ibyte_leading_byte_p (c)) - { + if (ibyte_leading_byte_p (c)) + { assert (ch == 0); if (c == LEADING_BYTE_LATIN_ISO8859_1 || c == LEADING_BYTE_CONTROL_1)
--- a/src/text.c Tue Dec 17 20:49:52 2013 +0200 +++ b/src/text.c Thu Dec 19 18:13:11 2013 +0000 @@ -2204,79 +2204,6 @@ #ifdef MULE -#ifdef EFFICIENT_INT_128_BIT -# define STRIDE_TYPE INT_128_BIT -# define HIGH_BIT_MASK \ - MAKE_128_BIT_UNSIGNED_CONSTANT (0x80808080808080808080808080808080) -#elif defined (EFFICIENT_INT_64_BIT) -# define STRIDE_TYPE INT_64_BIT -# define HIGH_BIT_MASK MAKE_64_BIT_UNSIGNED_CONSTANT (0x8080808080808080) -#else -# define STRIDE_TYPE INT_32_BIT -# define HIGH_BIT_MASK MAKE_32_BIT_UNSIGNED_CONSTANT (0x80808080) -#endif - -#define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1)) -#define ALIGN_MASK (~ ALIGN_BITS) -#define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0) -#define STRIDE sizeof (STRIDE_TYPE) - -/* Skip as many ASCII bytes as possible in the memory block [PTR, END). - Return pointer to the first non-ASCII byte. optimized for long - stretches of ASCII. */ -inline static const Ibyte * -skip_ascii (const Ibyte *ptr, const Ibyte *end) -{ - const unsigned STRIDE_TYPE *ascii_end; - - /* Need to do in 3 sections -- before alignment start, aligned chunk, - after alignment end. */ - while (!ALIGNED (ptr)) - { - if (ptr == end || !byte_ascii_p (*ptr)) - return ptr; - ptr++; - } - ascii_end = (const unsigned STRIDE_TYPE *) ptr; - /* This loop screams, because we can detect ASCII - characters 4 or 8 at a time. */ - while ((const Ibyte *) ascii_end + STRIDE <= end - && !(*ascii_end & HIGH_BIT_MASK)) - ascii_end++; - ptr = (Ibyte *) ascii_end; - while (ptr < end && byte_ascii_p (*ptr)) - ptr++; - return ptr; -} - -/* Skip as many ASCII bytes as possible in the memory block [END, PTR), - going downwards. Return pointer to the location above the first - non-ASCII byte. Optimized for long stretches of ASCII. */ -inline static const Ibyte * -skip_ascii_down (const Ibyte *ptr, const Ibyte *end) -{ - const unsigned STRIDE_TYPE *ascii_end; - - /* Need to do in 3 sections -- before alignment start, aligned chunk, - after alignment end. */ - while (!ALIGNED (ptr)) - { - if (ptr == end || !byte_ascii_p (*(ptr - 1))) - return ptr; - ptr--; - } - ascii_end = (const unsigned STRIDE_TYPE *) ptr - 1; - /* This loop screams, because we can detect ASCII - characters 4 or 8 at a time. */ - while ((const Ibyte *) ascii_end >= end - && !(*ascii_end & HIGH_BIT_MASK)) - ascii_end--; - ptr = (Ibyte *) (ascii_end + 1); - while (ptr > end && byte_ascii_p (*(ptr - 1))) - ptr--; - return ptr; -} - /* Function equivalents of bytecount_to_charcount/charcount_to_bytecount. These work on strings of all sizes but are more efficient than a simple loop on large strings and probably less efficient on sufficiently small
--- a/src/text.h Tue Dec 17 20:49:52 2013 +0200 +++ b/src/text.h Thu Dec 19 18:13:11 2013 +0000 @@ -831,12 +831,91 @@ } } +#ifdef EFFICIENT_INT_128_BIT +# define STRIDE_TYPE INT_128_BIT +# define HIGH_BIT_MASK \ + MAKE_128_BIT_UNSIGNED_CONSTANT (0x80808080808080808080808080808080) +#elif defined (EFFICIENT_INT_64_BIT) +# define STRIDE_TYPE INT_64_BIT +# define HIGH_BIT_MASK MAKE_64_BIT_UNSIGNED_CONSTANT (0x8080808080808080) +#else +# define STRIDE_TYPE INT_32_BIT +# define HIGH_BIT_MASK MAKE_32_BIT_UNSIGNED_CONSTANT (0x80808080) +#endif + +#define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1)) +#define ALIGN_MASK (~ ALIGN_BITS) +#define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0) +#define STRIDE sizeof (STRIDE_TYPE) + +/* Skip as many ASCII bytes as possible in the memory block [PTR, END). + Return pointer to the first non-ASCII byte. optimized for long + stretches of ASCII. */ +DECLARE_INLINE_HEADER ( +const Ibyte * +skip_ascii (const Ibyte *ptr, const Ibyte *end) +) +{ + const unsigned STRIDE_TYPE *ascii_end; + + /* Need to do in 3 sections -- before alignment start, aligned chunk, + after alignment end. */ + while (!ALIGNED (ptr)) + { + if (ptr == end || !byte_ascii_p (*ptr)) + return ptr; + ptr++; + } + ascii_end = (const unsigned STRIDE_TYPE *) ptr; + /* This loop screams, because we can detect ASCII + characters 4 or 8 at a time. */ + while ((const Ibyte *) ascii_end + STRIDE <= end + && !(*ascii_end & HIGH_BIT_MASK)) + ascii_end++; + ptr = (Ibyte *) ascii_end; + while (ptr < end && byte_ascii_p (*ptr)) + ptr++; + return ptr; +} + +/* Skip as many ASCII bytes as possible in the memory block [END, PTR), + going downwards. Return pointer to the location above the first + non-ASCII byte. Optimized for long stretches of ASCII. */ +DECLARE_INLINE_HEADER ( +const Ibyte * +skip_ascii_down (const Ibyte *ptr, const Ibyte *end) +) +{ + const unsigned STRIDE_TYPE *ascii_end; + + /* Need to do in 3 sections -- before alignment start, aligned chunk, + after alignment end. */ + while (!ALIGNED (ptr)) + { + if (ptr == end || !byte_ascii_p (*(ptr - 1))) + return ptr; + ptr--; + } + ascii_end = (const unsigned STRIDE_TYPE *) ptr - 1; + /* This loop screams, because we can detect ASCII + characters 4 or 8 at a time. */ + while ((const Ibyte *) ascii_end >= end + && !(*ascii_end & HIGH_BIT_MASK)) + ascii_end--; + ptr = (Ibyte *) (ascii_end + 1); + while (ptr > end && byte_ascii_p (*(ptr - 1))) + ptr--; + return ptr; +} + #else #define bytecount_to_charcount(ptr, len) ((Charcount) (len)) #define bytecount_to_charcount_fmt(ptr, len, fmt) ((Charcount) (len)) #define charcount_to_bytecount(ptr, len) ((Bytecount) (len)) #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len)) +#define skip_ascii(ptr, end) end +#define skip_ascii_down(ptr, end) end #endif /* MULE */