comparison src/text.c @ 5774:7a538e1a4676

Use skip_ascii() in no_conversion_convert() when encoding. src/ChangeLog addition: 2013-12-19 Aidan Kehoe <kehoea@parhasard.net> * text.c: * text.h: * text.h (skip_ascii): Move skip_ascii (), the very fast inline function from the bytecount-to-charcount code, to text.h, to allow the coding systems to use it too as needed. * file-coding.c (no_conversion_convert): Use skip_ascii() as appropriate here, halving the time taken to write large files in my tests (again, relevant to VM buffers, but not a panacea to our issues with them.)
author Aidan Kehoe <kehoea@parhasard.net>
date Thu, 19 Dec 2013 18:13:11 +0000
parents 56144c8593a8
children 0cb4f494a548
comparison
equal deleted inserted replaced
5773:94a6b8fbd56e 5774:7a538e1a4676
2202 2202
2203 /* Optimization. Do it. Live it. Love it. */ 2203 /* Optimization. Do it. Live it. Love it. */
2204 2204
2205 #ifdef MULE 2205 #ifdef MULE
2206 2206
2207 #ifdef EFFICIENT_INT_128_BIT
2208 # define STRIDE_TYPE INT_128_BIT
2209 # define HIGH_BIT_MASK \
2210 MAKE_128_BIT_UNSIGNED_CONSTANT (0x80808080808080808080808080808080)
2211 #elif defined (EFFICIENT_INT_64_BIT)
2212 # define STRIDE_TYPE INT_64_BIT
2213 # define HIGH_BIT_MASK MAKE_64_BIT_UNSIGNED_CONSTANT (0x8080808080808080)
2214 #else
2215 # define STRIDE_TYPE INT_32_BIT
2216 # define HIGH_BIT_MASK MAKE_32_BIT_UNSIGNED_CONSTANT (0x80808080)
2217 #endif
2218
2219 #define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1))
2220 #define ALIGN_MASK (~ ALIGN_BITS)
2221 #define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0)
2222 #define STRIDE sizeof (STRIDE_TYPE)
2223
2224 /* Skip as many ASCII bytes as possible in the memory block [PTR, END).
2225 Return pointer to the first non-ASCII byte. optimized for long
2226 stretches of ASCII. */
2227 inline static const Ibyte *
2228 skip_ascii (const Ibyte *ptr, const Ibyte *end)
2229 {
2230 const unsigned STRIDE_TYPE *ascii_end;
2231
2232 /* Need to do in 3 sections -- before alignment start, aligned chunk,
2233 after alignment end. */
2234 while (!ALIGNED (ptr))
2235 {
2236 if (ptr == end || !byte_ascii_p (*ptr))
2237 return ptr;
2238 ptr++;
2239 }
2240 ascii_end = (const unsigned STRIDE_TYPE *) ptr;
2241 /* This loop screams, because we can detect ASCII
2242 characters 4 or 8 at a time. */
2243 while ((const Ibyte *) ascii_end + STRIDE <= end
2244 && !(*ascii_end & HIGH_BIT_MASK))
2245 ascii_end++;
2246 ptr = (Ibyte *) ascii_end;
2247 while (ptr < end && byte_ascii_p (*ptr))
2248 ptr++;
2249 return ptr;
2250 }
2251
2252 /* Skip as many ASCII bytes as possible in the memory block [END, PTR),
2253 going downwards. Return pointer to the location above the first
2254 non-ASCII byte. Optimized for long stretches of ASCII. */
2255 inline static const Ibyte *
2256 skip_ascii_down (const Ibyte *ptr, const Ibyte *end)
2257 {
2258 const unsigned STRIDE_TYPE *ascii_end;
2259
2260 /* Need to do in 3 sections -- before alignment start, aligned chunk,
2261 after alignment end. */
2262 while (!ALIGNED (ptr))
2263 {
2264 if (ptr == end || !byte_ascii_p (*(ptr - 1)))
2265 return ptr;
2266 ptr--;
2267 }
2268 ascii_end = (const unsigned STRIDE_TYPE *) ptr - 1;
2269 /* This loop screams, because we can detect ASCII
2270 characters 4 or 8 at a time. */
2271 while ((const Ibyte *) ascii_end >= end
2272 && !(*ascii_end & HIGH_BIT_MASK))
2273 ascii_end--;
2274 ptr = (Ibyte *) (ascii_end + 1);
2275 while (ptr > end && byte_ascii_p (*(ptr - 1)))
2276 ptr--;
2277 return ptr;
2278 }
2279
2280 /* Function equivalents of bytecount_to_charcount/charcount_to_bytecount. 2207 /* Function equivalents of bytecount_to_charcount/charcount_to_bytecount.
2281 These work on strings of all sizes but are more efficient than a simple 2208 These work on strings of all sizes but are more efficient than a simple
2282 loop on large strings and probably less efficient on sufficiently small 2209 loop on large strings and probably less efficient on sufficiently small
2283 strings. */ 2210 strings. */
2284 2211