Mercurial > hg > xemacs-beta
comparison src/text.c @ 5774:7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
src/ChangeLog addition:
2013-12-19 Aidan Kehoe <kehoea@parhasard.net>
* text.c:
* text.h:
* text.h (skip_ascii):
Move skip_ascii (), the very fast inline function from the
bytecount-to-charcount code, to text.h, to allow the coding
systems to use it too as needed.
* file-coding.c (no_conversion_convert):
Use skip_ascii() as appropriate here, halving the time taken to
write large files in my tests (again, relevant to VM buffers, but
not a panacea to our issues with them.)
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Thu, 19 Dec 2013 18:13:11 +0000 |
parents | 56144c8593a8 |
children | 0cb4f494a548 |
comparison
equal
deleted
inserted
replaced
5773:94a6b8fbd56e | 5774:7a538e1a4676 |
---|---|
2202 | 2202 |
2203 /* Optimization. Do it. Live it. Love it. */ | 2203 /* Optimization. Do it. Live it. Love it. */ |
2204 | 2204 |
2205 #ifdef MULE | 2205 #ifdef MULE |
2206 | 2206 |
2207 #ifdef EFFICIENT_INT_128_BIT | |
2208 # define STRIDE_TYPE INT_128_BIT | |
2209 # define HIGH_BIT_MASK \ | |
2210 MAKE_128_BIT_UNSIGNED_CONSTANT (0x80808080808080808080808080808080) | |
2211 #elif defined (EFFICIENT_INT_64_BIT) | |
2212 # define STRIDE_TYPE INT_64_BIT | |
2213 # define HIGH_BIT_MASK MAKE_64_BIT_UNSIGNED_CONSTANT (0x8080808080808080) | |
2214 #else | |
2215 # define STRIDE_TYPE INT_32_BIT | |
2216 # define HIGH_BIT_MASK MAKE_32_BIT_UNSIGNED_CONSTANT (0x80808080) | |
2217 #endif | |
2218 | |
2219 #define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1)) | |
2220 #define ALIGN_MASK (~ ALIGN_BITS) | |
2221 #define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0) | |
2222 #define STRIDE sizeof (STRIDE_TYPE) | |
2223 | |
2224 /* Skip as many ASCII bytes as possible in the memory block [PTR, END). | |
2225 Return pointer to the first non-ASCII byte. optimized for long | |
2226 stretches of ASCII. */ | |
2227 inline static const Ibyte * | |
2228 skip_ascii (const Ibyte *ptr, const Ibyte *end) | |
2229 { | |
2230 const unsigned STRIDE_TYPE *ascii_end; | |
2231 | |
2232 /* Need to do in 3 sections -- before alignment start, aligned chunk, | |
2233 after alignment end. */ | |
2234 while (!ALIGNED (ptr)) | |
2235 { | |
2236 if (ptr == end || !byte_ascii_p (*ptr)) | |
2237 return ptr; | |
2238 ptr++; | |
2239 } | |
2240 ascii_end = (const unsigned STRIDE_TYPE *) ptr; | |
2241 /* This loop screams, because we can detect ASCII | |
2242 characters 4 or 8 at a time. */ | |
2243 while ((const Ibyte *) ascii_end + STRIDE <= end | |
2244 && !(*ascii_end & HIGH_BIT_MASK)) | |
2245 ascii_end++; | |
2246 ptr = (Ibyte *) ascii_end; | |
2247 while (ptr < end && byte_ascii_p (*ptr)) | |
2248 ptr++; | |
2249 return ptr; | |
2250 } | |
2251 | |
2252 /* Skip as many ASCII bytes as possible in the memory block [END, PTR), | |
2253 going downwards. Return pointer to the location above the first | |
2254 non-ASCII byte. Optimized for long stretches of ASCII. */ | |
2255 inline static const Ibyte * | |
2256 skip_ascii_down (const Ibyte *ptr, const Ibyte *end) | |
2257 { | |
2258 const unsigned STRIDE_TYPE *ascii_end; | |
2259 | |
2260 /* Need to do in 3 sections -- before alignment start, aligned chunk, | |
2261 after alignment end. */ | |
2262 while (!ALIGNED (ptr)) | |
2263 { | |
2264 if (ptr == end || !byte_ascii_p (*(ptr - 1))) | |
2265 return ptr; | |
2266 ptr--; | |
2267 } | |
2268 ascii_end = (const unsigned STRIDE_TYPE *) ptr - 1; | |
2269 /* This loop screams, because we can detect ASCII | |
2270 characters 4 or 8 at a time. */ | |
2271 while ((const Ibyte *) ascii_end >= end | |
2272 && !(*ascii_end & HIGH_BIT_MASK)) | |
2273 ascii_end--; | |
2274 ptr = (Ibyte *) (ascii_end + 1); | |
2275 while (ptr > end && byte_ascii_p (*(ptr - 1))) | |
2276 ptr--; | |
2277 return ptr; | |
2278 } | |
2279 | |
2280 /* Function equivalents of bytecount_to_charcount/charcount_to_bytecount. | 2207 /* Function equivalents of bytecount_to_charcount/charcount_to_bytecount. |
2281 These work on strings of all sizes but are more efficient than a simple | 2208 These work on strings of all sizes but are more efficient than a simple |
2282 loop on large strings and probably less efficient on sufficiently small | 2209 loop on large strings and probably less efficient on sufficiently small |
2283 strings. */ | 2210 strings. */ |
2284 | 2211 |