comparison src/insdel.c @ 400:a86b2b5e0111 r21-2-30

Import from CVS: tag r21-2-30
author cvs
date Mon, 13 Aug 2007 11:14:34 +0200
parents 74fd4e045ea6
children b8cc9ab3f761
comparison
equal deleted inserted replaced
399:376370fb5946 400:a86b2b5e0111
307 bytecount_to_charcount (const Bufbyte *ptr, Bytecount len) 307 bytecount_to_charcount (const Bufbyte *ptr, Bytecount len)
308 { 308 {
309 Charcount count = 0; 309 Charcount count = 0;
310 const Bufbyte *end = ptr + len; 310 const Bufbyte *end = ptr + len;
311 311
312 #if (LONGBITS == 32 || LONGBITS == 64) 312 #if SIZEOF_LONG == 8
313 313 # define STRIDE_TYPE long
314 # if (LONGBITS == 32) 314 # define HIGH_BIT_MASK 0x8080808080808080UL
315 # define LONG_BYTES 4 315 #elif SIZEOF_LONG_LONG == 8 && !(defined (i386) || defined (__i386__))
316 # define ALIGN_MASK 0xFFFFFFFCU 316 # define STRIDE_TYPE long long
317 # define HIGH_BIT_MASK 0x80808080U 317 # define HIGH_BIT_MASK 0x8080808080808080ULL
318 # else 318 #elif SIZEOF_LONG == 4
319 # define LONG_BYTES 8 319 # define STRIDE_TYPE long
320 # define ALIGN_MASK 0xFFFFFFFFFFFFFFF8UL 320 # define HIGH_BIT_MASK 0x80808080UL
321 /* I had a dream, I was being overrun with early Intel processors ... */ 321 #else
322 # define HIGH_BIT_MASK 0x8080808080808080UL 322 # error Add support for 128-bit systems here
323 # endif 323 #endif
324 324
325 /* When we have a large number of bytes to scan, we can be trickier 325 #define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1))
326 and significantly faster by scanning them in chunks of the CPU word 326 #define ALIGN_MASK (~ ALIGN_BITS)
327 size (assuming that they're all ASCII -- we cut out as soon as 327 #define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0)
328 we find something non-ASCII). */ 328 #define STRIDE sizeof (STRIDE_TYPE)
329 if (len >= 12) 329
330 {
331 /* Determine the section in the middle of the string that's
332 amenable to this treatment. Everything has to be aligned
333 on CPU word boundaries. */
334 const Bufbyte *aligned_ptr =
335 (const Bufbyte *) (((unsigned long) (ptr + LONG_BYTES - 1)) &
336 ALIGN_MASK);
337 const Bufbyte *aligned_end =
338 (const Bufbyte *) (((unsigned long) end) & ALIGN_MASK);
339
340 /* Handle unaligned stuff at the beginning. */
341 while (ptr < aligned_ptr)
342 {
343 if (!BYTE_ASCII_P (*ptr))
344 goto bail;
345 count++, ptr++;
346 }
347 /* Now do it. */
348 while (ptr < aligned_end)
349 {
350
351 if ((* (unsigned long *) ptr) & HIGH_BIT_MASK)
352 goto bail;
353 ptr += LONG_BYTES;
354 count += LONG_BYTES;
355 }
356 }
357
358 #endif /* LONGBITS == 32 || LONGBITS == 64 */
359
360 bail:
361 while (ptr < end) 330 while (ptr < end)
362 { 331 {
363 count++; 332 if (BYTE_ASCII_P (*ptr))
364 INC_CHARPTR (ptr); 333 {
365 } 334 /* optimize for long stretches of ASCII */
335 if (! ALIGNED (ptr))
336 ptr++, count++;
337 else
338 {
339 const unsigned STRIDE_TYPE *ascii_end =
340 (const unsigned STRIDE_TYPE *) ptr;
341 /* This loop screams, because we can typically
342 detect ASCII characters 8 at a time. */
343 while ((const Bufbyte *) ascii_end + STRIDE <= end
344 && !(*ascii_end & HIGH_BIT_MASK))
345 ascii_end++;
346 if ((Bufbyte *) ascii_end == ptr)
347 ptr++, count++;
348 else
349 {
350 count += (Bufbyte *) ascii_end - ptr;
351 ptr = (Bufbyte *) ascii_end;
352 }
353 }
354 }
355 else
356 {
357 /* optimize for successive characters from the same charset */
358 Bufbyte leading_byte = *ptr;
359 size_t bytes = REP_BYTES_BY_FIRST_BYTE (leading_byte);
360 while ((ptr < end) && (*ptr == leading_byte))
361 ptr += bytes, count++;
362 }
363 }
364
366 #ifdef ERROR_CHECK_BUFPOS 365 #ifdef ERROR_CHECK_BUFPOS
367 /* Bomb out if the specified substring ends in the middle 366 /* Bomb out if the specified substring ends in the middle
368 of a character. Note that we might have already gotten 367 of a character. Note that we might have already gotten
369 a core dump above from an invalid reference, but at least 368 a core dump above from an invalid reference, but at least
370 we will get no farther than here. */ 369 we will get no farther than here. */