Mercurial > hg > xemacs-beta
comparison src/insdel.c @ 400:a86b2b5e0111 r21-2-30
Import from CVS: tag r21-2-30
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:14:34 +0200 |
parents | 74fd4e045ea6 |
children | b8cc9ab3f761 |
comparison
equal
deleted
inserted
replaced
399:376370fb5946 | 400:a86b2b5e0111 |
---|---|
307 bytecount_to_charcount (const Bufbyte *ptr, Bytecount len) | 307 bytecount_to_charcount (const Bufbyte *ptr, Bytecount len) |
308 { | 308 { |
309 Charcount count = 0; | 309 Charcount count = 0; |
310 const Bufbyte *end = ptr + len; | 310 const Bufbyte *end = ptr + len; |
311 | 311 |
312 #if (LONGBITS == 32 || LONGBITS == 64) | 312 #if SIZEOF_LONG == 8 |
313 | 313 # define STRIDE_TYPE long |
314 # if (LONGBITS == 32) | 314 # define HIGH_BIT_MASK 0x8080808080808080UL |
315 # define LONG_BYTES 4 | 315 #elif SIZEOF_LONG_LONG == 8 && !(defined (i386) || defined (__i386__)) |
316 # define ALIGN_MASK 0xFFFFFFFCU | 316 # define STRIDE_TYPE long long |
317 # define HIGH_BIT_MASK 0x80808080U | 317 # define HIGH_BIT_MASK 0x8080808080808080ULL |
318 # else | 318 #elif SIZEOF_LONG == 4 |
319 # define LONG_BYTES 8 | 319 # define STRIDE_TYPE long |
320 # define ALIGN_MASK 0xFFFFFFFFFFFFFFF8UL | 320 # define HIGH_BIT_MASK 0x80808080UL |
321 /* I had a dream, I was being overrun with early Intel processors ... */ | 321 #else |
322 # define HIGH_BIT_MASK 0x8080808080808080UL | 322 # error Add support for 128-bit systems here |
323 # endif | 323 #endif |
324 | 324 |
325 /* When we have a large number of bytes to scan, we can be trickier | 325 #define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1)) |
326 and significantly faster by scanning them in chunks of the CPU word | 326 #define ALIGN_MASK (~ ALIGN_BITS) |
327 size (assuming that they're all ASCII -- we cut out as soon as | 327 #define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0) |
328 we find something non-ASCII). */ | 328 #define STRIDE sizeof (STRIDE_TYPE) |
329 if (len >= 12) | 329 |
330 { | |
331 /* Determine the section in the middle of the string that's | |
332 amenable to this treatment. Everything has to be aligned | |
333 on CPU word boundaries. */ | |
334 const Bufbyte *aligned_ptr = | |
335 (const Bufbyte *) (((unsigned long) (ptr + LONG_BYTES - 1)) & | |
336 ALIGN_MASK); | |
337 const Bufbyte *aligned_end = | |
338 (const Bufbyte *) (((unsigned long) end) & ALIGN_MASK); | |
339 | |
340 /* Handle unaligned stuff at the beginning. */ | |
341 while (ptr < aligned_ptr) | |
342 { | |
343 if (!BYTE_ASCII_P (*ptr)) | |
344 goto bail; | |
345 count++, ptr++; | |
346 } | |
347 /* Now do it. */ | |
348 while (ptr < aligned_end) | |
349 { | |
350 | |
351 if ((* (unsigned long *) ptr) & HIGH_BIT_MASK) | |
352 goto bail; | |
353 ptr += LONG_BYTES; | |
354 count += LONG_BYTES; | |
355 } | |
356 } | |
357 | |
358 #endif /* LONGBITS == 32 || LONGBITS == 64 */ | |
359 | |
360 bail: | |
361 while (ptr < end) | 330 while (ptr < end) |
362 { | 331 { |
363 count++; | 332 if (BYTE_ASCII_P (*ptr)) |
364 INC_CHARPTR (ptr); | 333 { |
365 } | 334 /* optimize for long stretches of ASCII */ |
335 if (! ALIGNED (ptr)) | |
336 ptr++, count++; | |
337 else | |
338 { | |
339 const unsigned STRIDE_TYPE *ascii_end = | |
340 (const unsigned STRIDE_TYPE *) ptr; | |
341 /* This loop screams, because we can typically | |
342 detect ASCII characters 8 at a time. */ | |
343 while ((const Bufbyte *) ascii_end + STRIDE <= end | |
344 && !(*ascii_end & HIGH_BIT_MASK)) | |
345 ascii_end++; | |
346 if ((Bufbyte *) ascii_end == ptr) | |
347 ptr++, count++; | |
348 else | |
349 { | |
350 count += (Bufbyte *) ascii_end - ptr; | |
351 ptr = (Bufbyte *) ascii_end; | |
352 } | |
353 } | |
354 } | |
355 else | |
356 { | |
357 /* optimize for successive characters from the same charset */ | |
358 Bufbyte leading_byte = *ptr; | |
359 size_t bytes = REP_BYTES_BY_FIRST_BYTE (leading_byte); | |
360 while ((ptr < end) && (*ptr == leading_byte)) | |
361 ptr += bytes, count++; | |
362 } | |
363 } | |
364 | |
366 #ifdef ERROR_CHECK_BUFPOS | 365 #ifdef ERROR_CHECK_BUFPOS |
367 /* Bomb out if the specified substring ends in the middle | 366 /* Bomb out if the specified substring ends in the middle |
368 of a character. Note that we might have already gotten | 367 of a character. Note that we might have already gotten |
369 a core dump above from an invalid reference, but at least | 368 a core dump above from an invalid reference, but at least |
370 we will get no farther than here. */ | 369 we will get no farther than here. */ |