comparison src/insdel.c @ 442:abe6d1db359e r21-2-36

Import from CVS: tag r21-2-36
author cvs
date Mon, 13 Aug 2007 11:35:02 +0200
parents 8de8e3f6228a
children 223736d75acb
comparison
equal deleted inserted replaced
441:72a7cfa4a488 442:abe6d1db359e
198 Working with the various representations: 198 Working with the various representations:
199 ========================================= */ 199 ========================================= */
200 200
201 #include <config.h> 201 #include <config.h>
202 #include "lisp.h" 202 #include "lisp.h"
203 #include <limits.h>
204 203
205 #include "buffer.h" 204 #include "buffer.h"
206 #include "device.h" 205 #include "device.h"
207 #include "frame.h" 206 #include "frame.h"
208 #include "extents.h" 207 #include "extents.h"
302 301
303 /* Given a pointer to a text string and a length in bytes, return 302 /* Given a pointer to a text string and a length in bytes, return
304 the equivalent length in characters. */ 303 the equivalent length in characters. */
305 304
306 Charcount 305 Charcount
307 bytecount_to_charcount (CONST Bufbyte *ptr, Bytecount len) 306 bytecount_to_charcount (const Bufbyte *ptr, Bytecount len)
308 { 307 {
309 Charcount count = 0; 308 Charcount count = 0;
310 CONST Bufbyte *end = ptr + len; 309 const Bufbyte *end = ptr + len;
311 310
312 #if (LONGBITS == 32 || LONGBITS == 64) 311 #if SIZEOF_LONG == 8
313 312 # define STRIDE_TYPE long
314 # if (LONGBITS == 32) 313 # define HIGH_BIT_MASK 0x8080808080808080UL
315 # define LONG_BYTES 4 314 #elif SIZEOF_LONG_LONG == 8 && !(defined (i386) || defined (__i386__))
316 # define ALIGN_MASK 0xFFFFFFFCU 315 # define STRIDE_TYPE long long
317 # define HIGH_BIT_MASK 0x80808080U 316 # define HIGH_BIT_MASK 0x8080808080808080ULL
318 # else 317 #elif SIZEOF_LONG == 4
319 # define LONG_BYTES 8 318 # define STRIDE_TYPE long
320 # define ALIGN_MASK 0xFFFFFFFFFFFFFFF8UL 319 # define HIGH_BIT_MASK 0x80808080UL
321 /* I had a dream, I was being overrun with early Intel processors ... */ 320 #else
322 # define HIGH_BIT_MASK 0x8080808080808080UL 321 # error Add support for 128-bit systems here
323 # endif 322 #endif
324 323
325 /* When we have a large number of bytes to scan, we can be trickier 324 #define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1))
326 and significantly faster by scanning them in chunks of the CPU word 325 #define ALIGN_MASK (~ ALIGN_BITS)
327 size (assuming that they're all ASCII -- we cut out as soon as 326 #define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0)
328 we find something non-ASCII). */ 327 #define STRIDE sizeof (STRIDE_TYPE)
329 if (len >= 12) 328
330 {
331 /* Determine the section in the middle of the string that's
332 amenable to this treatment. Everything has to be aligned
333 on CPU word boundaries. */
334 CONST Bufbyte *aligned_ptr =
335 (CONST Bufbyte *) (((unsigned long) (ptr + LONG_BYTES - 1)) &
336 ALIGN_MASK);
337 CONST Bufbyte *aligned_end =
338 (CONST Bufbyte *) (((unsigned long) end) & ALIGN_MASK);
339
340 /* Handle unaligned stuff at the beginning. */
341 while (ptr < aligned_ptr)
342 {
343 if (!BYTE_ASCII_P (*ptr))
344 goto bail;
345 count++, ptr++;
346 }
347 /* Now do it. */
348 while (ptr < aligned_end)
349 {
350
351 if ((* (unsigned long *) ptr) & HIGH_BIT_MASK)
352 goto bail;
353 ptr += LONG_BYTES;
354 count += LONG_BYTES;
355 }
356 }
357
358 #endif /* LONGBITS == 32 || LONGBITS == 64 */
359
360 bail:
361 while (ptr < end) 329 while (ptr < end)
362 { 330 {
363 count++; 331 if (BYTE_ASCII_P (*ptr))
364 INC_CHARPTR (ptr); 332 {
365 } 333 /* optimize for long stretches of ASCII */
334 if (! ALIGNED (ptr))
335 ptr++, count++;
336 else
337 {
338 const unsigned STRIDE_TYPE *ascii_end =
339 (const unsigned STRIDE_TYPE *) ptr;
340 /* This loop screams, because we can typically
341 detect ASCII characters 8 at a time. */
342 while ((const Bufbyte *) ascii_end + STRIDE <= end
343 && !(*ascii_end & HIGH_BIT_MASK))
344 ascii_end++;
345 if ((Bufbyte *) ascii_end == ptr)
346 ptr++, count++;
347 else
348 {
349 count += (Bufbyte *) ascii_end - ptr;
350 ptr = (Bufbyte *) ascii_end;
351 }
352 }
353 }
354 else
355 {
356 /* optimize for successive characters from the same charset */
357 Bufbyte leading_byte = *ptr;
358 size_t bytes = REP_BYTES_BY_FIRST_BYTE (leading_byte);
359 while ((ptr < end) && (*ptr == leading_byte))
360 ptr += bytes, count++;
361 }
362 }
363
366 #ifdef ERROR_CHECK_BUFPOS 364 #ifdef ERROR_CHECK_BUFPOS
367 /* Bomb out if the specified substring ends in the middle 365 /* Bomb out if the specified substring ends in the middle
368 of a character. Note that we might have already gotten 366 of a character. Note that we might have already gotten
369 a core dump above from an invalid reference, but at least 367 a core dump above from an invalid reference, but at least
370 we will get no farther than here. */ 368 we will get no farther than here. */
376 374
377 /* Given a pointer to a text string and a length in characters, return 375 /* Given a pointer to a text string and a length in characters, return
378 the equivalent length in bytes. */ 376 the equivalent length in bytes. */
379 377
380 Bytecount 378 Bytecount
381 charcount_to_bytecount (CONST Bufbyte *ptr, Charcount len) 379 charcount_to_bytecount (const Bufbyte *ptr, Charcount len)
382 { 380 {
383 CONST Bufbyte *newptr = ptr; 381 const Bufbyte *newptr = ptr;
384 382
385 while (len > 0) 383 while (len > 0)
386 { 384 {
387 INC_CHARPTR (newptr); 385 INC_CHARPTR (newptr);
388 len--; 386 len--;
1194 */ 1192 */
1195 1193
1196 Bufpos 1194 Bufpos
1197 get_buffer_pos_char (struct buffer *b, Lisp_Object pos, unsigned int flags) 1195 get_buffer_pos_char (struct buffer *b, Lisp_Object pos, unsigned int flags)
1198 { 1196 {
1197 /* Does not GC */
1199 Bufpos ind; 1198 Bufpos ind;
1200 Bufpos min_allowed, max_allowed; 1199 Bufpos min_allowed, max_allowed;
1201 1200
1202 CHECK_INT_COERCE_MARKER (pos); 1201 CHECK_INT_COERCE_MARKER (pos);
1203 ind = XINT (pos); 1202 ind = XINT (pos);
1243 1242
1244 void 1243 void
1245 get_buffer_range_char (struct buffer *b, Lisp_Object from, Lisp_Object to, 1244 get_buffer_range_char (struct buffer *b, Lisp_Object from, Lisp_Object to,
1246 Bufpos *from_out, Bufpos *to_out, unsigned int flags) 1245 Bufpos *from_out, Bufpos *to_out, unsigned int flags)
1247 { 1246 {
1247 /* Does not GC */
1248 Bufpos min_allowed, max_allowed; 1248 Bufpos min_allowed, max_allowed;
1249 1249
1250 min_allowed = (flags & GB_ALLOW_PAST_ACCESSIBLE) ? 1250 min_allowed = (flags & GB_ALLOW_PAST_ACCESSIBLE) ?
1251 BUF_BEG (b) : BUF_BEGV (b); 1251 BUF_BEG (b) : BUF_BEGV (b);
1252 max_allowed = (flags & GB_ALLOW_PAST_ACCESSIBLE) ? 1252 max_allowed = (flags & GB_ALLOW_PAST_ACCESSIBLE) ?
2369 /************************************************************************/ 2369 /************************************************************************/
2370 /* Insertion of strings */ 2370 /* Insertion of strings */
2371 /************************************************************************/ 2371 /************************************************************************/
2372 2372
2373 void 2373 void
2374 fixup_internal_substring (CONST Bufbyte *nonreloc, Lisp_Object reloc, 2374 fixup_internal_substring (const Bufbyte *nonreloc, Lisp_Object reloc,
2375 Bytecount offset, Bytecount *len) 2375 Bytecount offset, Bytecount *len)
2376 { 2376 {
2377 assert ((nonreloc && NILP (reloc)) || (!nonreloc && STRINGP (reloc))); 2377 assert ((nonreloc && NILP (reloc)) || (!nonreloc && STRINGP (reloc)));
2378 2378
2379 if (*len < 0) 2379 if (*len < 0)
2380 { 2380 {
2381 if (nonreloc) 2381 if (nonreloc)
2382 *len = strlen ((CONST char *) nonreloc) - offset; 2382 *len = strlen ((const char *) nonreloc) - offset;
2383 else 2383 else
2384 *len = XSTRING_LENGTH (reloc) - offset; 2384 *len = XSTRING_LENGTH (reloc) - offset;
2385 } 2385 }
2386 #ifdef ERROR_CHECK_BUFPOS 2386 #ifdef ERROR_CHECK_BUFPOS
2387 assert (*len >= 0); 2387 assert (*len >= 0);
2411 before modifying a buffer. Similar checks were already done 2411 before modifying a buffer. Similar checks were already done
2412 in the higher-level Lisp functions calling insert-file-contents. */ 2412 in the higher-level Lisp functions calling insert-file-contents. */
2413 2413
2414 Charcount 2414 Charcount
2415 buffer_insert_string_1 (struct buffer *buf, Bufpos pos, 2415 buffer_insert_string_1 (struct buffer *buf, Bufpos pos,
2416 CONST Bufbyte *nonreloc, Lisp_Object reloc, 2416 const Bufbyte *nonreloc, Lisp_Object reloc,
2417 Bytecount offset, Bytecount length, 2417 Bytecount offset, Bytecount length,
2418 int flags) 2418 int flags)
2419 { 2419 {
2420 /* This function can GC */ 2420 /* This function can GC */
2421 struct gcpro gcpro1; 2421 struct gcpro gcpro1;
2576 to take place. (If POS is -1, text is inserted at point and point 2576 to take place. (If POS is -1, text is inserted at point and point
2577 moves forward past the text.) FLAGS is as above. */ 2577 moves forward past the text.) FLAGS is as above. */
2578 2578
2579 Charcount 2579 Charcount
2580 buffer_insert_raw_string_1 (struct buffer *buf, Bufpos pos, 2580 buffer_insert_raw_string_1 (struct buffer *buf, Bufpos pos,
2581 CONST Bufbyte *nonreloc, Bytecount length, 2581 const Bufbyte *nonreloc, Bytecount length,
2582 int flags) 2582 int flags)
2583 { 2583 {
2584 /* This function can GC */ 2584 /* This function can GC */
2585 return buffer_insert_string_1 (buf, pos, nonreloc, Qnil, 0, length, 2585 return buffer_insert_string_1 (buf, pos, nonreloc, Qnil, 0, length,
2586 flags); 2586 flags);
2600 } 2600 }
2601 2601
2602 /* Insert the null-terminated string S (in external format). */ 2602 /* Insert the null-terminated string S (in external format). */
2603 2603
2604 Charcount 2604 Charcount
2605 buffer_insert_c_string_1 (struct buffer *buf, Bufpos pos, CONST char *s, 2605 buffer_insert_c_string_1 (struct buffer *buf, Bufpos pos, const char *s,
2606 int flags) 2606 int flags)
2607 { 2607 {
2608 /* This function can GC */ 2608 /* This function can GC */
2609 CONST char *translated = GETTEXT (s); 2609 const char *translated = GETTEXT (s);
2610 return buffer_insert_string_1 (buf, pos, (CONST Bufbyte *) translated, Qnil, 2610 return buffer_insert_string_1 (buf, pos, (const Bufbyte *) translated, Qnil,
2611 0, strlen (translated), flags); 2611 0, strlen (translated), flags);
2612 } 2612 }
2613 2613
2614 Charcount 2614 Charcount
2615 buffer_insert_emacs_char_1 (struct buffer *buf, Bufpos pos, Emchar ch, 2615 buffer_insert_emacs_char_1 (struct buffer *buf, Bufpos pos, Emchar ch,
3071 iro); 3071 iro);
3072 } 3072 }
3073 } 3073 }
3074 3074
3075 void 3075 void
3076 find_charsets_in_bufbyte_string (unsigned char *charsets, CONST Bufbyte *str, 3076 find_charsets_in_bufbyte_string (unsigned char *charsets, const Bufbyte *str,
3077 Bytecount len) 3077 Bytecount len)
3078 { 3078 {
3079 #ifndef MULE 3079 #ifndef MULE
3080 /* Telescope this. */ 3080 /* Telescope this. */
3081 charsets[0] = 1; 3081 charsets[0] = 1;
3082 #else 3082 #else
3083 CONST Bufbyte *strend = str + len; 3083 const Bufbyte *strend = str + len;
3084 memset (charsets, 0, NUM_LEADING_BYTES); 3084 memset (charsets, 0, NUM_LEADING_BYTES);
3085
3086 /* #### SJT doesn't like this. */
3087 if (len == 0)
3088 {
3089 charsets[XCHARSET_LEADING_BYTE (Vcharset_ascii) - 128] = 1;
3090 return;
3091 }
3085 3092
3086 while (str < strend) 3093 while (str < strend)
3087 { 3094 {
3088 charsets[CHAR_LEADING_BYTE (charptr_emchar (str)) - 128] = 1; 3095 charsets[CHAR_LEADING_BYTE (charptr_emchar (str)) - 128] = 1;
3089 INC_CHARPTR (str); 3096 INC_CHARPTR (str);
3090 } 3097 }
3091 #endif 3098 #endif
3092 } 3099 }
3093 3100
3094 void 3101 void
3095 find_charsets_in_emchar_string (unsigned char *charsets, CONST Emchar *str, 3102 find_charsets_in_emchar_string (unsigned char *charsets, const Emchar *str,
3096 Charcount len) 3103 Charcount len)
3097 { 3104 {
3098 #ifndef MULE 3105 #ifndef MULE
3099 /* Telescope this. */ 3106 /* Telescope this. */
3100 charsets[0] = 1; 3107 charsets[0] = 1;
3101 #else 3108 #else
3102 int i; 3109 int i;
3103 3110
3104 memset (charsets, 0, NUM_LEADING_BYTES); 3111 memset (charsets, 0, NUM_LEADING_BYTES);
3112
3113 /* #### SJT doesn't like this. */
3114 if (len == 0)
3115 {
3116 charsets[XCHARSET_LEADING_BYTE (Vcharset_ascii) - 128] = 1;
3117 return;
3118 }
3119
3105 for (i = 0; i < len; i++) 3120 for (i = 0; i < len; i++)
3106 { 3121 {
3107 charsets[CHAR_LEADING_BYTE (str[i]) - 128] = 1; 3122 charsets[CHAR_LEADING_BYTE (str[i]) - 128] = 1;
3108 } 3123 }
3109 #endif 3124 #endif
3110 } 3125 }
3111 3126
3112 int 3127 int
3113 bufbyte_string_displayed_columns (CONST Bufbyte *str, Bytecount len) 3128 bufbyte_string_displayed_columns (const Bufbyte *str, Bytecount len)
3114 { 3129 {
3115 int cols = 0; 3130 int cols = 0;
3116 CONST Bufbyte *end = str + len; 3131 const Bufbyte *end = str + len;
3117 3132
3118 while (str < end) 3133 while (str < end)
3119 { 3134 {
3120 #ifdef MULE 3135 #ifdef MULE
3121 Emchar ch = charptr_emchar (str); 3136 Emchar ch = charptr_emchar (str);
3128 3143
3129 return cols; 3144 return cols;
3130 } 3145 }
3131 3146
3132 int 3147 int
3133 emchar_string_displayed_columns (CONST Emchar *str, Charcount len) 3148 emchar_string_displayed_columns (const Emchar *str, Charcount len)
3134 { 3149 {
3135 #ifdef MULE 3150 #ifdef MULE
3136 int cols = 0; 3151 int cols = 0;
3137 int i; 3152 int i;
3138 3153
3146 } 3161 }
3147 3162
3148 /* NOTE: Does not reset the Dynarr. */ 3163 /* NOTE: Does not reset the Dynarr. */
3149 3164
3150 void 3165 void
3151 convert_bufbyte_string_into_emchar_dynarr (CONST Bufbyte *str, Bytecount len, 3166 convert_bufbyte_string_into_emchar_dynarr (const Bufbyte *str, Bytecount len,
3152 Emchar_dynarr *dyn) 3167 Emchar_dynarr *dyn)
3153 { 3168 {
3154 CONST Bufbyte *strend = str + len; 3169 const Bufbyte *strend = str + len;
3155 3170
3156 while (str < strend) 3171 while (str < strend)
3157 { 3172 {
3158 Emchar ch = charptr_emchar (str); 3173 Emchar ch = charptr_emchar (str);
3159 Dynarr_add (dyn, ch); 3174 Dynarr_add (dyn, ch);
3160 INC_CHARPTR (str); 3175 INC_CHARPTR (str);
3161 } 3176 }
3162 } 3177 }
3163 3178
3164 Charcount 3179 Charcount
3165 convert_bufbyte_string_into_emchar_string (CONST Bufbyte *str, Bytecount len, 3180 convert_bufbyte_string_into_emchar_string (const Bufbyte *str, Bytecount len,
3166 Emchar *arr) 3181 Emchar *arr)
3167 { 3182 {
3168 CONST Bufbyte *strend = str + len; 3183 const Bufbyte *strend = str + len;
3169 Charcount newlen = 0; 3184 Charcount newlen = 0;
3170 while (str < strend) 3185 while (str < strend)
3171 { 3186 {
3172 Emchar ch = charptr_emchar (str); 3187 Emchar ch = charptr_emchar (str);
3173 arr[newlen++] = ch; 3188 arr[newlen++] = ch;