Mercurial > hg > xemacs-beta
comparison src/insdel.c @ 442:abe6d1db359e r21-2-36
Import from CVS: tag r21-2-36
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:35:02 +0200 |
parents | 8de8e3f6228a |
children | 223736d75acb |
comparison
equal
deleted
inserted
replaced
441:72a7cfa4a488 | 442:abe6d1db359e |
---|---|
198 Working with the various representations: | 198 Working with the various representations: |
199 ========================================= */ | 199 ========================================= */ |
200 | 200 |
201 #include <config.h> | 201 #include <config.h> |
202 #include "lisp.h" | 202 #include "lisp.h" |
203 #include <limits.h> | |
204 | 203 |
205 #include "buffer.h" | 204 #include "buffer.h" |
206 #include "device.h" | 205 #include "device.h" |
207 #include "frame.h" | 206 #include "frame.h" |
208 #include "extents.h" | 207 #include "extents.h" |
302 | 301 |
303 /* Given a pointer to a text string and a length in bytes, return | 302 /* Given a pointer to a text string and a length in bytes, return |
304 the equivalent length in characters. */ | 303 the equivalent length in characters. */ |
305 | 304 |
306 Charcount | 305 Charcount |
307 bytecount_to_charcount (CONST Bufbyte *ptr, Bytecount len) | 306 bytecount_to_charcount (const Bufbyte *ptr, Bytecount len) |
308 { | 307 { |
309 Charcount count = 0; | 308 Charcount count = 0; |
310 CONST Bufbyte *end = ptr + len; | 309 const Bufbyte *end = ptr + len; |
311 | 310 |
312 #if (LONGBITS == 32 || LONGBITS == 64) | 311 #if SIZEOF_LONG == 8 |
313 | 312 # define STRIDE_TYPE long |
314 # if (LONGBITS == 32) | 313 # define HIGH_BIT_MASK 0x8080808080808080UL |
315 # define LONG_BYTES 4 | 314 #elif SIZEOF_LONG_LONG == 8 && !(defined (i386) || defined (__i386__)) |
316 # define ALIGN_MASK 0xFFFFFFFCU | 315 # define STRIDE_TYPE long long |
317 # define HIGH_BIT_MASK 0x80808080U | 316 # define HIGH_BIT_MASK 0x8080808080808080ULL |
318 # else | 317 #elif SIZEOF_LONG == 4 |
319 # define LONG_BYTES 8 | 318 # define STRIDE_TYPE long |
320 # define ALIGN_MASK 0xFFFFFFFFFFFFFFF8UL | 319 # define HIGH_BIT_MASK 0x80808080UL |
321 /* I had a dream, I was being overrun with early Intel processors ... */ | 320 #else |
322 # define HIGH_BIT_MASK 0x8080808080808080UL | 321 # error Add support for 128-bit systems here |
323 # endif | 322 #endif |
324 | 323 |
325 /* When we have a large number of bytes to scan, we can be trickier | 324 #define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1)) |
326 and significantly faster by scanning them in chunks of the CPU word | 325 #define ALIGN_MASK (~ ALIGN_BITS) |
327 size (assuming that they're all ASCII -- we cut out as soon as | 326 #define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0) |
328 we find something non-ASCII). */ | 327 #define STRIDE sizeof (STRIDE_TYPE) |
329 if (len >= 12) | 328 |
330 { | |
331 /* Determine the section in the middle of the string that's | |
332 amenable to this treatment. Everything has to be aligned | |
333 on CPU word boundaries. */ | |
334 CONST Bufbyte *aligned_ptr = | |
335 (CONST Bufbyte *) (((unsigned long) (ptr + LONG_BYTES - 1)) & | |
336 ALIGN_MASK); | |
337 CONST Bufbyte *aligned_end = | |
338 (CONST Bufbyte *) (((unsigned long) end) & ALIGN_MASK); | |
339 | |
340 /* Handle unaligned stuff at the beginning. */ | |
341 while (ptr < aligned_ptr) | |
342 { | |
343 if (!BYTE_ASCII_P (*ptr)) | |
344 goto bail; | |
345 count++, ptr++; | |
346 } | |
347 /* Now do it. */ | |
348 while (ptr < aligned_end) | |
349 { | |
350 | |
351 if ((* (unsigned long *) ptr) & HIGH_BIT_MASK) | |
352 goto bail; | |
353 ptr += LONG_BYTES; | |
354 count += LONG_BYTES; | |
355 } | |
356 } | |
357 | |
358 #endif /* LONGBITS == 32 || LONGBITS == 64 */ | |
359 | |
360 bail: | |
361 while (ptr < end) | 329 while (ptr < end) |
362 { | 330 { |
363 count++; | 331 if (BYTE_ASCII_P (*ptr)) |
364 INC_CHARPTR (ptr); | 332 { |
365 } | 333 /* optimize for long stretches of ASCII */ |
334 if (! ALIGNED (ptr)) | |
335 ptr++, count++; | |
336 else | |
337 { | |
338 const unsigned STRIDE_TYPE *ascii_end = | |
339 (const unsigned STRIDE_TYPE *) ptr; | |
340 /* This loop screams, because we can typically | |
341 detect ASCII characters 8 at a time. */ | |
342 while ((const Bufbyte *) ascii_end + STRIDE <= end | |
343 && !(*ascii_end & HIGH_BIT_MASK)) | |
344 ascii_end++; | |
345 if ((Bufbyte *) ascii_end == ptr) | |
346 ptr++, count++; | |
347 else | |
348 { | |
349 count += (Bufbyte *) ascii_end - ptr; | |
350 ptr = (Bufbyte *) ascii_end; | |
351 } | |
352 } | |
353 } | |
354 else | |
355 { | |
356 /* optimize for successive characters from the same charset */ | |
357 Bufbyte leading_byte = *ptr; | |
358 size_t bytes = REP_BYTES_BY_FIRST_BYTE (leading_byte); | |
359 while ((ptr < end) && (*ptr == leading_byte)) | |
360 ptr += bytes, count++; | |
361 } | |
362 } | |
363 | |
366 #ifdef ERROR_CHECK_BUFPOS | 364 #ifdef ERROR_CHECK_BUFPOS |
367 /* Bomb out if the specified substring ends in the middle | 365 /* Bomb out if the specified substring ends in the middle |
368 of a character. Note that we might have already gotten | 366 of a character. Note that we might have already gotten |
369 a core dump above from an invalid reference, but at least | 367 a core dump above from an invalid reference, but at least |
370 we will get no farther than here. */ | 368 we will get no farther than here. */ |
376 | 374 |
377 /* Given a pointer to a text string and a length in characters, return | 375 /* Given a pointer to a text string and a length in characters, return |
378 the equivalent length in bytes. */ | 376 the equivalent length in bytes. */ |
379 | 377 |
380 Bytecount | 378 Bytecount |
381 charcount_to_bytecount (CONST Bufbyte *ptr, Charcount len) | 379 charcount_to_bytecount (const Bufbyte *ptr, Charcount len) |
382 { | 380 { |
383 CONST Bufbyte *newptr = ptr; | 381 const Bufbyte *newptr = ptr; |
384 | 382 |
385 while (len > 0) | 383 while (len > 0) |
386 { | 384 { |
387 INC_CHARPTR (newptr); | 385 INC_CHARPTR (newptr); |
388 len--; | 386 len--; |
1194 */ | 1192 */ |
1195 | 1193 |
1196 Bufpos | 1194 Bufpos |
1197 get_buffer_pos_char (struct buffer *b, Lisp_Object pos, unsigned int flags) | 1195 get_buffer_pos_char (struct buffer *b, Lisp_Object pos, unsigned int flags) |
1198 { | 1196 { |
1197 /* Does not GC */ | |
1199 Bufpos ind; | 1198 Bufpos ind; |
1200 Bufpos min_allowed, max_allowed; | 1199 Bufpos min_allowed, max_allowed; |
1201 | 1200 |
1202 CHECK_INT_COERCE_MARKER (pos); | 1201 CHECK_INT_COERCE_MARKER (pos); |
1203 ind = XINT (pos); | 1202 ind = XINT (pos); |
1243 | 1242 |
1244 void | 1243 void |
1245 get_buffer_range_char (struct buffer *b, Lisp_Object from, Lisp_Object to, | 1244 get_buffer_range_char (struct buffer *b, Lisp_Object from, Lisp_Object to, |
1246 Bufpos *from_out, Bufpos *to_out, unsigned int flags) | 1245 Bufpos *from_out, Bufpos *to_out, unsigned int flags) |
1247 { | 1246 { |
1247 /* Does not GC */ | |
1248 Bufpos min_allowed, max_allowed; | 1248 Bufpos min_allowed, max_allowed; |
1249 | 1249 |
1250 min_allowed = (flags & GB_ALLOW_PAST_ACCESSIBLE) ? | 1250 min_allowed = (flags & GB_ALLOW_PAST_ACCESSIBLE) ? |
1251 BUF_BEG (b) : BUF_BEGV (b); | 1251 BUF_BEG (b) : BUF_BEGV (b); |
1252 max_allowed = (flags & GB_ALLOW_PAST_ACCESSIBLE) ? | 1252 max_allowed = (flags & GB_ALLOW_PAST_ACCESSIBLE) ? |
2369 /************************************************************************/ | 2369 /************************************************************************/ |
2370 /* Insertion of strings */ | 2370 /* Insertion of strings */ |
2371 /************************************************************************/ | 2371 /************************************************************************/ |
2372 | 2372 |
2373 void | 2373 void |
2374 fixup_internal_substring (CONST Bufbyte *nonreloc, Lisp_Object reloc, | 2374 fixup_internal_substring (const Bufbyte *nonreloc, Lisp_Object reloc, |
2375 Bytecount offset, Bytecount *len) | 2375 Bytecount offset, Bytecount *len) |
2376 { | 2376 { |
2377 assert ((nonreloc && NILP (reloc)) || (!nonreloc && STRINGP (reloc))); | 2377 assert ((nonreloc && NILP (reloc)) || (!nonreloc && STRINGP (reloc))); |
2378 | 2378 |
2379 if (*len < 0) | 2379 if (*len < 0) |
2380 { | 2380 { |
2381 if (nonreloc) | 2381 if (nonreloc) |
2382 *len = strlen ((CONST char *) nonreloc) - offset; | 2382 *len = strlen ((const char *) nonreloc) - offset; |
2383 else | 2383 else |
2384 *len = XSTRING_LENGTH (reloc) - offset; | 2384 *len = XSTRING_LENGTH (reloc) - offset; |
2385 } | 2385 } |
2386 #ifdef ERROR_CHECK_BUFPOS | 2386 #ifdef ERROR_CHECK_BUFPOS |
2387 assert (*len >= 0); | 2387 assert (*len >= 0); |
2411 before modifying a buffer. Similar checks were already done | 2411 before modifying a buffer. Similar checks were already done |
2412 in the higher-level Lisp functions calling insert-file-contents. */ | 2412 in the higher-level Lisp functions calling insert-file-contents. */ |
2413 | 2413 |
2414 Charcount | 2414 Charcount |
2415 buffer_insert_string_1 (struct buffer *buf, Bufpos pos, | 2415 buffer_insert_string_1 (struct buffer *buf, Bufpos pos, |
2416 CONST Bufbyte *nonreloc, Lisp_Object reloc, | 2416 const Bufbyte *nonreloc, Lisp_Object reloc, |
2417 Bytecount offset, Bytecount length, | 2417 Bytecount offset, Bytecount length, |
2418 int flags) | 2418 int flags) |
2419 { | 2419 { |
2420 /* This function can GC */ | 2420 /* This function can GC */ |
2421 struct gcpro gcpro1; | 2421 struct gcpro gcpro1; |
2576 to take place. (If POS is -1, text is inserted at point and point | 2576 to take place. (If POS is -1, text is inserted at point and point |
2577 moves forward past the text.) FLAGS is as above. */ | 2577 moves forward past the text.) FLAGS is as above. */ |
2578 | 2578 |
2579 Charcount | 2579 Charcount |
2580 buffer_insert_raw_string_1 (struct buffer *buf, Bufpos pos, | 2580 buffer_insert_raw_string_1 (struct buffer *buf, Bufpos pos, |
2581 CONST Bufbyte *nonreloc, Bytecount length, | 2581 const Bufbyte *nonreloc, Bytecount length, |
2582 int flags) | 2582 int flags) |
2583 { | 2583 { |
2584 /* This function can GC */ | 2584 /* This function can GC */ |
2585 return buffer_insert_string_1 (buf, pos, nonreloc, Qnil, 0, length, | 2585 return buffer_insert_string_1 (buf, pos, nonreloc, Qnil, 0, length, |
2586 flags); | 2586 flags); |
2600 } | 2600 } |
2601 | 2601 |
2602 /* Insert the null-terminated string S (in external format). */ | 2602 /* Insert the null-terminated string S (in external format). */ |
2603 | 2603 |
2604 Charcount | 2604 Charcount |
2605 buffer_insert_c_string_1 (struct buffer *buf, Bufpos pos, CONST char *s, | 2605 buffer_insert_c_string_1 (struct buffer *buf, Bufpos pos, const char *s, |
2606 int flags) | 2606 int flags) |
2607 { | 2607 { |
2608 /* This function can GC */ | 2608 /* This function can GC */ |
2609 CONST char *translated = GETTEXT (s); | 2609 const char *translated = GETTEXT (s); |
2610 return buffer_insert_string_1 (buf, pos, (CONST Bufbyte *) translated, Qnil, | 2610 return buffer_insert_string_1 (buf, pos, (const Bufbyte *) translated, Qnil, |
2611 0, strlen (translated), flags); | 2611 0, strlen (translated), flags); |
2612 } | 2612 } |
2613 | 2613 |
2614 Charcount | 2614 Charcount |
2615 buffer_insert_emacs_char_1 (struct buffer *buf, Bufpos pos, Emchar ch, | 2615 buffer_insert_emacs_char_1 (struct buffer *buf, Bufpos pos, Emchar ch, |
3071 iro); | 3071 iro); |
3072 } | 3072 } |
3073 } | 3073 } |
3074 | 3074 |
3075 void | 3075 void |
3076 find_charsets_in_bufbyte_string (unsigned char *charsets, CONST Bufbyte *str, | 3076 find_charsets_in_bufbyte_string (unsigned char *charsets, const Bufbyte *str, |
3077 Bytecount len) | 3077 Bytecount len) |
3078 { | 3078 { |
3079 #ifndef MULE | 3079 #ifndef MULE |
3080 /* Telescope this. */ | 3080 /* Telescope this. */ |
3081 charsets[0] = 1; | 3081 charsets[0] = 1; |
3082 #else | 3082 #else |
3083 CONST Bufbyte *strend = str + len; | 3083 const Bufbyte *strend = str + len; |
3084 memset (charsets, 0, NUM_LEADING_BYTES); | 3084 memset (charsets, 0, NUM_LEADING_BYTES); |
3085 | |
3086 /* #### SJT doesn't like this. */ | |
3087 if (len == 0) | |
3088 { | |
3089 charsets[XCHARSET_LEADING_BYTE (Vcharset_ascii) - 128] = 1; | |
3090 return; | |
3091 } | |
3085 | 3092 |
3086 while (str < strend) | 3093 while (str < strend) |
3087 { | 3094 { |
3088 charsets[CHAR_LEADING_BYTE (charptr_emchar (str)) - 128] = 1; | 3095 charsets[CHAR_LEADING_BYTE (charptr_emchar (str)) - 128] = 1; |
3089 INC_CHARPTR (str); | 3096 INC_CHARPTR (str); |
3090 } | 3097 } |
3091 #endif | 3098 #endif |
3092 } | 3099 } |
3093 | 3100 |
3094 void | 3101 void |
3095 find_charsets_in_emchar_string (unsigned char *charsets, CONST Emchar *str, | 3102 find_charsets_in_emchar_string (unsigned char *charsets, const Emchar *str, |
3096 Charcount len) | 3103 Charcount len) |
3097 { | 3104 { |
3098 #ifndef MULE | 3105 #ifndef MULE |
3099 /* Telescope this. */ | 3106 /* Telescope this. */ |
3100 charsets[0] = 1; | 3107 charsets[0] = 1; |
3101 #else | 3108 #else |
3102 int i; | 3109 int i; |
3103 | 3110 |
3104 memset (charsets, 0, NUM_LEADING_BYTES); | 3111 memset (charsets, 0, NUM_LEADING_BYTES); |
3112 | |
3113 /* #### SJT doesn't like this. */ | |
3114 if (len == 0) | |
3115 { | |
3116 charsets[XCHARSET_LEADING_BYTE (Vcharset_ascii) - 128] = 1; | |
3117 return; | |
3118 } | |
3119 | |
3105 for (i = 0; i < len; i++) | 3120 for (i = 0; i < len; i++) |
3106 { | 3121 { |
3107 charsets[CHAR_LEADING_BYTE (str[i]) - 128] = 1; | 3122 charsets[CHAR_LEADING_BYTE (str[i]) - 128] = 1; |
3108 } | 3123 } |
3109 #endif | 3124 #endif |
3110 } | 3125 } |
3111 | 3126 |
3112 int | 3127 int |
3113 bufbyte_string_displayed_columns (CONST Bufbyte *str, Bytecount len) | 3128 bufbyte_string_displayed_columns (const Bufbyte *str, Bytecount len) |
3114 { | 3129 { |
3115 int cols = 0; | 3130 int cols = 0; |
3116 CONST Bufbyte *end = str + len; | 3131 const Bufbyte *end = str + len; |
3117 | 3132 |
3118 while (str < end) | 3133 while (str < end) |
3119 { | 3134 { |
3120 #ifdef MULE | 3135 #ifdef MULE |
3121 Emchar ch = charptr_emchar (str); | 3136 Emchar ch = charptr_emchar (str); |
3128 | 3143 |
3129 return cols; | 3144 return cols; |
3130 } | 3145 } |
3131 | 3146 |
3132 int | 3147 int |
3133 emchar_string_displayed_columns (CONST Emchar *str, Charcount len) | 3148 emchar_string_displayed_columns (const Emchar *str, Charcount len) |
3134 { | 3149 { |
3135 #ifdef MULE | 3150 #ifdef MULE |
3136 int cols = 0; | 3151 int cols = 0; |
3137 int i; | 3152 int i; |
3138 | 3153 |
3146 } | 3161 } |
3147 | 3162 |
3148 /* NOTE: Does not reset the Dynarr. */ | 3163 /* NOTE: Does not reset the Dynarr. */ |
3149 | 3164 |
3150 void | 3165 void |
3151 convert_bufbyte_string_into_emchar_dynarr (CONST Bufbyte *str, Bytecount len, | 3166 convert_bufbyte_string_into_emchar_dynarr (const Bufbyte *str, Bytecount len, |
3152 Emchar_dynarr *dyn) | 3167 Emchar_dynarr *dyn) |
3153 { | 3168 { |
3154 CONST Bufbyte *strend = str + len; | 3169 const Bufbyte *strend = str + len; |
3155 | 3170 |
3156 while (str < strend) | 3171 while (str < strend) |
3157 { | 3172 { |
3158 Emchar ch = charptr_emchar (str); | 3173 Emchar ch = charptr_emchar (str); |
3159 Dynarr_add (dyn, ch); | 3174 Dynarr_add (dyn, ch); |
3160 INC_CHARPTR (str); | 3175 INC_CHARPTR (str); |
3161 } | 3176 } |
3162 } | 3177 } |
3163 | 3178 |
3164 Charcount | 3179 Charcount |
3165 convert_bufbyte_string_into_emchar_string (CONST Bufbyte *str, Bytecount len, | 3180 convert_bufbyte_string_into_emchar_string (const Bufbyte *str, Bytecount len, |
3166 Emchar *arr) | 3181 Emchar *arr) |
3167 { | 3182 { |
3168 CONST Bufbyte *strend = str + len; | 3183 const Bufbyte *strend = str + len; |
3169 Charcount newlen = 0; | 3184 Charcount newlen = 0; |
3170 while (str < strend) | 3185 while (str < strend) |
3171 { | 3186 { |
3172 Emchar ch = charptr_emchar (str); | 3187 Emchar ch = charptr_emchar (str); |
3173 arr[newlen++] = ch; | 3188 arr[newlen++] = ch; |