comparison src/fileio.c @ 5776:65d65b52d608

Pass character count from coding systems to buffer insertion code. src/ChangeLog addition: 2014-01-16 Aidan Kehoe <kehoea@parhasard.net> Pass character count information from the no-conversion and unicode coding systems to the buffer insertion code, making #'find-file on large buffers a little snappier (if ERROR_CHECK_TEXT is not defined). * file-coding.c: * file-coding.c (coding_character_tell): New. * file-coding.c (conversion_coding_stream_description): New. * file-coding.c (no_conversion_convert): Update characters_seen when decoding. * file-coding.c (no_conversion_character_tell): New. * file-coding.c (lstream_type_create_file_coding): Create the no_conversion type with data. * file-coding.c (coding_system_type_create): Make the character_tell method available here. * file-coding.h: * file-coding.h (struct coding_system_methods): Add a new character_tell() method, passing charcount information from the coding systems to the buffer code, avoiding duplicate bytecount-to-charcount work especially with large buffers. * fileio.c (Finsert_file_contents_internal): Update this to pass charcount information to buffer_insert_string_1(), if that is available from the lstream code. * insdel.c: * insdel.c (buffer_insert_string_1): Add a new CCLEN argument, giving the character count of the string to insert. It can be -1 to indicate that te function should work it out itself using bytecount_to_charcount(), as it used to. * insdel.c (buffer_insert_raw_string_1): * insdel.c (buffer_insert_lisp_string_1): * insdel.c (buffer_insert_ascstring_1): * insdel.c (buffer_insert_emacs_char_1): * insdel.c (buffer_insert_from_buffer_1): * insdel.c (buffer_replace_char): Update these functions to use the new calling convention. * insdel.h: * insdel.h (buffer_insert_string): Update this header to reflect the new buffer_insert_string_1() argument. * lstream.c (Lstream_character_tell): New. Return the number of characters *read* and seen by the consumer so far, taking into account the unget buffer, and buffered reading. * lstream.c (Lstream_unread): Update unget_character_count here as appropriate. * lstream.c (Lstream_rewind): Reset unget_character_count here too. * lstream.h: * lstream.h (struct lstream): Provide the character_tell method, add a new field, unget_character_count, giving the number of characters ever passed to Lstream_unread(). Declare Lstream_character_tell(). Make Lstream_ungetc(), which happens to be unused, an inline function rather than a macro, in the course of updating it to modify unget_character_count. * print.c (output_string): Use the new argument to buffer_insert_string_1(). * tests.c: * tests.c (Ftest_character_tell): New test function. * tests.c (syms_of_tests): Make it available. * unicode.c: * unicode.c (struct unicode_coding_stream): * unicode.c (unicode_character_tell): New method. * unicode.c (unicode_convert): Update the character counter as appropriate. * unicode.c (coding_system_type_create_unicode): Make the character_tell method available.
author Aidan Kehoe <kehoea@parhasard.net>
date Thu, 16 Jan 2014 16:27:52 +0000
parents 56144c8593a8
children 319e18d08654
comparison
equal deleted inserted replaced
5775:4004c3266c09 5776:65d65b52d608
3178 { 3178 {
3179 Charbpos cur_point = BUF_PT (buf); 3179 Charbpos cur_point = BUF_PT (buf);
3180 struct gcpro ngcpro1; 3180 struct gcpro ngcpro1;
3181 Lisp_Object stream = make_filedesc_input_stream (fd, 0, total, 3181 Lisp_Object stream = make_filedesc_input_stream (fd, 0, total,
3182 LSTR_ALLOW_QUIT); 3182 LSTR_ALLOW_QUIT);
3183 Charcount last_tell = -1;
3183 3184
3184 NGCPRO1 (stream); 3185 NGCPRO1 (stream);
3185 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536); 3186 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536);
3186 stream = make_coding_input_stream 3187 stream = make_coding_input_stream
3187 (XLSTREAM (stream), get_coding_system_for_text_file (codesys, 1), 3188 (XLSTREAM (stream), get_coding_system_for_text_file (codesys, 1),
3188 CODING_DECODE, 0); 3189 CODING_DECODE, 0);
3189 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536); 3190 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536);
3191 last_tell = Lstream_character_tell (XLSTREAM (stream));
3190 3192
3191 record_unwind_protect (delete_stream_unwind, stream); 3193 record_unwind_protect (delete_stream_unwind, stream);
3192 3194
3193 /* No need to limit the amount of stuff we attempt to read. (It would 3195 /* No need to limit the amount of stuff we attempt to read. (It would
3194 be incorrect, anyway, when Mule is enabled.) Instead, the limiting 3196 be incorrect, anyway, when Mule is enabled.) Instead, the limiting
3195 occurs inside of the filedesc stream. */ 3197 occurs inside of the filedesc stream. */
3196 while (1) 3198 while (1)
3197 { 3199 {
3198 Bytecount this_len; 3200 Bytecount this_len;
3199 Charcount cc_inserted; 3201 Charcount cc_inserted, this_tell = last_tell;
3200 3202
3201 QUIT; 3203 QUIT;
3202 this_len = Lstream_read (XLSTREAM (stream), read_buf, 3204 this_len = Lstream_read (XLSTREAM (stream), read_buf,
3203 sizeof (read_buf)); 3205 sizeof (read_buf));
3204 3206
3207 if (this_len < 0) 3209 if (this_len < 0)
3208 saverrno = errno; 3210 saverrno = errno;
3209 break; 3211 break;
3210 } 3212 }
3211 3213
3212 cc_inserted = buffer_insert_raw_string_1 (buf, cur_point, read_buf, 3214 cc_inserted
3213 this_len, 3215 = buffer_insert_string_1 (buf, cur_point, read_buf, Qnil,
3214 !NILP (visit) 3216 0, this_len, last_tell >= 0
3215 ? INSDEL_NO_LOCKING : 0); 3217 ? (this_tell
3218 = Lstream_character_tell (XLSTREAM
3219 (stream)))
3220 - last_tell : -1,
3221 !NILP (visit) ? INSDEL_NO_LOCKING : 0);
3216 inserted += cc_inserted; 3222 inserted += cc_inserted;
3217 cur_point += cc_inserted; 3223 cur_point += cc_inserted;
3224 last_tell = this_tell;
3218 } 3225 }
3219 if (!NILP (used_codesys)) 3226 if (!NILP (used_codesys))
3220 { 3227 {
3221 Fset (used_codesys, 3228 Fset (used_codesys,
3222 XCODING_SYSTEM_NAME 3229 XCODING_SYSTEM_NAME