diff src/file-coding.c @ 5776:65d65b52d608

Pass character count from coding systems to buffer insertion code. src/ChangeLog addition: 2014-01-16 Aidan Kehoe <kehoea@parhasard.net> Pass character count information from the no-conversion and unicode coding systems to the buffer insertion code, making #'find-file on large buffers a little snappier (if ERROR_CHECK_TEXT is not defined). * file-coding.c: * file-coding.c (coding_character_tell): New. * file-coding.c (conversion_coding_stream_description): New. * file-coding.c (no_conversion_convert): Update characters_seen when decoding. * file-coding.c (no_conversion_character_tell): New. * file-coding.c (lstream_type_create_file_coding): Create the no_conversion type with data. * file-coding.c (coding_system_type_create): Make the character_tell method available here. * file-coding.h: * file-coding.h (struct coding_system_methods): Add a new character_tell() method, passing charcount information from the coding systems to the buffer code, avoiding duplicate bytecount-to-charcount work especially with large buffers. * fileio.c (Finsert_file_contents_internal): Update this to pass charcount information to buffer_insert_string_1(), if that is available from the lstream code. * insdel.c: * insdel.c (buffer_insert_string_1): Add a new CCLEN argument, giving the character count of the string to insert. It can be -1 to indicate that te function should work it out itself using bytecount_to_charcount(), as it used to. * insdel.c (buffer_insert_raw_string_1): * insdel.c (buffer_insert_lisp_string_1): * insdel.c (buffer_insert_ascstring_1): * insdel.c (buffer_insert_emacs_char_1): * insdel.c (buffer_insert_from_buffer_1): * insdel.c (buffer_replace_char): Update these functions to use the new calling convention. * insdel.h: * insdel.h (buffer_insert_string): Update this header to reflect the new buffer_insert_string_1() argument. * lstream.c (Lstream_character_tell): New. Return the number of characters *read* and seen by the consumer so far, taking into account the unget buffer, and buffered reading. * lstream.c (Lstream_unread): Update unget_character_count here as appropriate. * lstream.c (Lstream_rewind): Reset unget_character_count here too. * lstream.h: * lstream.h (struct lstream): Provide the character_tell method, add a new field, unget_character_count, giving the number of characters ever passed to Lstream_unread(). Declare Lstream_character_tell(). Make Lstream_ungetc(), which happens to be unused, an inline function rather than a macro, in the course of updating it to modify unget_character_count. * print.c (output_string): Use the new argument to buffer_insert_string_1(). * tests.c: * tests.c (Ftest_character_tell): New test function. * tests.c (syms_of_tests): Make it available. * unicode.c: * unicode.c (struct unicode_coding_stream): * unicode.c (unicode_character_tell): New method. * unicode.c (unicode_convert): Update the character counter as appropriate. * unicode.c (coding_system_type_create_unicode): Make the character_tell method available.
author Aidan Kehoe <kehoea@parhasard.net>
date Thu, 16 Jan 2014 16:27:52 +0000
parents 7a538e1a4676
children ccaa851ae712
line wrap: on
line diff
--- a/src/file-coding.c	Sun Dec 22 10:36:33 2013 +0000
+++ b/src/file-coding.c	Thu Jan 16 16:27:52 2014 +0000
@@ -1990,6 +1990,14 @@
   return Lstream_seekable_p (str->other_end);
 }
 
+static Charcount
+coding_character_tell (Lstream *stream)
+{
+  struct coding_stream *str = CODING_STREAM_DATA (stream);
+
+  return XCODESYSMETH_OR_GIVEN (str->codesys, character_tell, (str), -1);
+}
+
 static int
 coding_flusher (Lstream *stream)
 {
@@ -2823,7 +2831,32 @@
 
    #### Shouldn't we _call_ it that, then?  And while we're at it,
    separate it into "to_internal" and "to_external"? */
-DEFINE_CODING_SYSTEM_TYPE (no_conversion);
+
+
+struct no_conversion_coding_system
+{
+};
+
+struct no_conversion_coding_stream
+{
+  /* Number of characters seen when decoding. */
+  Charcount characters_seen;
+};
+
+static const struct memory_description no_conversion_coding_system_description[] = {
+  { XD_END }
+};
+
+static const struct memory_description no_conversion_coding_stream_description_1 [] = {
+  { XD_INT, offsetof (struct no_conversion_coding_stream, characters_seen) },
+  { XD_END }
+};
+
+const struct sized_memory_description no_conversion_coding_stream_description = {
+  sizeof (struct no_conversion_coding_stream), no_conversion_coding_stream_description_1
+};
+
+DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (no_conversion);
 
 /* This is used when reading in "binary" files -- i.e. files that may
    contain all 256 possible byte values and that are not to be
@@ -2846,6 +2879,9 @@
 	  DECODE_ADD_BINARY_CHAR (c, dst);
 	}
 
+      CODING_STREAM_TYPE_DATA (str, no_conversion)->characters_seen
+        += orign;
+
       if (str->eof)
 	DECODE_OUTPUT_PARTIAL_CHAR (ch, dst);
     }
@@ -2904,6 +2940,12 @@
   return orign;
 }
 
+static Charcount
+no_conversion_character_tell (struct coding_stream *str)
+{
+  return CODING_STREAM_TYPE_DATA (str, no_conversion)->characters_seen;
+}
+
 DEFINE_DETECTOR (no_conversion);
 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion);
 
@@ -4656,6 +4698,7 @@
   LSTREAM_HAS_METHOD (coding, writer);
   LSTREAM_HAS_METHOD (coding, rewinder);
   LSTREAM_HAS_METHOD (coding, seekable_p);
+  LSTREAM_HAS_METHOD (coding, character_tell);
   LSTREAM_HAS_METHOD (coding, marker);
   LSTREAM_HAS_METHOD (coding, flusher);
   LSTREAM_HAS_METHOD (coding, closer);
@@ -4697,9 +4740,10 @@
   dump_add_opaque_int (&coding_detector_count);
   dump_add_opaque_int (&coding_detector_category_count);
 
-  INITIALIZE_CODING_SYSTEM_TYPE (no_conversion,
-				 "no-conversion-coding-system-p");
+  INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (no_conversion,
+                                           "no-conversion-coding-system-p");
   CODING_SYSTEM_HAS_METHOD (no_conversion, convert);
+  CODING_SYSTEM_HAS_METHOD (no_conversion, character_tell);
 
   INITIALIZE_DETECTOR (no_conversion);
   DETECTOR_HAS_METHOD (no_conversion, detect);