diff src/lstream.h @ 5776:65d65b52d608

Pass character count from coding systems to buffer insertion code. src/ChangeLog addition: 2014-01-16 Aidan Kehoe <kehoea@parhasard.net> Pass character count information from the no-conversion and unicode coding systems to the buffer insertion code, making #'find-file on large buffers a little snappier (if ERROR_CHECK_TEXT is not defined). * file-coding.c: * file-coding.c (coding_character_tell): New. * file-coding.c (conversion_coding_stream_description): New. * file-coding.c (no_conversion_convert): Update characters_seen when decoding. * file-coding.c (no_conversion_character_tell): New. * file-coding.c (lstream_type_create_file_coding): Create the no_conversion type with data. * file-coding.c (coding_system_type_create): Make the character_tell method available here. * file-coding.h: * file-coding.h (struct coding_system_methods): Add a new character_tell() method, passing charcount information from the coding systems to the buffer code, avoiding duplicate bytecount-to-charcount work especially with large buffers. * fileio.c (Finsert_file_contents_internal): Update this to pass charcount information to buffer_insert_string_1(), if that is available from the lstream code. * insdel.c: * insdel.c (buffer_insert_string_1): Add a new CCLEN argument, giving the character count of the string to insert. It can be -1 to indicate that te function should work it out itself using bytecount_to_charcount(), as it used to. * insdel.c (buffer_insert_raw_string_1): * insdel.c (buffer_insert_lisp_string_1): * insdel.c (buffer_insert_ascstring_1): * insdel.c (buffer_insert_emacs_char_1): * insdel.c (buffer_insert_from_buffer_1): * insdel.c (buffer_replace_char): Update these functions to use the new calling convention. * insdel.h: * insdel.h (buffer_insert_string): Update this header to reflect the new buffer_insert_string_1() argument. * lstream.c (Lstream_character_tell): New. Return the number of characters *read* and seen by the consumer so far, taking into account the unget buffer, and buffered reading. * lstream.c (Lstream_unread): Update unget_character_count here as appropriate. * lstream.c (Lstream_rewind): Reset unget_character_count here too. * lstream.h: * lstream.h (struct lstream): Provide the character_tell method, add a new field, unget_character_count, giving the number of characters ever passed to Lstream_unread(). Declare Lstream_character_tell(). Make Lstream_ungetc(), which happens to be unused, an inline function rather than a macro, in the course of updating it to modify unget_character_count. * print.c (output_string): Use the new argument to buffer_insert_string_1(). * tests.c: * tests.c (Ftest_character_tell): New test function. * tests.c (syms_of_tests): Make it available. * unicode.c: * unicode.c (struct unicode_coding_stream): * unicode.c (unicode_character_tell): New method. * unicode.c (unicode_convert): Update the character counter as appropriate. * unicode.c (coding_system_type_create_unicode): Make the character_tell method available.
author Aidan Kehoe <kehoea@parhasard.net>
date Thu, 16 Jan 2014 16:27:52 +0000
parents 2dbefd79b3d3
children d2c0ff38ad5c
line wrap: on
line diff
--- a/src/lstream.h	Sun Dec 22 10:36:33 2013 +0000
+++ b/src/lstream.h	Thu Jan 16 16:27:52 2014 +0000
@@ -181,6 +181,10 @@
      method.  If this method is not present, the result is determined
      by whether a rewind method is present. */
   int (*seekable_p) (Lstream *stream);
+
+  /* Return the number of complete characters read so far. Respects
+     buffering and unget. Returns -1 if unknown or not implemented. */
+  Charcount (*character_tell) (Lstream *stream);
   /* Perform any additional operations necessary to flush the
      data in this stream. */
   int (*flusher) (Lstream *stream);
@@ -250,8 +254,9 @@
      similarly has to push the data on backwards. */
   unsigned char *unget_buffer; /* holds characters pushed back onto input */
   Bytecount unget_buffer_size; /* allocated size of buffer */
-  Bytecount unget_buffer_ind; /* pointer to next buffer spot
-					  to write a character */
+  Bytecount unget_buffer_ind; /* Next buffer spot to write a character */
+
+  Charcount unget_character_count; /* Count of complete characters ever ungot. */
 
   Bytecount byte_count;
   int flags;
@@ -297,8 +302,8 @@
 int Lstream_fputc (Lstream *lstr, int c);
 int Lstream_fgetc (Lstream *lstr);
 void Lstream_fungetc (Lstream *lstr, int c);
-Bytecount Lstream_read (Lstream *lstr, void *data,
-				 Bytecount size);
+Bytecount Lstream_read (Lstream *lstr, void *data, Bytecount size);
+Charcount Lstream_character_tell (Lstream *);
 int Lstream_write (Lstream *lstr, const void *data,
 		   Bytecount size);
 int Lstream_was_blocked_p (Lstream *lstr);
@@ -353,19 +358,28 @@
    reverse order they were pushed back -- most recent first. (This is
    necessary for consistency -- if there are a number of bytes that
    have been unread and I read and unread a byte, it needs to be the
-   first to be read again.) This is a macro and so it is very
-   efficient.  The C argument is only evaluated once but the STREAM
-   argument is evaluated more than once.
- */
+   first to be read again.) */
 
-#define Lstream_ungetc(stream, c)					\
-/* Add to the end if it won't overflow buffer; otherwise call the	\
-   function equivalent */						\
-  ((stream)->unget_buffer_ind >= (stream)->unget_buffer_size ?		\
-   Lstream_fungetc (stream, c) :					\
-   (void) ((stream)->byte_count--,					\
-   ((stream)->unget_buffer[(stream)->unget_buffer_ind++] =		\
-    (unsigned char) (c))))
+DECLARE_INLINE_HEADER (
+void
+Lstream_ungetc (Lstream *lstr, int c)
+)
+{
+  /* Add to the end if it won't overflow buffer; otherwise call the
+     function equivalent */
+  if (lstr->unget_buffer_ind >= lstr->unget_buffer_size)
+    {
+      Lstream_fungetc (lstr, c);
+    }
+  else
+    {
+      lstr->byte_count--;
+      lstr->unget_buffer[lstr->unget_buffer_ind] = (unsigned char) (c);
+      lstr->unget_character_count
+        += valid_ibyteptr_p (lstr->unget_buffer + lstr->unget_buffer_ind);
+      lstr->unget_buffer_ind++;
+    }
+}
 
 #define Lstream_data(stream) ((void *) ((stream)->data))
 #define Lstream_byte_count(stream) ((stream)->byte_count)