changeset 5785:7343a186a475

Correct some partial character accounting, buffered_bytecount_to_charcount(). src/ChangeLog addition: 2014-01-23 Aidan Kehoe <kehoea@parhasard.net> * lstream.c (Lstream_read_1): Don't include the unread partial character in unget_character_count, since our consumers will never be aware of it. * text.c: * text.c (buffered_bytecount_to_charcount): A buffer consisting entirely of a partial character needs to be treated as a partial last character, not a partial first character, to avoid double-counting.
author Aidan Kehoe <kehoea@parhasard.net>
date Thu, 23 Jan 2014 13:49:40 +0000
parents 0cb4f494a548
children 6355de501637
files src/ChangeLog src/lstream.c src/text.c
diffstat 3 files changed, 25 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Wed Jan 22 17:52:00 2014 +0000
+++ b/src/ChangeLog	Thu Jan 23 13:49:40 2014 +0000
@@ -1,3 +1,14 @@
+2014-01-23  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* lstream.c (Lstream_read_1):
+	Don't include the unread partial character in
+	unget_character_count, since our consumers will never be aware of it.
+	* text.c:
+	* text.c (buffered_bytecount_to_charcount):
+	A buffer consisting entirely of a partial character needs to be
+	treated as a partial last character, not a partial first
+	character, to avoid double-counting.
+
 2014-01-22  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* text.c (buffered_bytecount_to_charcount): New.
--- a/src/lstream.c	Wed Jan 22 17:52:00 2014 +0000
+++ b/src/lstream.c	Thu Jan 23 13:49:40 2014 +0000
@@ -721,8 +721,15 @@
       Bytecount newoff = validate_ibyte_string_backward (p, off);
       if (newoff < off)
 	{
+          Charcount before = lstr->unget_character_count;
 	  Lstream_unread (lstr, p + newoff, off - newoff);
 	  off = newoff;
+
+          /* Since it's Lstream_read rather than our consumers unreading the
+             incomplete character (conceptually, not affecting the number of
+             characters ever Lstream_read() from the stream),
+             unget_character_count shouldn't include it. */
+          lstr->unget_character_count = before;
 	}
     }
 
--- a/src/text.c	Wed Jan 22 17:52:00 2014 +0000
+++ b/src/text.c	Thu Jan 23 13:49:40 2014 +0000
@@ -2244,9 +2244,9 @@
 /* Return the character count of an lstream or coding buffer of
    internal-format text, counting partial characters at the beginning of the
    buffer as whole characters, and *not* counting partial characters at the
-   end of the buffer. This is because the result of this function is
-   subtracted from the character count given by the coding system character
-   tell methods, which include the former but not the latter. */
+   end of the buffer. The result of this function is subtracted from the
+   character count given by the coding system character tell methods, and we
+   need to treat each buffer in the same way to avoid double-counting. */
 
 Charcount
 buffered_bytecount_to_charcount (const Ibyte *bufptr, Bytecount len)
@@ -2258,10 +2258,10 @@
     {
       if (rep_bytes_by_first_byte (*bufptr) > len)
         {
-          /* This is a partial first character, include it. Return
-             immediately so validate_ibyte_string_backward doesn't run off
-             the beginning of the string. */
-          return (Charcount) 1;
+          /* This is a partial last character. Return 0, avoid treating it
+             as a partial first character, since that would lead to it being
+             counted twice. */
+          return (Charcount) 0;
         }
     }
   else