changeset 5784:0cb4f494a548

Have the result of coding_character_tell() reflect str->convert_to, too. src/ChangeLog addition: 2014-01-22 Aidan Kehoe <kehoea@parhasard.net> * text.c (buffered_bytecount_to_charcount): New. Return the number of characters in a coding or lstream buffer, including partial characters at the beginning of the buffer, but not including those at the end. Refactored from Lstream_character_tell(). * text.h: * text.h (buffered_bytecount_to_charcount): Declare it. * lstream.c (Lstream_character_tell): Use the refactored buffered_bytecount_to_charcount () here, both for the unget buffer and in_buffer. * file-coding.c (coding_character_tell): Check the character count of the lstream buffer too, when passing back the character count from the coding stream.
author Aidan Kehoe <kehoea@parhasard.net>
date Wed, 22 Jan 2014 17:52:00 +0000
parents cfc6a8c144f1
children 7343a186a475
files src/ChangeLog src/file-coding.c src/lstream.c src/text.c src/text.h
diffstat 5 files changed, 100 insertions(+), 84 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Tue Jan 21 00:27:16 2014 +0000
+++ b/src/ChangeLog	Wed Jan 22 17:52:00 2014 +0000
@@ -1,3 +1,19 @@
+2014-01-22  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* text.c (buffered_bytecount_to_charcount): New.
+	Return the number of characters in a coding or lstream buffer,
+	including partial characters at the beginning of the buffer, but
+	not including those at the end. Refactored from
+	Lstream_character_tell().
+	* text.h:
+	* text.h (buffered_bytecount_to_charcount): Declare it.
+	* lstream.c (Lstream_character_tell):
+	Use the refactored buffered_bytecount_to_charcount () here, both
+	for the unget buffer and in_buffer.
+	* file-coding.c (coding_character_tell):
+	Check the character count of the lstream buffer too, when passing
+	back the character count from the coding stream.
+
 2014-01-21  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* redisplay.c (start_with_line_at_pixpos):
--- a/src/file-coding.c	Tue Jan 21 00:27:16 2014 +0000
+++ b/src/file-coding.c	Wed Jan 22 17:52:00 2014 +0000
@@ -1994,8 +1994,19 @@
 coding_character_tell (Lstream *stream)
 {
   struct coding_stream *str = CODING_STREAM_DATA (stream);
-
-  return XCODESYSMETH_OR_GIVEN (str->codesys, character_tell, (str), -1);
+  Charcount ctell
+    = XCODESYSMETH_OR_GIVEN (str->codesys, character_tell, (str), -1);
+  
+  if (ctell > 0 && Dynarr_length (str->convert_to) > 0)
+    {
+      ctell
+        -= buffered_bytecount_to_charcount ((const Ibyte *)
+                                            (Dynarr_begin (str->convert_to)),
+                                            Dynarr_length (str->convert_to));
+      text_checking_assert (ctell >= 0);
+    }
+
+  return ctell;
 }
 
 static int
--- a/src/lstream.c	Tue Jan 21 00:27:16 2014 +0000
+++ b/src/lstream.c	Wed Jan 22 17:52:00 2014 +0000
@@ -752,15 +752,10 @@
           /* The character count should not include those characters
              currently *in* the unget buffer, subtract that count.  */
           Ibyte *ungot, *ungot_ptr;
-          Bytecount ii = lstr->unget_buffer_ind, impartial, sevenflen;
+          Bytecount ii = lstr->unget_buffer_ind;
 
           ungot_ptr = ungot
-            = alloca_ibytes (lstr->unget_buffer_ind) + MAX_ICHAR_LEN;
-
-          /* Make sure the string starts with a valid ibyteptr, otherwise
-             validate_ibyte_string_backward could run off the beginning. */
-          sevenflen = set_itext_ichar (ungot, (Ichar) 0x7f);
-          ungot_ptr += sevenflen;
+            = alloca_ibytes (lstr->unget_buffer_ind);
 
           /* Internal format data, but in reverse order. There's not
              actually a need to alloca here, we could work out the character
@@ -772,90 +767,23 @@
               *ungot_ptr++ = lstr->unget_buffer[--ii];
             }
 
-          impartial
-            = validate_ibyte_string_backward (ungot, ungot_ptr - ungot);
-
-          /* Move past the character we added. */
-          impartial -= sevenflen;
-          INC_IBYTEPTR (ungot);
-
-          if (impartial > 0 && !valid_ibyteptr_p (ungot))
-            {
-              Ibyte *newstart = ungot, *limit = ungot + impartial;
-              /* Our consumer has the start of a partial character, we
-                 have the rest. */
-
-              while (!valid_ibyteptr_p (newstart) && newstart < limit)
-                {
-                  newstart++, impartial--;
-                }
-                  
-              /* Remove this character from the count, since the
-                 end-consumer hasn't seen the full character. */
-              ctell--;
-              ungot = newstart;
-            }
-          else if (valid_ibyteptr_p (ungot)
-                   && rep_bytes_by_first_byte (*ungot) > impartial)
-            {
-              /* Rest of a partial character has yet to be read, its first
-                 octet has probably been unread by Lstream_read_1(). We
-                 included it in the accounting in Lstream_unread(), adjust
-                 the figure here appropriately. */
-              ctell--;
-            }
-
-          /* bytecount_to_charcount will throw an assertion failure if we're
-             not at the start of a character. */
-          text_checking_assert (impartial == 0 || valid_ibyteptr_p (ungot));
-
           /* The character length of this text is included in
              unget_character_count; if the bytes are still in the unget
              buffer, then our consumers haven't seen them, and so the
              character tell figure shouldn't reflect them. Subtract it from
              the total.  */
-          ctell -= bytecount_to_charcount (ungot, impartial);
+          ctell
+            -= buffered_bytecount_to_charcount (ungot, ungot_ptr - ungot);
         }
 
       if (lstr->in_buffer_ind < lstr->in_buffer_current)
         {
-          Ibyte *inbuf = lstr->in_buffer + lstr->in_buffer_ind;
-          Bytecount partial = lstr->in_buffer_current - lstr->in_buffer_ind,
-            impartial;
-
-          if (!valid_ibyteptr_p (inbuf))
-            {
-              Ibyte *newstart = inbuf;
-              Ibyte *limit = lstr->in_buffer + lstr->in_buffer_current;
-              /* Our consumer has the start of a partial character, we
-                 have the rest. */
-
-              while (newstart < limit && !valid_ibyteptr_p (newstart))
-                {
-                  newstart++;
-                }
-                  
-              /* Remove this character from the count, since the
-                 end-consumer hasn't seen the full character. */
-              ctell--;
-              inbuf = newstart;
-              partial = limit - newstart;
-            }
-
-          if (valid_ibyteptr_p (inbuf)) 
-            {
-              /* There's at least one valid starting char in the string,
-                 validate_ibyte_string_backward won't run off the
-                 begining. */
-              impartial = 
-                validate_ibyte_string_backward (inbuf, partial);
-            }
-          else
-            {
-              impartial = 0;
-            }
-
-          ctell -= bytecount_to_charcount (inbuf, impartial);
+          ctell
+            -= buffered_bytecount_to_charcount ((const Ibyte *)
+                                                (lstr->in_buffer
+                                                 + lstr->in_buffer_ind),
+                                                lstr->in_buffer_current
+                                                - lstr->in_buffer_ind);
         }
 
       text_checking_assert (ctell >= 0);
--- a/src/text.c	Tue Jan 21 00:27:16 2014 +0000
+++ b/src/text.c	Wed Jan 22 17:52:00 2014 +0000
@@ -2241,6 +2241,60 @@
   return count;
 }
 
+/* Return the character count of an lstream or coding buffer of
+   internal-format text, counting partial characters at the beginning of the
+   buffer as whole characters, and *not* counting partial characters at the
+   end of the buffer. This is because the result of this function is
+   subtracted from the character count given by the coding system character
+   tell methods, which include the former but not the latter. */
+
+Charcount
+buffered_bytecount_to_charcount (const Ibyte *bufptr, Bytecount len)
+{
+  Boolint partial_first = 0;
+  Bytecount impartial;
+
+  if (valid_ibyteptr_p (bufptr))
+    {
+      if (rep_bytes_by_first_byte (*bufptr) > len)
+        {
+          /* This is a partial first character, include it. Return
+             immediately so validate_ibyte_string_backward doesn't run off
+             the beginning of the string. */
+          return (Charcount) 1;
+        }
+    }
+  else
+    {
+      const Ibyte *newstart = bufptr, *limit = newstart + len;
+
+      /* Our consumer has the start of a partial character, we have the
+         rest. */
+      while (newstart < limit && !valid_ibyteptr_p (newstart))
+        {
+          newstart++;
+        }
+                  
+      partial_first = 1;
+      bufptr = newstart;
+      len = limit - newstart;
+    }
+
+  if (len && valid_ibyteptr_p (bufptr))
+    {
+      /* There's at least one valid starting char in the string,
+         validate_ibyte_string_backward won't run off the begining. */
+      impartial = validate_ibyte_string_backward (bufptr, len);
+    }
+  else
+    {
+      impartial = 0;
+    }
+
+  return (Charcount) partial_first + bytecount_to_charcount (bufptr,
+                                                             impartial);
+}
+
 Bytecount
 charcount_to_bytecount_fun (const Ibyte *ptr, Charcount len)
 {
--- a/src/text.h	Tue Jan 21 00:27:16 2014 +0000
+++ b/src/text.h	Wed Jan 22 17:52:00 2014 +0000
@@ -908,6 +908,12 @@
   return ptr;
 }
 
+/* Return the character count of an lstream or coding buffer of internal
+   format text, counting partial characters at the beginning of the buffer
+   as whole characters, and *not* counting partial characters at the end of
+   the buffer. */
+Charcount buffered_bytecount_to_charcount (const Ibyte *, Bytecount len);
+
 #else
 
 #define bytecount_to_charcount(ptr, len) ((Charcount) (len))
@@ -916,6 +922,7 @@
 #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len))
 #define skip_ascii(ptr, end) end
 #define skip_ascii_down(ptr, end) end
+#define buffered_bytecount_to_charcount (ptr, len) (len)
 
 #endif /* MULE */