# HG changeset patch # User Aidan Kehoe # Date 1390484980 0 # Node ID 7343a186a4757d2053e550dcb9299fd446970487 # Parent 0cb4f494a54809a054e242ee16c00c0e9c34d39d Correct some partial character accounting, buffered_bytecount_to_charcount(). src/ChangeLog addition: 2014-01-23 Aidan Kehoe * lstream.c (Lstream_read_1): Don't include the unread partial character in unget_character_count, since our consumers will never be aware of it. * text.c: * text.c (buffered_bytecount_to_charcount): A buffer consisting entirely of a partial character needs to be treated as a partial last character, not a partial first character, to avoid double-counting. diff -r 0cb4f494a548 -r 7343a186a475 src/ChangeLog --- a/src/ChangeLog Wed Jan 22 17:52:00 2014 +0000 +++ b/src/ChangeLog Thu Jan 23 13:49:40 2014 +0000 @@ -1,3 +1,14 @@ +2014-01-23 Aidan Kehoe + + * lstream.c (Lstream_read_1): + Don't include the unread partial character in + unget_character_count, since our consumers will never be aware of it. + * text.c: + * text.c (buffered_bytecount_to_charcount): + A buffer consisting entirely of a partial character needs to be + treated as a partial last character, not a partial first + character, to avoid double-counting. + 2014-01-22 Aidan Kehoe * text.c (buffered_bytecount_to_charcount): New. diff -r 0cb4f494a548 -r 7343a186a475 src/lstream.c --- a/src/lstream.c Wed Jan 22 17:52:00 2014 +0000 +++ b/src/lstream.c Thu Jan 23 13:49:40 2014 +0000 @@ -721,8 +721,15 @@ Bytecount newoff = validate_ibyte_string_backward (p, off); if (newoff < off) { + Charcount before = lstr->unget_character_count; Lstream_unread (lstr, p + newoff, off - newoff); off = newoff; + + /* Since it's Lstream_read rather than our consumers unreading the + incomplete character (conceptually, not affecting the number of + characters ever Lstream_read() from the stream), + unget_character_count shouldn't include it. */ + lstr->unget_character_count = before; } } diff -r 0cb4f494a548 -r 7343a186a475 src/text.c --- a/src/text.c Wed Jan 22 17:52:00 2014 +0000 +++ b/src/text.c Thu Jan 23 13:49:40 2014 +0000 @@ -2244,9 +2244,9 @@ /* Return the character count of an lstream or coding buffer of internal-format text, counting partial characters at the beginning of the buffer as whole characters, and *not* counting partial characters at the - end of the buffer. This is because the result of this function is - subtracted from the character count given by the coding system character - tell methods, which include the former but not the latter. */ + end of the buffer. The result of this function is subtracted from the + character count given by the coding system character tell methods, and we + need to treat each buffer in the same way to avoid double-counting. */ Charcount buffered_bytecount_to_charcount (const Ibyte *bufptr, Bytecount len) @@ -2258,10 +2258,10 @@ { if (rep_bytes_by_first_byte (*bufptr) > len) { - /* This is a partial first character, include it. Return - immediately so validate_ibyte_string_backward doesn't run off - the beginning of the string. */ - return (Charcount) 1; + /* This is a partial last character. Return 0, avoid treating it + as a partial first character, since that would lead to it being + counted twice. */ + return (Charcount) 0; } } else