Mercurial > hg > xemacs-beta
changeset 5784:0cb4f494a548
Have the result of coding_character_tell() reflect str->convert_to, too.
src/ChangeLog addition:
2014-01-22 Aidan Kehoe <kehoea@parhasard.net>
* text.c (buffered_bytecount_to_charcount): New.
Return the number of characters in a coding or lstream buffer,
including partial characters at the beginning of the buffer, but
not including those at the end. Refactored from
Lstream_character_tell().
* text.h:
* text.h (buffered_bytecount_to_charcount): Declare it.
* lstream.c (Lstream_character_tell):
Use the refactored buffered_bytecount_to_charcount () here, both
for the unget buffer and in_buffer.
* file-coding.c (coding_character_tell):
Check the character count of the lstream buffer too, when passing
back the character count from the coding stream.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Wed, 22 Jan 2014 17:52:00 +0000 |
parents | cfc6a8c144f1 |
children | 7343a186a475 |
files | src/ChangeLog src/file-coding.c src/lstream.c src/text.c src/text.h |
diffstat | 5 files changed, 100 insertions(+), 84 deletions(-) [+] |
line wrap: on
line diff
--- a/src/ChangeLog Tue Jan 21 00:27:16 2014 +0000 +++ b/src/ChangeLog Wed Jan 22 17:52:00 2014 +0000 @@ -1,3 +1,19 @@ +2014-01-22 Aidan Kehoe <kehoea@parhasard.net> + + * text.c (buffered_bytecount_to_charcount): New. + Return the number of characters in a coding or lstream buffer, + including partial characters at the beginning of the buffer, but + not including those at the end. Refactored from + Lstream_character_tell(). + * text.h: + * text.h (buffered_bytecount_to_charcount): Declare it. + * lstream.c (Lstream_character_tell): + Use the refactored buffered_bytecount_to_charcount () here, both + for the unget buffer and in_buffer. + * file-coding.c (coding_character_tell): + Check the character count of the lstream buffer too, when passing + back the character count from the coding stream. + 2014-01-21 Aidan Kehoe <kehoea@parhasard.net> * redisplay.c (start_with_line_at_pixpos):
--- a/src/file-coding.c Tue Jan 21 00:27:16 2014 +0000 +++ b/src/file-coding.c Wed Jan 22 17:52:00 2014 +0000 @@ -1994,8 +1994,19 @@ coding_character_tell (Lstream *stream) { struct coding_stream *str = CODING_STREAM_DATA (stream); - - return XCODESYSMETH_OR_GIVEN (str->codesys, character_tell, (str), -1); + Charcount ctell + = XCODESYSMETH_OR_GIVEN (str->codesys, character_tell, (str), -1); + + if (ctell > 0 && Dynarr_length (str->convert_to) > 0) + { + ctell + -= buffered_bytecount_to_charcount ((const Ibyte *) + (Dynarr_begin (str->convert_to)), + Dynarr_length (str->convert_to)); + text_checking_assert (ctell >= 0); + } + + return ctell; } static int
--- a/src/lstream.c Tue Jan 21 00:27:16 2014 +0000 +++ b/src/lstream.c Wed Jan 22 17:52:00 2014 +0000 @@ -752,15 +752,10 @@ /* The character count should not include those characters currently *in* the unget buffer, subtract that count. */ Ibyte *ungot, *ungot_ptr; - Bytecount ii = lstr->unget_buffer_ind, impartial, sevenflen; + Bytecount ii = lstr->unget_buffer_ind; ungot_ptr = ungot - = alloca_ibytes (lstr->unget_buffer_ind) + MAX_ICHAR_LEN; - - /* Make sure the string starts with a valid ibyteptr, otherwise - validate_ibyte_string_backward could run off the beginning. */ - sevenflen = set_itext_ichar (ungot, (Ichar) 0x7f); - ungot_ptr += sevenflen; + = alloca_ibytes (lstr->unget_buffer_ind); /* Internal format data, but in reverse order. There's not actually a need to alloca here, we could work out the character @@ -772,90 +767,23 @@ *ungot_ptr++ = lstr->unget_buffer[--ii]; } - impartial - = validate_ibyte_string_backward (ungot, ungot_ptr - ungot); - - /* Move past the character we added. */ - impartial -= sevenflen; - INC_IBYTEPTR (ungot); - - if (impartial > 0 && !valid_ibyteptr_p (ungot)) - { - Ibyte *newstart = ungot, *limit = ungot + impartial; - /* Our consumer has the start of a partial character, we - have the rest. */ - - while (!valid_ibyteptr_p (newstart) && newstart < limit) - { - newstart++, impartial--; - } - - /* Remove this character from the count, since the - end-consumer hasn't seen the full character. */ - ctell--; - ungot = newstart; - } - else if (valid_ibyteptr_p (ungot) - && rep_bytes_by_first_byte (*ungot) > impartial) - { - /* Rest of a partial character has yet to be read, its first - octet has probably been unread by Lstream_read_1(). We - included it in the accounting in Lstream_unread(), adjust - the figure here appropriately. */ - ctell--; - } - - /* bytecount_to_charcount will throw an assertion failure if we're - not at the start of a character. */ - text_checking_assert (impartial == 0 || valid_ibyteptr_p (ungot)); - /* The character length of this text is included in unget_character_count; if the bytes are still in the unget buffer, then our consumers haven't seen them, and so the character tell figure shouldn't reflect them. Subtract it from the total. */ - ctell -= bytecount_to_charcount (ungot, impartial); + ctell + -= buffered_bytecount_to_charcount (ungot, ungot_ptr - ungot); } if (lstr->in_buffer_ind < lstr->in_buffer_current) { - Ibyte *inbuf = lstr->in_buffer + lstr->in_buffer_ind; - Bytecount partial = lstr->in_buffer_current - lstr->in_buffer_ind, - impartial; - - if (!valid_ibyteptr_p (inbuf)) - { - Ibyte *newstart = inbuf; - Ibyte *limit = lstr->in_buffer + lstr->in_buffer_current; - /* Our consumer has the start of a partial character, we - have the rest. */ - - while (newstart < limit && !valid_ibyteptr_p (newstart)) - { - newstart++; - } - - /* Remove this character from the count, since the - end-consumer hasn't seen the full character. */ - ctell--; - inbuf = newstart; - partial = limit - newstart; - } - - if (valid_ibyteptr_p (inbuf)) - { - /* There's at least one valid starting char in the string, - validate_ibyte_string_backward won't run off the - begining. */ - impartial = - validate_ibyte_string_backward (inbuf, partial); - } - else - { - impartial = 0; - } - - ctell -= bytecount_to_charcount (inbuf, impartial); + ctell + -= buffered_bytecount_to_charcount ((const Ibyte *) + (lstr->in_buffer + + lstr->in_buffer_ind), + lstr->in_buffer_current + - lstr->in_buffer_ind); } text_checking_assert (ctell >= 0);
--- a/src/text.c Tue Jan 21 00:27:16 2014 +0000 +++ b/src/text.c Wed Jan 22 17:52:00 2014 +0000 @@ -2241,6 +2241,60 @@ return count; } +/* Return the character count of an lstream or coding buffer of + internal-format text, counting partial characters at the beginning of the + buffer as whole characters, and *not* counting partial characters at the + end of the buffer. This is because the result of this function is + subtracted from the character count given by the coding system character + tell methods, which include the former but not the latter. */ + +Charcount +buffered_bytecount_to_charcount (const Ibyte *bufptr, Bytecount len) +{ + Boolint partial_first = 0; + Bytecount impartial; + + if (valid_ibyteptr_p (bufptr)) + { + if (rep_bytes_by_first_byte (*bufptr) > len) + { + /* This is a partial first character, include it. Return + immediately so validate_ibyte_string_backward doesn't run off + the beginning of the string. */ + return (Charcount) 1; + } + } + else + { + const Ibyte *newstart = bufptr, *limit = newstart + len; + + /* Our consumer has the start of a partial character, we have the + rest. */ + while (newstart < limit && !valid_ibyteptr_p (newstart)) + { + newstart++; + } + + partial_first = 1; + bufptr = newstart; + len = limit - newstart; + } + + if (len && valid_ibyteptr_p (bufptr)) + { + /* There's at least one valid starting char in the string, + validate_ibyte_string_backward won't run off the begining. */ + impartial = validate_ibyte_string_backward (bufptr, len); + } + else + { + impartial = 0; + } + + return (Charcount) partial_first + bytecount_to_charcount (bufptr, + impartial); +} + Bytecount charcount_to_bytecount_fun (const Ibyte *ptr, Charcount len) {
--- a/src/text.h Tue Jan 21 00:27:16 2014 +0000 +++ b/src/text.h Wed Jan 22 17:52:00 2014 +0000 @@ -908,6 +908,12 @@ return ptr; } +/* Return the character count of an lstream or coding buffer of internal + format text, counting partial characters at the beginning of the buffer + as whole characters, and *not* counting partial characters at the end of + the buffer. */ +Charcount buffered_bytecount_to_charcount (const Ibyte *, Bytecount len); + #else #define bytecount_to_charcount(ptr, len) ((Charcount) (len)) @@ -916,6 +922,7 @@ #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len)) #define skip_ascii(ptr, end) end #define skip_ascii_down(ptr, end) end +#define buffered_bytecount_to_charcount (ptr, len) (len) #endif /* MULE */