Mercurial > hg > xemacs-beta
comparison src/lstream.c @ 5784:0cb4f494a548
Have the result of coding_character_tell() reflect str->convert_to, too.
src/ChangeLog addition:
2014-01-22 Aidan Kehoe <kehoea@parhasard.net>
* text.c (buffered_bytecount_to_charcount): New.
Return the number of characters in a coding or lstream buffer,
including partial characters at the beginning of the buffer, but
not including those at the end. Refactored from
Lstream_character_tell().
* text.h:
* text.h (buffered_bytecount_to_charcount): Declare it.
* lstream.c (Lstream_character_tell):
Use the refactored buffered_bytecount_to_charcount () here, both
for the unget buffer and in_buffer.
* file-coding.c (coding_character_tell):
Check the character count of the lstream buffer too, when passing
back the character count from the coding stream.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Wed, 22 Jan 2014 17:52:00 +0000 |
parents | 65d65b52d608 |
children | 7343a186a475 |
comparison
equal
deleted
inserted
replaced
5783:cfc6a8c144f1 | 5784:0cb4f494a548 |
---|---|
750 if (lstr->unget_buffer_ind > 0) | 750 if (lstr->unget_buffer_ind > 0) |
751 { | 751 { |
752 /* The character count should not include those characters | 752 /* The character count should not include those characters |
753 currently *in* the unget buffer, subtract that count. */ | 753 currently *in* the unget buffer, subtract that count. */ |
754 Ibyte *ungot, *ungot_ptr; | 754 Ibyte *ungot, *ungot_ptr; |
755 Bytecount ii = lstr->unget_buffer_ind, impartial, sevenflen; | 755 Bytecount ii = lstr->unget_buffer_ind; |
756 | 756 |
757 ungot_ptr = ungot | 757 ungot_ptr = ungot |
758 = alloca_ibytes (lstr->unget_buffer_ind) + MAX_ICHAR_LEN; | 758 = alloca_ibytes (lstr->unget_buffer_ind); |
759 | |
760 /* Make sure the string starts with a valid ibyteptr, otherwise | |
761 validate_ibyte_string_backward could run off the beginning. */ | |
762 sevenflen = set_itext_ichar (ungot, (Ichar) 0x7f); | |
763 ungot_ptr += sevenflen; | |
764 | 759 |
765 /* Internal format data, but in reverse order. There's not | 760 /* Internal format data, but in reverse order. There's not |
766 actually a need to alloca here, we could work out the character | 761 actually a need to alloca here, we could work out the character |
767 count directly from the reversed bytes, but the alloca approach | 762 count directly from the reversed bytes, but the alloca approach |
768 is more robust to changes in our internal format, and the unget | 763 is more robust to changes in our internal format, and the unget |
770 while (ii > 0) | 765 while (ii > 0) |
771 { | 766 { |
772 *ungot_ptr++ = lstr->unget_buffer[--ii]; | 767 *ungot_ptr++ = lstr->unget_buffer[--ii]; |
773 } | 768 } |
774 | 769 |
775 impartial | |
776 = validate_ibyte_string_backward (ungot, ungot_ptr - ungot); | |
777 | |
778 /* Move past the character we added. */ | |
779 impartial -= sevenflen; | |
780 INC_IBYTEPTR (ungot); | |
781 | |
782 if (impartial > 0 && !valid_ibyteptr_p (ungot)) | |
783 { | |
784 Ibyte *newstart = ungot, *limit = ungot + impartial; | |
785 /* Our consumer has the start of a partial character, we | |
786 have the rest. */ | |
787 | |
788 while (!valid_ibyteptr_p (newstart) && newstart < limit) | |
789 { | |
790 newstart++, impartial--; | |
791 } | |
792 | |
793 /* Remove this character from the count, since the | |
794 end-consumer hasn't seen the full character. */ | |
795 ctell--; | |
796 ungot = newstart; | |
797 } | |
798 else if (valid_ibyteptr_p (ungot) | |
799 && rep_bytes_by_first_byte (*ungot) > impartial) | |
800 { | |
801 /* Rest of a partial character has yet to be read, its first | |
802 octet has probably been unread by Lstream_read_1(). We | |
803 included it in the accounting in Lstream_unread(), adjust | |
804 the figure here appropriately. */ | |
805 ctell--; | |
806 } | |
807 | |
808 /* bytecount_to_charcount will throw an assertion failure if we're | |
809 not at the start of a character. */ | |
810 text_checking_assert (impartial == 0 || valid_ibyteptr_p (ungot)); | |
811 | |
812 /* The character length of this text is included in | 770 /* The character length of this text is included in |
813 unget_character_count; if the bytes are still in the unget | 771 unget_character_count; if the bytes are still in the unget |
814 buffer, then our consumers haven't seen them, and so the | 772 buffer, then our consumers haven't seen them, and so the |
815 character tell figure shouldn't reflect them. Subtract it from | 773 character tell figure shouldn't reflect them. Subtract it from |
816 the total. */ | 774 the total. */ |
817 ctell -= bytecount_to_charcount (ungot, impartial); | 775 ctell |
776 -= buffered_bytecount_to_charcount (ungot, ungot_ptr - ungot); | |
818 } | 777 } |
819 | 778 |
820 if (lstr->in_buffer_ind < lstr->in_buffer_current) | 779 if (lstr->in_buffer_ind < lstr->in_buffer_current) |
821 { | 780 { |
822 Ibyte *inbuf = lstr->in_buffer + lstr->in_buffer_ind; | 781 ctell |
823 Bytecount partial = lstr->in_buffer_current - lstr->in_buffer_ind, | 782 -= buffered_bytecount_to_charcount ((const Ibyte *) |
824 impartial; | 783 (lstr->in_buffer |
825 | 784 + lstr->in_buffer_ind), |
826 if (!valid_ibyteptr_p (inbuf)) | 785 lstr->in_buffer_current |
827 { | 786 - lstr->in_buffer_ind); |
828 Ibyte *newstart = inbuf; | |
829 Ibyte *limit = lstr->in_buffer + lstr->in_buffer_current; | |
830 /* Our consumer has the start of a partial character, we | |
831 have the rest. */ | |
832 | |
833 while (newstart < limit && !valid_ibyteptr_p (newstart)) | |
834 { | |
835 newstart++; | |
836 } | |
837 | |
838 /* Remove this character from the count, since the | |
839 end-consumer hasn't seen the full character. */ | |
840 ctell--; | |
841 inbuf = newstart; | |
842 partial = limit - newstart; | |
843 } | |
844 | |
845 if (valid_ibyteptr_p (inbuf)) | |
846 { | |
847 /* There's at least one valid starting char in the string, | |
848 validate_ibyte_string_backward won't run off the | |
849 begining. */ | |
850 impartial = | |
851 validate_ibyte_string_backward (inbuf, partial); | |
852 } | |
853 else | |
854 { | |
855 impartial = 0; | |
856 } | |
857 | |
858 ctell -= bytecount_to_charcount (inbuf, impartial); | |
859 } | 787 } |
860 | 788 |
861 text_checking_assert (ctell >= 0); | 789 text_checking_assert (ctell >= 0); |
862 } | 790 } |
863 | 791 |