comparison src/lstream.c @ 5784:0cb4f494a548

Have the result of coding_character_tell() reflect str->convert_to, too. src/ChangeLog addition: 2014-01-22 Aidan Kehoe <kehoea@parhasard.net> * text.c (buffered_bytecount_to_charcount): New. Return the number of characters in a coding or lstream buffer, including partial characters at the beginning of the buffer, but not including those at the end. Refactored from Lstream_character_tell(). * text.h: * text.h (buffered_bytecount_to_charcount): Declare it. * lstream.c (Lstream_character_tell): Use the refactored buffered_bytecount_to_charcount () here, both for the unget buffer and in_buffer. * file-coding.c (coding_character_tell): Check the character count of the lstream buffer too, when passing back the character count from the coding stream.
author Aidan Kehoe <kehoea@parhasard.net>
date Wed, 22 Jan 2014 17:52:00 +0000
parents 65d65b52d608
children 7343a186a475
comparison
equal deleted inserted replaced
5783:cfc6a8c144f1 5784:0cb4f494a548
750 if (lstr->unget_buffer_ind > 0) 750 if (lstr->unget_buffer_ind > 0)
751 { 751 {
752 /* The character count should not include those characters 752 /* The character count should not include those characters
753 currently *in* the unget buffer, subtract that count. */ 753 currently *in* the unget buffer, subtract that count. */
754 Ibyte *ungot, *ungot_ptr; 754 Ibyte *ungot, *ungot_ptr;
755 Bytecount ii = lstr->unget_buffer_ind, impartial, sevenflen; 755 Bytecount ii = lstr->unget_buffer_ind;
756 756
757 ungot_ptr = ungot 757 ungot_ptr = ungot
758 = alloca_ibytes (lstr->unget_buffer_ind) + MAX_ICHAR_LEN; 758 = alloca_ibytes (lstr->unget_buffer_ind);
759
760 /* Make sure the string starts with a valid ibyteptr, otherwise
761 validate_ibyte_string_backward could run off the beginning. */
762 sevenflen = set_itext_ichar (ungot, (Ichar) 0x7f);
763 ungot_ptr += sevenflen;
764 759
765 /* Internal format data, but in reverse order. There's not 760 /* Internal format data, but in reverse order. There's not
766 actually a need to alloca here, we could work out the character 761 actually a need to alloca here, we could work out the character
767 count directly from the reversed bytes, but the alloca approach 762 count directly from the reversed bytes, but the alloca approach
768 is more robust to changes in our internal format, and the unget 763 is more robust to changes in our internal format, and the unget
770 while (ii > 0) 765 while (ii > 0)
771 { 766 {
772 *ungot_ptr++ = lstr->unget_buffer[--ii]; 767 *ungot_ptr++ = lstr->unget_buffer[--ii];
773 } 768 }
774 769
775 impartial
776 = validate_ibyte_string_backward (ungot, ungot_ptr - ungot);
777
778 /* Move past the character we added. */
779 impartial -= sevenflen;
780 INC_IBYTEPTR (ungot);
781
782 if (impartial > 0 && !valid_ibyteptr_p (ungot))
783 {
784 Ibyte *newstart = ungot, *limit = ungot + impartial;
785 /* Our consumer has the start of a partial character, we
786 have the rest. */
787
788 while (!valid_ibyteptr_p (newstart) && newstart < limit)
789 {
790 newstart++, impartial--;
791 }
792
793 /* Remove this character from the count, since the
794 end-consumer hasn't seen the full character. */
795 ctell--;
796 ungot = newstart;
797 }
798 else if (valid_ibyteptr_p (ungot)
799 && rep_bytes_by_first_byte (*ungot) > impartial)
800 {
801 /* Rest of a partial character has yet to be read, its first
802 octet has probably been unread by Lstream_read_1(). We
803 included it in the accounting in Lstream_unread(), adjust
804 the figure here appropriately. */
805 ctell--;
806 }
807
808 /* bytecount_to_charcount will throw an assertion failure if we're
809 not at the start of a character. */
810 text_checking_assert (impartial == 0 || valid_ibyteptr_p (ungot));
811
812 /* The character length of this text is included in 770 /* The character length of this text is included in
813 unget_character_count; if the bytes are still in the unget 771 unget_character_count; if the bytes are still in the unget
814 buffer, then our consumers haven't seen them, and so the 772 buffer, then our consumers haven't seen them, and so the
815 character tell figure shouldn't reflect them. Subtract it from 773 character tell figure shouldn't reflect them. Subtract it from
816 the total. */ 774 the total. */
817 ctell -= bytecount_to_charcount (ungot, impartial); 775 ctell
776 -= buffered_bytecount_to_charcount (ungot, ungot_ptr - ungot);
818 } 777 }
819 778
820 if (lstr->in_buffer_ind < lstr->in_buffer_current) 779 if (lstr->in_buffer_ind < lstr->in_buffer_current)
821 { 780 {
822 Ibyte *inbuf = lstr->in_buffer + lstr->in_buffer_ind; 781 ctell
823 Bytecount partial = lstr->in_buffer_current - lstr->in_buffer_ind, 782 -= buffered_bytecount_to_charcount ((const Ibyte *)
824 impartial; 783 (lstr->in_buffer
825 784 + lstr->in_buffer_ind),
826 if (!valid_ibyteptr_p (inbuf)) 785 lstr->in_buffer_current
827 { 786 - lstr->in_buffer_ind);
828 Ibyte *newstart = inbuf;
829 Ibyte *limit = lstr->in_buffer + lstr->in_buffer_current;
830 /* Our consumer has the start of a partial character, we
831 have the rest. */
832
833 while (newstart < limit && !valid_ibyteptr_p (newstart))
834 {
835 newstart++;
836 }
837
838 /* Remove this character from the count, since the
839 end-consumer hasn't seen the full character. */
840 ctell--;
841 inbuf = newstart;
842 partial = limit - newstart;
843 }
844
845 if (valid_ibyteptr_p (inbuf))
846 {
847 /* There's at least one valid starting char in the string,
848 validate_ibyte_string_backward won't run off the
849 begining. */
850 impartial =
851 validate_ibyte_string_backward (inbuf, partial);
852 }
853 else
854 {
855 impartial = 0;
856 }
857
858 ctell -= bytecount_to_charcount (inbuf, impartial);
859 } 787 }
860 788
861 text_checking_assert (ctell >= 0); 789 text_checking_assert (ctell >= 0);
862 } 790 }
863 791