Mercurial > hg > xemacs-beta
comparison src/lstream.c @ 5776:65d65b52d608
Pass character count from coding systems to buffer insertion code.
src/ChangeLog addition:
2014-01-16 Aidan Kehoe <kehoea@parhasard.net>
Pass character count information from the no-conversion and
unicode coding systems to the buffer insertion code, making
#'find-file on large buffers a little snappier (if
ERROR_CHECK_TEXT is not defined).
* file-coding.c:
* file-coding.c (coding_character_tell): New.
* file-coding.c (conversion_coding_stream_description): New.
* file-coding.c (no_conversion_convert):
Update characters_seen when decoding.
* file-coding.c (no_conversion_character_tell): New.
* file-coding.c (lstream_type_create_file_coding): Create the
no_conversion type with data.
* file-coding.c (coding_system_type_create):
Make the character_tell method available here.
* file-coding.h:
* file-coding.h (struct coding_system_methods):
Add a new character_tell() method, passing charcount information
from the coding systems to the buffer code, avoiding duplicate
bytecount-to-charcount work especially with large buffers.
* fileio.c (Finsert_file_contents_internal):
Update this to pass charcount information to
buffer_insert_string_1(), if that is available from the lstream code.
* insdel.c:
* insdel.c (buffer_insert_string_1):
Add a new CCLEN argument, giving the character count of the string
to insert. It can be -1 to indicate that te function should work
it out itself using bytecount_to_charcount(), as it used to.
* insdel.c (buffer_insert_raw_string_1):
* insdel.c (buffer_insert_lisp_string_1):
* insdel.c (buffer_insert_ascstring_1):
* insdel.c (buffer_insert_emacs_char_1):
* insdel.c (buffer_insert_from_buffer_1):
* insdel.c (buffer_replace_char):
Update these functions to use the new calling convention.
* insdel.h:
* insdel.h (buffer_insert_string):
Update this header to reflect the new buffer_insert_string_1()
argument.
* lstream.c (Lstream_character_tell): New.
Return the number of characters *read* and seen by the consumer so
far, taking into account the unget buffer, and buffered reading.
* lstream.c (Lstream_unread):
Update unget_character_count here as appropriate.
* lstream.c (Lstream_rewind):
Reset unget_character_count here too.
* lstream.h:
* lstream.h (struct lstream):
Provide the character_tell method, add a new field,
unget_character_count, giving the number of characters ever passed
to Lstream_unread().
Declare Lstream_character_tell().
Make Lstream_ungetc(), which happens to be unused, an inline
function rather than a macro, in the course of updating it to
modify unget_character_count.
* print.c (output_string):
Use the new argument to buffer_insert_string_1().
* tests.c:
* tests.c (Ftest_character_tell):
New test function.
* tests.c (syms_of_tests):
Make it available.
* unicode.c:
* unicode.c (struct unicode_coding_stream):
* unicode.c (unicode_character_tell):
New method.
* unicode.c (unicode_convert):
Update the character counter as appropriate.
* unicode.c (coding_system_type_create_unicode):
Make the character_tell method available.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Thu, 16 Jan 2014 16:27:52 +0000 |
parents | 2dbefd79b3d3 |
children | 0cb4f494a548 |
comparison
equal
deleted
inserted
replaced
5775:4004c3266c09 | 5776:65d65b52d608 |
---|---|
733 Lstream_read (Lstream *lstr, void *data, Bytecount size) | 733 Lstream_read (Lstream *lstr, void *data, Bytecount size) |
734 { | 734 { |
735 return Lstream_read_1 (lstr, data, size, 0); | 735 return Lstream_read_1 (lstr, data, size, 0); |
736 } | 736 } |
737 | 737 |
738 Charcount | |
739 Lstream_character_tell (Lstream *lstr) | |
740 { | |
741 Charcount ctell = lstr->imp->character_tell ? | |
742 lstr->imp->character_tell (lstr) : -1; | |
743 | |
744 if (ctell >= 0) | |
745 { | |
746 /* Our implementation's character tell code doesn't know about the | |
747 unget buffer, update its figure to reflect it. */ | |
748 ctell += lstr->unget_character_count; | |
749 | |
750 if (lstr->unget_buffer_ind > 0) | |
751 { | |
752 /* The character count should not include those characters | |
753 currently *in* the unget buffer, subtract that count. */ | |
754 Ibyte *ungot, *ungot_ptr; | |
755 Bytecount ii = lstr->unget_buffer_ind, impartial, sevenflen; | |
756 | |
757 ungot_ptr = ungot | |
758 = alloca_ibytes (lstr->unget_buffer_ind) + MAX_ICHAR_LEN; | |
759 | |
760 /* Make sure the string starts with a valid ibyteptr, otherwise | |
761 validate_ibyte_string_backward could run off the beginning. */ | |
762 sevenflen = set_itext_ichar (ungot, (Ichar) 0x7f); | |
763 ungot_ptr += sevenflen; | |
764 | |
765 /* Internal format data, but in reverse order. There's not | |
766 actually a need to alloca here, we could work out the character | |
767 count directly from the reversed bytes, but the alloca approach | |
768 is more robust to changes in our internal format, and the unget | |
769 buffer is not going to blow the stack. */ | |
770 while (ii > 0) | |
771 { | |
772 *ungot_ptr++ = lstr->unget_buffer[--ii]; | |
773 } | |
774 | |
775 impartial | |
776 = validate_ibyte_string_backward (ungot, ungot_ptr - ungot); | |
777 | |
778 /* Move past the character we added. */ | |
779 impartial -= sevenflen; | |
780 INC_IBYTEPTR (ungot); | |
781 | |
782 if (impartial > 0 && !valid_ibyteptr_p (ungot)) | |
783 { | |
784 Ibyte *newstart = ungot, *limit = ungot + impartial; | |
785 /* Our consumer has the start of a partial character, we | |
786 have the rest. */ | |
787 | |
788 while (!valid_ibyteptr_p (newstart) && newstart < limit) | |
789 { | |
790 newstart++, impartial--; | |
791 } | |
792 | |
793 /* Remove this character from the count, since the | |
794 end-consumer hasn't seen the full character. */ | |
795 ctell--; | |
796 ungot = newstart; | |
797 } | |
798 else if (valid_ibyteptr_p (ungot) | |
799 && rep_bytes_by_first_byte (*ungot) > impartial) | |
800 { | |
801 /* Rest of a partial character has yet to be read, its first | |
802 octet has probably been unread by Lstream_read_1(). We | |
803 included it in the accounting in Lstream_unread(), adjust | |
804 the figure here appropriately. */ | |
805 ctell--; | |
806 } | |
807 | |
808 /* bytecount_to_charcount will throw an assertion failure if we're | |
809 not at the start of a character. */ | |
810 text_checking_assert (impartial == 0 || valid_ibyteptr_p (ungot)); | |
811 | |
812 /* The character length of this text is included in | |
813 unget_character_count; if the bytes are still in the unget | |
814 buffer, then our consumers haven't seen them, and so the | |
815 character tell figure shouldn't reflect them. Subtract it from | |
816 the total. */ | |
817 ctell -= bytecount_to_charcount (ungot, impartial); | |
818 } | |
819 | |
820 if (lstr->in_buffer_ind < lstr->in_buffer_current) | |
821 { | |
822 Ibyte *inbuf = lstr->in_buffer + lstr->in_buffer_ind; | |
823 Bytecount partial = lstr->in_buffer_current - lstr->in_buffer_ind, | |
824 impartial; | |
825 | |
826 if (!valid_ibyteptr_p (inbuf)) | |
827 { | |
828 Ibyte *newstart = inbuf; | |
829 Ibyte *limit = lstr->in_buffer + lstr->in_buffer_current; | |
830 /* Our consumer has the start of a partial character, we | |
831 have the rest. */ | |
832 | |
833 while (newstart < limit && !valid_ibyteptr_p (newstart)) | |
834 { | |
835 newstart++; | |
836 } | |
837 | |
838 /* Remove this character from the count, since the | |
839 end-consumer hasn't seen the full character. */ | |
840 ctell--; | |
841 inbuf = newstart; | |
842 partial = limit - newstart; | |
843 } | |
844 | |
845 if (valid_ibyteptr_p (inbuf)) | |
846 { | |
847 /* There's at least one valid starting char in the string, | |
848 validate_ibyte_string_backward won't run off the | |
849 begining. */ | |
850 impartial = | |
851 validate_ibyte_string_backward (inbuf, partial); | |
852 } | |
853 else | |
854 { | |
855 impartial = 0; | |
856 } | |
857 | |
858 ctell -= bytecount_to_charcount (inbuf, impartial); | |
859 } | |
860 | |
861 text_checking_assert (ctell >= 0); | |
862 } | |
863 | |
864 return ctell; | |
865 } | |
738 | 866 |
739 /* Push back SIZE bytes of DATA onto the input queue. The next call | 867 /* Push back SIZE bytes of DATA onto the input queue. The next call |
740 to Lstream_read() with the same size will read the same bytes back. | 868 to Lstream_read() with the same size will read the same bytes back. |
741 Note that this will be the case even if there is other pending | 869 Note that this will be the case even if there is other pending |
742 unread data. */ | 870 unread data. */ |
753 lstr->byte_count -= size; | 881 lstr->byte_count -= size; |
754 | 882 |
755 /* Bytes have to go on in reverse order -- they are reversed | 883 /* Bytes have to go on in reverse order -- they are reversed |
756 again when read back. */ | 884 again when read back. */ |
757 while (size--) | 885 while (size--) |
758 lstr->unget_buffer[lstr->unget_buffer_ind++] = p[size]; | 886 { |
887 lstr->unget_buffer[lstr->unget_buffer_ind++] = p[size]; | |
888 /* If we see a valid first byte, that is the last octet in a | |
889 character, so increase the count of ungot characters. */ | |
890 lstr->unget_character_count += valid_ibyteptr_p (p + size); | |
891 } | |
759 } | 892 } |
760 | 893 |
761 /* Rewind the stream to the beginning. */ | 894 /* Rewind the stream to the beginning. */ |
762 | 895 |
763 int | 896 int |
766 if (!lstr->imp->rewinder) | 899 if (!lstr->imp->rewinder) |
767 Lstream_internal_error ("lstream has no rewinder", lstr); | 900 Lstream_internal_error ("lstream has no rewinder", lstr); |
768 if (Lstream_flush (lstr) < 0) | 901 if (Lstream_flush (lstr) < 0) |
769 return -1; | 902 return -1; |
770 lstr->byte_count = 0; | 903 lstr->byte_count = 0; |
904 lstr->unget_character_count = 0; | |
771 return (lstr->imp->rewinder) (lstr); | 905 return (lstr->imp->rewinder) (lstr); |
772 } | 906 } |
773 | 907 |
774 int | 908 int |
775 Lstream_seekable_p (Lstream *lstr) | 909 Lstream_seekable_p (Lstream *lstr) |