comparison src/file-coding.c @ 5776:65d65b52d608

Pass character count from coding systems to buffer insertion code. src/ChangeLog addition: 2014-01-16 Aidan Kehoe <kehoea@parhasard.net> Pass character count information from the no-conversion and unicode coding systems to the buffer insertion code, making #'find-file on large buffers a little snappier (if ERROR_CHECK_TEXT is not defined). * file-coding.c: * file-coding.c (coding_character_tell): New. * file-coding.c (conversion_coding_stream_description): New. * file-coding.c (no_conversion_convert): Update characters_seen when decoding. * file-coding.c (no_conversion_character_tell): New. * file-coding.c (lstream_type_create_file_coding): Create the no_conversion type with data. * file-coding.c (coding_system_type_create): Make the character_tell method available here. * file-coding.h: * file-coding.h (struct coding_system_methods): Add a new character_tell() method, passing charcount information from the coding systems to the buffer code, avoiding duplicate bytecount-to-charcount work especially with large buffers. * fileio.c (Finsert_file_contents_internal): Update this to pass charcount information to buffer_insert_string_1(), if that is available from the lstream code. * insdel.c: * insdel.c (buffer_insert_string_1): Add a new CCLEN argument, giving the character count of the string to insert. It can be -1 to indicate that te function should work it out itself using bytecount_to_charcount(), as it used to. * insdel.c (buffer_insert_raw_string_1): * insdel.c (buffer_insert_lisp_string_1): * insdel.c (buffer_insert_ascstring_1): * insdel.c (buffer_insert_emacs_char_1): * insdel.c (buffer_insert_from_buffer_1): * insdel.c (buffer_replace_char): Update these functions to use the new calling convention. * insdel.h: * insdel.h (buffer_insert_string): Update this header to reflect the new buffer_insert_string_1() argument. * lstream.c (Lstream_character_tell): New. Return the number of characters *read* and seen by the consumer so far, taking into account the unget buffer, and buffered reading. * lstream.c (Lstream_unread): Update unget_character_count here as appropriate. * lstream.c (Lstream_rewind): Reset unget_character_count here too. * lstream.h: * lstream.h (struct lstream): Provide the character_tell method, add a new field, unget_character_count, giving the number of characters ever passed to Lstream_unread(). Declare Lstream_character_tell(). Make Lstream_ungetc(), which happens to be unused, an inline function rather than a macro, in the course of updating it to modify unget_character_count. * print.c (output_string): Use the new argument to buffer_insert_string_1(). * tests.c: * tests.c (Ftest_character_tell): New test function. * tests.c (syms_of_tests): Make it available. * unicode.c: * unicode.c (struct unicode_coding_stream): * unicode.c (unicode_character_tell): New method. * unicode.c (unicode_convert): Update the character counter as appropriate. * unicode.c (coding_system_type_create_unicode): Make the character_tell method available.
author Aidan Kehoe <kehoea@parhasard.net>
date Thu, 16 Jan 2014 16:27:52 +0000
parents 7a538e1a4676
children ccaa851ae712
comparison
equal deleted inserted replaced
5775:4004c3266c09 5776:65d65b52d608
1988 { 1988 {
1989 struct coding_stream *str = CODING_STREAM_DATA (stream); 1989 struct coding_stream *str = CODING_STREAM_DATA (stream);
1990 return Lstream_seekable_p (str->other_end); 1990 return Lstream_seekable_p (str->other_end);
1991 } 1991 }
1992 1992
1993 static Charcount
1994 coding_character_tell (Lstream *stream)
1995 {
1996 struct coding_stream *str = CODING_STREAM_DATA (stream);
1997
1998 return XCODESYSMETH_OR_GIVEN (str->codesys, character_tell, (str), -1);
1999 }
2000
1993 static int 2001 static int
1994 coding_flusher (Lstream *stream) 2002 coding_flusher (Lstream *stream)
1995 { 2003 {
1996 struct coding_stream *str = CODING_STREAM_DATA (stream); 2004 struct coding_stream *str = CODING_STREAM_DATA (stream);
1997 return Lstream_flush (str->other_end); 2005 return Lstream_flush (str->other_end);
2821 conversion), but it appears to the user as if the text is read in 2829 conversion), but it appears to the user as if the text is read in
2822 without conversion. 2830 without conversion.
2823 2831
2824 #### Shouldn't we _call_ it that, then? And while we're at it, 2832 #### Shouldn't we _call_ it that, then? And while we're at it,
2825 separate it into "to_internal" and "to_external"? */ 2833 separate it into "to_internal" and "to_external"? */
2826 DEFINE_CODING_SYSTEM_TYPE (no_conversion); 2834
2835
2836 struct no_conversion_coding_system
2837 {
2838 };
2839
2840 struct no_conversion_coding_stream
2841 {
2842 /* Number of characters seen when decoding. */
2843 Charcount characters_seen;
2844 };
2845
2846 static const struct memory_description no_conversion_coding_system_description[] = {
2847 { XD_END }
2848 };
2849
2850 static const struct memory_description no_conversion_coding_stream_description_1 [] = {
2851 { XD_INT, offsetof (struct no_conversion_coding_stream, characters_seen) },
2852 { XD_END }
2853 };
2854
2855 const struct sized_memory_description no_conversion_coding_stream_description = {
2856 sizeof (struct no_conversion_coding_stream), no_conversion_coding_stream_description_1
2857 };
2858
2859 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (no_conversion);
2827 2860
2828 /* This is used when reading in "binary" files -- i.e. files that may 2861 /* This is used when reading in "binary" files -- i.e. files that may
2829 contain all 256 possible byte values and that are not to be 2862 contain all 256 possible byte values and that are not to be
2830 interpreted as being in any particular encoding. */ 2863 interpreted as being in any particular encoding. */
2831 static Bytecount 2864 static Bytecount
2843 { 2876 {
2844 c = *src++; 2877 c = *src++;
2845 2878
2846 DECODE_ADD_BINARY_CHAR (c, dst); 2879 DECODE_ADD_BINARY_CHAR (c, dst);
2847 } 2880 }
2881
2882 CODING_STREAM_TYPE_DATA (str, no_conversion)->characters_seen
2883 += orign;
2848 2884
2849 if (str->eof) 2885 if (str->eof)
2850 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); 2886 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst);
2851 } 2887 }
2852 else 2888 else
2900 } 2936 }
2901 } 2937 }
2902 2938
2903 str->ch = ch; 2939 str->ch = ch;
2904 return orign; 2940 return orign;
2941 }
2942
2943 static Charcount
2944 no_conversion_character_tell (struct coding_stream *str)
2945 {
2946 return CODING_STREAM_TYPE_DATA (str, no_conversion)->characters_seen;
2905 } 2947 }
2906 2948
2907 DEFINE_DETECTOR (no_conversion); 2949 DEFINE_DETECTOR (no_conversion);
2908 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion); 2950 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion);
2909 2951
4654 { 4696 {
4655 LSTREAM_HAS_METHOD (coding, reader); 4697 LSTREAM_HAS_METHOD (coding, reader);
4656 LSTREAM_HAS_METHOD (coding, writer); 4698 LSTREAM_HAS_METHOD (coding, writer);
4657 LSTREAM_HAS_METHOD (coding, rewinder); 4699 LSTREAM_HAS_METHOD (coding, rewinder);
4658 LSTREAM_HAS_METHOD (coding, seekable_p); 4700 LSTREAM_HAS_METHOD (coding, seekable_p);
4701 LSTREAM_HAS_METHOD (coding, character_tell);
4659 LSTREAM_HAS_METHOD (coding, marker); 4702 LSTREAM_HAS_METHOD (coding, marker);
4660 LSTREAM_HAS_METHOD (coding, flusher); 4703 LSTREAM_HAS_METHOD (coding, flusher);
4661 LSTREAM_HAS_METHOD (coding, closer); 4704 LSTREAM_HAS_METHOD (coding, closer);
4662 LSTREAM_HAS_METHOD (coding, finalizer); 4705 LSTREAM_HAS_METHOD (coding, finalizer);
4663 } 4706 }
4695 4738
4696 dump_add_opaque_int (&coding_system_tick); 4739 dump_add_opaque_int (&coding_system_tick);
4697 dump_add_opaque_int (&coding_detector_count); 4740 dump_add_opaque_int (&coding_detector_count);
4698 dump_add_opaque_int (&coding_detector_category_count); 4741 dump_add_opaque_int (&coding_detector_category_count);
4699 4742
4700 INITIALIZE_CODING_SYSTEM_TYPE (no_conversion, 4743 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (no_conversion,
4701 "no-conversion-coding-system-p"); 4744 "no-conversion-coding-system-p");
4702 CODING_SYSTEM_HAS_METHOD (no_conversion, convert); 4745 CODING_SYSTEM_HAS_METHOD (no_conversion, convert);
4746 CODING_SYSTEM_HAS_METHOD (no_conversion, character_tell);
4703 4747
4704 INITIALIZE_DETECTOR (no_conversion); 4748 INITIALIZE_DETECTOR (no_conversion);
4705 DETECTOR_HAS_METHOD (no_conversion, detect); 4749 DETECTOR_HAS_METHOD (no_conversion, detect);
4706 INITIALIZE_DETECTOR_CATEGORY (no_conversion, no_conversion); 4750 INITIALIZE_DETECTOR_CATEGORY (no_conversion, no_conversion);
4707 4751