Mercurial > hg > xemacs-beta
comparison src/file-coding.c @ 5776:65d65b52d608
Pass character count from coding systems to buffer insertion code.
src/ChangeLog addition:
2014-01-16 Aidan Kehoe <kehoea@parhasard.net>
Pass character count information from the no-conversion and
unicode coding systems to the buffer insertion code, making
#'find-file on large buffers a little snappier (if
ERROR_CHECK_TEXT is not defined).
* file-coding.c:
* file-coding.c (coding_character_tell): New.
* file-coding.c (conversion_coding_stream_description): New.
* file-coding.c (no_conversion_convert):
Update characters_seen when decoding.
* file-coding.c (no_conversion_character_tell): New.
* file-coding.c (lstream_type_create_file_coding): Create the
no_conversion type with data.
* file-coding.c (coding_system_type_create):
Make the character_tell method available here.
* file-coding.h:
* file-coding.h (struct coding_system_methods):
Add a new character_tell() method, passing charcount information
from the coding systems to the buffer code, avoiding duplicate
bytecount-to-charcount work especially with large buffers.
* fileio.c (Finsert_file_contents_internal):
Update this to pass charcount information to
buffer_insert_string_1(), if that is available from the lstream code.
* insdel.c:
* insdel.c (buffer_insert_string_1):
Add a new CCLEN argument, giving the character count of the string
to insert. It can be -1 to indicate that te function should work
it out itself using bytecount_to_charcount(), as it used to.
* insdel.c (buffer_insert_raw_string_1):
* insdel.c (buffer_insert_lisp_string_1):
* insdel.c (buffer_insert_ascstring_1):
* insdel.c (buffer_insert_emacs_char_1):
* insdel.c (buffer_insert_from_buffer_1):
* insdel.c (buffer_replace_char):
Update these functions to use the new calling convention.
* insdel.h:
* insdel.h (buffer_insert_string):
Update this header to reflect the new buffer_insert_string_1()
argument.
* lstream.c (Lstream_character_tell): New.
Return the number of characters *read* and seen by the consumer so
far, taking into account the unget buffer, and buffered reading.
* lstream.c (Lstream_unread):
Update unget_character_count here as appropriate.
* lstream.c (Lstream_rewind):
Reset unget_character_count here too.
* lstream.h:
* lstream.h (struct lstream):
Provide the character_tell method, add a new field,
unget_character_count, giving the number of characters ever passed
to Lstream_unread().
Declare Lstream_character_tell().
Make Lstream_ungetc(), which happens to be unused, an inline
function rather than a macro, in the course of updating it to
modify unget_character_count.
* print.c (output_string):
Use the new argument to buffer_insert_string_1().
* tests.c:
* tests.c (Ftest_character_tell):
New test function.
* tests.c (syms_of_tests):
Make it available.
* unicode.c:
* unicode.c (struct unicode_coding_stream):
* unicode.c (unicode_character_tell):
New method.
* unicode.c (unicode_convert):
Update the character counter as appropriate.
* unicode.c (coding_system_type_create_unicode):
Make the character_tell method available.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Thu, 16 Jan 2014 16:27:52 +0000 |
parents | 7a538e1a4676 |
children | ccaa851ae712 |
comparison
equal
deleted
inserted
replaced
5775:4004c3266c09 | 5776:65d65b52d608 |
---|---|
1988 { | 1988 { |
1989 struct coding_stream *str = CODING_STREAM_DATA (stream); | 1989 struct coding_stream *str = CODING_STREAM_DATA (stream); |
1990 return Lstream_seekable_p (str->other_end); | 1990 return Lstream_seekable_p (str->other_end); |
1991 } | 1991 } |
1992 | 1992 |
1993 static Charcount | |
1994 coding_character_tell (Lstream *stream) | |
1995 { | |
1996 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1997 | |
1998 return XCODESYSMETH_OR_GIVEN (str->codesys, character_tell, (str), -1); | |
1999 } | |
2000 | |
1993 static int | 2001 static int |
1994 coding_flusher (Lstream *stream) | 2002 coding_flusher (Lstream *stream) |
1995 { | 2003 { |
1996 struct coding_stream *str = CODING_STREAM_DATA (stream); | 2004 struct coding_stream *str = CODING_STREAM_DATA (stream); |
1997 return Lstream_flush (str->other_end); | 2005 return Lstream_flush (str->other_end); |
2821 conversion), but it appears to the user as if the text is read in | 2829 conversion), but it appears to the user as if the text is read in |
2822 without conversion. | 2830 without conversion. |
2823 | 2831 |
2824 #### Shouldn't we _call_ it that, then? And while we're at it, | 2832 #### Shouldn't we _call_ it that, then? And while we're at it, |
2825 separate it into "to_internal" and "to_external"? */ | 2833 separate it into "to_internal" and "to_external"? */ |
2826 DEFINE_CODING_SYSTEM_TYPE (no_conversion); | 2834 |
2835 | |
2836 struct no_conversion_coding_system | |
2837 { | |
2838 }; | |
2839 | |
2840 struct no_conversion_coding_stream | |
2841 { | |
2842 /* Number of characters seen when decoding. */ | |
2843 Charcount characters_seen; | |
2844 }; | |
2845 | |
2846 static const struct memory_description no_conversion_coding_system_description[] = { | |
2847 { XD_END } | |
2848 }; | |
2849 | |
2850 static const struct memory_description no_conversion_coding_stream_description_1 [] = { | |
2851 { XD_INT, offsetof (struct no_conversion_coding_stream, characters_seen) }, | |
2852 { XD_END } | |
2853 }; | |
2854 | |
2855 const struct sized_memory_description no_conversion_coding_stream_description = { | |
2856 sizeof (struct no_conversion_coding_stream), no_conversion_coding_stream_description_1 | |
2857 }; | |
2858 | |
2859 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (no_conversion); | |
2827 | 2860 |
2828 /* This is used when reading in "binary" files -- i.e. files that may | 2861 /* This is used when reading in "binary" files -- i.e. files that may |
2829 contain all 256 possible byte values and that are not to be | 2862 contain all 256 possible byte values and that are not to be |
2830 interpreted as being in any particular encoding. */ | 2863 interpreted as being in any particular encoding. */ |
2831 static Bytecount | 2864 static Bytecount |
2843 { | 2876 { |
2844 c = *src++; | 2877 c = *src++; |
2845 | 2878 |
2846 DECODE_ADD_BINARY_CHAR (c, dst); | 2879 DECODE_ADD_BINARY_CHAR (c, dst); |
2847 } | 2880 } |
2881 | |
2882 CODING_STREAM_TYPE_DATA (str, no_conversion)->characters_seen | |
2883 += orign; | |
2848 | 2884 |
2849 if (str->eof) | 2885 if (str->eof) |
2850 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | 2886 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); |
2851 } | 2887 } |
2852 else | 2888 else |
2900 } | 2936 } |
2901 } | 2937 } |
2902 | 2938 |
2903 str->ch = ch; | 2939 str->ch = ch; |
2904 return orign; | 2940 return orign; |
2941 } | |
2942 | |
2943 static Charcount | |
2944 no_conversion_character_tell (struct coding_stream *str) | |
2945 { | |
2946 return CODING_STREAM_TYPE_DATA (str, no_conversion)->characters_seen; | |
2905 } | 2947 } |
2906 | 2948 |
2907 DEFINE_DETECTOR (no_conversion); | 2949 DEFINE_DETECTOR (no_conversion); |
2908 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion); | 2950 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion); |
2909 | 2951 |
4654 { | 4696 { |
4655 LSTREAM_HAS_METHOD (coding, reader); | 4697 LSTREAM_HAS_METHOD (coding, reader); |
4656 LSTREAM_HAS_METHOD (coding, writer); | 4698 LSTREAM_HAS_METHOD (coding, writer); |
4657 LSTREAM_HAS_METHOD (coding, rewinder); | 4699 LSTREAM_HAS_METHOD (coding, rewinder); |
4658 LSTREAM_HAS_METHOD (coding, seekable_p); | 4700 LSTREAM_HAS_METHOD (coding, seekable_p); |
4701 LSTREAM_HAS_METHOD (coding, character_tell); | |
4659 LSTREAM_HAS_METHOD (coding, marker); | 4702 LSTREAM_HAS_METHOD (coding, marker); |
4660 LSTREAM_HAS_METHOD (coding, flusher); | 4703 LSTREAM_HAS_METHOD (coding, flusher); |
4661 LSTREAM_HAS_METHOD (coding, closer); | 4704 LSTREAM_HAS_METHOD (coding, closer); |
4662 LSTREAM_HAS_METHOD (coding, finalizer); | 4705 LSTREAM_HAS_METHOD (coding, finalizer); |
4663 } | 4706 } |
4695 | 4738 |
4696 dump_add_opaque_int (&coding_system_tick); | 4739 dump_add_opaque_int (&coding_system_tick); |
4697 dump_add_opaque_int (&coding_detector_count); | 4740 dump_add_opaque_int (&coding_detector_count); |
4698 dump_add_opaque_int (&coding_detector_category_count); | 4741 dump_add_opaque_int (&coding_detector_category_count); |
4699 | 4742 |
4700 INITIALIZE_CODING_SYSTEM_TYPE (no_conversion, | 4743 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (no_conversion, |
4701 "no-conversion-coding-system-p"); | 4744 "no-conversion-coding-system-p"); |
4702 CODING_SYSTEM_HAS_METHOD (no_conversion, convert); | 4745 CODING_SYSTEM_HAS_METHOD (no_conversion, convert); |
4746 CODING_SYSTEM_HAS_METHOD (no_conversion, character_tell); | |
4703 | 4747 |
4704 INITIALIZE_DETECTOR (no_conversion); | 4748 INITIALIZE_DETECTOR (no_conversion); |
4705 DETECTOR_HAS_METHOD (no_conversion, detect); | 4749 DETECTOR_HAS_METHOD (no_conversion, detect); |
4706 INITIALIZE_DETECTOR_CATEGORY (no_conversion, no_conversion); | 4750 INITIALIZE_DETECTOR_CATEGORY (no_conversion, no_conversion); |
4707 | 4751 |