Mercurial > hg > xemacs-beta
comparison src/tests.c @ 5776:65d65b52d608
Pass character count from coding systems to buffer insertion code.
src/ChangeLog addition:
2014-01-16 Aidan Kehoe <kehoea@parhasard.net>
Pass character count information from the no-conversion and
unicode coding systems to the buffer insertion code, making
#'find-file on large buffers a little snappier (if
ERROR_CHECK_TEXT is not defined).
* file-coding.c:
* file-coding.c (coding_character_tell): New.
* file-coding.c (conversion_coding_stream_description): New.
* file-coding.c (no_conversion_convert):
Update characters_seen when decoding.
* file-coding.c (no_conversion_character_tell): New.
* file-coding.c (lstream_type_create_file_coding): Create the
no_conversion type with data.
* file-coding.c (coding_system_type_create):
Make the character_tell method available here.
* file-coding.h:
* file-coding.h (struct coding_system_methods):
Add a new character_tell() method, passing charcount information
from the coding systems to the buffer code, avoiding duplicate
bytecount-to-charcount work especially with large buffers.
* fileio.c (Finsert_file_contents_internal):
Update this to pass charcount information to
buffer_insert_string_1(), if that is available from the lstream code.
* insdel.c:
* insdel.c (buffer_insert_string_1):
Add a new CCLEN argument, giving the character count of the string
to insert. It can be -1 to indicate that te function should work
it out itself using bytecount_to_charcount(), as it used to.
* insdel.c (buffer_insert_raw_string_1):
* insdel.c (buffer_insert_lisp_string_1):
* insdel.c (buffer_insert_ascstring_1):
* insdel.c (buffer_insert_emacs_char_1):
* insdel.c (buffer_insert_from_buffer_1):
* insdel.c (buffer_replace_char):
Update these functions to use the new calling convention.
* insdel.h:
* insdel.h (buffer_insert_string):
Update this header to reflect the new buffer_insert_string_1()
argument.
* lstream.c (Lstream_character_tell): New.
Return the number of characters *read* and seen by the consumer so
far, taking into account the unget buffer, and buffered reading.
* lstream.c (Lstream_unread):
Update unget_character_count here as appropriate.
* lstream.c (Lstream_rewind):
Reset unget_character_count here too.
* lstream.h:
* lstream.h (struct lstream):
Provide the character_tell method, add a new field,
unget_character_count, giving the number of characters ever passed
to Lstream_unread().
Declare Lstream_character_tell().
Make Lstream_ungetc(), which happens to be unused, an inline
function rather than a macro, in the course of updating it to
modify unget_character_count.
* print.c (output_string):
Use the new argument to buffer_insert_string_1().
* tests.c:
* tests.c (Ftest_character_tell):
New test function.
* tests.c (syms_of_tests):
Make it available.
* unicode.c:
* unicode.c (struct unicode_coding_stream):
* unicode.c (unicode_character_tell):
New method.
* unicode.c (unicode_convert):
Update the character counter as appropriate.
* unicode.c (coding_system_type_create_unicode):
Make the character_tell method available.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Thu, 16 Jan 2014 16:27:52 +0000 |
parents | 56144c8593a8 |
children | e2fae7783046 |
comparison
equal
deleted
inserted
replaced
5775:4004c3266c09 | 5776:65d65b52d608 |
---|---|
554 DFC_CHECK_DATA (ptr, len, ext_dos, "DOS Lisp opaque, ALLOCA, binary"); | 554 DFC_CHECK_DATA (ptr, len, ext_dos, "DOS Lisp opaque, ALLOCA, binary"); |
555 | 555 |
556 NUNGCPRO; | 556 NUNGCPRO; |
557 UNGCPRO; | 557 UNGCPRO; |
558 return conversion_result; | 558 return conversion_result; |
559 } | |
560 | |
561 DEFUN ("test-character-tell", Ftest_character_tell, 0, 0, "", /* | |
562 Return list of results of tests of the stream character offset code. | |
563 For use by the automated test suite. See tests/automated/c-tests. | |
564 | |
565 Each element is a list (DESCRIPTION, STATUS, REASON). | |
566 DESCRIPTION is a string describing the test. | |
567 STATUS is a symbol, either t (pass) or nil (fail). | |
568 REASON is nil or a string describing the failure (not required). | |
569 */ | |
570 ()) | |
571 { | |
572 Extbyte ext_unix[]= "\n\nfoo\nbar\n\nf\372b\343\340\nfoo\nbar\n"; | |
573 /* Previous string in UTF-8. */ | |
574 Extbyte ext_utf_8_unix[] | |
575 = "\n\nfoo\nbar\n\nf\303\272b\303\243\303\240\nfoo\nbar\n"; | |
576 Charcount ext_utf_8_unix_char_len = 25; | |
577 Ibyte shortbuf[13], longbuf[512]; | |
578 Lisp_Object stream = | |
579 make_fixed_buffer_input_stream (ext_unix, sizeof (ext_unix) - 1); | |
580 Lisp_Object result = Qnil, string = Qnil; | |
581 Charcount count; | |
582 Bytecount bytecount; | |
583 struct gcpro gcpro1, gcpro2, gcpro3; | |
584 | |
585 #define CHARACTER_TELL_ASSERT(assertion, description, failing_case) \ | |
586 do \ | |
587 { \ | |
588 if (assertion) \ | |
589 result = Fcons (list3 (build_cistring (description), \ | |
590 Qt, Qnil), result); \ | |
591 else \ | |
592 result = Fcons (list3 (build_cistring (description), \ | |
593 Qnil, build_ascstring (failing_case)), \ | |
594 result); \ | |
595 } \ | |
596 while (0) | |
597 | |
598 GCPRO3 (stream, result, string); | |
599 | |
600 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536); | |
601 stream = make_coding_input_stream | |
602 (XLSTREAM (stream), Ffind_coding_system (intern ("no-conversion-unix")), | |
603 CODING_DECODE, 0); | |
604 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536); | |
605 | |
606 bytecount = Lstream_read (XLSTREAM (stream), longbuf, sizeof (longbuf)); | |
607 | |
608 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream)) | |
609 == sizeof (ext_unix) -1, | |
610 "basic character tell, no-conversion-unix", | |
611 "basic character tell failed"); | |
612 | |
613 string = build_extstring (ext_unix, | |
614 Ffind_coding_system (intern | |
615 ("no-conversion-unix"))); | |
616 | |
617 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream)) | |
618 == string_char_length (string), | |
619 "repeat basic character tell, no-conversion-unix", | |
620 "repeat basic character tell failed with string"); | |
621 | |
622 count = Lstream_character_tell (XLSTREAM (stream)); | |
623 | |
624 Lstream_unread (XLSTREAM (stream), "r\n", 2); | |
625 | |
626 /* This should give the same result as before the unread. */ | |
627 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream)) | |
628 == count, "checking post-unread character tell", | |
629 "post-unread character tell failed"); | |
630 bytecount += Lstream_read (XLSTREAM (stream), longbuf + bytecount, | |
631 sizeof (longbuf) - bytecount); | |
632 | |
633 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream)) | |
634 == count + 2, | |
635 "checking post-unread+read character tell", | |
636 "post-unread+read character tell failed"); | |
637 | |
638 /* This seems to be buggy for my purposes. */ | |
639 /* Lstream_rewind (XLSTREAM (stream)); */ | |
640 Lstream_close (XLSTREAM (stream)); | |
641 Lstream_delete (XLSTREAM (stream)); | |
642 | |
643 stream = make_fixed_buffer_input_stream (ext_unix, sizeof (ext_unix) - 1); | |
644 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536); | |
645 Lstream_unset_character_mode (XLSTREAM (stream)); | |
646 stream = make_coding_input_stream | |
647 (XLSTREAM (stream), Ffind_coding_system (intern ("no-conversion-unix")), | |
648 CODING_DECODE, 0); | |
649 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536); | |
650 Lstream_unset_character_mode (XLSTREAM (stream)); | |
651 | |
652 bytecount = Lstream_read (XLSTREAM (stream), shortbuf, sizeof (shortbuf)); | |
653 | |
654 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream)) | |
655 /* This should be equal to sizeof (shortbuf) on | |
656 non-mule. */ | |
657 == sizeof (shortbuf) - !(byte_ascii_p (0xff)), | |
658 "character tell with short read, no-conversion-unix", | |
659 "short read character tell failed"); | |
660 | |
661 Lstream_close (XLSTREAM (stream)); | |
662 Lstream_delete (XLSTREAM (stream)); | |
663 | |
664 stream | |
665 = make_fixed_buffer_input_stream (ext_utf_8_unix, | |
666 sizeof (ext_utf_8_unix) - 1); | |
667 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536); | |
668 stream = make_coding_input_stream | |
669 (XLSTREAM (stream), Ffind_coding_system (intern ("utf-8-unix")), | |
670 CODING_DECODE, 0); | |
671 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536); | |
672 | |
673 bytecount = Lstream_read (XLSTREAM (stream), longbuf, sizeof (longbuf)); | |
674 | |
675 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream)) | |
676 == ext_utf_8_unix_char_len, | |
677 "utf-8 character tell, utf-8-unix", | |
678 "utf-8 character tell failed"); | |
679 | |
680 string = build_extstring (ext_utf_8_unix, | |
681 Ffind_coding_system (intern | |
682 ("utf-8-unix"))); | |
683 | |
684 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream)) | |
685 == string_char_length (string), | |
686 "repeat utf-8 character tell, utf-8-unix", | |
687 "repeat utf-8 character tell failed with string"); | |
688 | |
689 count = Lstream_character_tell (XLSTREAM (stream)); | |
690 | |
691 Lstream_unread (XLSTREAM (stream), "r\n", 2); | |
692 | |
693 /* This should give the same result as before the unread. */ | |
694 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream)) | |
695 == count, "checking post-unread utf-8 tell", | |
696 "post-unread utf-8 tell failed"); | |
697 bytecount += Lstream_read (XLSTREAM (stream), longbuf + bytecount, | |
698 sizeof (longbuf) - bytecount); | |
699 | |
700 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream)) | |
701 == count + 2, | |
702 "checking post-unread+read utf-8 tell", | |
703 "post-unread+read utf-8 tell failed"); | |
704 | |
705 /* This seems to be buggy for my purposes. */ | |
706 /* Lstream_rewind (XLSTREAM (stream)); */ | |
707 Lstream_close (XLSTREAM (stream)); | |
708 Lstream_delete (XLSTREAM (stream)); | |
709 | |
710 stream = make_fixed_buffer_input_stream (ext_utf_8_unix, sizeof (ext_utf_8_unix) - 1); | |
711 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536); | |
712 Lstream_set_character_mode (XLSTREAM (stream)); | |
713 | |
714 stream = make_coding_input_stream | |
715 (XLSTREAM (stream), Ffind_coding_system (intern ("utf-8-unix")), | |
716 CODING_DECODE, 0); | |
717 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536); | |
718 Lstream_set_character_mode (XLSTREAM (stream)); | |
719 | |
720 bytecount = Lstream_read (XLSTREAM (stream), shortbuf, sizeof (shortbuf)); | |
721 | |
722 CHARACTER_TELL_ASSERT | |
723 (bytecount == (sizeof (shortbuf) - 1), | |
724 "utf-8 Lstream_read, character mode, checking partial char not read", | |
725 "partial char appars to have been read when it shouldn't"); | |
726 | |
727 CHARACTER_TELL_ASSERT | |
728 (Lstream_character_tell (XLSTREAM (stream)) | |
729 /* This is shorter, because it's in the middle of a character. */ | |
730 == sizeof (shortbuf) - 1, | |
731 "utf-8 tell with short read, character mode, utf-8-unix", | |
732 "utf-8 read character tell, character mode failed"); | |
733 | |
734 Lstream_close (XLSTREAM (stream)); | |
735 Lstream_delete (XLSTREAM (stream)); | |
736 | |
737 UNGCPRO; | |
738 return result; | |
559 } | 739 } |
560 | 740 |
561 | 741 |
562 /* Hash Table testing */ | 742 /* Hash Table testing */ |
563 | 743 |
722 syms_of_tests (void) | 902 syms_of_tests (void) |
723 { | 903 { |
724 Vtest_function_list = Qnil; | 904 Vtest_function_list = Qnil; |
725 | 905 |
726 TESTS_DEFSUBR (Ftest_data_format_conversion); | 906 TESTS_DEFSUBR (Ftest_data_format_conversion); |
907 TESTS_DEFSUBR (Ftest_character_tell); | |
727 TESTS_DEFSUBR (Ftest_hash_tables); | 908 TESTS_DEFSUBR (Ftest_hash_tables); |
728 TESTS_DEFSUBR (Ftest_store_void_in_lisp); | 909 TESTS_DEFSUBR (Ftest_store_void_in_lisp); |
729 /* Add other test functions here with TESTS_DEFSUBR */ | 910 /* Add other test functions here with TESTS_DEFSUBR */ |
730 } | 911 } |
731 | 912 |