comparison src/tests.c @ 5776:65d65b52d608

Pass character count from coding systems to buffer insertion code. src/ChangeLog addition: 2014-01-16 Aidan Kehoe <kehoea@parhasard.net> Pass character count information from the no-conversion and unicode coding systems to the buffer insertion code, making #'find-file on large buffers a little snappier (if ERROR_CHECK_TEXT is not defined). * file-coding.c: * file-coding.c (coding_character_tell): New. * file-coding.c (conversion_coding_stream_description): New. * file-coding.c (no_conversion_convert): Update characters_seen when decoding. * file-coding.c (no_conversion_character_tell): New. * file-coding.c (lstream_type_create_file_coding): Create the no_conversion type with data. * file-coding.c (coding_system_type_create): Make the character_tell method available here. * file-coding.h: * file-coding.h (struct coding_system_methods): Add a new character_tell() method, passing charcount information from the coding systems to the buffer code, avoiding duplicate bytecount-to-charcount work especially with large buffers. * fileio.c (Finsert_file_contents_internal): Update this to pass charcount information to buffer_insert_string_1(), if that is available from the lstream code. * insdel.c: * insdel.c (buffer_insert_string_1): Add a new CCLEN argument, giving the character count of the string to insert. It can be -1 to indicate that te function should work it out itself using bytecount_to_charcount(), as it used to. * insdel.c (buffer_insert_raw_string_1): * insdel.c (buffer_insert_lisp_string_1): * insdel.c (buffer_insert_ascstring_1): * insdel.c (buffer_insert_emacs_char_1): * insdel.c (buffer_insert_from_buffer_1): * insdel.c (buffer_replace_char): Update these functions to use the new calling convention. * insdel.h: * insdel.h (buffer_insert_string): Update this header to reflect the new buffer_insert_string_1() argument. * lstream.c (Lstream_character_tell): New. Return the number of characters *read* and seen by the consumer so far, taking into account the unget buffer, and buffered reading. * lstream.c (Lstream_unread): Update unget_character_count here as appropriate. * lstream.c (Lstream_rewind): Reset unget_character_count here too. * lstream.h: * lstream.h (struct lstream): Provide the character_tell method, add a new field, unget_character_count, giving the number of characters ever passed to Lstream_unread(). Declare Lstream_character_tell(). Make Lstream_ungetc(), which happens to be unused, an inline function rather than a macro, in the course of updating it to modify unget_character_count. * print.c (output_string): Use the new argument to buffer_insert_string_1(). * tests.c: * tests.c (Ftest_character_tell): New test function. * tests.c (syms_of_tests): Make it available. * unicode.c: * unicode.c (struct unicode_coding_stream): * unicode.c (unicode_character_tell): New method. * unicode.c (unicode_convert): Update the character counter as appropriate. * unicode.c (coding_system_type_create_unicode): Make the character_tell method available.
author Aidan Kehoe <kehoea@parhasard.net>
date Thu, 16 Jan 2014 16:27:52 +0000
parents 56144c8593a8
children e2fae7783046
comparison
equal deleted inserted replaced
5775:4004c3266c09 5776:65d65b52d608
554 DFC_CHECK_DATA (ptr, len, ext_dos, "DOS Lisp opaque, ALLOCA, binary"); 554 DFC_CHECK_DATA (ptr, len, ext_dos, "DOS Lisp opaque, ALLOCA, binary");
555 555
556 NUNGCPRO; 556 NUNGCPRO;
557 UNGCPRO; 557 UNGCPRO;
558 return conversion_result; 558 return conversion_result;
559 }
560
561 DEFUN ("test-character-tell", Ftest_character_tell, 0, 0, "", /*
562 Return list of results of tests of the stream character offset code.
563 For use by the automated test suite. See tests/automated/c-tests.
564
565 Each element is a list (DESCRIPTION, STATUS, REASON).
566 DESCRIPTION is a string describing the test.
567 STATUS is a symbol, either t (pass) or nil (fail).
568 REASON is nil or a string describing the failure (not required).
569 */
570 ())
571 {
572 Extbyte ext_unix[]= "\n\nfoo\nbar\n\nf\372b\343\340\nfoo\nbar\n";
573 /* Previous string in UTF-8. */
574 Extbyte ext_utf_8_unix[]
575 = "\n\nfoo\nbar\n\nf\303\272b\303\243\303\240\nfoo\nbar\n";
576 Charcount ext_utf_8_unix_char_len = 25;
577 Ibyte shortbuf[13], longbuf[512];
578 Lisp_Object stream =
579 make_fixed_buffer_input_stream (ext_unix, sizeof (ext_unix) - 1);
580 Lisp_Object result = Qnil, string = Qnil;
581 Charcount count;
582 Bytecount bytecount;
583 struct gcpro gcpro1, gcpro2, gcpro3;
584
585 #define CHARACTER_TELL_ASSERT(assertion, description, failing_case) \
586 do \
587 { \
588 if (assertion) \
589 result = Fcons (list3 (build_cistring (description), \
590 Qt, Qnil), result); \
591 else \
592 result = Fcons (list3 (build_cistring (description), \
593 Qnil, build_ascstring (failing_case)), \
594 result); \
595 } \
596 while (0)
597
598 GCPRO3 (stream, result, string);
599
600 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536);
601 stream = make_coding_input_stream
602 (XLSTREAM (stream), Ffind_coding_system (intern ("no-conversion-unix")),
603 CODING_DECODE, 0);
604 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536);
605
606 bytecount = Lstream_read (XLSTREAM (stream), longbuf, sizeof (longbuf));
607
608 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream))
609 == sizeof (ext_unix) -1,
610 "basic character tell, no-conversion-unix",
611 "basic character tell failed");
612
613 string = build_extstring (ext_unix,
614 Ffind_coding_system (intern
615 ("no-conversion-unix")));
616
617 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream))
618 == string_char_length (string),
619 "repeat basic character tell, no-conversion-unix",
620 "repeat basic character tell failed with string");
621
622 count = Lstream_character_tell (XLSTREAM (stream));
623
624 Lstream_unread (XLSTREAM (stream), "r\n", 2);
625
626 /* This should give the same result as before the unread. */
627 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream))
628 == count, "checking post-unread character tell",
629 "post-unread character tell failed");
630 bytecount += Lstream_read (XLSTREAM (stream), longbuf + bytecount,
631 sizeof (longbuf) - bytecount);
632
633 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream))
634 == count + 2,
635 "checking post-unread+read character tell",
636 "post-unread+read character tell failed");
637
638 /* This seems to be buggy for my purposes. */
639 /* Lstream_rewind (XLSTREAM (stream)); */
640 Lstream_close (XLSTREAM (stream));
641 Lstream_delete (XLSTREAM (stream));
642
643 stream = make_fixed_buffer_input_stream (ext_unix, sizeof (ext_unix) - 1);
644 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536);
645 Lstream_unset_character_mode (XLSTREAM (stream));
646 stream = make_coding_input_stream
647 (XLSTREAM (stream), Ffind_coding_system (intern ("no-conversion-unix")),
648 CODING_DECODE, 0);
649 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536);
650 Lstream_unset_character_mode (XLSTREAM (stream));
651
652 bytecount = Lstream_read (XLSTREAM (stream), shortbuf, sizeof (shortbuf));
653
654 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream))
655 /* This should be equal to sizeof (shortbuf) on
656 non-mule. */
657 == sizeof (shortbuf) - !(byte_ascii_p (0xff)),
658 "character tell with short read, no-conversion-unix",
659 "short read character tell failed");
660
661 Lstream_close (XLSTREAM (stream));
662 Lstream_delete (XLSTREAM (stream));
663
664 stream
665 = make_fixed_buffer_input_stream (ext_utf_8_unix,
666 sizeof (ext_utf_8_unix) - 1);
667 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536);
668 stream = make_coding_input_stream
669 (XLSTREAM (stream), Ffind_coding_system (intern ("utf-8-unix")),
670 CODING_DECODE, 0);
671 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536);
672
673 bytecount = Lstream_read (XLSTREAM (stream), longbuf, sizeof (longbuf));
674
675 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream))
676 == ext_utf_8_unix_char_len,
677 "utf-8 character tell, utf-8-unix",
678 "utf-8 character tell failed");
679
680 string = build_extstring (ext_utf_8_unix,
681 Ffind_coding_system (intern
682 ("utf-8-unix")));
683
684 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream))
685 == string_char_length (string),
686 "repeat utf-8 character tell, utf-8-unix",
687 "repeat utf-8 character tell failed with string");
688
689 count = Lstream_character_tell (XLSTREAM (stream));
690
691 Lstream_unread (XLSTREAM (stream), "r\n", 2);
692
693 /* This should give the same result as before the unread. */
694 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream))
695 == count, "checking post-unread utf-8 tell",
696 "post-unread utf-8 tell failed");
697 bytecount += Lstream_read (XLSTREAM (stream), longbuf + bytecount,
698 sizeof (longbuf) - bytecount);
699
700 CHARACTER_TELL_ASSERT (Lstream_character_tell (XLSTREAM (stream))
701 == count + 2,
702 "checking post-unread+read utf-8 tell",
703 "post-unread+read utf-8 tell failed");
704
705 /* This seems to be buggy for my purposes. */
706 /* Lstream_rewind (XLSTREAM (stream)); */
707 Lstream_close (XLSTREAM (stream));
708 Lstream_delete (XLSTREAM (stream));
709
710 stream = make_fixed_buffer_input_stream (ext_utf_8_unix, sizeof (ext_utf_8_unix) - 1);
711 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536);
712 Lstream_set_character_mode (XLSTREAM (stream));
713
714 stream = make_coding_input_stream
715 (XLSTREAM (stream), Ffind_coding_system (intern ("utf-8-unix")),
716 CODING_DECODE, 0);
717 Lstream_set_buffering (XLSTREAM (stream), LSTREAM_BLOCKN_BUFFERED, 65536);
718 Lstream_set_character_mode (XLSTREAM (stream));
719
720 bytecount = Lstream_read (XLSTREAM (stream), shortbuf, sizeof (shortbuf));
721
722 CHARACTER_TELL_ASSERT
723 (bytecount == (sizeof (shortbuf) - 1),
724 "utf-8 Lstream_read, character mode, checking partial char not read",
725 "partial char appars to have been read when it shouldn't");
726
727 CHARACTER_TELL_ASSERT
728 (Lstream_character_tell (XLSTREAM (stream))
729 /* This is shorter, because it's in the middle of a character. */
730 == sizeof (shortbuf) - 1,
731 "utf-8 tell with short read, character mode, utf-8-unix",
732 "utf-8 read character tell, character mode failed");
733
734 Lstream_close (XLSTREAM (stream));
735 Lstream_delete (XLSTREAM (stream));
736
737 UNGCPRO;
738 return result;
559 } 739 }
560 740
561 741
562 /* Hash Table testing */ 742 /* Hash Table testing */
563 743
722 syms_of_tests (void) 902 syms_of_tests (void)
723 { 903 {
724 Vtest_function_list = Qnil; 904 Vtest_function_list = Qnil;
725 905
726 TESTS_DEFSUBR (Ftest_data_format_conversion); 906 TESTS_DEFSUBR (Ftest_data_format_conversion);
907 TESTS_DEFSUBR (Ftest_character_tell);
727 TESTS_DEFSUBR (Ftest_hash_tables); 908 TESTS_DEFSUBR (Ftest_hash_tables);
728 TESTS_DEFSUBR (Ftest_store_void_in_lisp); 909 TESTS_DEFSUBR (Ftest_store_void_in_lisp);
729 /* Add other test functions here with TESTS_DEFSUBR */ 910 /* Add other test functions here with TESTS_DEFSUBR */
730 } 911 }
731 912