comparison src/text.c @ 867:804517e16990

[xemacs-hg @ 2002-06-05 09:54:39 by ben] Textual renaming: text/char names abbrev.c, alloc.c, buffer.c, buffer.h, bytecode.c, callint.c, casefiddle.c, casetab.c, charset.h, chartab.c, chartab.h, cmds.c, console-gtk.h, console-msw.c, console-msw.h, console-stream.c, console-tty.c, console-x.c, console-x.h, console.h, data.c, device-msw.c, device-x.c, dialog-msw.c, dired-msw.c, dired.c, doc.c, doprnt.c, editfns.c, eldap.c, emodules.c, eval.c, event-Xt.c, event-gtk.c, event-msw.c, event-stream.c, event-unixoid.c, events.c, events.h, file-coding.c, file-coding.h, fileio.c, filelock.c, fns.c, font-lock.c, frame-gtk.c, frame-msw.c, frame-x.c, frame.c, glyphs-eimage.c, glyphs-msw.c, glyphs-x.c, glyphs.c, glyphs.h, gpmevent.c, gui-x.c, gui-x.h, gui.c, gui.h, hpplay.c, indent.c, insdel.c, insdel.h, intl-win32.c, keymap.c, line-number.c, line-number.h, lisp-disunion.h, lisp-union.h, lisp.h, lread.c, lrecord.h, lstream.c, lstream.h, md5.c, menubar-msw.c, menubar-x.c, menubar.c, minibuf.c, mule-ccl.c, mule-charset.c, mule-coding.c, mule-wnnfns.c, ndir.h, nt.c, objects-gtk.c, objects-gtk.h, objects-msw.c, objects-tty.c, objects-x.c, objects.c, objects.h, postgresql.c, print.c, process-nt.c, process-unix.c, process.c, procimpl.h, realpath.c, redisplay-gtk.c, redisplay-msw.c, redisplay-output.c, redisplay-tty.c, redisplay-x.c, redisplay.c, redisplay.h, regex.c, search.c, select-common.h, select-gtk.c, select-x.c, sound.h, symbols.c, syntax.c, syntax.h, sysdep.c, sysdep.h, sysdir.h, sysfile.h, sysproc.h, syspwd.h, systime.h, syswindows.h, termcap.c, tests.c, text.c, text.h, toolbar-common.c, tooltalk.c, ui-gtk.c, unexnt.c, unicode.c, win32.c: Text/char naming rationalization. [a] distinguish between "charptr" when it refers to operations on the pointer itself and when it refers to operations on text; and [b] use consistent naming for everything referring to internal format, i.e. Itext == text in internal format Ibyte == a byte in such text Ichar == a char as represented in internal character format thus e.g. set_charptr_emchar -> set_itext_ichar The pre and post tags on either side of this change are: pre-internal-format-textual-renaming post-internal-format-textual-renaming See the Internals Manual for details of exactly how this was done, how to handle the change in your workspace, etc.
author ben
date Wed, 05 Jun 2002 09:58:45 +0000
parents 2b6fa2618f76
children e22b0213b713
comparison
equal deleted inserted replaced
866:613552a02607 867:804517e16990
380 380
381 except in the case of characters at the gap position. 381 except in the case of characters at the gap position.
382 382
383 B. Other Typedefs 383 B. Other Typedefs
384 384
385 Emchar: 385 Ichar:
386 ------- 386 -------
387 This typedef represents a single Emacs character, which can be 387 This typedef represents a single Emacs character, which can be
388 ASCII, ISO-8859, or some extended character, as would typically 388 ASCII, ISO-8859, or some extended character, as would typically
389 be used for Kanji. Note that the representation of a character 389 be used for Kanji. Note that the representation of a character
390 as an Emchar is *not* the same as the representation of that 390 as an Ichar is *not* the same as the representation of that
391 same character in a string; thus, you cannot do the standard 391 same character in a string; thus, you cannot do the standard
392 C trick of passing a pointer to a character to a function that 392 C trick of passing a pointer to a character to a function that
393 expects a string. 393 expects a string.
394 394
395 An Emchar takes up 19 bits of representation and (for code 395 An Ichar takes up 19 bits of representation and (for code
396 compatibility and such) is compatible with an int. This 396 compatibility and such) is compatible with an int. This
397 representation is visible on the Lisp level. The important 397 representation is visible on the Lisp level. The important
398 characteristics of the Emchar representation are 398 characteristics of the Ichar representation are
399 399
400 -- values 0x00 - 0x7f represent ASCII. 400 -- values 0x00 - 0x7f represent ASCII.
401 -- values 0x80 - 0xff represent the right half of ISO-8859-1. 401 -- values 0x80 - 0xff represent the right half of ISO-8859-1.
402 -- values 0x100 and up represent all other characters. 402 -- values 0x100 and up represent all other characters.
403 403
404 This means that Emchar values are upwardly compatible with 404 This means that Ichar values are upwardly compatible with
405 the standard 8-bit representation of ASCII/ISO-8859-1. 405 the standard 8-bit representation of ASCII/ISO-8859-1.
406 406
407 Intbyte: 407 Ibyte:
408 -------- 408 --------
409 The data in a buffer or string is logically made up of Intbyte 409 The data in a buffer or string is logically made up of Ibyte
410 objects, where a Intbyte takes up the same amount of space as a 410 objects, where a Ibyte takes up the same amount of space as a
411 char. (It is declared differently, though, to catch invalid 411 char. (It is declared differently, though, to catch invalid
412 usages.) Strings stored using Intbytes are said to be in 412 usages.) Strings stored using Ibytes are said to be in
413 "internal format". The important characteristics of internal 413 "internal format". The important characteristics of internal
414 format are 414 format are
415 415
416 -- ASCII characters are represented as a single Intbyte, 416 -- ASCII characters are represented as a single Ibyte,
417 in the range 0 - 0x7f. 417 in the range 0 - 0x7f.
418 -- All other characters are represented as a Intbyte in 418 -- All other characters are represented as a Ibyte in
419 the range 0x80 - 0x9f followed by one or more Intbytes 419 the range 0x80 - 0x9f followed by one or more Ibytes
420 in the range 0xa0 to 0xff. 420 in the range 0xa0 to 0xff.
421 421
422 This leads to a number of desirable properties: 422 This leads to a number of desirable properties:
423 423
424 -- Given the position of the beginning of a character, 424 -- Given the position of the beginning of a character,
630 /************************************************************************/ 630 /************************************************************************/
631 631
632 /* Most are inline functions in lisp.h */ 632 /* Most are inline functions in lisp.h */
633 633
634 int 634 int
635 qxesprintf (Intbyte *buffer, const CIntbyte *format, ...) 635 qxesprintf (Ibyte *buffer, const CIbyte *format, ...)
636 { 636 {
637 va_list args; 637 va_list args;
638 int retval; 638 int retval;
639 639
640 va_start (args, format); 640 va_start (args, format);
643 643
644 return retval; 644 return retval;
645 } 645 }
646 646
647 /* strcasecmp() implementation from BSD */ 647 /* strcasecmp() implementation from BSD */
648 static Intbyte strcasecmp_charmap[] = { 648 static Ibyte strcasecmp_charmap[] = {
649 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 649 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
650 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 650 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
651 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 651 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
652 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 652 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
653 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 653 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
689 we use symmetrical algorithms that may sacrifice a few machine 689 we use symmetrical algorithms that may sacrifice a few machine
690 cycles but are MUCH MUCH clearer, which counts a lot more. 690 cycles but are MUCH MUCH clearer, which counts a lot more.
691 */ 691 */
692 692
693 int 693 int
694 qxestrcasecmp (const Intbyte *s1, const Intbyte *s2) 694 qxestrcasecmp (const Ibyte *s1, const Ibyte *s2)
695 { 695 {
696 Intbyte *cm = strcasecmp_charmap; 696 Ibyte *cm = strcasecmp_charmap;
697 697
698 while (cm[*s1] == cm[*s2++]) 698 while (cm[*s1] == cm[*s2++])
699 if (*s1++ == '\0') 699 if (*s1++ == '\0')
700 return (0); 700 return (0);
701 701
703 } 703 }
704 704
705 int 705 int
706 ascii_strcasecmp (const Char_ASCII *s1, const Char_ASCII *s2) 706 ascii_strcasecmp (const Char_ASCII *s1, const Char_ASCII *s2)
707 { 707 {
708 return qxestrcasecmp ((const Intbyte *) s1, (const Intbyte *) s2); 708 return qxestrcasecmp ((const Ibyte *) s1, (const Ibyte *) s2);
709 } 709 }
710 710
711 int 711 int
712 qxestrcasecmp_c (const Intbyte *s1, const Char_ASCII *s2) 712 qxestrcasecmp_c (const Ibyte *s1, const Char_ASCII *s2)
713 { 713 {
714 return qxestrcasecmp (s1, (const Intbyte *) s2); 714 return qxestrcasecmp (s1, (const Ibyte *) s2);
715 } 715 }
716 716
717 /* An internationalized version that collapses case in a general fashion. 717 /* An internationalized version that collapses case in a general fashion.
718 */ 718 */
719 719
720 int 720 int
721 qxestrcasecmp_i18n (const Intbyte *s1, const Intbyte *s2) 721 qxestrcasecmp_i18n (const Ibyte *s1, const Ibyte *s2)
722 { 722 {
723 while (*s1 && *s2) 723 while (*s1 && *s2)
724 { 724 {
725 if (DOWNCASE (0, charptr_emchar (s1)) != 725 if (DOWNCASE (0, itext_ichar (s1)) !=
726 DOWNCASE (0, charptr_emchar (s2))) 726 DOWNCASE (0, itext_ichar (s2)))
727 break; 727 break;
728 INC_CHARPTR (s1); 728 INC_IBYTEPTR (s1);
729 INC_CHARPTR (s2); 729 INC_IBYTEPTR (s2);
730 } 730 }
731 731
732 return (DOWNCASE (0, charptr_emchar (s1)) - 732 return (DOWNCASE (0, itext_ichar (s1)) -
733 DOWNCASE (0, charptr_emchar (s2))); 733 DOWNCASE (0, itext_ichar (s2)));
734 } 734 }
735 735
736 /* The only difference between these next two and 736 /* The only difference between these next two and
737 qxememcasecmp()/qxememcasecmp_i18n() is that these two will stop if 737 qxememcasecmp()/qxememcasecmp_i18n() is that these two will stop if
738 both strings are equal and less than LEN in length, while 738 both strings are equal and less than LEN in length, while
739 the mem...() versions would would run off the end. */ 739 the mem...() versions would would run off the end. */
740 740
741 int 741 int
742 qxestrncasecmp (const Intbyte *s1, const Intbyte *s2, Bytecount len) 742 qxestrncasecmp (const Ibyte *s1, const Ibyte *s2, Bytecount len)
743 { 743 {
744 Intbyte *cm = strcasecmp_charmap; 744 Ibyte *cm = strcasecmp_charmap;
745 745
746 while (len--) 746 while (len--)
747 { 747 {
748 int diff = cm[*s1] - cm[*s2]; 748 int diff = cm[*s1] - cm[*s2];
749 if (diff != 0) 749 if (diff != 0)
757 } 757 }
758 758
759 int 759 int
760 ascii_strncasecmp (const Char_ASCII *s1, const Char_ASCII *s2, Bytecount len) 760 ascii_strncasecmp (const Char_ASCII *s1, const Char_ASCII *s2, Bytecount len)
761 { 761 {
762 return qxestrncasecmp ((const Intbyte *) s1, (const Intbyte *) s2, len); 762 return qxestrncasecmp ((const Ibyte *) s1, (const Ibyte *) s2, len);
763 } 763 }
764 764
765 int 765 int
766 qxestrncasecmp_c (const Intbyte *s1, const Char_ASCII *s2, Bytecount len) 766 qxestrncasecmp_c (const Ibyte *s1, const Char_ASCII *s2, Bytecount len)
767 { 767 {
768 return qxestrncasecmp (s1, (const Intbyte *) s2, len); 768 return qxestrncasecmp (s1, (const Ibyte *) s2, len);
769 } 769 }
770 770
771 /* Compare LEN_FROM_S1 worth of characters from S1 with the same number of 771 /* Compare LEN_FROM_S1 worth of characters from S1 with the same number of
772 characters from S2, case insensitive. NOTE: Downcasing can convert 772 characters from S2, case insensitive. NOTE: Downcasing can convert
773 characters from one length in bytes to another, so reversing S1 and S2 773 characters from one length in bytes to another, so reversing S1 and S2
774 is *NOT* a symmetric operations! You must choose a length that agrees 774 is *NOT* a symmetric operations! You must choose a length that agrees
775 with S1. */ 775 with S1. */
776 776
777 int 777 int
778 qxestrncasecmp_i18n (const Intbyte *s1, const Intbyte *s2, 778 qxestrncasecmp_i18n (const Ibyte *s1, const Ibyte *s2,
779 Bytecount len_from_s1) 779 Bytecount len_from_s1)
780 { 780 {
781 while (len_from_s1 > 0) 781 while (len_from_s1 > 0)
782 { 782 {
783 const Intbyte *old_s1 = s1; 783 const Ibyte *old_s1 = s1;
784 int diff = (DOWNCASE (0, charptr_emchar (s1)) - 784 int diff = (DOWNCASE (0, itext_ichar (s1)) -
785 DOWNCASE (0, charptr_emchar (s2))); 785 DOWNCASE (0, itext_ichar (s2)));
786 if (diff != 0) 786 if (diff != 0)
787 return diff; 787 return diff;
788 if (!*s1) 788 if (!*s1)
789 return 0; 789 return 0;
790 INC_CHARPTR (s1); 790 INC_IBYTEPTR (s1);
791 INC_CHARPTR (s2); 791 INC_IBYTEPTR (s2);
792 len_from_s1 -= s1 - old_s1; 792 len_from_s1 -= s1 - old_s1;
793 } 793 }
794 794
795 return 0; 795 return 0;
796 } 796 }
797 797
798 int 798 int
799 qxememcmp (const Intbyte *s1, const Intbyte *s2, Bytecount len) 799 qxememcmp (const Ibyte *s1, const Ibyte *s2, Bytecount len)
800 { 800 {
801 return memcmp (s1, s2, len); 801 return memcmp (s1, s2, len);
802 } 802 }
803 803
804 int 804 int
805 qxememcmp4 (const Intbyte *s1, Bytecount len1, 805 qxememcmp4 (const Ibyte *s1, Bytecount len1,
806 const Intbyte *s2, Bytecount len2) 806 const Ibyte *s2, Bytecount len2)
807 { 807 {
808 int retval = qxememcmp (s1, s2, min (len1, len2)); 808 int retval = qxememcmp (s1, s2, min (len1, len2));
809 if (retval) 809 if (retval)
810 return retval; 810 return retval;
811 return len1 - len2; 811 return len1 - len2;
812 } 812 }
813 813
814 int 814 int
815 qxememcasecmp (const Intbyte *s1, const Intbyte *s2, Bytecount len) 815 qxememcasecmp (const Ibyte *s1, const Ibyte *s2, Bytecount len)
816 { 816 {
817 Intbyte *cm = strcasecmp_charmap; 817 Ibyte *cm = strcasecmp_charmap;
818 818
819 while (len--) 819 while (len--)
820 { 820 {
821 int diff = cm[*s1] - cm[*s2]; 821 int diff = cm[*s1] - cm[*s2];
822 if (diff != 0) 822 if (diff != 0)
826 826
827 return 0; 827 return 0;
828 } 828 }
829 829
830 int 830 int
831 qxememcasecmp4 (const Intbyte *s1, Bytecount len1, 831 qxememcasecmp4 (const Ibyte *s1, Bytecount len1,
832 const Intbyte *s2, Bytecount len2) 832 const Ibyte *s2, Bytecount len2)
833 { 833 {
834 int retval = qxememcasecmp (s1, s2, min (len1, len2)); 834 int retval = qxememcasecmp (s1, s2, min (len1, len2));
835 if (retval) 835 if (retval)
836 return retval; 836 return retval;
837 return len1 - len2; 837 return len1 - len2;
838 } 838 }
839 839
840 /* Do a character-by-character comparison, returning "which is greater" by 840 /* Do a character-by-character comparison, returning "which is greater" by
841 comparing the Emchar values. (#### Should have option to compare Unicode 841 comparing the Ichar values. (#### Should have option to compare Unicode
842 points) */ 842 points) */
843 843
844 int 844 int
845 qxetextcmp (const Intbyte *s1, Bytecount len1, 845 qxetextcmp (const Ibyte *s1, Bytecount len1,
846 const Intbyte *s2, Bytecount len2) 846 const Ibyte *s2, Bytecount len2)
847 { 847 {
848 while (len1 > 0 && len2 > 0) 848 while (len1 > 0 && len2 > 0)
849 { 849 {
850 const Intbyte *old_s1 = s1; 850 const Ibyte *old_s1 = s1;
851 const Intbyte *old_s2 = s2; 851 const Ibyte *old_s2 = s2;
852 int diff = charptr_emchar (s1) - charptr_emchar (s2); 852 int diff = itext_ichar (s1) - itext_ichar (s2);
853 if (diff != 0) 853 if (diff != 0)
854 return diff; 854 return diff;
855 INC_CHARPTR (s1); 855 INC_IBYTEPTR (s1);
856 INC_CHARPTR (s2); 856 INC_IBYTEPTR (s2);
857 len1 -= s1 - old_s1; 857 len1 -= s1 - old_s1;
858 len2 -= s2 - old_s2; 858 len2 -= s2 - old_s2;
859 } 859 }
860 860
861 assert (len1 >= 0 && len2 >= 0); 861 assert (len1 >= 0 && len2 >= 0);
862 return len1 - len2; 862 return len1 - len2;
863 } 863 }
864 864
865 int 865 int
866 qxetextcmp_matching (const Intbyte *s1, Bytecount len1, 866 qxetextcmp_matching (const Ibyte *s1, Bytecount len1,
867 const Intbyte *s2, Bytecount len2, 867 const Ibyte *s2, Bytecount len2,
868 Charcount *matching) 868 Charcount *matching)
869 { 869 {
870 *matching = 0; 870 *matching = 0;
871 while (len1 > 0 && len2 > 0) 871 while (len1 > 0 && len2 > 0)
872 { 872 {
873 const Intbyte *old_s1 = s1; 873 const Ibyte *old_s1 = s1;
874 const Intbyte *old_s2 = s2; 874 const Ibyte *old_s2 = s2;
875 int diff = charptr_emchar (s1) - charptr_emchar (s2); 875 int diff = itext_ichar (s1) - itext_ichar (s2);
876 if (diff != 0) 876 if (diff != 0)
877 return diff; 877 return diff;
878 INC_CHARPTR (s1); 878 INC_IBYTEPTR (s1);
879 INC_CHARPTR (s2); 879 INC_IBYTEPTR (s2);
880 len1 -= s1 - old_s1; 880 len1 -= s1 - old_s1;
881 len2 -= s2 - old_s2; 881 len2 -= s2 - old_s2;
882 (*matching)++; 882 (*matching)++;
883 } 883 }
884 884
885 assert (len1 >= 0 && len2 >= 0); 885 assert (len1 >= 0 && len2 >= 0);
886 return len1 - len2; 886 return len1 - len2;
887 } 887 }
888 888
889 /* Do a character-by-character comparison, returning "which is greater" by 889 /* Do a character-by-character comparison, returning "which is greater" by
890 comparing the Emchar values, case insensitively (by downcasing both 890 comparing the Ichar values, case insensitively (by downcasing both
891 first). (#### Should have option to compare Unicode points) 891 first). (#### Should have option to compare Unicode points)
892 892
893 In this case, both lengths must be specified becaused downcasing can 893 In this case, both lengths must be specified becaused downcasing can
894 convert characters from one length in bytes to another; therefore, two 894 convert characters from one length in bytes to another; therefore, two
895 blocks of text of different length might be equal. If both compare 895 blocks of text of different length might be equal. If both compare
896 equal up to the limit in length of one but not the other, the longer one 896 equal up to the limit in length of one but not the other, the longer one
897 is "greater". */ 897 is "greater". */
898 898
899 int 899 int
900 qxetextcasecmp (const Intbyte *s1, Bytecount len1, 900 qxetextcasecmp (const Ibyte *s1, Bytecount len1,
901 const Intbyte *s2, Bytecount len2) 901 const Ibyte *s2, Bytecount len2)
902 { 902 {
903 while (len1 > 0 && len2 > 0) 903 while (len1 > 0 && len2 > 0)
904 { 904 {
905 const Intbyte *old_s1 = s1; 905 const Ibyte *old_s1 = s1;
906 const Intbyte *old_s2 = s2; 906 const Ibyte *old_s2 = s2;
907 int diff = (DOWNCASE (0, charptr_emchar (s1)) - 907 int diff = (DOWNCASE (0, itext_ichar (s1)) -
908 DOWNCASE (0, charptr_emchar (s2))); 908 DOWNCASE (0, itext_ichar (s2)));
909 if (diff != 0) 909 if (diff != 0)
910 return diff; 910 return diff;
911 INC_CHARPTR (s1); 911 INC_IBYTEPTR (s1);
912 INC_CHARPTR (s2); 912 INC_IBYTEPTR (s2);
913 len1 -= s1 - old_s1; 913 len1 -= s1 - old_s1;
914 len2 -= s2 - old_s2; 914 len2 -= s2 - old_s2;
915 } 915 }
916 916
917 assert (len1 >= 0 && len2 >= 0); 917 assert (len1 >= 0 && len2 >= 0);
920 920
921 /* Like qxetextcasecmp() but also return number of characters at 921 /* Like qxetextcasecmp() but also return number of characters at
922 beginning that match. */ 922 beginning that match. */
923 923
924 int 924 int
925 qxetextcasecmp_matching (const Intbyte *s1, Bytecount len1, 925 qxetextcasecmp_matching (const Ibyte *s1, Bytecount len1,
926 const Intbyte *s2, Bytecount len2, 926 const Ibyte *s2, Bytecount len2,
927 Charcount *matching) 927 Charcount *matching)
928 { 928 {
929 *matching = 0; 929 *matching = 0;
930 while (len1 > 0 && len2 > 0) 930 while (len1 > 0 && len2 > 0)
931 { 931 {
932 const Intbyte *old_s1 = s1; 932 const Ibyte *old_s1 = s1;
933 const Intbyte *old_s2 = s2; 933 const Ibyte *old_s2 = s2;
934 int diff = (DOWNCASE (0, charptr_emchar (s1)) - 934 int diff = (DOWNCASE (0, itext_ichar (s1)) -
935 DOWNCASE (0, charptr_emchar (s2))); 935 DOWNCASE (0, itext_ichar (s2)));
936 if (diff != 0) 936 if (diff != 0)
937 return diff; 937 return diff;
938 INC_CHARPTR (s1); 938 INC_IBYTEPTR (s1);
939 INC_CHARPTR (s2); 939 INC_IBYTEPTR (s2);
940 len1 -= s1 - old_s1; 940 len1 -= s1 - old_s1;
941 len2 -= s2 - old_s2; 941 len2 -= s2 - old_s2;
942 (*matching)++; 942 (*matching)++;
943 } 943 }
944 944
947 } 947 }
948 948
949 int 949 int
950 lisp_strcasecmp (Lisp_Object s1, Lisp_Object s2) 950 lisp_strcasecmp (Lisp_Object s1, Lisp_Object s2)
951 { 951 {
952 Intbyte *cm = strcasecmp_charmap; 952 Ibyte *cm = strcasecmp_charmap;
953 Intbyte *p1 = XSTRING_DATA (s1); 953 Ibyte *p1 = XSTRING_DATA (s1);
954 Intbyte *p2 = XSTRING_DATA (s2); 954 Ibyte *p2 = XSTRING_DATA (s2);
955 Intbyte *e1 = p1 + XSTRING_LENGTH (s1); 955 Ibyte *e1 = p1 + XSTRING_LENGTH (s1);
956 Intbyte *e2 = p2 + XSTRING_LENGTH (s2); 956 Ibyte *e2 = p2 + XSTRING_LENGTH (s2);
957 957
958 /* again, we use a symmetric algorithm and favor clarity over 958 /* again, we use a symmetric algorithm and favor clarity over
959 nanosecond improvements. */ 959 nanosecond improvements. */
960 while (1) 960 while (1)
961 { 961 {
983 /************************************************************************/ 983 /************************************************************************/
984 984
985 /* NOTE: Does not reset the Dynarr. */ 985 /* NOTE: Does not reset the Dynarr. */
986 986
987 void 987 void
988 convert_intbyte_string_into_emchar_dynarr (const Intbyte *str, Bytecount len, 988 convert_ibyte_string_into_ichar_dynarr (const Ibyte *str, Bytecount len,
989 Emchar_dynarr *dyn) 989 Ichar_dynarr *dyn)
990 { 990 {
991 const Intbyte *strend = str + len; 991 const Ibyte *strend = str + len;
992 992
993 while (str < strend) 993 while (str < strend)
994 { 994 {
995 Emchar ch = charptr_emchar (str); 995 Ichar ch = itext_ichar (str);
996 Dynarr_add (dyn, ch); 996 Dynarr_add (dyn, ch);
997 INC_CHARPTR (str); 997 INC_IBYTEPTR (str);
998 } 998 }
999 } 999 }
1000 1000
1001 Charcount 1001 Charcount
1002 convert_intbyte_string_into_emchar_string (const Intbyte *str, Bytecount len, 1002 convert_ibyte_string_into_ichar_string (const Ibyte *str, Bytecount len,
1003 Emchar *arr) 1003 Ichar *arr)
1004 { 1004 {
1005 const Intbyte *strend = str + len; 1005 const Ibyte *strend = str + len;
1006 Charcount newlen = 0; 1006 Charcount newlen = 0;
1007 while (str < strend) 1007 while (str < strend)
1008 { 1008 {
1009 Emchar ch = charptr_emchar (str); 1009 Ichar ch = itext_ichar (str);
1010 arr[newlen++] = ch; 1010 arr[newlen++] = ch;
1011 INC_CHARPTR (str); 1011 INC_IBYTEPTR (str);
1012 } 1012 }
1013 return newlen; 1013 return newlen;
1014 } 1014 }
1015 1015
1016 /* Convert an array of Emchars into the equivalent string representation. 1016 /* Convert an array of Ichars into the equivalent string representation.
1017 Store into the given Intbyte dynarr. Does not reset the dynarr. 1017 Store into the given Ibyte dynarr. Does not reset the dynarr.
1018 Does not add a terminating zero. */ 1018 Does not add a terminating zero. */
1019 1019
1020 void 1020 void
1021 convert_emchar_string_into_intbyte_dynarr (Emchar *arr, int nels, 1021 convert_ichar_string_into_ibyte_dynarr (Ichar *arr, int nels,
1022 Intbyte_dynarr *dyn) 1022 Ibyte_dynarr *dyn)
1023 { 1023 {
1024 Intbyte str[MAX_EMCHAR_LEN]; 1024 Ibyte str[MAX_ICHAR_LEN];
1025 int i; 1025 int i;
1026 1026
1027 for (i = 0; i < nels; i++) 1027 for (i = 0; i < nels; i++)
1028 { 1028 {
1029 Bytecount len = set_charptr_emchar (str, arr[i]); 1029 Bytecount len = set_itext_ichar (str, arr[i]);
1030 Dynarr_add_many (dyn, str, len); 1030 Dynarr_add_many (dyn, str, len);
1031 } 1031 }
1032 } 1032 }
1033 1033
1034 /* Convert an array of Emchars into the equivalent string representation. 1034 /* Convert an array of Ichars into the equivalent string representation.
1035 Malloc the space needed for this and return it. If LEN_OUT is not a 1035 Malloc the space needed for this and return it. If LEN_OUT is not a
1036 NULL pointer, store into LEN_OUT the number of Intbytes in the 1036 NULL pointer, store into LEN_OUT the number of Ibytes in the
1037 malloc()ed string. Note that the actual number of Intbytes allocated 1037 malloc()ed string. Note that the actual number of Ibytes allocated
1038 is one more than this: the returned string is zero-terminated. */ 1038 is one more than this: the returned string is zero-terminated. */
1039 1039
1040 Intbyte * 1040 Ibyte *
1041 convert_emchar_string_into_malloced_string (Emchar *arr, int nels, 1041 convert_ichar_string_into_malloced_string (Ichar *arr, int nels,
1042 Bytecount *len_out) 1042 Bytecount *len_out)
1043 { 1043 {
1044 /* Damn zero-termination. */ 1044 /* Damn zero-termination. */
1045 Intbyte *str = (Intbyte *) ALLOCA (nels * MAX_EMCHAR_LEN + 1); 1045 Ibyte *str = (Ibyte *) ALLOCA (nels * MAX_ICHAR_LEN + 1);
1046 Intbyte *strorig = str; 1046 Ibyte *strorig = str;
1047 Bytecount len; 1047 Bytecount len;
1048 1048
1049 int i; 1049 int i;
1050 1050
1051 for (i = 0; i < nels; i++) 1051 for (i = 0; i < nels; i++)
1052 str += set_charptr_emchar (str, arr[i]); 1052 str += set_itext_ichar (str, arr[i]);
1053 *str = '\0'; 1053 *str = '\0';
1054 len = str - strorig; 1054 len = str - strorig;
1055 str = (Intbyte *) xmalloc (1 + len); 1055 str = (Ibyte *) xmalloc (1 + len);
1056 memcpy (str, strorig, 1 + len); 1056 memcpy (str, strorig, 1 + len);
1057 if (len_out) 1057 if (len_out)
1058 *len_out = len; 1058 *len_out = len;
1059 return str; 1059 return str;
1060 } 1060 }
1062 #define COPY_TEXT_BETWEEN_FORMATS(srcfmt, dstfmt) \ 1062 #define COPY_TEXT_BETWEEN_FORMATS(srcfmt, dstfmt) \
1063 do \ 1063 do \
1064 { \ 1064 { \
1065 if (dst) \ 1065 if (dst) \
1066 { \ 1066 { \
1067 Intbyte *dstend = dst + dstlen; \ 1067 Ibyte *dstend = dst + dstlen; \
1068 Intbyte *dstp = dst; \ 1068 Ibyte *dstp = dst; \
1069 const Intbyte *srcend = src + srclen; \ 1069 const Ibyte *srcend = src + srclen; \
1070 const Intbyte *srcp = src; \ 1070 const Ibyte *srcp = src; \
1071 \ 1071 \
1072 while (srcp < srcend) \ 1072 while (srcp < srcend) \
1073 { \ 1073 { \
1074 Emchar ch = charptr_emchar_fmt (srcp, srcfmt, srcobj); \ 1074 Ichar ch = itext_ichar_fmt (srcp, srcfmt, srcobj); \
1075 Bytecount len = emchar_len_fmt (ch, dstfmt); \ 1075 Bytecount len = ichar_len_fmt (ch, dstfmt); \
1076 \ 1076 \
1077 if (dstp + len <= dstend) \ 1077 if (dstp + len <= dstend) \
1078 { \ 1078 { \
1079 set_charptr_emchar_fmt (dstp, ch, dstfmt, dstobj); \ 1079 set_itext_ichar_fmt (dstp, ch, dstfmt, dstobj); \
1080 dstp += len; \ 1080 dstp += len; \
1081 } \ 1081 } \
1082 else \ 1082 else \
1083 break; \ 1083 break; \
1084 INC_CHARPTR_FMT (srcp, srcfmt); \ 1084 INC_IBYTEPTR_FMT (srcp, srcfmt); \
1085 } \ 1085 } \
1086 text_checking_assert (srcp <= srcend); \ 1086 text_checking_assert (srcp <= srcend); \
1087 if (src_used) \ 1087 if (src_used) \
1088 *src_used = srcp - src; \ 1088 *src_used = srcp - src; \
1089 return dstp - dst; \ 1089 return dstp - dst; \
1090 } \ 1090 } \
1091 else \ 1091 else \
1092 { \ 1092 { \
1093 const Intbyte *srcend = src + srclen; \ 1093 const Ibyte *srcend = src + srclen; \
1094 const Intbyte *srcp = src; \ 1094 const Ibyte *srcp = src; \
1095 Bytecount total = 0; \ 1095 Bytecount total = 0; \
1096 \ 1096 \
1097 while (srcp < srcend) \ 1097 while (srcp < srcend) \
1098 { \ 1098 { \
1099 total += emchar_len_fmt (charptr_emchar_fmt (srcp, srcfmt, \ 1099 total += ichar_len_fmt (itext_ichar_fmt (srcp, srcfmt, \
1100 srcobj), dstfmt); \ 1100 srcobj), dstfmt); \
1101 INC_CHARPTR_FMT (srcp, srcfmt); \ 1101 INC_IBYTEPTR_FMT (srcp, srcfmt); \
1102 } \ 1102 } \
1103 text_checking_assert (srcp == srcend); \ 1103 text_checking_assert (srcp == srcend); \
1104 if (src_used) \ 1104 if (src_used) \
1105 *src_used = srcp - src; \ 1105 *src_used = srcp - src; \
1106 return total; \ 1106 return total; \
1114 SRC_USED (if not NULL). If DST is NULL, don't actually store anything 1114 SRC_USED (if not NULL). If DST is NULL, don't actually store anything
1115 and just return the size needed to store all the text. Will not copy 1115 and just return the size needed to store all the text. Will not copy
1116 partial characters into DST. */ 1116 partial characters into DST. */
1117 1117
1118 Bytecount 1118 Bytecount
1119 copy_text_between_formats (const Intbyte *src, Bytecount srclen, 1119 copy_text_between_formats (const Ibyte *src, Bytecount srclen,
1120 Internal_Format srcfmt, 1120 Internal_Format srcfmt,
1121 Lisp_Object srcobj, 1121 Lisp_Object srcobj,
1122 Intbyte *dst, Bytecount dstlen, 1122 Ibyte *dst, Bytecount dstlen,
1123 Internal_Format dstfmt, 1123 Internal_Format dstfmt,
1124 Lisp_Object dstobj, 1124 Lisp_Object dstobj,
1125 Bytecount *src_used) 1125 Bytecount *src_used)
1126 { 1126 {
1127 if (srcfmt == dstfmt && 1127 if (srcfmt == dstfmt &&
1128 objects_have_same_internal_representation (srcobj, dstobj)) 1128 objects_have_same_internal_representation (srcobj, dstobj))
1129 { 1129 {
1130 if (dst) 1130 if (dst)
1131 { 1131 {
1132 srclen = min (srclen, dstlen); 1132 srclen = min (srclen, dstlen);
1133 srclen = validate_intbyte_string_backward (src, srclen); 1133 srclen = validate_ibyte_string_backward (src, srclen);
1134 memcpy (dst, src, srclen); 1134 memcpy (dst, src, srclen);
1135 if (src_used) 1135 if (src_used)
1136 *src_used = srclen; 1136 *src_used = srclen;
1137 return srclen; 1137 return srclen;
1138 } 1138 }
1162 through SRC_USED (if not NULL). If DST is NULL, don't actually store 1162 through SRC_USED (if not NULL). If DST is NULL, don't actually store
1163 anything and just return the size needed to store all the text. */ 1163 anything and just return the size needed to store all the text. */
1164 1164
1165 Bytecount 1165 Bytecount
1166 copy_buffer_text_out (struct buffer *buf, Bytebpos pos, 1166 copy_buffer_text_out (struct buffer *buf, Bytebpos pos,
1167 Bytecount len, Intbyte *dst, Bytecount dstlen, 1167 Bytecount len, Ibyte *dst, Bytecount dstlen,
1168 Internal_Format dstfmt, Lisp_Object dstobj, 1168 Internal_Format dstfmt, Lisp_Object dstobj,
1169 Bytecount *src_used) 1169 Bytecount *src_used)
1170 { 1170 {
1171 Bytecount dst_used = 0; 1171 Bytecount dst_used = 0;
1172 if (src_used) 1172 if (src_used)
1211 /************************************************************************/ 1211 /************************************************************************/
1212 /* charset properties of strings */ 1212 /* charset properties of strings */
1213 /************************************************************************/ 1213 /************************************************************************/
1214 1214
1215 void 1215 void
1216 find_charsets_in_intbyte_string (unsigned char *charsets, const Intbyte *str, 1216 find_charsets_in_ibyte_string (unsigned char *charsets, const Ibyte *str,
1217 Bytecount len) 1217 Bytecount len)
1218 { 1218 {
1219 #ifndef MULE 1219 #ifndef MULE
1220 /* Telescope this. */ 1220 /* Telescope this. */
1221 charsets[0] = 1; 1221 charsets[0] = 1;
1222 #else 1222 #else
1223 const Intbyte *strend = str + len; 1223 const Ibyte *strend = str + len;
1224 memset (charsets, 0, NUM_LEADING_BYTES); 1224 memset (charsets, 0, NUM_LEADING_BYTES);
1225 1225
1226 /* #### SJT doesn't like this. */ 1226 /* #### SJT doesn't like this. */
1227 if (len == 0) 1227 if (len == 0)
1228 { 1228 {
1230 return; 1230 return;
1231 } 1231 }
1232 1232
1233 while (str < strend) 1233 while (str < strend)
1234 { 1234 {
1235 charsets[emchar_leading_byte (charptr_emchar (str)) - MIN_LEADING_BYTE] = 1235 charsets[ichar_leading_byte (itext_ichar (str)) - MIN_LEADING_BYTE] =
1236 1; 1236 1;
1237 INC_CHARPTR (str); 1237 INC_IBYTEPTR (str);
1238 } 1238 }
1239 #endif 1239 #endif
1240 } 1240 }
1241 1241
1242 void 1242 void
1243 find_charsets_in_emchar_string (unsigned char *charsets, const Emchar *str, 1243 find_charsets_in_ichar_string (unsigned char *charsets, const Ichar *str,
1244 Charcount len) 1244 Charcount len)
1245 { 1245 {
1246 #ifndef MULE 1246 #ifndef MULE
1247 /* Telescope this. */ 1247 /* Telescope this. */
1248 charsets[0] = 1; 1248 charsets[0] = 1;
1258 return; 1258 return;
1259 } 1259 }
1260 1260
1261 for (i = 0; i < len; i++) 1261 for (i = 0; i < len; i++)
1262 { 1262 {
1263 charsets[emchar_leading_byte (str[i]) - MIN_LEADING_BYTE] = 1; 1263 charsets[ichar_leading_byte (str[i]) - MIN_LEADING_BYTE] = 1;
1264 } 1264 }
1265 #endif 1265 #endif
1266 } 1266 }
1267 1267
1268 int 1268 int
1269 intbyte_string_displayed_columns (const Intbyte *str, Bytecount len) 1269 ibyte_string_displayed_columns (const Ibyte *str, Bytecount len)
1270 { 1270 {
1271 int cols = 0; 1271 int cols = 0;
1272 const Intbyte *end = str + len; 1272 const Ibyte *end = str + len;
1273 1273
1274 while (str < end) 1274 while (str < end)
1275 { 1275 {
1276 #ifdef MULE 1276 #ifdef MULE
1277 Emchar ch = charptr_emchar (str); 1277 Ichar ch = itext_ichar (str);
1278 cols += XCHARSET_COLUMNS (emchar_charset (ch)); 1278 cols += XCHARSET_COLUMNS (ichar_charset (ch));
1279 #else 1279 #else
1280 cols++; 1280 cols++;
1281 #endif 1281 #endif
1282 INC_CHARPTR (str); 1282 INC_IBYTEPTR (str);
1283 } 1283 }
1284 1284
1285 return cols; 1285 return cols;
1286 } 1286 }
1287 1287
1288 int 1288 int
1289 emchar_string_displayed_columns (const Emchar *str, Charcount len) 1289 ichar_string_displayed_columns (const Ichar *str, Charcount len)
1290 { 1290 {
1291 #ifdef MULE 1291 #ifdef MULE
1292 int cols = 0; 1292 int cols = 0;
1293 int i; 1293 int i;
1294 1294
1295 for (i = 0; i < len; i++) 1295 for (i = 0; i < len; i++)
1296 cols += XCHARSET_COLUMNS (emchar_charset (str[i])); 1296 cols += XCHARSET_COLUMNS (ichar_charset (str[i]));
1297 1297
1298 return cols; 1298 return cols;
1299 #else /* not MULE */ 1299 #else /* not MULE */
1300 return len; 1300 return len;
1301 #endif 1301 #endif
1302 } 1302 }
1303 1303
1304 Charcount 1304 Charcount
1305 intbyte_string_nonascii_chars (const Intbyte *str, Bytecount len) 1305 ibyte_string_nonascii_chars (const Ibyte *str, Bytecount len)
1306 { 1306 {
1307 #ifdef MULE 1307 #ifdef MULE
1308 const Intbyte *end = str + len; 1308 const Ibyte *end = str + len;
1309 Charcount retval = 0; 1309 Charcount retval = 0;
1310 1310
1311 while (str < end) 1311 while (str < end)
1312 { 1312 {
1313 if (!byte_ascii_p (*str)) 1313 if (!byte_ascii_p (*str))
1314 retval++; 1314 retval++;
1315 INC_CHARPTR (str); 1315 INC_IBYTEPTR (str);
1316 } 1316 }
1317 1317
1318 return retval; 1318 return retval;
1319 #else 1319 #else
1320 return 0; 1320 return 0;
1325 /***************************************************************************/ 1325 /***************************************************************************/
1326 /* Eistring helper functions */ 1326 /* Eistring helper functions */
1327 /***************************************************************************/ 1327 /***************************************************************************/
1328 1328
1329 int 1329 int
1330 eistr_casefiddle_1 (Intbyte *olddata, Bytecount len, Intbyte *newdata, 1330 eistr_casefiddle_1 (Ibyte *olddata, Bytecount len, Ibyte *newdata,
1331 int downp) 1331 int downp)
1332 { 1332 {
1333 Intbyte *endp = olddata + len; 1333 Ibyte *endp = olddata + len;
1334 Intbyte *newp = newdata; 1334 Ibyte *newp = newdata;
1335 int changedp = 0; 1335 int changedp = 0;
1336 1336
1337 while (olddata < endp) 1337 while (olddata < endp)
1338 { 1338 {
1339 Emchar c = charptr_emchar (olddata); 1339 Ichar c = itext_ichar (olddata);
1340 Emchar newc; 1340 Ichar newc;
1341 1341
1342 if (downp) 1342 if (downp)
1343 newc = DOWNCASE (0, c); 1343 newc = DOWNCASE (0, c);
1344 else 1344 else
1345 newc = UPCASE (0, c); 1345 newc = UPCASE (0, c);
1346 1346
1347 if (c != newc) 1347 if (c != newc)
1348 changedp = 1; 1348 changedp = 1;
1349 1349
1350 newp += set_charptr_emchar (newp, newc); 1350 newp += set_itext_ichar (newp, newc);
1351 INC_CHARPTR (olddata); 1351 INC_IBYTEPTR (olddata);
1352 } 1352 }
1353 1353
1354 *newp = '\0'; 1354 *newp = '\0';
1355 1355
1356 return changedp ? newp - newdata : 0; 1356 return changedp ? newp - newdata : 0;
1374 if (ei->mallocp_) 1374 if (ei->mallocp_)
1375 return; 1375 return;
1376 ei->mallocp_ = 1; 1376 ei->mallocp_ = 1;
1377 if (ei->data_) 1377 if (ei->data_)
1378 { 1378 {
1379 Intbyte *newdata; 1379 Ibyte *newdata;
1380 1380
1381 ei->max_size_allocated_ = 1381 ei->max_size_allocated_ =
1382 eifind_large_enough_buffer (0, ei->bytelen_ + 1); 1382 eifind_large_enough_buffer (0, ei->bytelen_ + 1);
1383 newdata = (Intbyte *) xmalloc (ei->max_size_allocated_); 1383 newdata = (Ibyte *) xmalloc (ei->max_size_allocated_);
1384 memcpy (newdata, ei->data_, ei->bytelen_ + 1); 1384 memcpy (newdata, ei->data_, ei->bytelen_ + 1);
1385 ei->data_ = newdata; 1385 ei->data_ = newdata;
1386 } 1386 }
1387 1387
1388 if (ei->extdata_) 1388 if (ei->extdata_)
1397 } 1397 }
1398 } 1398 }
1399 1399
1400 int 1400 int
1401 eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff, 1401 eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff,
1402 Bytecount len, Charcount charlen, const Intbyte *data, 1402 Bytecount len, Charcount charlen, const Ibyte *data,
1403 const Eistring *ei2, int is_c, int fold_case) 1403 const Eistring *ei2, int is_c, int fold_case)
1404 { 1404 {
1405 assert ((off < 0) != (charoff < 0)); 1405 assert ((off < 0) != (charoff < 0));
1406 if (off < 0) 1406 if (off < 0)
1407 { 1407 {
1420 assert ((is_c != 0) == (data != 0)); 1420 assert ((is_c != 0) == (data != 0));
1421 assert (fold_case >= 0 && fold_case <= 2); 1421 assert (fold_case >= 0 && fold_case <= 2);
1422 1422
1423 { 1423 {
1424 Bytecount dstlen; 1424 Bytecount dstlen;
1425 const Intbyte *src = ei->data_, *dst; 1425 const Ibyte *src = ei->data_, *dst;
1426 1426
1427 if (data) 1427 if (data)
1428 { 1428 {
1429 dst = data; 1429 dst = data;
1430 dstlen = qxestrlen (data); 1430 dstlen = qxestrlen (data);
1442 fold_case == 1 ? qxememcasecmp4 (src, len, dst, dstlen) : 1442 fold_case == 1 ? qxememcasecmp4 (src, len, dst, dstlen) :
1443 qxetextcasecmp (src, len, dst, dstlen)); 1443 qxetextcasecmp (src, len, dst, dstlen));
1444 } 1444 }
1445 } 1445 }
1446 1446
1447 Intbyte * 1447 Ibyte *
1448 eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out, Internal_Format fmt, 1448 eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out, Internal_Format fmt,
1449 Lisp_Object object) 1449 Lisp_Object object)
1450 { 1450 {
1451 Intbyte *ptr; 1451 Ibyte *ptr;
1452 1452
1453 assert (fmt == FORMAT_DEFAULT); 1453 assert (fmt == FORMAT_DEFAULT);
1454 ptr = xnew_array (Intbyte, eistr->bytelen_ + 1); 1454 ptr = xnew_array (Ibyte, eistr->bytelen_ + 1);
1455 if (len_out) 1455 if (len_out)
1456 *len_out = eistr->bytelen_; 1456 *len_out = eistr->bytelen_;
1457 memcpy (ptr, eistr->data_, eistr->bytelen_ + 1); 1457 memcpy (ptr, eistr->data_, eistr->bytelen_ + 1);
1458 return ptr; 1458 return ptr;
1459 } 1459 }
1468 #ifdef MULE 1468 #ifdef MULE
1469 1469
1470 /* Skip as many ASCII bytes as possible in the memory block [PTR, END). 1470 /* Skip as many ASCII bytes as possible in the memory block [PTR, END).
1471 Return pointer to the first non-ASCII byte. optimized for long 1471 Return pointer to the first non-ASCII byte. optimized for long
1472 stretches of ASCII. */ 1472 stretches of ASCII. */
1473 inline static const Intbyte * 1473 inline static const Ibyte *
1474 skip_ascii (const Intbyte *ptr, const Intbyte *end) 1474 skip_ascii (const Ibyte *ptr, const Ibyte *end)
1475 { 1475 {
1476 #ifdef EFFICIENT_INT_128_BIT 1476 #ifdef EFFICIENT_INT_128_BIT
1477 # define STRIDE_TYPE INT_128_BIT 1477 # define STRIDE_TYPE INT_128_BIT
1478 # define HIGH_BIT_MASK \ 1478 # define HIGH_BIT_MASK \
1479 MAKE_128_BIT_UNSIGNED_CONSTANT (0x80808080808080808080808080808080) 1479 MAKE_128_BIT_UNSIGNED_CONSTANT (0x80808080808080808080808080808080)
1501 ptr++; 1501 ptr++;
1502 } 1502 }
1503 ascii_end = (const unsigned STRIDE_TYPE *) ptr; 1503 ascii_end = (const unsigned STRIDE_TYPE *) ptr;
1504 /* This loop screams, because we can detect ASCII 1504 /* This loop screams, because we can detect ASCII
1505 characters 4 or 8 at a time. */ 1505 characters 4 or 8 at a time. */
1506 while ((const Intbyte *) ascii_end + STRIDE <= end 1506 while ((const Ibyte *) ascii_end + STRIDE <= end
1507 && !(*ascii_end & HIGH_BIT_MASK)) 1507 && !(*ascii_end & HIGH_BIT_MASK))
1508 ascii_end++; 1508 ascii_end++;
1509 ptr = (Intbyte *) ascii_end; 1509 ptr = (Ibyte *) ascii_end;
1510 while (ptr < end && byte_ascii_p (*ptr)) 1510 while (ptr < end && byte_ascii_p (*ptr))
1511 ptr++; 1511 ptr++;
1512 return ptr; 1512 return ptr;
1513 } 1513 }
1514 1514
1516 These work on strings of all sizes but are more efficient than a simple 1516 These work on strings of all sizes but are more efficient than a simple
1517 loop on large strings and probably less efficient on sufficiently small 1517 loop on large strings and probably less efficient on sufficiently small
1518 strings. */ 1518 strings. */
1519 1519
1520 Charcount 1520 Charcount
1521 bytecount_to_charcount_fun (const Intbyte *ptr, Bytecount len) 1521 bytecount_to_charcount_fun (const Ibyte *ptr, Bytecount len)
1522 { 1522 {
1523 Charcount count = 0; 1523 Charcount count = 0;
1524 const Intbyte *end = ptr + len; 1524 const Ibyte *end = ptr + len;
1525 while (1) 1525 while (1)
1526 { 1526 {
1527 const Intbyte *newptr = skip_ascii (ptr, end); 1527 const Ibyte *newptr = skip_ascii (ptr, end);
1528 count += newptr - ptr; 1528 count += newptr - ptr;
1529 ptr = newptr; 1529 ptr = newptr;
1530 if (ptr == end) 1530 if (ptr == end)
1531 break; 1531 break;
1532 { 1532 {
1533 /* Optimize for successive characters from the same charset */ 1533 /* Optimize for successive characters from the same charset */
1534 Intbyte leading_byte = *ptr; 1534 Ibyte leading_byte = *ptr;
1535 int bytes = rep_bytes_by_first_byte (leading_byte); 1535 int bytes = rep_bytes_by_first_byte (leading_byte);
1536 while (ptr < end && *ptr == leading_byte) 1536 while (ptr < end && *ptr == leading_byte)
1537 ptr += bytes, count++; 1537 ptr += bytes, count++;
1538 } 1538 }
1539 } 1539 }
1548 1548
1549 return count; 1549 return count;
1550 } 1550 }
1551 1551
1552 Bytecount 1552 Bytecount
1553 charcount_to_bytecount_fun (const Intbyte *ptr, Charcount len) 1553 charcount_to_bytecount_fun (const Ibyte *ptr, Charcount len)
1554 { 1554 {
1555 const Intbyte *newptr = ptr; 1555 const Ibyte *newptr = ptr;
1556 while (1) 1556 while (1)
1557 { 1557 {
1558 const Intbyte *newnewptr = skip_ascii (newptr, newptr + len); 1558 const Ibyte *newnewptr = skip_ascii (newptr, newptr + len);
1559 len -= newnewptr - newptr; 1559 len -= newnewptr - newptr;
1560 newptr = newnewptr; 1560 newptr = newnewptr;
1561 if (!len) 1561 if (!len)
1562 break; 1562 break;
1563 { 1563 {
1564 /* Optimize for successive characters from the same charset */ 1564 /* Optimize for successive characters from the same charset */
1565 Intbyte leading_byte = *newptr; 1565 Ibyte leading_byte = *newptr;
1566 int bytes = rep_bytes_by_first_byte (leading_byte); 1566 int bytes = rep_bytes_by_first_byte (leading_byte);
1567 while (len > 0 && *newptr == leading_byte) 1567 while (len > 0 && *newptr == leading_byte)
1568 newptr += bytes, len--; 1568 newptr += bytes, len--;
1569 } 1569 }
1570 } 1570 }
2733 /* Implement TO_EXTERNAL_FORMAT, TO_INTERNAL_FORMAT */ 2733 /* Implement TO_EXTERNAL_FORMAT, TO_INTERNAL_FORMAT */
2734 /************************************************************************/ 2734 /************************************************************************/
2735 2735
2736 typedef struct 2736 typedef struct
2737 { 2737 {
2738 Dynarr_declare (Intbyte_dynarr *); 2738 Dynarr_declare (Ibyte_dynarr *);
2739 } Intbyte_dynarr_dynarr; 2739 } Ibyte_dynarr_dynarr;
2740 2740
2741 typedef struct 2741 typedef struct
2742 { 2742 {
2743 Dynarr_declare (Extbyte_dynarr *); 2743 Dynarr_declare (Extbyte_dynarr *);
2744 } Extbyte_dynarr_dynarr; 2744 } Extbyte_dynarr_dynarr;
2745 2745
2746 static Extbyte_dynarr_dynarr *conversion_out_dynarr_list; 2746 static Extbyte_dynarr_dynarr *conversion_out_dynarr_list;
2747 static Intbyte_dynarr_dynarr *conversion_in_dynarr_list; 2747 static Ibyte_dynarr_dynarr *conversion_in_dynarr_list;
2748 2748
2749 static int dfc_convert_to_external_format_in_use; 2749 static int dfc_convert_to_external_format_in_use;
2750 static int dfc_convert_to_internal_format_in_use; 2750 static int dfc_convert_to_internal_format_in_use;
2751 2751
2752 void 2752 void
2789 is a lisp string. */ 2789 is a lisp string. */
2790 if (source_type != DFC_TYPE_LISP_LSTREAM && 2790 if (source_type != DFC_TYPE_LISP_LSTREAM &&
2791 sink_type != DFC_TYPE_LISP_LSTREAM && 2791 sink_type != DFC_TYPE_LISP_LSTREAM &&
2792 coding_system_is_binary (coding_system)) 2792 coding_system_is_binary (coding_system))
2793 { 2793 {
2794 const Intbyte *ptr; 2794 const Ibyte *ptr;
2795 Bytecount len; 2795 Bytecount len;
2796 2796
2797 if (source_type == DFC_TYPE_LISP_STRING) 2797 if (source_type == DFC_TYPE_LISP_STRING)
2798 { 2798 {
2799 ptr = XSTRING_DATA (source->lisp_object); 2799 ptr = XSTRING_DATA (source->lisp_object);
2800 len = XSTRING_LENGTH (source->lisp_object); 2800 len = XSTRING_LENGTH (source->lisp_object);
2801 } 2801 }
2802 else 2802 else
2803 { 2803 {
2804 ptr = (Intbyte *) source->data.ptr; 2804 ptr = (Ibyte *) source->data.ptr;
2805 len = source->data.len; 2805 len = source->data.len;
2806 } 2806 }
2807 2807
2808 #ifdef MULE 2808 #ifdef MULE
2809 { 2809 {
2810 const Intbyte *end; 2810 const Ibyte *end;
2811 for (end = ptr + len; ptr < end;) 2811 for (end = ptr + len; ptr < end;)
2812 { 2812 {
2813 Intbyte c = 2813 Ibyte c =
2814 (byte_ascii_p (*ptr)) ? *ptr : 2814 (byte_ascii_p (*ptr)) ? *ptr :
2815 (*ptr == LEADING_BYTE_CONTROL_1) ? (*(ptr+1) - 0x20) : 2815 (*ptr == LEADING_BYTE_CONTROL_1) ? (*(ptr+1) - 0x20) :
2816 (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) : 2816 (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) :
2817 '~'; 2817 '~';
2818 2818
2819 Dynarr_add (conversion_out_dynarr, (Extbyte) c); 2819 Dynarr_add (conversion_out_dynarr, (Extbyte) c);
2820 INC_CHARPTR (ptr); 2820 INC_IBYTEPTR (ptr);
2821 } 2821 }
2822 text_checking_assert (ptr == end); 2822 text_checking_assert (ptr == end);
2823 } 2823 }
2824 #else 2824 #else
2825 Dynarr_add_many (conversion_out_dynarr, ptr, len); 2825 Dynarr_add_many (conversion_out_dynarr, ptr, len);
2830 /* Optimize the common case involving Unicode where only ASCII is involved */ 2830 /* Optimize the common case involving Unicode where only ASCII is involved */
2831 else if (source_type != DFC_TYPE_LISP_LSTREAM && 2831 else if (source_type != DFC_TYPE_LISP_LSTREAM &&
2832 sink_type != DFC_TYPE_LISP_LSTREAM && 2832 sink_type != DFC_TYPE_LISP_LSTREAM &&
2833 dfc_coding_system_is_unicode (coding_system)) 2833 dfc_coding_system_is_unicode (coding_system))
2834 { 2834 {
2835 const Intbyte *ptr, *p; 2835 const Ibyte *ptr, *p;
2836 Bytecount len; 2836 Bytecount len;
2837 const Intbyte *end; 2837 const Ibyte *end;
2838 2838
2839 if (source_type == DFC_TYPE_LISP_STRING) 2839 if (source_type == DFC_TYPE_LISP_STRING)
2840 { 2840 {
2841 ptr = XSTRING_DATA (source->lisp_object); 2841 ptr = XSTRING_DATA (source->lisp_object);
2842 len = XSTRING_LENGTH (source->lisp_object); 2842 len = XSTRING_LENGTH (source->lisp_object);
2843 } 2843 }
2844 else 2844 else
2845 { 2845 {
2846 ptr = (Intbyte *) source->data.ptr; 2846 ptr = (Ibyte *) source->data.ptr;
2847 len = source->data.len; 2847 len = source->data.len;
2848 } 2848 }
2849 end = ptr + len; 2849 end = ptr + len;
2850 2850
2851 for (p = ptr; p < end; p++) 2851 for (p = ptr; p < end; p++)
2954 { 2954 {
2955 /* It's guaranteed that many callers are not prepared for GC here, 2955 /* It's guaranteed that many callers are not prepared for GC here,
2956 esp. given that this code conversion occurs in many very hidden 2956 esp. given that this code conversion occurs in many very hidden
2957 places. */ 2957 places. */
2958 int count = begin_gc_forbidden (); 2958 int count = begin_gc_forbidden ();
2959 Intbyte_dynarr *conversion_in_dynarr; 2959 Ibyte_dynarr *conversion_in_dynarr;
2960 2960
2961 type_checking_assert 2961 type_checking_assert
2962 ((source_type == DFC_TYPE_DATA || 2962 ((source_type == DFC_TYPE_DATA ||
2963 source_type == DFC_TYPE_LISP_LSTREAM) 2963 source_type == DFC_TYPE_LISP_LSTREAM)
2964 && 2964 &&
2965 (sink_type == DFC_TYPE_DATA || 2965 (sink_type == DFC_TYPE_DATA ||
2966 sink_type == DFC_TYPE_LISP_LSTREAM)); 2966 sink_type == DFC_TYPE_LISP_LSTREAM));
2967 2967
2968 if (Dynarr_length (conversion_in_dynarr_list) <= 2968 if (Dynarr_length (conversion_in_dynarr_list) <=
2969 dfc_convert_to_internal_format_in_use) 2969 dfc_convert_to_internal_format_in_use)
2970 Dynarr_add (conversion_in_dynarr_list, Dynarr_new (Intbyte)); 2970 Dynarr_add (conversion_in_dynarr_list, Dynarr_new (Ibyte));
2971 conversion_in_dynarr = Dynarr_at (conversion_in_dynarr_list, 2971 conversion_in_dynarr = Dynarr_at (conversion_in_dynarr_list,
2972 dfc_convert_to_internal_format_in_use); 2972 dfc_convert_to_internal_format_in_use);
2973 Dynarr_reset (conversion_in_dynarr); 2973 Dynarr_reset (conversion_in_dynarr);
2974 2974
2975 internal_bind_int (&dfc_convert_to_internal_format_in_use, 2975 internal_bind_int (&dfc_convert_to_internal_format_in_use,
2980 if (source_type != DFC_TYPE_LISP_LSTREAM && 2980 if (source_type != DFC_TYPE_LISP_LSTREAM &&
2981 sink_type != DFC_TYPE_LISP_LSTREAM && 2981 sink_type != DFC_TYPE_LISP_LSTREAM &&
2982 coding_system_is_binary (coding_system)) 2982 coding_system_is_binary (coding_system))
2983 { 2983 {
2984 #ifdef MULE 2984 #ifdef MULE
2985 const Intbyte *ptr = (const Intbyte *) source->data.ptr; 2985 const Ibyte *ptr = (const Ibyte *) source->data.ptr;
2986 Bytecount len = source->data.len; 2986 Bytecount len = source->data.len;
2987 const Intbyte *end = ptr + len; 2987 const Ibyte *end = ptr + len;
2988 2988
2989 for (; ptr < end; ptr++) 2989 for (; ptr < end; ptr++)
2990 { 2990 {
2991 Intbyte c = *ptr; 2991 Ibyte c = *ptr;
2992 2992
2993 if (byte_ascii_p (c)) 2993 if (byte_ascii_p (c))
2994 Dynarr_add (conversion_in_dynarr, c); 2994 Dynarr_add (conversion_in_dynarr, c);
2995 else if (byte_c1_p (c)) 2995 else if (byte_c1_p (c))
2996 { 2996 {
3011 /* Optimize the common case involving Unicode where only ASCII/Latin-1 is involved */ 3011 /* Optimize the common case involving Unicode where only ASCII/Latin-1 is involved */
3012 else if (source_type != DFC_TYPE_LISP_LSTREAM && 3012 else if (source_type != DFC_TYPE_LISP_LSTREAM &&
3013 sink_type != DFC_TYPE_LISP_LSTREAM && 3013 sink_type != DFC_TYPE_LISP_LSTREAM &&
3014 dfc_coding_system_is_unicode (coding_system)) 3014 dfc_coding_system_is_unicode (coding_system))
3015 { 3015 {
3016 const Intbyte *ptr = (const Intbyte *) source->data.ptr + 1; 3016 const Ibyte *ptr = (const Ibyte *) source->data.ptr + 1;
3017 Bytecount len = source->data.len; 3017 Bytecount len = source->data.len;
3018 const Intbyte *end = ptr + len; 3018 const Ibyte *end = ptr + len;
3019 3019
3020 if (len & 1) 3020 if (len & 1)
3021 goto the_hard_way; 3021 goto the_hard_way;
3022 3022
3023 for (; ptr < end; ptr += 2) 3023 for (; ptr < end; ptr += 2)
3024 { 3024 {
3025 if (*ptr) 3025 if (*ptr)
3026 goto the_hard_way; 3026 goto the_hard_way;
3027 } 3027 }
3028 3028
3029 ptr = (const Intbyte *) source->data.ptr; 3029 ptr = (const Ibyte *) source->data.ptr;
3030 end = ptr + len; 3030 end = ptr + len;
3031 3031
3032 for (; ptr < end; ptr += 2) 3032 for (; ptr < end; ptr += 2)
3033 { 3033 {
3034 Intbyte c = *ptr; 3034 Ibyte c = *ptr;
3035 3035
3036 if (byte_ascii_p (c)) 3036 if (byte_ascii_p (c))
3037 Dynarr_add (conversion_in_dynarr, c); 3037 Dynarr_add (conversion_in_dynarr, c);
3038 #ifdef MULE 3038 #ifdef MULE
3039 else if (byte_c1_p (c)) 3039 else if (byte_c1_p (c))
3133 } 3133 }
3134 } 3134 }
3135 3135
3136 3136
3137 /************************************************************************/ 3137 /************************************************************************/
3138 /* Basic Emchar functions */ 3138 /* Basic Ichar functions */
3139 /************************************************************************/ 3139 /************************************************************************/
3140 3140
3141 #ifdef MULE 3141 #ifdef MULE
3142 3142
3143 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded 3143 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
3144 string in STR. Returns the number of bytes stored. 3144 string in STR. Returns the number of bytes stored.
3145 Do not call this directly. Use the macro set_charptr_emchar() instead. 3145 Do not call this directly. Use the macro set_itext_ichar() instead.
3146 */ 3146 */
3147 3147
3148 Bytecount 3148 Bytecount
3149 non_ascii_set_charptr_emchar (Intbyte *str, Emchar c) 3149 non_ascii_set_itext_ichar (Ibyte *str, Ichar c)
3150 { 3150 {
3151 Intbyte *p; 3151 Ibyte *p;
3152 Intbyte lb; 3152 Ibyte lb;
3153 int c1, c2; 3153 int c1, c2;
3154 Lisp_Object charset; 3154 Lisp_Object charset;
3155 3155
3156 p = str; 3156 p = str;
3157 BREAKUP_EMCHAR (c, charset, c1, c2); 3157 BREAKUP_ICHAR (c, charset, c1, c2);
3158 lb = emchar_leading_byte (c); 3158 lb = ichar_leading_byte (c);
3159 if (leading_byte_private_p (lb)) 3159 if (leading_byte_private_p (lb))
3160 *p++ = private_leading_byte_prefix (lb); 3160 *p++ = private_leading_byte_prefix (lb);
3161 *p++ = lb; 3161 *p++ = lb;
3162 if (EQ (charset, Vcharset_control_1)) 3162 if (EQ (charset, Vcharset_control_1))
3163 c1 += 0x20; 3163 c1 += 0x20;
3168 return (p - str); 3168 return (p - str);
3169 } 3169 }
3170 3170
3171 /* Return the first character from a Mule-encoded string in STR, 3171 /* Return the first character from a Mule-encoded string in STR,
3172 assuming it's non-ASCII. Do not call this directly. 3172 assuming it's non-ASCII. Do not call this directly.
3173 Use the macro charptr_emchar() instead. */ 3173 Use the macro itext_ichar() instead. */
3174 3174
3175 Emchar 3175 Ichar
3176 non_ascii_charptr_emchar (const Intbyte *str) 3176 non_ascii_itext_ichar (const Ibyte *str)
3177 { 3177 {
3178 Intbyte i0 = *str, i1, i2 = 0; 3178 Ibyte i0 = *str, i1, i2 = 0;
3179 Lisp_Object charset; 3179 Lisp_Object charset;
3180 3180
3181 if (i0 == LEADING_BYTE_CONTROL_1) 3181 if (i0 == LEADING_BYTE_CONTROL_1)
3182 return (Emchar) (*++str - 0x20); 3182 return (Ichar) (*++str - 0x20);
3183 3183
3184 if (leading_byte_prefix_p (i0)) 3184 if (leading_byte_prefix_p (i0))
3185 i0 = *++str; 3185 i0 = *++str;
3186 3186
3187 i1 = *++str & 0x7F; 3187 i1 = *++str & 0x7F;
3188 3188
3189 charset = charset_by_leading_byte (i0); 3189 charset = charset_by_leading_byte (i0);
3190 if (XCHARSET_DIMENSION (charset) == 2) 3190 if (XCHARSET_DIMENSION (charset) == 2)
3191 i2 = *++str & 0x7F; 3191 i2 = *++str & 0x7F;
3192 3192
3193 return make_emchar (charset, i1, i2); 3193 return make_ichar (charset, i1, i2);
3194 } 3194 }
3195 3195
3196 /* Return whether CH is a valid Emchar, assuming it's non-ASCII. 3196 /* Return whether CH is a valid Ichar, assuming it's non-ASCII.
3197 Do not call this directly. Use the macro valid_emchar_p() instead. */ 3197 Do not call this directly. Use the macro valid_ichar_p() instead. */
3198 3198
3199 int 3199 int
3200 non_ascii_valid_emchar_p (Emchar ch) 3200 non_ascii_valid_ichar_p (Ichar ch)
3201 { 3201 {
3202 int f1, f2, f3; 3202 int f1, f2, f3;
3203 3203
3204 /* Must have only lowest 19 bits set */ 3204 /* Must have only lowest 19 bits set */
3205 if (ch & ~0x7FFFF) 3205 if (ch & ~0x7FFFF)
3206 return 0; 3206 return 0;
3207 3207
3208 f1 = emchar_field1 (ch); 3208 f1 = ichar_field1 (ch);
3209 f2 = emchar_field2 (ch); 3209 f2 = ichar_field2 (ch);
3210 f3 = emchar_field3 (ch); 3210 f3 = ichar_field3 (ch);
3211 3211
3212 if (f1 == 0) 3212 if (f1 == 0)
3213 { 3213 {
3214 /* dimension-1 char */ 3214 /* dimension-1 char */
3215 Lisp_Object charset; 3215 Lisp_Object charset;
3216 3216
3217 /* leading byte must be correct */ 3217 /* leading byte must be correct */
3218 if (f2 < MIN_EMCHAR_FIELD2_OFFICIAL || 3218 if (f2 < MIN_ICHAR_FIELD2_OFFICIAL ||
3219 (f2 > MAX_EMCHAR_FIELD2_OFFICIAL && f2 < MIN_EMCHAR_FIELD2_PRIVATE) || 3219 (f2 > MAX_ICHAR_FIELD2_OFFICIAL && f2 < MIN_ICHAR_FIELD2_PRIVATE) ||
3220 f2 > MAX_EMCHAR_FIELD2_PRIVATE) 3220 f2 > MAX_ICHAR_FIELD2_PRIVATE)
3221 return 0; 3221 return 0;
3222 /* octet not out of range */ 3222 /* octet not out of range */
3223 if (f3 < 0x20) 3223 if (f3 < 0x20)
3224 return 0; 3224 return 0;
3225 /* charset exists */ 3225 /* charset exists */
3238 { 3238 {
3239 /* dimension-2 char */ 3239 /* dimension-2 char */
3240 Lisp_Object charset; 3240 Lisp_Object charset;
3241 3241
3242 /* leading byte must be correct */ 3242 /* leading byte must be correct */
3243 if (f1 < MIN_EMCHAR_FIELD1_OFFICIAL || 3243 if (f1 < MIN_ICHAR_FIELD1_OFFICIAL ||
3244 (f1 > MAX_EMCHAR_FIELD1_OFFICIAL && f1 < MIN_EMCHAR_FIELD1_PRIVATE) || 3244 (f1 > MAX_ICHAR_FIELD1_OFFICIAL && f1 < MIN_ICHAR_FIELD1_PRIVATE) ||
3245 f1 > MAX_EMCHAR_FIELD1_PRIVATE) 3245 f1 > MAX_ICHAR_FIELD1_PRIVATE)
3246 return 0; 3246 return 0;
3247 /* octets not out of range */ 3247 /* octets not out of range */
3248 if (f2 < 0x20 || f3 < 0x20) 3248 if (f2 < 0x20 || f3 < 0x20)
3249 return 0; 3249 return 0;
3250 3250
3258 return 1; 3258 return 1;
3259 } 3259 }
3260 #endif /* ENABLE_COMPOSITE_CHARS */ 3260 #endif /* ENABLE_COMPOSITE_CHARS */
3261 3261
3262 /* charset exists */ 3262 /* charset exists */
3263 if (f1 <= MAX_EMCHAR_FIELD1_OFFICIAL) 3263 if (f1 <= MAX_ICHAR_FIELD1_OFFICIAL)
3264 charset = 3264 charset =
3265 charset_by_leading_byte (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE); 3265 charset_by_leading_byte (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
3266 else 3266 else
3267 charset = 3267 charset =
3268 charset_by_leading_byte (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE); 3268 charset_by_leading_byte (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
3274 XCHARSET_CHARS (charset) == 96); 3274 XCHARSET_CHARS (charset) == 96);
3275 } 3275 }
3276 } 3276 }
3277 3277
3278 /* Copy the character pointed to by SRC into DST. Do not call this 3278 /* Copy the character pointed to by SRC into DST. Do not call this
3279 directly. Use the macro charptr_copy_emchar() instead. 3279 directly. Use the macro itext_copy_ichar() instead.
3280 Return the number of bytes copied. */ 3280 Return the number of bytes copied. */
3281 3281
3282 Bytecount 3282 Bytecount
3283 non_ascii_charptr_copy_emchar (const Intbyte *src, Intbyte *dst) 3283 non_ascii_itext_copy_ichar (const Ibyte *src, Ibyte *dst)
3284 { 3284 {
3285 Bytecount bytes = rep_bytes_by_first_byte (*src); 3285 Bytecount bytes = rep_bytes_by_first_byte (*src);
3286 Bytecount i; 3286 Bytecount i;
3287 for (i = bytes; i; i--, dst++, src++) 3287 for (i = bytes; i; i--, dst++, src++)
3288 *dst = *src; 3288 *dst = *src;
3291 3291
3292 #endif /* MULE */ 3292 #endif /* MULE */
3293 3293
3294 3294
3295 /************************************************************************/ 3295 /************************************************************************/
3296 /* streams of Emchars */ 3296 /* streams of Ichars */
3297 /************************************************************************/ 3297 /************************************************************************/
3298 3298
3299 #ifdef MULE 3299 #ifdef MULE
3300 3300
3301 /* Treat a stream as a stream of Emchar's rather than a stream of bytes. 3301 /* Treat a stream as a stream of Ichar's rather than a stream of bytes.
3302 The functions below are not meant to be called directly; use 3302 The functions below are not meant to be called directly; use
3303 the macros in insdel.h. */ 3303 the macros in insdel.h. */
3304 3304
3305 Emchar 3305 Ichar
3306 Lstream_get_emchar_1 (Lstream *stream, int ch) 3306 Lstream_get_ichar_1 (Lstream *stream, int ch)
3307 { 3307 {
3308 Intbyte str[MAX_EMCHAR_LEN]; 3308 Ibyte str[MAX_ICHAR_LEN];
3309 Intbyte *strptr = str; 3309 Ibyte *strptr = str;
3310 Bytecount bytes; 3310 Bytecount bytes;
3311 3311
3312 str[0] = (Intbyte) ch; 3312 str[0] = (Ibyte) ch;
3313 3313
3314 for (bytes = rep_bytes_by_first_byte (ch) - 1; bytes; bytes--) 3314 for (bytes = rep_bytes_by_first_byte (ch) - 1; bytes; bytes--)
3315 { 3315 {
3316 int c = Lstream_getc (stream); 3316 int c = Lstream_getc (stream);
3317 text_checking_assert (c >= 0); 3317 text_checking_assert (c >= 0);
3318 *++strptr = (Intbyte) c; 3318 *++strptr = (Ibyte) c;
3319 } 3319 }
3320 return charptr_emchar (str); 3320 return itext_ichar (str);
3321 } 3321 }
3322 3322
3323 int 3323 int
3324 Lstream_fput_emchar (Lstream *stream, Emchar ch) 3324 Lstream_fput_ichar (Lstream *stream, Ichar ch)
3325 { 3325 {
3326 Intbyte str[MAX_EMCHAR_LEN]; 3326 Ibyte str[MAX_ICHAR_LEN];
3327 Bytecount len = set_charptr_emchar (str, ch); 3327 Bytecount len = set_itext_ichar (str, ch);
3328 return Lstream_write (stream, str, len); 3328 return Lstream_write (stream, str, len);
3329 } 3329 }
3330 3330
3331 void 3331 void
3332 Lstream_funget_emchar (Lstream *stream, Emchar ch) 3332 Lstream_funget_ichar (Lstream *stream, Ichar ch)
3333 { 3333 {
3334 Intbyte str[MAX_EMCHAR_LEN]; 3334 Ibyte str[MAX_ICHAR_LEN];
3335 Bytecount len = set_charptr_emchar (str, ch); 3335 Bytecount len = set_itext_ichar (str, ch);
3336 Lstream_unread (stream, str, len); 3336 Lstream_unread (stream, str, len);
3337 } 3337 }
3338 3338
3339 #endif /* MULE */ 3339 #endif /* MULE */
3340 3340
3436 if (CHARSET_DIMENSION (cs) == 1) 3436 if (CHARSET_DIMENSION (cs) == 1)
3437 { 3437 {
3438 if (!NILP (arg2)) 3438 if (!NILP (arg2))
3439 invalid_argument 3439 invalid_argument
3440 ("Charset is of dimension one; second octet must be nil", arg2); 3440 ("Charset is of dimension one; second octet must be nil", arg2);
3441 return make_char (make_emchar (charset, a1, 0)); 3441 return make_char (make_ichar (charset, a1, 0));
3442 } 3442 }
3443 3443
3444 CHECK_INT (arg2); 3444 CHECK_INT (arg2);
3445 a2 = XINT (arg2) & 0x7f; 3445 a2 = XINT (arg2) & 0x7f;
3446 if (a2 < lowlim || a2 > highlim) 3446 if (a2 < lowlim || a2 > highlim)
3447 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim)); 3447 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
3448 3448
3449 return make_char (make_emchar (charset, a1, a2)); 3449 return make_char (make_ichar (charset, a1, a2));
3450 #else 3450 #else
3451 int a1; 3451 int a1;
3452 int lowlim, highlim; 3452 int lowlim, highlim;
3453 3453
3454 if (EQ (charset, Qascii)) lowlim = 0, highlim = 127; 3454 if (EQ (charset, Qascii)) lowlim = 0, highlim = 127;
3478 (ch)) 3478 (ch))
3479 { 3479 {
3480 CHECK_CHAR_COERCE_INT (ch); 3480 CHECK_CHAR_COERCE_INT (ch);
3481 3481
3482 return XCHARSET_NAME (charset_by_leading_byte 3482 return XCHARSET_NAME (charset_by_leading_byte
3483 (emchar_leading_byte (XCHAR (ch)))); 3483 (ichar_leading_byte (XCHAR (ch))));
3484 } 3484 }
3485 3485
3486 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /* 3486 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
3487 Return the octet numbered N (should be 0 or 1) of char CH. 3487 Return the octet numbered N (should be 0 or 1) of char CH.
3488 N defaults to 0 if omitted. 3488 N defaults to 0 if omitted.
3492 Lisp_Object charset; 3492 Lisp_Object charset;
3493 int octet0, octet1; 3493 int octet0, octet1;
3494 3494
3495 CHECK_CHAR_COERCE_INT (ch); 3495 CHECK_CHAR_COERCE_INT (ch);
3496 3496
3497 BREAKUP_EMCHAR (XCHAR (ch), charset, octet0, octet1); 3497 BREAKUP_ICHAR (XCHAR (ch), charset, octet0, octet1);
3498 3498
3499 if (NILP (n) || EQ (n, Qzero)) 3499 if (NILP (n) || EQ (n, Qzero))
3500 return make_int (octet0); 3500 return make_int (octet0);
3501 else if (EQ (n, make_int (1))) 3501 else if (EQ (n, make_int (1)))
3502 return make_int (octet1); 3502 return make_int (octet1);
3516 int c1, c2; 3516 int c1, c2;
3517 3517
3518 GCPRO2 (charset, rc); 3518 GCPRO2 (charset, rc);
3519 CHECK_CHAR_COERCE_INT (character); 3519 CHECK_CHAR_COERCE_INT (character);
3520 3520
3521 BREAKUP_EMCHAR (XCHAR (character), charset, c1, c2); 3521 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2);
3522 3522
3523 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2) 3523 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
3524 { 3524 {
3525 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2)); 3525 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
3526 } 3526 }
3540 /* composite character functions */ 3540 /* composite character functions */
3541 /************************************************************************/ 3541 /************************************************************************/
3542 3542
3543 #ifdef ENABLE_COMPOSITE_CHARS 3543 #ifdef ENABLE_COMPOSITE_CHARS
3544 3544
3545 Emchar 3545 Ichar
3546 lookup_composite_char (Intbyte *str, int len) 3546 lookup_composite_char (Ibyte *str, int len)
3547 { 3547 {
3548 Lisp_Object lispstr = make_string (str, len); 3548 Lisp_Object lispstr = make_string (str, len);
3549 Lisp_Object ch = Fgethash (lispstr, 3549 Lisp_Object ch = Fgethash (lispstr,
3550 Vcomposite_char_string2char_hash_table, 3550 Vcomposite_char_string2char_hash_table,
3551 Qunbound); 3551 Qunbound);
3552 Emchar emch; 3552 Ichar emch;
3553 3553
3554 if (UNBOUNDP (ch)) 3554 if (UNBOUNDP (ch))
3555 { 3555 {
3556 if (composite_char_row_next >= 128) 3556 if (composite_char_row_next >= 128)
3557 invalid_operation ("No more composite chars available", lispstr); 3557 invalid_operation ("No more composite chars available", lispstr);
3558 emch = make_emchar (Vcharset_composite, composite_char_row_next, 3558 emch = make_ichar (Vcharset_composite, composite_char_row_next,
3559 composite_char_col_next); 3559 composite_char_col_next);
3560 Fputhash (make_char (emch), lispstr, 3560 Fputhash (make_char (emch), lispstr,
3561 Vcomposite_char_char2string_hash_table); 3561 Vcomposite_char_char2string_hash_table);
3562 Fputhash (lispstr, make_char (emch), 3562 Fputhash (lispstr, make_char (emch),
3563 Vcomposite_char_string2char_hash_table); 3563 Vcomposite_char_string2char_hash_table);
3572 emch = XCHAR (ch); 3572 emch = XCHAR (ch);
3573 return emch; 3573 return emch;
3574 } 3574 }
3575 3575
3576 Lisp_Object 3576 Lisp_Object
3577 composite_char_string (Emchar ch) 3577 composite_char_string (Ichar ch)
3578 { 3578 {
3579 Lisp_Object str = Fgethash (make_char (ch), 3579 Lisp_Object str = Fgethash (make_char (ch),
3580 Vcomposite_char_char2string_hash_table, 3580 Vcomposite_char_char2string_hash_table,
3581 Qunbound); 3581 Qunbound);
3582 assert (!UNBOUNDP (str)); 3582 assert (!UNBOUNDP (str));
3598 DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /* 3598 DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
3599 Return a string of the characters comprising a composite character. 3599 Return a string of the characters comprising a composite character.
3600 */ 3600 */
3601 (ch)) 3601 (ch))
3602 { 3602 {
3603 Emchar emch; 3603 Ichar emch;
3604 3604
3605 CHECK_CHAR (ch); 3605 CHECK_CHAR (ch);
3606 emch = XCHAR (ch); 3606 emch = XCHAR (ch);
3607 if (emchar_leading_byte (emch) != LEADING_BYTE_COMPOSITE) 3607 if (ichar_leading_byte (emch) != LEADING_BYTE_COMPOSITE)
3608 invalid_argument ("Must be composite char", ch); 3608 invalid_argument ("Must be composite char", ch);
3609 return composite_char_string (emch); 3609 return composite_char_string (emch);
3610 } 3610 }
3611 #endif /* ENABLE_COMPOSITE_CHARS */ 3611 #endif /* ENABLE_COMPOSITE_CHARS */
3612 3612
3648 void 3648 void
3649 reinit_vars_of_text (void) 3649 reinit_vars_of_text (void)
3650 { 3650 {
3651 int i; 3651 int i;
3652 3652
3653 conversion_in_dynarr_list = Dynarr_new2 (Intbyte_dynarr_dynarr, 3653 conversion_in_dynarr_list = Dynarr_new2 (Ibyte_dynarr_dynarr,
3654 Intbyte_dynarr *); 3654 Ibyte_dynarr *);
3655 conversion_out_dynarr_list = Dynarr_new2 (Extbyte_dynarr_dynarr, 3655 conversion_out_dynarr_list = Dynarr_new2 (Extbyte_dynarr_dynarr,
3656 Extbyte_dynarr *); 3656 Extbyte_dynarr *);
3657 3657
3658 /* #### Olivier, why does this need to be reinitted? */ 3658 /* #### Olivier, why does this need to be reinitted? */
3659 for (i = 0; i <= MAX_BYTEBPOS_GAP_SIZE_3; i++) 3659 for (i = 0; i <= MAX_BYTEBPOS_GAP_SIZE_3; i++)