Mercurial > hg > xemacs-beta
comparison src/text.c @ 867:804517e16990
[xemacs-hg @ 2002-06-05 09:54:39 by ben]
Textual renaming: text/char names
abbrev.c, alloc.c, buffer.c, buffer.h, bytecode.c, callint.c, casefiddle.c, casetab.c, charset.h, chartab.c, chartab.h, cmds.c, console-gtk.h, console-msw.c, console-msw.h, console-stream.c, console-tty.c, console-x.c, console-x.h, console.h, data.c, device-msw.c, device-x.c, dialog-msw.c, dired-msw.c, dired.c, doc.c, doprnt.c, editfns.c, eldap.c, emodules.c, eval.c, event-Xt.c, event-gtk.c, event-msw.c, event-stream.c, event-unixoid.c, events.c, events.h, file-coding.c, file-coding.h, fileio.c, filelock.c, fns.c, font-lock.c, frame-gtk.c, frame-msw.c, frame-x.c, frame.c, glyphs-eimage.c, glyphs-msw.c, glyphs-x.c, glyphs.c, glyphs.h, gpmevent.c, gui-x.c, gui-x.h, gui.c, gui.h, hpplay.c, indent.c, insdel.c, insdel.h, intl-win32.c, keymap.c, line-number.c, line-number.h, lisp-disunion.h, lisp-union.h, lisp.h, lread.c, lrecord.h, lstream.c, lstream.h, md5.c, menubar-msw.c, menubar-x.c, menubar.c, minibuf.c, mule-ccl.c, mule-charset.c, mule-coding.c, mule-wnnfns.c, ndir.h, nt.c, objects-gtk.c, objects-gtk.h, objects-msw.c, objects-tty.c, objects-x.c, objects.c, objects.h, postgresql.c, print.c, process-nt.c, process-unix.c, process.c, procimpl.h, realpath.c, redisplay-gtk.c, redisplay-msw.c, redisplay-output.c, redisplay-tty.c, redisplay-x.c, redisplay.c, redisplay.h, regex.c, search.c, select-common.h, select-gtk.c, select-x.c, sound.h, symbols.c, syntax.c, syntax.h, sysdep.c, sysdep.h, sysdir.h, sysfile.h, sysproc.h, syspwd.h, systime.h, syswindows.h, termcap.c, tests.c, text.c, text.h, toolbar-common.c, tooltalk.c, ui-gtk.c, unexnt.c, unicode.c, win32.c: Text/char naming rationalization.
[a] distinguish between "charptr" when it refers to operations on
the pointer itself and when it refers to operations on text; and
[b] use consistent naming for everything referring to internal
format, i.e.
Itext == text in internal format
Ibyte == a byte in such text
Ichar == a char as represented in internal character format
thus e.g.
set_charptr_emchar -> set_itext_ichar
The pre and post tags on either side of this change are:
pre-internal-format-textual-renaming
post-internal-format-textual-renaming
See the Internals Manual for details of exactly how this was done,
how to handle the change in your workspace, etc.
author | ben |
---|---|
date | Wed, 05 Jun 2002 09:58:45 +0000 |
parents | 2b6fa2618f76 |
children | e22b0213b713 |
comparison
equal
deleted
inserted
replaced
866:613552a02607 | 867:804517e16990 |
---|---|
380 | 380 |
381 except in the case of characters at the gap position. | 381 except in the case of characters at the gap position. |
382 | 382 |
383 B. Other Typedefs | 383 B. Other Typedefs |
384 | 384 |
385 Emchar: | 385 Ichar: |
386 ------- | 386 ------- |
387 This typedef represents a single Emacs character, which can be | 387 This typedef represents a single Emacs character, which can be |
388 ASCII, ISO-8859, or some extended character, as would typically | 388 ASCII, ISO-8859, or some extended character, as would typically |
389 be used for Kanji. Note that the representation of a character | 389 be used for Kanji. Note that the representation of a character |
390 as an Emchar is *not* the same as the representation of that | 390 as an Ichar is *not* the same as the representation of that |
391 same character in a string; thus, you cannot do the standard | 391 same character in a string; thus, you cannot do the standard |
392 C trick of passing a pointer to a character to a function that | 392 C trick of passing a pointer to a character to a function that |
393 expects a string. | 393 expects a string. |
394 | 394 |
395 An Emchar takes up 19 bits of representation and (for code | 395 An Ichar takes up 19 bits of representation and (for code |
396 compatibility and such) is compatible with an int. This | 396 compatibility and such) is compatible with an int. This |
397 representation is visible on the Lisp level. The important | 397 representation is visible on the Lisp level. The important |
398 characteristics of the Emchar representation are | 398 characteristics of the Ichar representation are |
399 | 399 |
400 -- values 0x00 - 0x7f represent ASCII. | 400 -- values 0x00 - 0x7f represent ASCII. |
401 -- values 0x80 - 0xff represent the right half of ISO-8859-1. | 401 -- values 0x80 - 0xff represent the right half of ISO-8859-1. |
402 -- values 0x100 and up represent all other characters. | 402 -- values 0x100 and up represent all other characters. |
403 | 403 |
404 This means that Emchar values are upwardly compatible with | 404 This means that Ichar values are upwardly compatible with |
405 the standard 8-bit representation of ASCII/ISO-8859-1. | 405 the standard 8-bit representation of ASCII/ISO-8859-1. |
406 | 406 |
407 Intbyte: | 407 Ibyte: |
408 -------- | 408 -------- |
409 The data in a buffer or string is logically made up of Intbyte | 409 The data in a buffer or string is logically made up of Ibyte |
410 objects, where a Intbyte takes up the same amount of space as a | 410 objects, where a Ibyte takes up the same amount of space as a |
411 char. (It is declared differently, though, to catch invalid | 411 char. (It is declared differently, though, to catch invalid |
412 usages.) Strings stored using Intbytes are said to be in | 412 usages.) Strings stored using Ibytes are said to be in |
413 "internal format". The important characteristics of internal | 413 "internal format". The important characteristics of internal |
414 format are | 414 format are |
415 | 415 |
416 -- ASCII characters are represented as a single Intbyte, | 416 -- ASCII characters are represented as a single Ibyte, |
417 in the range 0 - 0x7f. | 417 in the range 0 - 0x7f. |
418 -- All other characters are represented as a Intbyte in | 418 -- All other characters are represented as a Ibyte in |
419 the range 0x80 - 0x9f followed by one or more Intbytes | 419 the range 0x80 - 0x9f followed by one or more Ibytes |
420 in the range 0xa0 to 0xff. | 420 in the range 0xa0 to 0xff. |
421 | 421 |
422 This leads to a number of desirable properties: | 422 This leads to a number of desirable properties: |
423 | 423 |
424 -- Given the position of the beginning of a character, | 424 -- Given the position of the beginning of a character, |
630 /************************************************************************/ | 630 /************************************************************************/ |
631 | 631 |
632 /* Most are inline functions in lisp.h */ | 632 /* Most are inline functions in lisp.h */ |
633 | 633 |
634 int | 634 int |
635 qxesprintf (Intbyte *buffer, const CIntbyte *format, ...) | 635 qxesprintf (Ibyte *buffer, const CIbyte *format, ...) |
636 { | 636 { |
637 va_list args; | 637 va_list args; |
638 int retval; | 638 int retval; |
639 | 639 |
640 va_start (args, format); | 640 va_start (args, format); |
643 | 643 |
644 return retval; | 644 return retval; |
645 } | 645 } |
646 | 646 |
647 /* strcasecmp() implementation from BSD */ | 647 /* strcasecmp() implementation from BSD */ |
648 static Intbyte strcasecmp_charmap[] = { | 648 static Ibyte strcasecmp_charmap[] = { |
649 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', | 649 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', |
650 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', | 650 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', |
651 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', | 651 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', |
652 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', | 652 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', |
653 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', | 653 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', |
689 we use symmetrical algorithms that may sacrifice a few machine | 689 we use symmetrical algorithms that may sacrifice a few machine |
690 cycles but are MUCH MUCH clearer, which counts a lot more. | 690 cycles but are MUCH MUCH clearer, which counts a lot more. |
691 */ | 691 */ |
692 | 692 |
693 int | 693 int |
694 qxestrcasecmp (const Intbyte *s1, const Intbyte *s2) | 694 qxestrcasecmp (const Ibyte *s1, const Ibyte *s2) |
695 { | 695 { |
696 Intbyte *cm = strcasecmp_charmap; | 696 Ibyte *cm = strcasecmp_charmap; |
697 | 697 |
698 while (cm[*s1] == cm[*s2++]) | 698 while (cm[*s1] == cm[*s2++]) |
699 if (*s1++ == '\0') | 699 if (*s1++ == '\0') |
700 return (0); | 700 return (0); |
701 | 701 |
703 } | 703 } |
704 | 704 |
705 int | 705 int |
706 ascii_strcasecmp (const Char_ASCII *s1, const Char_ASCII *s2) | 706 ascii_strcasecmp (const Char_ASCII *s1, const Char_ASCII *s2) |
707 { | 707 { |
708 return qxestrcasecmp ((const Intbyte *) s1, (const Intbyte *) s2); | 708 return qxestrcasecmp ((const Ibyte *) s1, (const Ibyte *) s2); |
709 } | 709 } |
710 | 710 |
711 int | 711 int |
712 qxestrcasecmp_c (const Intbyte *s1, const Char_ASCII *s2) | 712 qxestrcasecmp_c (const Ibyte *s1, const Char_ASCII *s2) |
713 { | 713 { |
714 return qxestrcasecmp (s1, (const Intbyte *) s2); | 714 return qxestrcasecmp (s1, (const Ibyte *) s2); |
715 } | 715 } |
716 | 716 |
717 /* An internationalized version that collapses case in a general fashion. | 717 /* An internationalized version that collapses case in a general fashion. |
718 */ | 718 */ |
719 | 719 |
720 int | 720 int |
721 qxestrcasecmp_i18n (const Intbyte *s1, const Intbyte *s2) | 721 qxestrcasecmp_i18n (const Ibyte *s1, const Ibyte *s2) |
722 { | 722 { |
723 while (*s1 && *s2) | 723 while (*s1 && *s2) |
724 { | 724 { |
725 if (DOWNCASE (0, charptr_emchar (s1)) != | 725 if (DOWNCASE (0, itext_ichar (s1)) != |
726 DOWNCASE (0, charptr_emchar (s2))) | 726 DOWNCASE (0, itext_ichar (s2))) |
727 break; | 727 break; |
728 INC_CHARPTR (s1); | 728 INC_IBYTEPTR (s1); |
729 INC_CHARPTR (s2); | 729 INC_IBYTEPTR (s2); |
730 } | 730 } |
731 | 731 |
732 return (DOWNCASE (0, charptr_emchar (s1)) - | 732 return (DOWNCASE (0, itext_ichar (s1)) - |
733 DOWNCASE (0, charptr_emchar (s2))); | 733 DOWNCASE (0, itext_ichar (s2))); |
734 } | 734 } |
735 | 735 |
736 /* The only difference between these next two and | 736 /* The only difference between these next two and |
737 qxememcasecmp()/qxememcasecmp_i18n() is that these two will stop if | 737 qxememcasecmp()/qxememcasecmp_i18n() is that these two will stop if |
738 both strings are equal and less than LEN in length, while | 738 both strings are equal and less than LEN in length, while |
739 the mem...() versions would would run off the end. */ | 739 the mem...() versions would would run off the end. */ |
740 | 740 |
741 int | 741 int |
742 qxestrncasecmp (const Intbyte *s1, const Intbyte *s2, Bytecount len) | 742 qxestrncasecmp (const Ibyte *s1, const Ibyte *s2, Bytecount len) |
743 { | 743 { |
744 Intbyte *cm = strcasecmp_charmap; | 744 Ibyte *cm = strcasecmp_charmap; |
745 | 745 |
746 while (len--) | 746 while (len--) |
747 { | 747 { |
748 int diff = cm[*s1] - cm[*s2]; | 748 int diff = cm[*s1] - cm[*s2]; |
749 if (diff != 0) | 749 if (diff != 0) |
757 } | 757 } |
758 | 758 |
759 int | 759 int |
760 ascii_strncasecmp (const Char_ASCII *s1, const Char_ASCII *s2, Bytecount len) | 760 ascii_strncasecmp (const Char_ASCII *s1, const Char_ASCII *s2, Bytecount len) |
761 { | 761 { |
762 return qxestrncasecmp ((const Intbyte *) s1, (const Intbyte *) s2, len); | 762 return qxestrncasecmp ((const Ibyte *) s1, (const Ibyte *) s2, len); |
763 } | 763 } |
764 | 764 |
765 int | 765 int |
766 qxestrncasecmp_c (const Intbyte *s1, const Char_ASCII *s2, Bytecount len) | 766 qxestrncasecmp_c (const Ibyte *s1, const Char_ASCII *s2, Bytecount len) |
767 { | 767 { |
768 return qxestrncasecmp (s1, (const Intbyte *) s2, len); | 768 return qxestrncasecmp (s1, (const Ibyte *) s2, len); |
769 } | 769 } |
770 | 770 |
771 /* Compare LEN_FROM_S1 worth of characters from S1 with the same number of | 771 /* Compare LEN_FROM_S1 worth of characters from S1 with the same number of |
772 characters from S2, case insensitive. NOTE: Downcasing can convert | 772 characters from S2, case insensitive. NOTE: Downcasing can convert |
773 characters from one length in bytes to another, so reversing S1 and S2 | 773 characters from one length in bytes to another, so reversing S1 and S2 |
774 is *NOT* a symmetric operations! You must choose a length that agrees | 774 is *NOT* a symmetric operations! You must choose a length that agrees |
775 with S1. */ | 775 with S1. */ |
776 | 776 |
777 int | 777 int |
778 qxestrncasecmp_i18n (const Intbyte *s1, const Intbyte *s2, | 778 qxestrncasecmp_i18n (const Ibyte *s1, const Ibyte *s2, |
779 Bytecount len_from_s1) | 779 Bytecount len_from_s1) |
780 { | 780 { |
781 while (len_from_s1 > 0) | 781 while (len_from_s1 > 0) |
782 { | 782 { |
783 const Intbyte *old_s1 = s1; | 783 const Ibyte *old_s1 = s1; |
784 int diff = (DOWNCASE (0, charptr_emchar (s1)) - | 784 int diff = (DOWNCASE (0, itext_ichar (s1)) - |
785 DOWNCASE (0, charptr_emchar (s2))); | 785 DOWNCASE (0, itext_ichar (s2))); |
786 if (diff != 0) | 786 if (diff != 0) |
787 return diff; | 787 return diff; |
788 if (!*s1) | 788 if (!*s1) |
789 return 0; | 789 return 0; |
790 INC_CHARPTR (s1); | 790 INC_IBYTEPTR (s1); |
791 INC_CHARPTR (s2); | 791 INC_IBYTEPTR (s2); |
792 len_from_s1 -= s1 - old_s1; | 792 len_from_s1 -= s1 - old_s1; |
793 } | 793 } |
794 | 794 |
795 return 0; | 795 return 0; |
796 } | 796 } |
797 | 797 |
798 int | 798 int |
799 qxememcmp (const Intbyte *s1, const Intbyte *s2, Bytecount len) | 799 qxememcmp (const Ibyte *s1, const Ibyte *s2, Bytecount len) |
800 { | 800 { |
801 return memcmp (s1, s2, len); | 801 return memcmp (s1, s2, len); |
802 } | 802 } |
803 | 803 |
804 int | 804 int |
805 qxememcmp4 (const Intbyte *s1, Bytecount len1, | 805 qxememcmp4 (const Ibyte *s1, Bytecount len1, |
806 const Intbyte *s2, Bytecount len2) | 806 const Ibyte *s2, Bytecount len2) |
807 { | 807 { |
808 int retval = qxememcmp (s1, s2, min (len1, len2)); | 808 int retval = qxememcmp (s1, s2, min (len1, len2)); |
809 if (retval) | 809 if (retval) |
810 return retval; | 810 return retval; |
811 return len1 - len2; | 811 return len1 - len2; |
812 } | 812 } |
813 | 813 |
814 int | 814 int |
815 qxememcasecmp (const Intbyte *s1, const Intbyte *s2, Bytecount len) | 815 qxememcasecmp (const Ibyte *s1, const Ibyte *s2, Bytecount len) |
816 { | 816 { |
817 Intbyte *cm = strcasecmp_charmap; | 817 Ibyte *cm = strcasecmp_charmap; |
818 | 818 |
819 while (len--) | 819 while (len--) |
820 { | 820 { |
821 int diff = cm[*s1] - cm[*s2]; | 821 int diff = cm[*s1] - cm[*s2]; |
822 if (diff != 0) | 822 if (diff != 0) |
826 | 826 |
827 return 0; | 827 return 0; |
828 } | 828 } |
829 | 829 |
830 int | 830 int |
831 qxememcasecmp4 (const Intbyte *s1, Bytecount len1, | 831 qxememcasecmp4 (const Ibyte *s1, Bytecount len1, |
832 const Intbyte *s2, Bytecount len2) | 832 const Ibyte *s2, Bytecount len2) |
833 { | 833 { |
834 int retval = qxememcasecmp (s1, s2, min (len1, len2)); | 834 int retval = qxememcasecmp (s1, s2, min (len1, len2)); |
835 if (retval) | 835 if (retval) |
836 return retval; | 836 return retval; |
837 return len1 - len2; | 837 return len1 - len2; |
838 } | 838 } |
839 | 839 |
840 /* Do a character-by-character comparison, returning "which is greater" by | 840 /* Do a character-by-character comparison, returning "which is greater" by |
841 comparing the Emchar values. (#### Should have option to compare Unicode | 841 comparing the Ichar values. (#### Should have option to compare Unicode |
842 points) */ | 842 points) */ |
843 | 843 |
844 int | 844 int |
845 qxetextcmp (const Intbyte *s1, Bytecount len1, | 845 qxetextcmp (const Ibyte *s1, Bytecount len1, |
846 const Intbyte *s2, Bytecount len2) | 846 const Ibyte *s2, Bytecount len2) |
847 { | 847 { |
848 while (len1 > 0 && len2 > 0) | 848 while (len1 > 0 && len2 > 0) |
849 { | 849 { |
850 const Intbyte *old_s1 = s1; | 850 const Ibyte *old_s1 = s1; |
851 const Intbyte *old_s2 = s2; | 851 const Ibyte *old_s2 = s2; |
852 int diff = charptr_emchar (s1) - charptr_emchar (s2); | 852 int diff = itext_ichar (s1) - itext_ichar (s2); |
853 if (diff != 0) | 853 if (diff != 0) |
854 return diff; | 854 return diff; |
855 INC_CHARPTR (s1); | 855 INC_IBYTEPTR (s1); |
856 INC_CHARPTR (s2); | 856 INC_IBYTEPTR (s2); |
857 len1 -= s1 - old_s1; | 857 len1 -= s1 - old_s1; |
858 len2 -= s2 - old_s2; | 858 len2 -= s2 - old_s2; |
859 } | 859 } |
860 | 860 |
861 assert (len1 >= 0 && len2 >= 0); | 861 assert (len1 >= 0 && len2 >= 0); |
862 return len1 - len2; | 862 return len1 - len2; |
863 } | 863 } |
864 | 864 |
865 int | 865 int |
866 qxetextcmp_matching (const Intbyte *s1, Bytecount len1, | 866 qxetextcmp_matching (const Ibyte *s1, Bytecount len1, |
867 const Intbyte *s2, Bytecount len2, | 867 const Ibyte *s2, Bytecount len2, |
868 Charcount *matching) | 868 Charcount *matching) |
869 { | 869 { |
870 *matching = 0; | 870 *matching = 0; |
871 while (len1 > 0 && len2 > 0) | 871 while (len1 > 0 && len2 > 0) |
872 { | 872 { |
873 const Intbyte *old_s1 = s1; | 873 const Ibyte *old_s1 = s1; |
874 const Intbyte *old_s2 = s2; | 874 const Ibyte *old_s2 = s2; |
875 int diff = charptr_emchar (s1) - charptr_emchar (s2); | 875 int diff = itext_ichar (s1) - itext_ichar (s2); |
876 if (diff != 0) | 876 if (diff != 0) |
877 return diff; | 877 return diff; |
878 INC_CHARPTR (s1); | 878 INC_IBYTEPTR (s1); |
879 INC_CHARPTR (s2); | 879 INC_IBYTEPTR (s2); |
880 len1 -= s1 - old_s1; | 880 len1 -= s1 - old_s1; |
881 len2 -= s2 - old_s2; | 881 len2 -= s2 - old_s2; |
882 (*matching)++; | 882 (*matching)++; |
883 } | 883 } |
884 | 884 |
885 assert (len1 >= 0 && len2 >= 0); | 885 assert (len1 >= 0 && len2 >= 0); |
886 return len1 - len2; | 886 return len1 - len2; |
887 } | 887 } |
888 | 888 |
889 /* Do a character-by-character comparison, returning "which is greater" by | 889 /* Do a character-by-character comparison, returning "which is greater" by |
890 comparing the Emchar values, case insensitively (by downcasing both | 890 comparing the Ichar values, case insensitively (by downcasing both |
891 first). (#### Should have option to compare Unicode points) | 891 first). (#### Should have option to compare Unicode points) |
892 | 892 |
893 In this case, both lengths must be specified becaused downcasing can | 893 In this case, both lengths must be specified becaused downcasing can |
894 convert characters from one length in bytes to another; therefore, two | 894 convert characters from one length in bytes to another; therefore, two |
895 blocks of text of different length might be equal. If both compare | 895 blocks of text of different length might be equal. If both compare |
896 equal up to the limit in length of one but not the other, the longer one | 896 equal up to the limit in length of one but not the other, the longer one |
897 is "greater". */ | 897 is "greater". */ |
898 | 898 |
899 int | 899 int |
900 qxetextcasecmp (const Intbyte *s1, Bytecount len1, | 900 qxetextcasecmp (const Ibyte *s1, Bytecount len1, |
901 const Intbyte *s2, Bytecount len2) | 901 const Ibyte *s2, Bytecount len2) |
902 { | 902 { |
903 while (len1 > 0 && len2 > 0) | 903 while (len1 > 0 && len2 > 0) |
904 { | 904 { |
905 const Intbyte *old_s1 = s1; | 905 const Ibyte *old_s1 = s1; |
906 const Intbyte *old_s2 = s2; | 906 const Ibyte *old_s2 = s2; |
907 int diff = (DOWNCASE (0, charptr_emchar (s1)) - | 907 int diff = (DOWNCASE (0, itext_ichar (s1)) - |
908 DOWNCASE (0, charptr_emchar (s2))); | 908 DOWNCASE (0, itext_ichar (s2))); |
909 if (diff != 0) | 909 if (diff != 0) |
910 return diff; | 910 return diff; |
911 INC_CHARPTR (s1); | 911 INC_IBYTEPTR (s1); |
912 INC_CHARPTR (s2); | 912 INC_IBYTEPTR (s2); |
913 len1 -= s1 - old_s1; | 913 len1 -= s1 - old_s1; |
914 len2 -= s2 - old_s2; | 914 len2 -= s2 - old_s2; |
915 } | 915 } |
916 | 916 |
917 assert (len1 >= 0 && len2 >= 0); | 917 assert (len1 >= 0 && len2 >= 0); |
920 | 920 |
921 /* Like qxetextcasecmp() but also return number of characters at | 921 /* Like qxetextcasecmp() but also return number of characters at |
922 beginning that match. */ | 922 beginning that match. */ |
923 | 923 |
924 int | 924 int |
925 qxetextcasecmp_matching (const Intbyte *s1, Bytecount len1, | 925 qxetextcasecmp_matching (const Ibyte *s1, Bytecount len1, |
926 const Intbyte *s2, Bytecount len2, | 926 const Ibyte *s2, Bytecount len2, |
927 Charcount *matching) | 927 Charcount *matching) |
928 { | 928 { |
929 *matching = 0; | 929 *matching = 0; |
930 while (len1 > 0 && len2 > 0) | 930 while (len1 > 0 && len2 > 0) |
931 { | 931 { |
932 const Intbyte *old_s1 = s1; | 932 const Ibyte *old_s1 = s1; |
933 const Intbyte *old_s2 = s2; | 933 const Ibyte *old_s2 = s2; |
934 int diff = (DOWNCASE (0, charptr_emchar (s1)) - | 934 int diff = (DOWNCASE (0, itext_ichar (s1)) - |
935 DOWNCASE (0, charptr_emchar (s2))); | 935 DOWNCASE (0, itext_ichar (s2))); |
936 if (diff != 0) | 936 if (diff != 0) |
937 return diff; | 937 return diff; |
938 INC_CHARPTR (s1); | 938 INC_IBYTEPTR (s1); |
939 INC_CHARPTR (s2); | 939 INC_IBYTEPTR (s2); |
940 len1 -= s1 - old_s1; | 940 len1 -= s1 - old_s1; |
941 len2 -= s2 - old_s2; | 941 len2 -= s2 - old_s2; |
942 (*matching)++; | 942 (*matching)++; |
943 } | 943 } |
944 | 944 |
947 } | 947 } |
948 | 948 |
949 int | 949 int |
950 lisp_strcasecmp (Lisp_Object s1, Lisp_Object s2) | 950 lisp_strcasecmp (Lisp_Object s1, Lisp_Object s2) |
951 { | 951 { |
952 Intbyte *cm = strcasecmp_charmap; | 952 Ibyte *cm = strcasecmp_charmap; |
953 Intbyte *p1 = XSTRING_DATA (s1); | 953 Ibyte *p1 = XSTRING_DATA (s1); |
954 Intbyte *p2 = XSTRING_DATA (s2); | 954 Ibyte *p2 = XSTRING_DATA (s2); |
955 Intbyte *e1 = p1 + XSTRING_LENGTH (s1); | 955 Ibyte *e1 = p1 + XSTRING_LENGTH (s1); |
956 Intbyte *e2 = p2 + XSTRING_LENGTH (s2); | 956 Ibyte *e2 = p2 + XSTRING_LENGTH (s2); |
957 | 957 |
958 /* again, we use a symmetric algorithm and favor clarity over | 958 /* again, we use a symmetric algorithm and favor clarity over |
959 nanosecond improvements. */ | 959 nanosecond improvements. */ |
960 while (1) | 960 while (1) |
961 { | 961 { |
983 /************************************************************************/ | 983 /************************************************************************/ |
984 | 984 |
985 /* NOTE: Does not reset the Dynarr. */ | 985 /* NOTE: Does not reset the Dynarr. */ |
986 | 986 |
987 void | 987 void |
988 convert_intbyte_string_into_emchar_dynarr (const Intbyte *str, Bytecount len, | 988 convert_ibyte_string_into_ichar_dynarr (const Ibyte *str, Bytecount len, |
989 Emchar_dynarr *dyn) | 989 Ichar_dynarr *dyn) |
990 { | 990 { |
991 const Intbyte *strend = str + len; | 991 const Ibyte *strend = str + len; |
992 | 992 |
993 while (str < strend) | 993 while (str < strend) |
994 { | 994 { |
995 Emchar ch = charptr_emchar (str); | 995 Ichar ch = itext_ichar (str); |
996 Dynarr_add (dyn, ch); | 996 Dynarr_add (dyn, ch); |
997 INC_CHARPTR (str); | 997 INC_IBYTEPTR (str); |
998 } | 998 } |
999 } | 999 } |
1000 | 1000 |
1001 Charcount | 1001 Charcount |
1002 convert_intbyte_string_into_emchar_string (const Intbyte *str, Bytecount len, | 1002 convert_ibyte_string_into_ichar_string (const Ibyte *str, Bytecount len, |
1003 Emchar *arr) | 1003 Ichar *arr) |
1004 { | 1004 { |
1005 const Intbyte *strend = str + len; | 1005 const Ibyte *strend = str + len; |
1006 Charcount newlen = 0; | 1006 Charcount newlen = 0; |
1007 while (str < strend) | 1007 while (str < strend) |
1008 { | 1008 { |
1009 Emchar ch = charptr_emchar (str); | 1009 Ichar ch = itext_ichar (str); |
1010 arr[newlen++] = ch; | 1010 arr[newlen++] = ch; |
1011 INC_CHARPTR (str); | 1011 INC_IBYTEPTR (str); |
1012 } | 1012 } |
1013 return newlen; | 1013 return newlen; |
1014 } | 1014 } |
1015 | 1015 |
1016 /* Convert an array of Emchars into the equivalent string representation. | 1016 /* Convert an array of Ichars into the equivalent string representation. |
1017 Store into the given Intbyte dynarr. Does not reset the dynarr. | 1017 Store into the given Ibyte dynarr. Does not reset the dynarr. |
1018 Does not add a terminating zero. */ | 1018 Does not add a terminating zero. */ |
1019 | 1019 |
1020 void | 1020 void |
1021 convert_emchar_string_into_intbyte_dynarr (Emchar *arr, int nels, | 1021 convert_ichar_string_into_ibyte_dynarr (Ichar *arr, int nels, |
1022 Intbyte_dynarr *dyn) | 1022 Ibyte_dynarr *dyn) |
1023 { | 1023 { |
1024 Intbyte str[MAX_EMCHAR_LEN]; | 1024 Ibyte str[MAX_ICHAR_LEN]; |
1025 int i; | 1025 int i; |
1026 | 1026 |
1027 for (i = 0; i < nels; i++) | 1027 for (i = 0; i < nels; i++) |
1028 { | 1028 { |
1029 Bytecount len = set_charptr_emchar (str, arr[i]); | 1029 Bytecount len = set_itext_ichar (str, arr[i]); |
1030 Dynarr_add_many (dyn, str, len); | 1030 Dynarr_add_many (dyn, str, len); |
1031 } | 1031 } |
1032 } | 1032 } |
1033 | 1033 |
1034 /* Convert an array of Emchars into the equivalent string representation. | 1034 /* Convert an array of Ichars into the equivalent string representation. |
1035 Malloc the space needed for this and return it. If LEN_OUT is not a | 1035 Malloc the space needed for this and return it. If LEN_OUT is not a |
1036 NULL pointer, store into LEN_OUT the number of Intbytes in the | 1036 NULL pointer, store into LEN_OUT the number of Ibytes in the |
1037 malloc()ed string. Note that the actual number of Intbytes allocated | 1037 malloc()ed string. Note that the actual number of Ibytes allocated |
1038 is one more than this: the returned string is zero-terminated. */ | 1038 is one more than this: the returned string is zero-terminated. */ |
1039 | 1039 |
1040 Intbyte * | 1040 Ibyte * |
1041 convert_emchar_string_into_malloced_string (Emchar *arr, int nels, | 1041 convert_ichar_string_into_malloced_string (Ichar *arr, int nels, |
1042 Bytecount *len_out) | 1042 Bytecount *len_out) |
1043 { | 1043 { |
1044 /* Damn zero-termination. */ | 1044 /* Damn zero-termination. */ |
1045 Intbyte *str = (Intbyte *) ALLOCA (nels * MAX_EMCHAR_LEN + 1); | 1045 Ibyte *str = (Ibyte *) ALLOCA (nels * MAX_ICHAR_LEN + 1); |
1046 Intbyte *strorig = str; | 1046 Ibyte *strorig = str; |
1047 Bytecount len; | 1047 Bytecount len; |
1048 | 1048 |
1049 int i; | 1049 int i; |
1050 | 1050 |
1051 for (i = 0; i < nels; i++) | 1051 for (i = 0; i < nels; i++) |
1052 str += set_charptr_emchar (str, arr[i]); | 1052 str += set_itext_ichar (str, arr[i]); |
1053 *str = '\0'; | 1053 *str = '\0'; |
1054 len = str - strorig; | 1054 len = str - strorig; |
1055 str = (Intbyte *) xmalloc (1 + len); | 1055 str = (Ibyte *) xmalloc (1 + len); |
1056 memcpy (str, strorig, 1 + len); | 1056 memcpy (str, strorig, 1 + len); |
1057 if (len_out) | 1057 if (len_out) |
1058 *len_out = len; | 1058 *len_out = len; |
1059 return str; | 1059 return str; |
1060 } | 1060 } |
1062 #define COPY_TEXT_BETWEEN_FORMATS(srcfmt, dstfmt) \ | 1062 #define COPY_TEXT_BETWEEN_FORMATS(srcfmt, dstfmt) \ |
1063 do \ | 1063 do \ |
1064 { \ | 1064 { \ |
1065 if (dst) \ | 1065 if (dst) \ |
1066 { \ | 1066 { \ |
1067 Intbyte *dstend = dst + dstlen; \ | 1067 Ibyte *dstend = dst + dstlen; \ |
1068 Intbyte *dstp = dst; \ | 1068 Ibyte *dstp = dst; \ |
1069 const Intbyte *srcend = src + srclen; \ | 1069 const Ibyte *srcend = src + srclen; \ |
1070 const Intbyte *srcp = src; \ | 1070 const Ibyte *srcp = src; \ |
1071 \ | 1071 \ |
1072 while (srcp < srcend) \ | 1072 while (srcp < srcend) \ |
1073 { \ | 1073 { \ |
1074 Emchar ch = charptr_emchar_fmt (srcp, srcfmt, srcobj); \ | 1074 Ichar ch = itext_ichar_fmt (srcp, srcfmt, srcobj); \ |
1075 Bytecount len = emchar_len_fmt (ch, dstfmt); \ | 1075 Bytecount len = ichar_len_fmt (ch, dstfmt); \ |
1076 \ | 1076 \ |
1077 if (dstp + len <= dstend) \ | 1077 if (dstp + len <= dstend) \ |
1078 { \ | 1078 { \ |
1079 set_charptr_emchar_fmt (dstp, ch, dstfmt, dstobj); \ | 1079 set_itext_ichar_fmt (dstp, ch, dstfmt, dstobj); \ |
1080 dstp += len; \ | 1080 dstp += len; \ |
1081 } \ | 1081 } \ |
1082 else \ | 1082 else \ |
1083 break; \ | 1083 break; \ |
1084 INC_CHARPTR_FMT (srcp, srcfmt); \ | 1084 INC_IBYTEPTR_FMT (srcp, srcfmt); \ |
1085 } \ | 1085 } \ |
1086 text_checking_assert (srcp <= srcend); \ | 1086 text_checking_assert (srcp <= srcend); \ |
1087 if (src_used) \ | 1087 if (src_used) \ |
1088 *src_used = srcp - src; \ | 1088 *src_used = srcp - src; \ |
1089 return dstp - dst; \ | 1089 return dstp - dst; \ |
1090 } \ | 1090 } \ |
1091 else \ | 1091 else \ |
1092 { \ | 1092 { \ |
1093 const Intbyte *srcend = src + srclen; \ | 1093 const Ibyte *srcend = src + srclen; \ |
1094 const Intbyte *srcp = src; \ | 1094 const Ibyte *srcp = src; \ |
1095 Bytecount total = 0; \ | 1095 Bytecount total = 0; \ |
1096 \ | 1096 \ |
1097 while (srcp < srcend) \ | 1097 while (srcp < srcend) \ |
1098 { \ | 1098 { \ |
1099 total += emchar_len_fmt (charptr_emchar_fmt (srcp, srcfmt, \ | 1099 total += ichar_len_fmt (itext_ichar_fmt (srcp, srcfmt, \ |
1100 srcobj), dstfmt); \ | 1100 srcobj), dstfmt); \ |
1101 INC_CHARPTR_FMT (srcp, srcfmt); \ | 1101 INC_IBYTEPTR_FMT (srcp, srcfmt); \ |
1102 } \ | 1102 } \ |
1103 text_checking_assert (srcp == srcend); \ | 1103 text_checking_assert (srcp == srcend); \ |
1104 if (src_used) \ | 1104 if (src_used) \ |
1105 *src_used = srcp - src; \ | 1105 *src_used = srcp - src; \ |
1106 return total; \ | 1106 return total; \ |
1114 SRC_USED (if not NULL). If DST is NULL, don't actually store anything | 1114 SRC_USED (if not NULL). If DST is NULL, don't actually store anything |
1115 and just return the size needed to store all the text. Will not copy | 1115 and just return the size needed to store all the text. Will not copy |
1116 partial characters into DST. */ | 1116 partial characters into DST. */ |
1117 | 1117 |
1118 Bytecount | 1118 Bytecount |
1119 copy_text_between_formats (const Intbyte *src, Bytecount srclen, | 1119 copy_text_between_formats (const Ibyte *src, Bytecount srclen, |
1120 Internal_Format srcfmt, | 1120 Internal_Format srcfmt, |
1121 Lisp_Object srcobj, | 1121 Lisp_Object srcobj, |
1122 Intbyte *dst, Bytecount dstlen, | 1122 Ibyte *dst, Bytecount dstlen, |
1123 Internal_Format dstfmt, | 1123 Internal_Format dstfmt, |
1124 Lisp_Object dstobj, | 1124 Lisp_Object dstobj, |
1125 Bytecount *src_used) | 1125 Bytecount *src_used) |
1126 { | 1126 { |
1127 if (srcfmt == dstfmt && | 1127 if (srcfmt == dstfmt && |
1128 objects_have_same_internal_representation (srcobj, dstobj)) | 1128 objects_have_same_internal_representation (srcobj, dstobj)) |
1129 { | 1129 { |
1130 if (dst) | 1130 if (dst) |
1131 { | 1131 { |
1132 srclen = min (srclen, dstlen); | 1132 srclen = min (srclen, dstlen); |
1133 srclen = validate_intbyte_string_backward (src, srclen); | 1133 srclen = validate_ibyte_string_backward (src, srclen); |
1134 memcpy (dst, src, srclen); | 1134 memcpy (dst, src, srclen); |
1135 if (src_used) | 1135 if (src_used) |
1136 *src_used = srclen; | 1136 *src_used = srclen; |
1137 return srclen; | 1137 return srclen; |
1138 } | 1138 } |
1162 through SRC_USED (if not NULL). If DST is NULL, don't actually store | 1162 through SRC_USED (if not NULL). If DST is NULL, don't actually store |
1163 anything and just return the size needed to store all the text. */ | 1163 anything and just return the size needed to store all the text. */ |
1164 | 1164 |
1165 Bytecount | 1165 Bytecount |
1166 copy_buffer_text_out (struct buffer *buf, Bytebpos pos, | 1166 copy_buffer_text_out (struct buffer *buf, Bytebpos pos, |
1167 Bytecount len, Intbyte *dst, Bytecount dstlen, | 1167 Bytecount len, Ibyte *dst, Bytecount dstlen, |
1168 Internal_Format dstfmt, Lisp_Object dstobj, | 1168 Internal_Format dstfmt, Lisp_Object dstobj, |
1169 Bytecount *src_used) | 1169 Bytecount *src_used) |
1170 { | 1170 { |
1171 Bytecount dst_used = 0; | 1171 Bytecount dst_used = 0; |
1172 if (src_used) | 1172 if (src_used) |
1211 /************************************************************************/ | 1211 /************************************************************************/ |
1212 /* charset properties of strings */ | 1212 /* charset properties of strings */ |
1213 /************************************************************************/ | 1213 /************************************************************************/ |
1214 | 1214 |
1215 void | 1215 void |
1216 find_charsets_in_intbyte_string (unsigned char *charsets, const Intbyte *str, | 1216 find_charsets_in_ibyte_string (unsigned char *charsets, const Ibyte *str, |
1217 Bytecount len) | 1217 Bytecount len) |
1218 { | 1218 { |
1219 #ifndef MULE | 1219 #ifndef MULE |
1220 /* Telescope this. */ | 1220 /* Telescope this. */ |
1221 charsets[0] = 1; | 1221 charsets[0] = 1; |
1222 #else | 1222 #else |
1223 const Intbyte *strend = str + len; | 1223 const Ibyte *strend = str + len; |
1224 memset (charsets, 0, NUM_LEADING_BYTES); | 1224 memset (charsets, 0, NUM_LEADING_BYTES); |
1225 | 1225 |
1226 /* #### SJT doesn't like this. */ | 1226 /* #### SJT doesn't like this. */ |
1227 if (len == 0) | 1227 if (len == 0) |
1228 { | 1228 { |
1230 return; | 1230 return; |
1231 } | 1231 } |
1232 | 1232 |
1233 while (str < strend) | 1233 while (str < strend) |
1234 { | 1234 { |
1235 charsets[emchar_leading_byte (charptr_emchar (str)) - MIN_LEADING_BYTE] = | 1235 charsets[ichar_leading_byte (itext_ichar (str)) - MIN_LEADING_BYTE] = |
1236 1; | 1236 1; |
1237 INC_CHARPTR (str); | 1237 INC_IBYTEPTR (str); |
1238 } | 1238 } |
1239 #endif | 1239 #endif |
1240 } | 1240 } |
1241 | 1241 |
1242 void | 1242 void |
1243 find_charsets_in_emchar_string (unsigned char *charsets, const Emchar *str, | 1243 find_charsets_in_ichar_string (unsigned char *charsets, const Ichar *str, |
1244 Charcount len) | 1244 Charcount len) |
1245 { | 1245 { |
1246 #ifndef MULE | 1246 #ifndef MULE |
1247 /* Telescope this. */ | 1247 /* Telescope this. */ |
1248 charsets[0] = 1; | 1248 charsets[0] = 1; |
1258 return; | 1258 return; |
1259 } | 1259 } |
1260 | 1260 |
1261 for (i = 0; i < len; i++) | 1261 for (i = 0; i < len; i++) |
1262 { | 1262 { |
1263 charsets[emchar_leading_byte (str[i]) - MIN_LEADING_BYTE] = 1; | 1263 charsets[ichar_leading_byte (str[i]) - MIN_LEADING_BYTE] = 1; |
1264 } | 1264 } |
1265 #endif | 1265 #endif |
1266 } | 1266 } |
1267 | 1267 |
1268 int | 1268 int |
1269 intbyte_string_displayed_columns (const Intbyte *str, Bytecount len) | 1269 ibyte_string_displayed_columns (const Ibyte *str, Bytecount len) |
1270 { | 1270 { |
1271 int cols = 0; | 1271 int cols = 0; |
1272 const Intbyte *end = str + len; | 1272 const Ibyte *end = str + len; |
1273 | 1273 |
1274 while (str < end) | 1274 while (str < end) |
1275 { | 1275 { |
1276 #ifdef MULE | 1276 #ifdef MULE |
1277 Emchar ch = charptr_emchar (str); | 1277 Ichar ch = itext_ichar (str); |
1278 cols += XCHARSET_COLUMNS (emchar_charset (ch)); | 1278 cols += XCHARSET_COLUMNS (ichar_charset (ch)); |
1279 #else | 1279 #else |
1280 cols++; | 1280 cols++; |
1281 #endif | 1281 #endif |
1282 INC_CHARPTR (str); | 1282 INC_IBYTEPTR (str); |
1283 } | 1283 } |
1284 | 1284 |
1285 return cols; | 1285 return cols; |
1286 } | 1286 } |
1287 | 1287 |
1288 int | 1288 int |
1289 emchar_string_displayed_columns (const Emchar *str, Charcount len) | 1289 ichar_string_displayed_columns (const Ichar *str, Charcount len) |
1290 { | 1290 { |
1291 #ifdef MULE | 1291 #ifdef MULE |
1292 int cols = 0; | 1292 int cols = 0; |
1293 int i; | 1293 int i; |
1294 | 1294 |
1295 for (i = 0; i < len; i++) | 1295 for (i = 0; i < len; i++) |
1296 cols += XCHARSET_COLUMNS (emchar_charset (str[i])); | 1296 cols += XCHARSET_COLUMNS (ichar_charset (str[i])); |
1297 | 1297 |
1298 return cols; | 1298 return cols; |
1299 #else /* not MULE */ | 1299 #else /* not MULE */ |
1300 return len; | 1300 return len; |
1301 #endif | 1301 #endif |
1302 } | 1302 } |
1303 | 1303 |
1304 Charcount | 1304 Charcount |
1305 intbyte_string_nonascii_chars (const Intbyte *str, Bytecount len) | 1305 ibyte_string_nonascii_chars (const Ibyte *str, Bytecount len) |
1306 { | 1306 { |
1307 #ifdef MULE | 1307 #ifdef MULE |
1308 const Intbyte *end = str + len; | 1308 const Ibyte *end = str + len; |
1309 Charcount retval = 0; | 1309 Charcount retval = 0; |
1310 | 1310 |
1311 while (str < end) | 1311 while (str < end) |
1312 { | 1312 { |
1313 if (!byte_ascii_p (*str)) | 1313 if (!byte_ascii_p (*str)) |
1314 retval++; | 1314 retval++; |
1315 INC_CHARPTR (str); | 1315 INC_IBYTEPTR (str); |
1316 } | 1316 } |
1317 | 1317 |
1318 return retval; | 1318 return retval; |
1319 #else | 1319 #else |
1320 return 0; | 1320 return 0; |
1325 /***************************************************************************/ | 1325 /***************************************************************************/ |
1326 /* Eistring helper functions */ | 1326 /* Eistring helper functions */ |
1327 /***************************************************************************/ | 1327 /***************************************************************************/ |
1328 | 1328 |
1329 int | 1329 int |
1330 eistr_casefiddle_1 (Intbyte *olddata, Bytecount len, Intbyte *newdata, | 1330 eistr_casefiddle_1 (Ibyte *olddata, Bytecount len, Ibyte *newdata, |
1331 int downp) | 1331 int downp) |
1332 { | 1332 { |
1333 Intbyte *endp = olddata + len; | 1333 Ibyte *endp = olddata + len; |
1334 Intbyte *newp = newdata; | 1334 Ibyte *newp = newdata; |
1335 int changedp = 0; | 1335 int changedp = 0; |
1336 | 1336 |
1337 while (olddata < endp) | 1337 while (olddata < endp) |
1338 { | 1338 { |
1339 Emchar c = charptr_emchar (olddata); | 1339 Ichar c = itext_ichar (olddata); |
1340 Emchar newc; | 1340 Ichar newc; |
1341 | 1341 |
1342 if (downp) | 1342 if (downp) |
1343 newc = DOWNCASE (0, c); | 1343 newc = DOWNCASE (0, c); |
1344 else | 1344 else |
1345 newc = UPCASE (0, c); | 1345 newc = UPCASE (0, c); |
1346 | 1346 |
1347 if (c != newc) | 1347 if (c != newc) |
1348 changedp = 1; | 1348 changedp = 1; |
1349 | 1349 |
1350 newp += set_charptr_emchar (newp, newc); | 1350 newp += set_itext_ichar (newp, newc); |
1351 INC_CHARPTR (olddata); | 1351 INC_IBYTEPTR (olddata); |
1352 } | 1352 } |
1353 | 1353 |
1354 *newp = '\0'; | 1354 *newp = '\0'; |
1355 | 1355 |
1356 return changedp ? newp - newdata : 0; | 1356 return changedp ? newp - newdata : 0; |
1374 if (ei->mallocp_) | 1374 if (ei->mallocp_) |
1375 return; | 1375 return; |
1376 ei->mallocp_ = 1; | 1376 ei->mallocp_ = 1; |
1377 if (ei->data_) | 1377 if (ei->data_) |
1378 { | 1378 { |
1379 Intbyte *newdata; | 1379 Ibyte *newdata; |
1380 | 1380 |
1381 ei->max_size_allocated_ = | 1381 ei->max_size_allocated_ = |
1382 eifind_large_enough_buffer (0, ei->bytelen_ + 1); | 1382 eifind_large_enough_buffer (0, ei->bytelen_ + 1); |
1383 newdata = (Intbyte *) xmalloc (ei->max_size_allocated_); | 1383 newdata = (Ibyte *) xmalloc (ei->max_size_allocated_); |
1384 memcpy (newdata, ei->data_, ei->bytelen_ + 1); | 1384 memcpy (newdata, ei->data_, ei->bytelen_ + 1); |
1385 ei->data_ = newdata; | 1385 ei->data_ = newdata; |
1386 } | 1386 } |
1387 | 1387 |
1388 if (ei->extdata_) | 1388 if (ei->extdata_) |
1397 } | 1397 } |
1398 } | 1398 } |
1399 | 1399 |
1400 int | 1400 int |
1401 eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff, | 1401 eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff, |
1402 Bytecount len, Charcount charlen, const Intbyte *data, | 1402 Bytecount len, Charcount charlen, const Ibyte *data, |
1403 const Eistring *ei2, int is_c, int fold_case) | 1403 const Eistring *ei2, int is_c, int fold_case) |
1404 { | 1404 { |
1405 assert ((off < 0) != (charoff < 0)); | 1405 assert ((off < 0) != (charoff < 0)); |
1406 if (off < 0) | 1406 if (off < 0) |
1407 { | 1407 { |
1420 assert ((is_c != 0) == (data != 0)); | 1420 assert ((is_c != 0) == (data != 0)); |
1421 assert (fold_case >= 0 && fold_case <= 2); | 1421 assert (fold_case >= 0 && fold_case <= 2); |
1422 | 1422 |
1423 { | 1423 { |
1424 Bytecount dstlen; | 1424 Bytecount dstlen; |
1425 const Intbyte *src = ei->data_, *dst; | 1425 const Ibyte *src = ei->data_, *dst; |
1426 | 1426 |
1427 if (data) | 1427 if (data) |
1428 { | 1428 { |
1429 dst = data; | 1429 dst = data; |
1430 dstlen = qxestrlen (data); | 1430 dstlen = qxestrlen (data); |
1442 fold_case == 1 ? qxememcasecmp4 (src, len, dst, dstlen) : | 1442 fold_case == 1 ? qxememcasecmp4 (src, len, dst, dstlen) : |
1443 qxetextcasecmp (src, len, dst, dstlen)); | 1443 qxetextcasecmp (src, len, dst, dstlen)); |
1444 } | 1444 } |
1445 } | 1445 } |
1446 | 1446 |
1447 Intbyte * | 1447 Ibyte * |
1448 eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out, Internal_Format fmt, | 1448 eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out, Internal_Format fmt, |
1449 Lisp_Object object) | 1449 Lisp_Object object) |
1450 { | 1450 { |
1451 Intbyte *ptr; | 1451 Ibyte *ptr; |
1452 | 1452 |
1453 assert (fmt == FORMAT_DEFAULT); | 1453 assert (fmt == FORMAT_DEFAULT); |
1454 ptr = xnew_array (Intbyte, eistr->bytelen_ + 1); | 1454 ptr = xnew_array (Ibyte, eistr->bytelen_ + 1); |
1455 if (len_out) | 1455 if (len_out) |
1456 *len_out = eistr->bytelen_; | 1456 *len_out = eistr->bytelen_; |
1457 memcpy (ptr, eistr->data_, eistr->bytelen_ + 1); | 1457 memcpy (ptr, eistr->data_, eistr->bytelen_ + 1); |
1458 return ptr; | 1458 return ptr; |
1459 } | 1459 } |
1468 #ifdef MULE | 1468 #ifdef MULE |
1469 | 1469 |
1470 /* Skip as many ASCII bytes as possible in the memory block [PTR, END). | 1470 /* Skip as many ASCII bytes as possible in the memory block [PTR, END). |
1471 Return pointer to the first non-ASCII byte. optimized for long | 1471 Return pointer to the first non-ASCII byte. optimized for long |
1472 stretches of ASCII. */ | 1472 stretches of ASCII. */ |
1473 inline static const Intbyte * | 1473 inline static const Ibyte * |
1474 skip_ascii (const Intbyte *ptr, const Intbyte *end) | 1474 skip_ascii (const Ibyte *ptr, const Ibyte *end) |
1475 { | 1475 { |
1476 #ifdef EFFICIENT_INT_128_BIT | 1476 #ifdef EFFICIENT_INT_128_BIT |
1477 # define STRIDE_TYPE INT_128_BIT | 1477 # define STRIDE_TYPE INT_128_BIT |
1478 # define HIGH_BIT_MASK \ | 1478 # define HIGH_BIT_MASK \ |
1479 MAKE_128_BIT_UNSIGNED_CONSTANT (0x80808080808080808080808080808080) | 1479 MAKE_128_BIT_UNSIGNED_CONSTANT (0x80808080808080808080808080808080) |
1501 ptr++; | 1501 ptr++; |
1502 } | 1502 } |
1503 ascii_end = (const unsigned STRIDE_TYPE *) ptr; | 1503 ascii_end = (const unsigned STRIDE_TYPE *) ptr; |
1504 /* This loop screams, because we can detect ASCII | 1504 /* This loop screams, because we can detect ASCII |
1505 characters 4 or 8 at a time. */ | 1505 characters 4 or 8 at a time. */ |
1506 while ((const Intbyte *) ascii_end + STRIDE <= end | 1506 while ((const Ibyte *) ascii_end + STRIDE <= end |
1507 && !(*ascii_end & HIGH_BIT_MASK)) | 1507 && !(*ascii_end & HIGH_BIT_MASK)) |
1508 ascii_end++; | 1508 ascii_end++; |
1509 ptr = (Intbyte *) ascii_end; | 1509 ptr = (Ibyte *) ascii_end; |
1510 while (ptr < end && byte_ascii_p (*ptr)) | 1510 while (ptr < end && byte_ascii_p (*ptr)) |
1511 ptr++; | 1511 ptr++; |
1512 return ptr; | 1512 return ptr; |
1513 } | 1513 } |
1514 | 1514 |
1516 These work on strings of all sizes but are more efficient than a simple | 1516 These work on strings of all sizes but are more efficient than a simple |
1517 loop on large strings and probably less efficient on sufficiently small | 1517 loop on large strings and probably less efficient on sufficiently small |
1518 strings. */ | 1518 strings. */ |
1519 | 1519 |
1520 Charcount | 1520 Charcount |
1521 bytecount_to_charcount_fun (const Intbyte *ptr, Bytecount len) | 1521 bytecount_to_charcount_fun (const Ibyte *ptr, Bytecount len) |
1522 { | 1522 { |
1523 Charcount count = 0; | 1523 Charcount count = 0; |
1524 const Intbyte *end = ptr + len; | 1524 const Ibyte *end = ptr + len; |
1525 while (1) | 1525 while (1) |
1526 { | 1526 { |
1527 const Intbyte *newptr = skip_ascii (ptr, end); | 1527 const Ibyte *newptr = skip_ascii (ptr, end); |
1528 count += newptr - ptr; | 1528 count += newptr - ptr; |
1529 ptr = newptr; | 1529 ptr = newptr; |
1530 if (ptr == end) | 1530 if (ptr == end) |
1531 break; | 1531 break; |
1532 { | 1532 { |
1533 /* Optimize for successive characters from the same charset */ | 1533 /* Optimize for successive characters from the same charset */ |
1534 Intbyte leading_byte = *ptr; | 1534 Ibyte leading_byte = *ptr; |
1535 int bytes = rep_bytes_by_first_byte (leading_byte); | 1535 int bytes = rep_bytes_by_first_byte (leading_byte); |
1536 while (ptr < end && *ptr == leading_byte) | 1536 while (ptr < end && *ptr == leading_byte) |
1537 ptr += bytes, count++; | 1537 ptr += bytes, count++; |
1538 } | 1538 } |
1539 } | 1539 } |
1548 | 1548 |
1549 return count; | 1549 return count; |
1550 } | 1550 } |
1551 | 1551 |
1552 Bytecount | 1552 Bytecount |
1553 charcount_to_bytecount_fun (const Intbyte *ptr, Charcount len) | 1553 charcount_to_bytecount_fun (const Ibyte *ptr, Charcount len) |
1554 { | 1554 { |
1555 const Intbyte *newptr = ptr; | 1555 const Ibyte *newptr = ptr; |
1556 while (1) | 1556 while (1) |
1557 { | 1557 { |
1558 const Intbyte *newnewptr = skip_ascii (newptr, newptr + len); | 1558 const Ibyte *newnewptr = skip_ascii (newptr, newptr + len); |
1559 len -= newnewptr - newptr; | 1559 len -= newnewptr - newptr; |
1560 newptr = newnewptr; | 1560 newptr = newnewptr; |
1561 if (!len) | 1561 if (!len) |
1562 break; | 1562 break; |
1563 { | 1563 { |
1564 /* Optimize for successive characters from the same charset */ | 1564 /* Optimize for successive characters from the same charset */ |
1565 Intbyte leading_byte = *newptr; | 1565 Ibyte leading_byte = *newptr; |
1566 int bytes = rep_bytes_by_first_byte (leading_byte); | 1566 int bytes = rep_bytes_by_first_byte (leading_byte); |
1567 while (len > 0 && *newptr == leading_byte) | 1567 while (len > 0 && *newptr == leading_byte) |
1568 newptr += bytes, len--; | 1568 newptr += bytes, len--; |
1569 } | 1569 } |
1570 } | 1570 } |
2733 /* Implement TO_EXTERNAL_FORMAT, TO_INTERNAL_FORMAT */ | 2733 /* Implement TO_EXTERNAL_FORMAT, TO_INTERNAL_FORMAT */ |
2734 /************************************************************************/ | 2734 /************************************************************************/ |
2735 | 2735 |
2736 typedef struct | 2736 typedef struct |
2737 { | 2737 { |
2738 Dynarr_declare (Intbyte_dynarr *); | 2738 Dynarr_declare (Ibyte_dynarr *); |
2739 } Intbyte_dynarr_dynarr; | 2739 } Ibyte_dynarr_dynarr; |
2740 | 2740 |
2741 typedef struct | 2741 typedef struct |
2742 { | 2742 { |
2743 Dynarr_declare (Extbyte_dynarr *); | 2743 Dynarr_declare (Extbyte_dynarr *); |
2744 } Extbyte_dynarr_dynarr; | 2744 } Extbyte_dynarr_dynarr; |
2745 | 2745 |
2746 static Extbyte_dynarr_dynarr *conversion_out_dynarr_list; | 2746 static Extbyte_dynarr_dynarr *conversion_out_dynarr_list; |
2747 static Intbyte_dynarr_dynarr *conversion_in_dynarr_list; | 2747 static Ibyte_dynarr_dynarr *conversion_in_dynarr_list; |
2748 | 2748 |
2749 static int dfc_convert_to_external_format_in_use; | 2749 static int dfc_convert_to_external_format_in_use; |
2750 static int dfc_convert_to_internal_format_in_use; | 2750 static int dfc_convert_to_internal_format_in_use; |
2751 | 2751 |
2752 void | 2752 void |
2789 is a lisp string. */ | 2789 is a lisp string. */ |
2790 if (source_type != DFC_TYPE_LISP_LSTREAM && | 2790 if (source_type != DFC_TYPE_LISP_LSTREAM && |
2791 sink_type != DFC_TYPE_LISP_LSTREAM && | 2791 sink_type != DFC_TYPE_LISP_LSTREAM && |
2792 coding_system_is_binary (coding_system)) | 2792 coding_system_is_binary (coding_system)) |
2793 { | 2793 { |
2794 const Intbyte *ptr; | 2794 const Ibyte *ptr; |
2795 Bytecount len; | 2795 Bytecount len; |
2796 | 2796 |
2797 if (source_type == DFC_TYPE_LISP_STRING) | 2797 if (source_type == DFC_TYPE_LISP_STRING) |
2798 { | 2798 { |
2799 ptr = XSTRING_DATA (source->lisp_object); | 2799 ptr = XSTRING_DATA (source->lisp_object); |
2800 len = XSTRING_LENGTH (source->lisp_object); | 2800 len = XSTRING_LENGTH (source->lisp_object); |
2801 } | 2801 } |
2802 else | 2802 else |
2803 { | 2803 { |
2804 ptr = (Intbyte *) source->data.ptr; | 2804 ptr = (Ibyte *) source->data.ptr; |
2805 len = source->data.len; | 2805 len = source->data.len; |
2806 } | 2806 } |
2807 | 2807 |
2808 #ifdef MULE | 2808 #ifdef MULE |
2809 { | 2809 { |
2810 const Intbyte *end; | 2810 const Ibyte *end; |
2811 for (end = ptr + len; ptr < end;) | 2811 for (end = ptr + len; ptr < end;) |
2812 { | 2812 { |
2813 Intbyte c = | 2813 Ibyte c = |
2814 (byte_ascii_p (*ptr)) ? *ptr : | 2814 (byte_ascii_p (*ptr)) ? *ptr : |
2815 (*ptr == LEADING_BYTE_CONTROL_1) ? (*(ptr+1) - 0x20) : | 2815 (*ptr == LEADING_BYTE_CONTROL_1) ? (*(ptr+1) - 0x20) : |
2816 (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) : | 2816 (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) : |
2817 '~'; | 2817 '~'; |
2818 | 2818 |
2819 Dynarr_add (conversion_out_dynarr, (Extbyte) c); | 2819 Dynarr_add (conversion_out_dynarr, (Extbyte) c); |
2820 INC_CHARPTR (ptr); | 2820 INC_IBYTEPTR (ptr); |
2821 } | 2821 } |
2822 text_checking_assert (ptr == end); | 2822 text_checking_assert (ptr == end); |
2823 } | 2823 } |
2824 #else | 2824 #else |
2825 Dynarr_add_many (conversion_out_dynarr, ptr, len); | 2825 Dynarr_add_many (conversion_out_dynarr, ptr, len); |
2830 /* Optimize the common case involving Unicode where only ASCII is involved */ | 2830 /* Optimize the common case involving Unicode where only ASCII is involved */ |
2831 else if (source_type != DFC_TYPE_LISP_LSTREAM && | 2831 else if (source_type != DFC_TYPE_LISP_LSTREAM && |
2832 sink_type != DFC_TYPE_LISP_LSTREAM && | 2832 sink_type != DFC_TYPE_LISP_LSTREAM && |
2833 dfc_coding_system_is_unicode (coding_system)) | 2833 dfc_coding_system_is_unicode (coding_system)) |
2834 { | 2834 { |
2835 const Intbyte *ptr, *p; | 2835 const Ibyte *ptr, *p; |
2836 Bytecount len; | 2836 Bytecount len; |
2837 const Intbyte *end; | 2837 const Ibyte *end; |
2838 | 2838 |
2839 if (source_type == DFC_TYPE_LISP_STRING) | 2839 if (source_type == DFC_TYPE_LISP_STRING) |
2840 { | 2840 { |
2841 ptr = XSTRING_DATA (source->lisp_object); | 2841 ptr = XSTRING_DATA (source->lisp_object); |
2842 len = XSTRING_LENGTH (source->lisp_object); | 2842 len = XSTRING_LENGTH (source->lisp_object); |
2843 } | 2843 } |
2844 else | 2844 else |
2845 { | 2845 { |
2846 ptr = (Intbyte *) source->data.ptr; | 2846 ptr = (Ibyte *) source->data.ptr; |
2847 len = source->data.len; | 2847 len = source->data.len; |
2848 } | 2848 } |
2849 end = ptr + len; | 2849 end = ptr + len; |
2850 | 2850 |
2851 for (p = ptr; p < end; p++) | 2851 for (p = ptr; p < end; p++) |
2954 { | 2954 { |
2955 /* It's guaranteed that many callers are not prepared for GC here, | 2955 /* It's guaranteed that many callers are not prepared for GC here, |
2956 esp. given that this code conversion occurs in many very hidden | 2956 esp. given that this code conversion occurs in many very hidden |
2957 places. */ | 2957 places. */ |
2958 int count = begin_gc_forbidden (); | 2958 int count = begin_gc_forbidden (); |
2959 Intbyte_dynarr *conversion_in_dynarr; | 2959 Ibyte_dynarr *conversion_in_dynarr; |
2960 | 2960 |
2961 type_checking_assert | 2961 type_checking_assert |
2962 ((source_type == DFC_TYPE_DATA || | 2962 ((source_type == DFC_TYPE_DATA || |
2963 source_type == DFC_TYPE_LISP_LSTREAM) | 2963 source_type == DFC_TYPE_LISP_LSTREAM) |
2964 && | 2964 && |
2965 (sink_type == DFC_TYPE_DATA || | 2965 (sink_type == DFC_TYPE_DATA || |
2966 sink_type == DFC_TYPE_LISP_LSTREAM)); | 2966 sink_type == DFC_TYPE_LISP_LSTREAM)); |
2967 | 2967 |
2968 if (Dynarr_length (conversion_in_dynarr_list) <= | 2968 if (Dynarr_length (conversion_in_dynarr_list) <= |
2969 dfc_convert_to_internal_format_in_use) | 2969 dfc_convert_to_internal_format_in_use) |
2970 Dynarr_add (conversion_in_dynarr_list, Dynarr_new (Intbyte)); | 2970 Dynarr_add (conversion_in_dynarr_list, Dynarr_new (Ibyte)); |
2971 conversion_in_dynarr = Dynarr_at (conversion_in_dynarr_list, | 2971 conversion_in_dynarr = Dynarr_at (conversion_in_dynarr_list, |
2972 dfc_convert_to_internal_format_in_use); | 2972 dfc_convert_to_internal_format_in_use); |
2973 Dynarr_reset (conversion_in_dynarr); | 2973 Dynarr_reset (conversion_in_dynarr); |
2974 | 2974 |
2975 internal_bind_int (&dfc_convert_to_internal_format_in_use, | 2975 internal_bind_int (&dfc_convert_to_internal_format_in_use, |
2980 if (source_type != DFC_TYPE_LISP_LSTREAM && | 2980 if (source_type != DFC_TYPE_LISP_LSTREAM && |
2981 sink_type != DFC_TYPE_LISP_LSTREAM && | 2981 sink_type != DFC_TYPE_LISP_LSTREAM && |
2982 coding_system_is_binary (coding_system)) | 2982 coding_system_is_binary (coding_system)) |
2983 { | 2983 { |
2984 #ifdef MULE | 2984 #ifdef MULE |
2985 const Intbyte *ptr = (const Intbyte *) source->data.ptr; | 2985 const Ibyte *ptr = (const Ibyte *) source->data.ptr; |
2986 Bytecount len = source->data.len; | 2986 Bytecount len = source->data.len; |
2987 const Intbyte *end = ptr + len; | 2987 const Ibyte *end = ptr + len; |
2988 | 2988 |
2989 for (; ptr < end; ptr++) | 2989 for (; ptr < end; ptr++) |
2990 { | 2990 { |
2991 Intbyte c = *ptr; | 2991 Ibyte c = *ptr; |
2992 | 2992 |
2993 if (byte_ascii_p (c)) | 2993 if (byte_ascii_p (c)) |
2994 Dynarr_add (conversion_in_dynarr, c); | 2994 Dynarr_add (conversion_in_dynarr, c); |
2995 else if (byte_c1_p (c)) | 2995 else if (byte_c1_p (c)) |
2996 { | 2996 { |
3011 /* Optimize the common case involving Unicode where only ASCII/Latin-1 is involved */ | 3011 /* Optimize the common case involving Unicode where only ASCII/Latin-1 is involved */ |
3012 else if (source_type != DFC_TYPE_LISP_LSTREAM && | 3012 else if (source_type != DFC_TYPE_LISP_LSTREAM && |
3013 sink_type != DFC_TYPE_LISP_LSTREAM && | 3013 sink_type != DFC_TYPE_LISP_LSTREAM && |
3014 dfc_coding_system_is_unicode (coding_system)) | 3014 dfc_coding_system_is_unicode (coding_system)) |
3015 { | 3015 { |
3016 const Intbyte *ptr = (const Intbyte *) source->data.ptr + 1; | 3016 const Ibyte *ptr = (const Ibyte *) source->data.ptr + 1; |
3017 Bytecount len = source->data.len; | 3017 Bytecount len = source->data.len; |
3018 const Intbyte *end = ptr + len; | 3018 const Ibyte *end = ptr + len; |
3019 | 3019 |
3020 if (len & 1) | 3020 if (len & 1) |
3021 goto the_hard_way; | 3021 goto the_hard_way; |
3022 | 3022 |
3023 for (; ptr < end; ptr += 2) | 3023 for (; ptr < end; ptr += 2) |
3024 { | 3024 { |
3025 if (*ptr) | 3025 if (*ptr) |
3026 goto the_hard_way; | 3026 goto the_hard_way; |
3027 } | 3027 } |
3028 | 3028 |
3029 ptr = (const Intbyte *) source->data.ptr; | 3029 ptr = (const Ibyte *) source->data.ptr; |
3030 end = ptr + len; | 3030 end = ptr + len; |
3031 | 3031 |
3032 for (; ptr < end; ptr += 2) | 3032 for (; ptr < end; ptr += 2) |
3033 { | 3033 { |
3034 Intbyte c = *ptr; | 3034 Ibyte c = *ptr; |
3035 | 3035 |
3036 if (byte_ascii_p (c)) | 3036 if (byte_ascii_p (c)) |
3037 Dynarr_add (conversion_in_dynarr, c); | 3037 Dynarr_add (conversion_in_dynarr, c); |
3038 #ifdef MULE | 3038 #ifdef MULE |
3039 else if (byte_c1_p (c)) | 3039 else if (byte_c1_p (c)) |
3133 } | 3133 } |
3134 } | 3134 } |
3135 | 3135 |
3136 | 3136 |
3137 /************************************************************************/ | 3137 /************************************************************************/ |
3138 /* Basic Emchar functions */ | 3138 /* Basic Ichar functions */ |
3139 /************************************************************************/ | 3139 /************************************************************************/ |
3140 | 3140 |
3141 #ifdef MULE | 3141 #ifdef MULE |
3142 | 3142 |
3143 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded | 3143 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded |
3144 string in STR. Returns the number of bytes stored. | 3144 string in STR. Returns the number of bytes stored. |
3145 Do not call this directly. Use the macro set_charptr_emchar() instead. | 3145 Do not call this directly. Use the macro set_itext_ichar() instead. |
3146 */ | 3146 */ |
3147 | 3147 |
3148 Bytecount | 3148 Bytecount |
3149 non_ascii_set_charptr_emchar (Intbyte *str, Emchar c) | 3149 non_ascii_set_itext_ichar (Ibyte *str, Ichar c) |
3150 { | 3150 { |
3151 Intbyte *p; | 3151 Ibyte *p; |
3152 Intbyte lb; | 3152 Ibyte lb; |
3153 int c1, c2; | 3153 int c1, c2; |
3154 Lisp_Object charset; | 3154 Lisp_Object charset; |
3155 | 3155 |
3156 p = str; | 3156 p = str; |
3157 BREAKUP_EMCHAR (c, charset, c1, c2); | 3157 BREAKUP_ICHAR (c, charset, c1, c2); |
3158 lb = emchar_leading_byte (c); | 3158 lb = ichar_leading_byte (c); |
3159 if (leading_byte_private_p (lb)) | 3159 if (leading_byte_private_p (lb)) |
3160 *p++ = private_leading_byte_prefix (lb); | 3160 *p++ = private_leading_byte_prefix (lb); |
3161 *p++ = lb; | 3161 *p++ = lb; |
3162 if (EQ (charset, Vcharset_control_1)) | 3162 if (EQ (charset, Vcharset_control_1)) |
3163 c1 += 0x20; | 3163 c1 += 0x20; |
3168 return (p - str); | 3168 return (p - str); |
3169 } | 3169 } |
3170 | 3170 |
3171 /* Return the first character from a Mule-encoded string in STR, | 3171 /* Return the first character from a Mule-encoded string in STR, |
3172 assuming it's non-ASCII. Do not call this directly. | 3172 assuming it's non-ASCII. Do not call this directly. |
3173 Use the macro charptr_emchar() instead. */ | 3173 Use the macro itext_ichar() instead. */ |
3174 | 3174 |
3175 Emchar | 3175 Ichar |
3176 non_ascii_charptr_emchar (const Intbyte *str) | 3176 non_ascii_itext_ichar (const Ibyte *str) |
3177 { | 3177 { |
3178 Intbyte i0 = *str, i1, i2 = 0; | 3178 Ibyte i0 = *str, i1, i2 = 0; |
3179 Lisp_Object charset; | 3179 Lisp_Object charset; |
3180 | 3180 |
3181 if (i0 == LEADING_BYTE_CONTROL_1) | 3181 if (i0 == LEADING_BYTE_CONTROL_1) |
3182 return (Emchar) (*++str - 0x20); | 3182 return (Ichar) (*++str - 0x20); |
3183 | 3183 |
3184 if (leading_byte_prefix_p (i0)) | 3184 if (leading_byte_prefix_p (i0)) |
3185 i0 = *++str; | 3185 i0 = *++str; |
3186 | 3186 |
3187 i1 = *++str & 0x7F; | 3187 i1 = *++str & 0x7F; |
3188 | 3188 |
3189 charset = charset_by_leading_byte (i0); | 3189 charset = charset_by_leading_byte (i0); |
3190 if (XCHARSET_DIMENSION (charset) == 2) | 3190 if (XCHARSET_DIMENSION (charset) == 2) |
3191 i2 = *++str & 0x7F; | 3191 i2 = *++str & 0x7F; |
3192 | 3192 |
3193 return make_emchar (charset, i1, i2); | 3193 return make_ichar (charset, i1, i2); |
3194 } | 3194 } |
3195 | 3195 |
3196 /* Return whether CH is a valid Emchar, assuming it's non-ASCII. | 3196 /* Return whether CH is a valid Ichar, assuming it's non-ASCII. |
3197 Do not call this directly. Use the macro valid_emchar_p() instead. */ | 3197 Do not call this directly. Use the macro valid_ichar_p() instead. */ |
3198 | 3198 |
3199 int | 3199 int |
3200 non_ascii_valid_emchar_p (Emchar ch) | 3200 non_ascii_valid_ichar_p (Ichar ch) |
3201 { | 3201 { |
3202 int f1, f2, f3; | 3202 int f1, f2, f3; |
3203 | 3203 |
3204 /* Must have only lowest 19 bits set */ | 3204 /* Must have only lowest 19 bits set */ |
3205 if (ch & ~0x7FFFF) | 3205 if (ch & ~0x7FFFF) |
3206 return 0; | 3206 return 0; |
3207 | 3207 |
3208 f1 = emchar_field1 (ch); | 3208 f1 = ichar_field1 (ch); |
3209 f2 = emchar_field2 (ch); | 3209 f2 = ichar_field2 (ch); |
3210 f3 = emchar_field3 (ch); | 3210 f3 = ichar_field3 (ch); |
3211 | 3211 |
3212 if (f1 == 0) | 3212 if (f1 == 0) |
3213 { | 3213 { |
3214 /* dimension-1 char */ | 3214 /* dimension-1 char */ |
3215 Lisp_Object charset; | 3215 Lisp_Object charset; |
3216 | 3216 |
3217 /* leading byte must be correct */ | 3217 /* leading byte must be correct */ |
3218 if (f2 < MIN_EMCHAR_FIELD2_OFFICIAL || | 3218 if (f2 < MIN_ICHAR_FIELD2_OFFICIAL || |
3219 (f2 > MAX_EMCHAR_FIELD2_OFFICIAL && f2 < MIN_EMCHAR_FIELD2_PRIVATE) || | 3219 (f2 > MAX_ICHAR_FIELD2_OFFICIAL && f2 < MIN_ICHAR_FIELD2_PRIVATE) || |
3220 f2 > MAX_EMCHAR_FIELD2_PRIVATE) | 3220 f2 > MAX_ICHAR_FIELD2_PRIVATE) |
3221 return 0; | 3221 return 0; |
3222 /* octet not out of range */ | 3222 /* octet not out of range */ |
3223 if (f3 < 0x20) | 3223 if (f3 < 0x20) |
3224 return 0; | 3224 return 0; |
3225 /* charset exists */ | 3225 /* charset exists */ |
3238 { | 3238 { |
3239 /* dimension-2 char */ | 3239 /* dimension-2 char */ |
3240 Lisp_Object charset; | 3240 Lisp_Object charset; |
3241 | 3241 |
3242 /* leading byte must be correct */ | 3242 /* leading byte must be correct */ |
3243 if (f1 < MIN_EMCHAR_FIELD1_OFFICIAL || | 3243 if (f1 < MIN_ICHAR_FIELD1_OFFICIAL || |
3244 (f1 > MAX_EMCHAR_FIELD1_OFFICIAL && f1 < MIN_EMCHAR_FIELD1_PRIVATE) || | 3244 (f1 > MAX_ICHAR_FIELD1_OFFICIAL && f1 < MIN_ICHAR_FIELD1_PRIVATE) || |
3245 f1 > MAX_EMCHAR_FIELD1_PRIVATE) | 3245 f1 > MAX_ICHAR_FIELD1_PRIVATE) |
3246 return 0; | 3246 return 0; |
3247 /* octets not out of range */ | 3247 /* octets not out of range */ |
3248 if (f2 < 0x20 || f3 < 0x20) | 3248 if (f2 < 0x20 || f3 < 0x20) |
3249 return 0; | 3249 return 0; |
3250 | 3250 |
3258 return 1; | 3258 return 1; |
3259 } | 3259 } |
3260 #endif /* ENABLE_COMPOSITE_CHARS */ | 3260 #endif /* ENABLE_COMPOSITE_CHARS */ |
3261 | 3261 |
3262 /* charset exists */ | 3262 /* charset exists */ |
3263 if (f1 <= MAX_EMCHAR_FIELD1_OFFICIAL) | 3263 if (f1 <= MAX_ICHAR_FIELD1_OFFICIAL) |
3264 charset = | 3264 charset = |
3265 charset_by_leading_byte (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE); | 3265 charset_by_leading_byte (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE); |
3266 else | 3266 else |
3267 charset = | 3267 charset = |
3268 charset_by_leading_byte (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE); | 3268 charset_by_leading_byte (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE); |
3274 XCHARSET_CHARS (charset) == 96); | 3274 XCHARSET_CHARS (charset) == 96); |
3275 } | 3275 } |
3276 } | 3276 } |
3277 | 3277 |
3278 /* Copy the character pointed to by SRC into DST. Do not call this | 3278 /* Copy the character pointed to by SRC into DST. Do not call this |
3279 directly. Use the macro charptr_copy_emchar() instead. | 3279 directly. Use the macro itext_copy_ichar() instead. |
3280 Return the number of bytes copied. */ | 3280 Return the number of bytes copied. */ |
3281 | 3281 |
3282 Bytecount | 3282 Bytecount |
3283 non_ascii_charptr_copy_emchar (const Intbyte *src, Intbyte *dst) | 3283 non_ascii_itext_copy_ichar (const Ibyte *src, Ibyte *dst) |
3284 { | 3284 { |
3285 Bytecount bytes = rep_bytes_by_first_byte (*src); | 3285 Bytecount bytes = rep_bytes_by_first_byte (*src); |
3286 Bytecount i; | 3286 Bytecount i; |
3287 for (i = bytes; i; i--, dst++, src++) | 3287 for (i = bytes; i; i--, dst++, src++) |
3288 *dst = *src; | 3288 *dst = *src; |
3291 | 3291 |
3292 #endif /* MULE */ | 3292 #endif /* MULE */ |
3293 | 3293 |
3294 | 3294 |
3295 /************************************************************************/ | 3295 /************************************************************************/ |
3296 /* streams of Emchars */ | 3296 /* streams of Ichars */ |
3297 /************************************************************************/ | 3297 /************************************************************************/ |
3298 | 3298 |
3299 #ifdef MULE | 3299 #ifdef MULE |
3300 | 3300 |
3301 /* Treat a stream as a stream of Emchar's rather than a stream of bytes. | 3301 /* Treat a stream as a stream of Ichar's rather than a stream of bytes. |
3302 The functions below are not meant to be called directly; use | 3302 The functions below are not meant to be called directly; use |
3303 the macros in insdel.h. */ | 3303 the macros in insdel.h. */ |
3304 | 3304 |
3305 Emchar | 3305 Ichar |
3306 Lstream_get_emchar_1 (Lstream *stream, int ch) | 3306 Lstream_get_ichar_1 (Lstream *stream, int ch) |
3307 { | 3307 { |
3308 Intbyte str[MAX_EMCHAR_LEN]; | 3308 Ibyte str[MAX_ICHAR_LEN]; |
3309 Intbyte *strptr = str; | 3309 Ibyte *strptr = str; |
3310 Bytecount bytes; | 3310 Bytecount bytes; |
3311 | 3311 |
3312 str[0] = (Intbyte) ch; | 3312 str[0] = (Ibyte) ch; |
3313 | 3313 |
3314 for (bytes = rep_bytes_by_first_byte (ch) - 1; bytes; bytes--) | 3314 for (bytes = rep_bytes_by_first_byte (ch) - 1; bytes; bytes--) |
3315 { | 3315 { |
3316 int c = Lstream_getc (stream); | 3316 int c = Lstream_getc (stream); |
3317 text_checking_assert (c >= 0); | 3317 text_checking_assert (c >= 0); |
3318 *++strptr = (Intbyte) c; | 3318 *++strptr = (Ibyte) c; |
3319 } | 3319 } |
3320 return charptr_emchar (str); | 3320 return itext_ichar (str); |
3321 } | 3321 } |
3322 | 3322 |
3323 int | 3323 int |
3324 Lstream_fput_emchar (Lstream *stream, Emchar ch) | 3324 Lstream_fput_ichar (Lstream *stream, Ichar ch) |
3325 { | 3325 { |
3326 Intbyte str[MAX_EMCHAR_LEN]; | 3326 Ibyte str[MAX_ICHAR_LEN]; |
3327 Bytecount len = set_charptr_emchar (str, ch); | 3327 Bytecount len = set_itext_ichar (str, ch); |
3328 return Lstream_write (stream, str, len); | 3328 return Lstream_write (stream, str, len); |
3329 } | 3329 } |
3330 | 3330 |
3331 void | 3331 void |
3332 Lstream_funget_emchar (Lstream *stream, Emchar ch) | 3332 Lstream_funget_ichar (Lstream *stream, Ichar ch) |
3333 { | 3333 { |
3334 Intbyte str[MAX_EMCHAR_LEN]; | 3334 Ibyte str[MAX_ICHAR_LEN]; |
3335 Bytecount len = set_charptr_emchar (str, ch); | 3335 Bytecount len = set_itext_ichar (str, ch); |
3336 Lstream_unread (stream, str, len); | 3336 Lstream_unread (stream, str, len); |
3337 } | 3337 } |
3338 | 3338 |
3339 #endif /* MULE */ | 3339 #endif /* MULE */ |
3340 | 3340 |
3436 if (CHARSET_DIMENSION (cs) == 1) | 3436 if (CHARSET_DIMENSION (cs) == 1) |
3437 { | 3437 { |
3438 if (!NILP (arg2)) | 3438 if (!NILP (arg2)) |
3439 invalid_argument | 3439 invalid_argument |
3440 ("Charset is of dimension one; second octet must be nil", arg2); | 3440 ("Charset is of dimension one; second octet must be nil", arg2); |
3441 return make_char (make_emchar (charset, a1, 0)); | 3441 return make_char (make_ichar (charset, a1, 0)); |
3442 } | 3442 } |
3443 | 3443 |
3444 CHECK_INT (arg2); | 3444 CHECK_INT (arg2); |
3445 a2 = XINT (arg2) & 0x7f; | 3445 a2 = XINT (arg2) & 0x7f; |
3446 if (a2 < lowlim || a2 > highlim) | 3446 if (a2 < lowlim || a2 > highlim) |
3447 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim)); | 3447 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim)); |
3448 | 3448 |
3449 return make_char (make_emchar (charset, a1, a2)); | 3449 return make_char (make_ichar (charset, a1, a2)); |
3450 #else | 3450 #else |
3451 int a1; | 3451 int a1; |
3452 int lowlim, highlim; | 3452 int lowlim, highlim; |
3453 | 3453 |
3454 if (EQ (charset, Qascii)) lowlim = 0, highlim = 127; | 3454 if (EQ (charset, Qascii)) lowlim = 0, highlim = 127; |
3478 (ch)) | 3478 (ch)) |
3479 { | 3479 { |
3480 CHECK_CHAR_COERCE_INT (ch); | 3480 CHECK_CHAR_COERCE_INT (ch); |
3481 | 3481 |
3482 return XCHARSET_NAME (charset_by_leading_byte | 3482 return XCHARSET_NAME (charset_by_leading_byte |
3483 (emchar_leading_byte (XCHAR (ch)))); | 3483 (ichar_leading_byte (XCHAR (ch)))); |
3484 } | 3484 } |
3485 | 3485 |
3486 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /* | 3486 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /* |
3487 Return the octet numbered N (should be 0 or 1) of char CH. | 3487 Return the octet numbered N (should be 0 or 1) of char CH. |
3488 N defaults to 0 if omitted. | 3488 N defaults to 0 if omitted. |
3492 Lisp_Object charset; | 3492 Lisp_Object charset; |
3493 int octet0, octet1; | 3493 int octet0, octet1; |
3494 | 3494 |
3495 CHECK_CHAR_COERCE_INT (ch); | 3495 CHECK_CHAR_COERCE_INT (ch); |
3496 | 3496 |
3497 BREAKUP_EMCHAR (XCHAR (ch), charset, octet0, octet1); | 3497 BREAKUP_ICHAR (XCHAR (ch), charset, octet0, octet1); |
3498 | 3498 |
3499 if (NILP (n) || EQ (n, Qzero)) | 3499 if (NILP (n) || EQ (n, Qzero)) |
3500 return make_int (octet0); | 3500 return make_int (octet0); |
3501 else if (EQ (n, make_int (1))) | 3501 else if (EQ (n, make_int (1))) |
3502 return make_int (octet1); | 3502 return make_int (octet1); |
3516 int c1, c2; | 3516 int c1, c2; |
3517 | 3517 |
3518 GCPRO2 (charset, rc); | 3518 GCPRO2 (charset, rc); |
3519 CHECK_CHAR_COERCE_INT (character); | 3519 CHECK_CHAR_COERCE_INT (character); |
3520 | 3520 |
3521 BREAKUP_EMCHAR (XCHAR (character), charset, c1, c2); | 3521 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2); |
3522 | 3522 |
3523 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2) | 3523 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2) |
3524 { | 3524 { |
3525 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2)); | 3525 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2)); |
3526 } | 3526 } |
3540 /* composite character functions */ | 3540 /* composite character functions */ |
3541 /************************************************************************/ | 3541 /************************************************************************/ |
3542 | 3542 |
3543 #ifdef ENABLE_COMPOSITE_CHARS | 3543 #ifdef ENABLE_COMPOSITE_CHARS |
3544 | 3544 |
3545 Emchar | 3545 Ichar |
3546 lookup_composite_char (Intbyte *str, int len) | 3546 lookup_composite_char (Ibyte *str, int len) |
3547 { | 3547 { |
3548 Lisp_Object lispstr = make_string (str, len); | 3548 Lisp_Object lispstr = make_string (str, len); |
3549 Lisp_Object ch = Fgethash (lispstr, | 3549 Lisp_Object ch = Fgethash (lispstr, |
3550 Vcomposite_char_string2char_hash_table, | 3550 Vcomposite_char_string2char_hash_table, |
3551 Qunbound); | 3551 Qunbound); |
3552 Emchar emch; | 3552 Ichar emch; |
3553 | 3553 |
3554 if (UNBOUNDP (ch)) | 3554 if (UNBOUNDP (ch)) |
3555 { | 3555 { |
3556 if (composite_char_row_next >= 128) | 3556 if (composite_char_row_next >= 128) |
3557 invalid_operation ("No more composite chars available", lispstr); | 3557 invalid_operation ("No more composite chars available", lispstr); |
3558 emch = make_emchar (Vcharset_composite, composite_char_row_next, | 3558 emch = make_ichar (Vcharset_composite, composite_char_row_next, |
3559 composite_char_col_next); | 3559 composite_char_col_next); |
3560 Fputhash (make_char (emch), lispstr, | 3560 Fputhash (make_char (emch), lispstr, |
3561 Vcomposite_char_char2string_hash_table); | 3561 Vcomposite_char_char2string_hash_table); |
3562 Fputhash (lispstr, make_char (emch), | 3562 Fputhash (lispstr, make_char (emch), |
3563 Vcomposite_char_string2char_hash_table); | 3563 Vcomposite_char_string2char_hash_table); |
3572 emch = XCHAR (ch); | 3572 emch = XCHAR (ch); |
3573 return emch; | 3573 return emch; |
3574 } | 3574 } |
3575 | 3575 |
3576 Lisp_Object | 3576 Lisp_Object |
3577 composite_char_string (Emchar ch) | 3577 composite_char_string (Ichar ch) |
3578 { | 3578 { |
3579 Lisp_Object str = Fgethash (make_char (ch), | 3579 Lisp_Object str = Fgethash (make_char (ch), |
3580 Vcomposite_char_char2string_hash_table, | 3580 Vcomposite_char_char2string_hash_table, |
3581 Qunbound); | 3581 Qunbound); |
3582 assert (!UNBOUNDP (str)); | 3582 assert (!UNBOUNDP (str)); |
3598 DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /* | 3598 DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /* |
3599 Return a string of the characters comprising a composite character. | 3599 Return a string of the characters comprising a composite character. |
3600 */ | 3600 */ |
3601 (ch)) | 3601 (ch)) |
3602 { | 3602 { |
3603 Emchar emch; | 3603 Ichar emch; |
3604 | 3604 |
3605 CHECK_CHAR (ch); | 3605 CHECK_CHAR (ch); |
3606 emch = XCHAR (ch); | 3606 emch = XCHAR (ch); |
3607 if (emchar_leading_byte (emch) != LEADING_BYTE_COMPOSITE) | 3607 if (ichar_leading_byte (emch) != LEADING_BYTE_COMPOSITE) |
3608 invalid_argument ("Must be composite char", ch); | 3608 invalid_argument ("Must be composite char", ch); |
3609 return composite_char_string (emch); | 3609 return composite_char_string (emch); |
3610 } | 3610 } |
3611 #endif /* ENABLE_COMPOSITE_CHARS */ | 3611 #endif /* ENABLE_COMPOSITE_CHARS */ |
3612 | 3612 |
3648 void | 3648 void |
3649 reinit_vars_of_text (void) | 3649 reinit_vars_of_text (void) |
3650 { | 3650 { |
3651 int i; | 3651 int i; |
3652 | 3652 |
3653 conversion_in_dynarr_list = Dynarr_new2 (Intbyte_dynarr_dynarr, | 3653 conversion_in_dynarr_list = Dynarr_new2 (Ibyte_dynarr_dynarr, |
3654 Intbyte_dynarr *); | 3654 Ibyte_dynarr *); |
3655 conversion_out_dynarr_list = Dynarr_new2 (Extbyte_dynarr_dynarr, | 3655 conversion_out_dynarr_list = Dynarr_new2 (Extbyte_dynarr_dynarr, |
3656 Extbyte_dynarr *); | 3656 Extbyte_dynarr *); |
3657 | 3657 |
3658 /* #### Olivier, why does this need to be reinitted? */ | 3658 /* #### Olivier, why does this need to be reinitted? */ |
3659 for (i = 0; i <= MAX_BYTEBPOS_GAP_SIZE_3; i++) | 3659 for (i = 0; i <= MAX_BYTEBPOS_GAP_SIZE_3; i++) |