comparison src/unicode.c @ 867:804517e16990

[xemacs-hg @ 2002-06-05 09:54:39 by ben] Textual renaming: text/char names abbrev.c, alloc.c, buffer.c, buffer.h, bytecode.c, callint.c, casefiddle.c, casetab.c, charset.h, chartab.c, chartab.h, cmds.c, console-gtk.h, console-msw.c, console-msw.h, console-stream.c, console-tty.c, console-x.c, console-x.h, console.h, data.c, device-msw.c, device-x.c, dialog-msw.c, dired-msw.c, dired.c, doc.c, doprnt.c, editfns.c, eldap.c, emodules.c, eval.c, event-Xt.c, event-gtk.c, event-msw.c, event-stream.c, event-unixoid.c, events.c, events.h, file-coding.c, file-coding.h, fileio.c, filelock.c, fns.c, font-lock.c, frame-gtk.c, frame-msw.c, frame-x.c, frame.c, glyphs-eimage.c, glyphs-msw.c, glyphs-x.c, glyphs.c, glyphs.h, gpmevent.c, gui-x.c, gui-x.h, gui.c, gui.h, hpplay.c, indent.c, insdel.c, insdel.h, intl-win32.c, keymap.c, line-number.c, line-number.h, lisp-disunion.h, lisp-union.h, lisp.h, lread.c, lrecord.h, lstream.c, lstream.h, md5.c, menubar-msw.c, menubar-x.c, menubar.c, minibuf.c, mule-ccl.c, mule-charset.c, mule-coding.c, mule-wnnfns.c, ndir.h, nt.c, objects-gtk.c, objects-gtk.h, objects-msw.c, objects-tty.c, objects-x.c, objects.c, objects.h, postgresql.c, print.c, process-nt.c, process-unix.c, process.c, procimpl.h, realpath.c, redisplay-gtk.c, redisplay-msw.c, redisplay-output.c, redisplay-tty.c, redisplay-x.c, redisplay.c, redisplay.h, regex.c, search.c, select-common.h, select-gtk.c, select-x.c, sound.h, symbols.c, syntax.c, syntax.h, sysdep.c, sysdep.h, sysdir.h, sysfile.h, sysproc.h, syspwd.h, systime.h, syswindows.h, termcap.c, tests.c, text.c, text.h, toolbar-common.c, tooltalk.c, ui-gtk.c, unexnt.c, unicode.c, win32.c: Text/char naming rationalization. [a] distinguish between "charptr" when it refers to operations on the pointer itself and when it refers to operations on text; and [b] use consistent naming for everything referring to internal format, i.e. Itext == text in internal format Ibyte == a byte in such text Ichar == a char as represented in internal character format thus e.g. set_charptr_emchar -> set_itext_ichar The pre and post tags on either side of this change are: pre-internal-format-textual-renaming post-internal-format-textual-renaming See the Internals Manual for details of exactly how this was done, how to handle the change in your workspace, etc.
author ben
date Wed, 05 Jun 2002 09:58:45 +0000
parents 6728e641994e
children 79c6ff3eef26
comparison
equal deleted inserted replaced
866:613552a02607 867:804517e16990
59 default empty table; that way, memory usage is more reasonable but 59 default empty table; that way, memory usage is more reasonable but
60 lookup still fast. 60 lookup still fast.
61 61
62 -- If from_unicode_levels == 1, from_unicode_table is a 256-element 62 -- If from_unicode_levels == 1, from_unicode_table is a 256-element
63 array of shorts (octet 1 in high byte, octet 2 in low byte; we don't 63 array of shorts (octet 1 in high byte, octet 2 in low byte; we don't
64 store Emchars directly to save space). 64 store Ichars directly to save space).
65 65
66 -- If from_unicode_levels == 2, from_unicode_table is a 66 -- If from_unicode_levels == 2, from_unicode_table is a
67 256-element array of short * pointers, each of which points to a 67 256-element array of short * pointers, each of which points to a
68 256-element array of shorts. 68 256-element array of shorts.
69 69
584 if (tab[i] != -1) 584 if (tab[i] != -1)
585 { 585 {
586 Lisp_Object char_charset; 586 Lisp_Object char_charset;
587 int c1, c2; 587 int c1, c2;
588 588
589 assert (valid_emchar_p (tab[i])); 589 assert (valid_ichar_p (tab[i]));
590 BREAKUP_EMCHAR (tab[i], char_charset, c1, c2); 590 BREAKUP_ICHAR (tab[i], char_charset, c1, c2);
591 assert (EQ (charset, char_charset)); 591 assert (EQ (charset, char_charset));
592 if (XCHARSET_DIMENSION (charset) == 1) 592 if (XCHARSET_DIMENSION (charset) == 1)
593 { 593 {
594 int *to_table = 594 int *to_table =
595 (int *) XCHARSET_TO_UNICODE_TABLE (charset); 595 (int *) XCHARSET_TO_UNICODE_TABLE (charset);
667 for (i = 0; i < 96; i++) 667 for (i = 0; i < 96; i++)
668 { 668 {
669 if (tab[i] != -1) 669 if (tab[i] != -1)
670 { 670 {
671 int u4, u3, u2, u1, levels; 671 int u4, u3, u2, u1, levels;
672 Emchar ch; 672 Ichar ch;
673 Emchar this_ch; 673 Ichar this_ch;
674 short val; 674 short val;
675 void *frtab = XCHARSET_FROM_UNICODE_TABLE (charset); 675 void *frtab = XCHARSET_FROM_UNICODE_TABLE (charset);
676 676
677 if (XCHARSET_DIMENSION (charset) == 1) 677 if (XCHARSET_DIMENSION (charset) == 1)
678 this_ch = make_emchar (charset, i + 32, 0); 678 this_ch = make_ichar (charset, i + 32, 0);
679 else 679 else
680 this_ch = make_emchar (charset, codetop + 32, i + 32); 680 this_ch = make_ichar (charset, codetop + 32, i + 32);
681 681
682 assert (tab[i] >= 0); 682 assert (tab[i] >= 0);
683 BREAKUP_UNICODE_CODE (tab[i], u4, u3, u2, u1, levels); 683 BREAKUP_UNICODE_CODE (tab[i], u4, u3, u2, u1, levels);
684 assert (levels <= XCHARSET_FROM_UNICODE_LEVELS (charset)); 684 assert (levels <= XCHARSET_FROM_UNICODE_LEVELS (charset));
685 685
690 case 3: val = ((short ***) frtab)[u3][u2][u1]; break; 690 case 3: val = ((short ***) frtab)[u3][u2][u1]; break;
691 case 4: val = ((short ****) frtab)[u4][u3][u2][u1]; break; 691 case 4: val = ((short ****) frtab)[u4][u3][u2][u1]; break;
692 default: abort (); 692 default: abort ();
693 } 693 }
694 694
695 ch = make_emchar (charset, val >> 8, val & 0xFF); 695 ch = make_ichar (charset, val >> 8, val & 0xFF);
696 assert (ch == this_ch); 696 assert (ch == this_ch);
697 697
698 switch (XCHARSET_FROM_UNICODE_LEVELS (charset)) 698 switch (XCHARSET_FROM_UNICODE_LEVELS (charset))
699 { 699 {
700 case 4: 700 case 4:
774 } 774 }
775 775
776 #endif /* SLEDGEHAMMER_CHECK_UNICODE */ 776 #endif /* SLEDGEHAMMER_CHECK_UNICODE */
777 777
778 static void 778 static void
779 set_unicode_conversion (Emchar chr, int code) 779 set_unicode_conversion (Ichar chr, int code)
780 { 780 {
781 Lisp_Object charset; 781 Lisp_Object charset;
782 int c1, c2; 782 int c1, c2;
783 783
784 BREAKUP_EMCHAR (chr, charset, c1, c2); 784 BREAKUP_ICHAR (chr, charset, c1, c2);
785 785
786 assert (!EQ (charset, Vcharset_ascii)); 786 assert (!EQ (charset, Vcharset_ascii));
787 assert (!EQ (charset, Vcharset_control_1)); 787 assert (!EQ (charset, Vcharset_control_1));
788 assert (!EQ (charset, Vcharset_composite)); 788 assert (!EQ (charset, Vcharset_composite));
789 789
911 sledgehammer_check_unicode_tables (charset); 911 sledgehammer_check_unicode_tables (charset);
912 #endif 912 #endif
913 } 913 }
914 914
915 int 915 int
916 emchar_to_unicode (Emchar chr) 916 ichar_to_unicode (Ichar chr)
917 { 917 {
918 Lisp_Object charset; 918 Lisp_Object charset;
919 int c1, c2; 919 int c1, c2;
920 920
921 type_checking_assert (valid_emchar_p (chr)); 921 type_checking_assert (valid_ichar_p (chr));
922 if (chr < 256) 922 if (chr < 256)
923 return (int) chr; 923 return (int) chr;
924 924
925 BREAKUP_EMCHAR (chr, charset, c1, c2); 925 BREAKUP_ICHAR (chr, charset, c1, c2);
926 if (EQ (charset, Vcharset_composite)) 926 if (EQ (charset, Vcharset_composite))
927 return -1; /* #### don't know how to handle */ 927 return -1; /* #### don't know how to handle */
928 else if (XCHARSET_DIMENSION (charset) == 1) 928 else if (XCHARSET_DIMENSION (charset) == 1)
929 return ((int *) XCHARSET_TO_UNICODE_TABLE (charset))[c1 - 32]; 929 return ((int *) XCHARSET_TO_UNICODE_TABLE (charset))[c1 - 32];
930 else 930 else
931 return ((int **) XCHARSET_TO_UNICODE_TABLE (charset))[c1 - 32][c2 - 32]; 931 return ((int **) XCHARSET_TO_UNICODE_TABLE (charset))[c1 - 32][c2 - 32];
932 } 932 }
933 933
934 static Emchar 934 static Ichar
935 unicode_to_char (int code, Lisp_Object_dynarr *charsets) 935 unicode_to_char (int code, Lisp_Object_dynarr *charsets)
936 { 936 {
937 int u1, u2, u3, u4; 937 int u1, u2, u3, u4;
938 int code_levels; 938 int code_levels;
939 int i; 939 int i;
940 int n = Dynarr_length (charsets); 940 int n = Dynarr_length (charsets);
941 941
942 type_checking_assert (code >= 0); 942 type_checking_assert (code >= 0);
943 if (code < 256) 943 if (code < 256)
944 return (Emchar) code; 944 return (Ichar) code;
945 945
946 BREAKUP_UNICODE_CODE (code, u4, u3, u2, u1, code_levels); 946 BREAKUP_UNICODE_CODE (code, u4, u3, u2, u1, code_levels);
947 947
948 for (i = 0; i < n; i++) 948 for (i = 0; i < n; i++)
949 { 949 {
962 case 4: retval = ((short ****) table)[u4][u3][u2][u1]; break; 962 case 4: retval = ((short ****) table)[u4][u3][u2][u1]; break;
963 default: abort (); retval = 0; 963 default: abort (); retval = 0;
964 } 964 }
965 965
966 if (retval != -1) 966 if (retval != -1)
967 return make_emchar (charset, retval >> 8, retval & 0xFF); 967 return make_ichar (charset, retval >> 8, retval & 0xFF);
968 } 968 }
969 } 969 }
970 970
971 return (Emchar) -1; 971 return (Ichar) -1;
972 } 972 }
973 973
974 static void 974 static void
975 add_charsets_to_precedence_list (Lisp_Object list, int *lbs, 975 add_charsets_to_precedence_list (Lisp_Object list, int *lbs,
976 Lisp_Object_dynarr *dynarr) 976 Lisp_Object_dynarr *dynarr)
1098 Lisp_Object charset; 1098 Lisp_Object charset;
1099 1099
1100 CHECK_CHAR (character); 1100 CHECK_CHAR (character);
1101 CHECK_NATNUM (code); 1101 CHECK_NATNUM (code);
1102 1102
1103 charset = emchar_charset (XCHAR (character)); 1103 charset = ichar_charset (XCHAR (character));
1104 if (EQ (charset, Vcharset_ascii) || 1104 if (EQ (charset, Vcharset_ascii) ||
1105 EQ (charset, Vcharset_control_1) || 1105 EQ (charset, Vcharset_control_1) ||
1106 EQ (charset, Vcharset_composite)) 1106 EQ (charset, Vcharset_composite))
1107 signal_error (Qinvalid_argument, "Cannot set Unicode translation for ASCII, Control-1 or Composite chars", 1107 signal_error (Qinvalid_argument, "Cannot set Unicode translation for ASCII, Control-1 or Composite chars",
1108 character); 1108 character);
1120 */ 1120 */
1121 (character)) 1121 (character))
1122 { 1122 {
1123 CHECK_CHAR (character); 1123 CHECK_CHAR (character);
1124 #ifdef MULE 1124 #ifdef MULE
1125 return make_int (emchar_to_unicode (XCHAR (character))); 1125 return make_int (ichar_to_unicode (XCHAR (character)));
1126 #else 1126 #else
1127 return Fchar_to_int (character); 1127 return Fchar_to_int (character);
1128 #endif /* MULE */ 1128 #endif /* MULE */
1129 } 1129 }
1130 1130
1154 Fget_charset (elt); 1154 Fget_charset (elt);
1155 } 1155 }
1156 1156
1157 if (NILP (charsets)) 1157 if (NILP (charsets))
1158 { 1158 {
1159 Emchar ret = unicode_to_char (c, unicode_precedence_dynarr); 1159 Ichar ret = unicode_to_char (c, unicode_precedence_dynarr);
1160 if (ret == -1) 1160 if (ret == -1)
1161 return Qnil; 1161 return Qnil;
1162 return make_char (ret); 1162 return make_char (ret);
1163 } 1163 }
1164 1164
1165 dyn = Dynarr_new (Lisp_Object); 1165 dyn = Dynarr_new (Lisp_Object);
1166 memset (lbs, 0, NUM_LEADING_BYTES * sizeof (int)); 1166 memset (lbs, 0, NUM_LEADING_BYTES * sizeof (int));
1167 add_charsets_to_precedence_list (charsets, lbs, dyn); 1167 add_charsets_to_precedence_list (charsets, lbs, dyn);
1168 { 1168 {
1169 Emchar ret = unicode_to_char (c, unicode_precedence_dynarr); 1169 Ichar ret = unicode_to_char (c, unicode_precedence_dynarr);
1170 Dynarr_free (dyn); 1170 Dynarr_free (dyn);
1171 if (ret == -1) 1171 if (ret == -1)
1172 return Qnil; 1172 return Qnil;
1173 return make_char (ret); 1173 return make_char (ret);
1174 } 1174 }
1321 cp1high = cp1 >> 8; 1321 cp1high = cp1 >> 8;
1322 cp1low = cp1 & 255; 1322 cp1low = cp1 & 255;
1323 1323
1324 if (big5) 1324 if (big5)
1325 { 1325 {
1326 Emchar ch = decode_big5_char (cp1high, cp1low); 1326 Ichar ch = decode_big5_char (cp1high, cp1low);
1327 if (ch == -1) 1327 if (ch == -1)
1328 1328
1329 warn_when_safe (Qunicode, Qwarning, 1329 warn_when_safe (Qunicode, Qwarning,
1330 "Out of range Big5 codepoint 0x%x in " 1330 "Out of range Big5 codepoint 0x%x in "
1331 "translation file %s:\n%s", 1331 "translation file %s:\n%s",
1334 set_unicode_conversion (ch, cp2); 1334 set_unicode_conversion (ch, cp2);
1335 } 1335 }
1336 else 1336 else
1337 { 1337 {
1338 int l1, h1, l2, h2; 1338 int l1, h1, l2, h2;
1339 Emchar emch; 1339 Ichar emch;
1340 1340
1341 switch (XCHARSET_TYPE (charset)) 1341 switch (XCHARSET_TYPE (charset))
1342 { 1342 {
1343 case CHARSET_TYPE_94: l1 = 33; h1 = 126; l2 = 0; h2 = 0; break; 1343 case CHARSET_TYPE_94: l1 = 33; h1 = 126; l2 = 0; h2 = 0; break;
1344 case CHARSET_TYPE_96: l1 = 32; h1 = 127; l2 = 0; h2 = 0; break; 1344 case CHARSET_TYPE_96: l1 = 32; h1 = 127; l2 = 0; h2 = 0; break;
1350 } 1350 }
1351 1351
1352 if (cp1high < l2 || cp1high > h2 || cp1low < l1 || cp1low > h1) 1352 if (cp1high < l2 || cp1high > h2 || cp1low < l1 || cp1low > h1)
1353 goto out_of_range; 1353 goto out_of_range;
1354 1354
1355 emch = (cp1high == 0 ? make_emchar (charset, cp1low, 0) : 1355 emch = (cp1high == 0 ? make_ichar (charset, cp1low, 0) :
1356 make_emchar (charset, cp1high, cp1low)); 1356 make_ichar (charset, cp1high, cp1low));
1357 set_unicode_conversion (emch, cp2); 1357 set_unicode_conversion (emch, cp2);
1358 } 1358 }
1359 } 1359 }
1360 } 1360 }
1361 1361
1432 if (ch == 0xFEFF && !data->seen_char && ignore_bom) 1432 if (ch == 0xFEFF && !data->seen_char && ignore_bom)
1433 ; 1433 ;
1434 else 1434 else
1435 { 1435 {
1436 #ifdef MULE 1436 #ifdef MULE
1437 Emchar chr = unicode_to_char (ch, unicode_precedence_dynarr); 1437 Ichar chr = unicode_to_char (ch, unicode_precedence_dynarr);
1438 1438
1439 if (chr != -1) 1439 if (chr != -1)
1440 { 1440 {
1441 Intbyte work[MAX_EMCHAR_LEN]; 1441 Ibyte work[MAX_ICHAR_LEN];
1442 int len; 1442 int len;
1443 1443
1444 len = set_charptr_emchar (work, chr); 1444 len = set_itext_ichar (work, chr);
1445 Dynarr_add_many (dst, work, len); 1445 Dynarr_add_many (dst, work, len);
1446 } 1446 }
1447 else 1447 else
1448 { 1448 {
1449 Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208); 1449 Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208);
1450 Dynarr_add (dst, 34 + 128); 1450 Dynarr_add (dst, 34 + 128);
1451 Dynarr_add (dst, 46 + 128); 1451 Dynarr_add (dst, 46 + 128);
1452 } 1452 }
1453 #else 1453 #else
1454 Dynarr_add (dst, (Intbyte) ch); 1454 Dynarr_add (dst, (Ibyte) ch);
1455 #endif /* MULE */ 1455 #endif /* MULE */
1456 } 1456 }
1457 1457
1458 data->seen_char = 1; 1458 data->seen_char = 1;
1459 } 1459 }
1546 encode_unicode_char (Lisp_Object charset, int h, int l, 1546 encode_unicode_char (Lisp_Object charset, int h, int l,
1547 unsigned_char_dynarr *dst, enum unicode_type type, 1547 unsigned_char_dynarr *dst, enum unicode_type type,
1548 int little_endian) 1548 int little_endian)
1549 { 1549 {
1550 #ifdef MULE 1550 #ifdef MULE
1551 int code = emchar_to_unicode (make_emchar (charset, h & 127, l & 127)); 1551 int code = ichar_to_unicode (make_ichar (charset, h & 127, l & 127));
1552 1552
1553 if (code == -1) 1553 if (code == -1)
1554 { 1554 {
1555 if (type != UNICODE_UTF_16 && 1555 if (type != UNICODE_UTF_16 &&
1556 XCHARSET_DIMENSION (charset) == 2 && 1556 XCHARSET_DIMENSION (charset) == 2 &&
1695 1695
1696 #ifdef ENABLE_COMPOSITE_CHARS 1696 #ifdef ENABLE_COMPOSITE_CHARS
1697 /* flags for handling composite chars. We do a little switcheroo 1697 /* flags for handling composite chars. We do a little switcheroo
1698 on the source while we're outputting the composite char. */ 1698 on the source while we're outputting the composite char. */
1699 Bytecount saved_n = 0; 1699 Bytecount saved_n = 0;
1700 const Intbyte *saved_src = NULL; 1700 const Ibyte *saved_src = NULL;
1701 int in_composite = 0; 1701 int in_composite = 0;
1702 1702
1703 back_to_square_n: 1703 back_to_square_n:
1704 #endif /* ENABLE_COMPOSITE_CHARS */ 1704 #endif /* ENABLE_COMPOSITE_CHARS */
1705 1705
1709 data->wrote_bom = 1; 1709 data->wrote_bom = 1;
1710 } 1710 }
1711 1711
1712 while (n--) 1712 while (n--)
1713 { 1713 {
1714 Intbyte c = *src++; 1714 Ibyte c = *src++;
1715 1715
1716 #ifdef MULE 1716 #ifdef MULE
1717 if (byte_ascii_p (c)) 1717 if (byte_ascii_p (c))
1718 #endif /* MULE */ 1718 #endif /* MULE */
1719 { /* Processing ASCII character */ 1719 { /* Processing ASCII character */
1722 little_endian); 1722 little_endian);
1723 1723
1724 char_boundary = 1; 1724 char_boundary = 1;
1725 } 1725 }
1726 #ifdef MULE 1726 #ifdef MULE
1727 else if (intbyte_leading_byte_p (c) || intbyte_leading_byte_p (ch)) 1727 else if (ibyte_leading_byte_p (c) || ibyte_leading_byte_p (ch))
1728 { /* Processing Leading Byte */ 1728 { /* Processing Leading Byte */
1729 ch = 0; 1729 ch = 0;
1730 charset = charset_by_leading_byte (c); 1730 charset = charset_by_leading_byte (c);
1731 if (leading_byte_prefix_p(c)) 1731 if (leading_byte_prefix_p(c))
1732 ch = c; 1732 ch = c;
1766 dst, type, 1766 dst, type,
1767 little_endian); 1767 little_endian);
1768 } 1768 }
1769 else 1769 else
1770 { 1770 {
1771 Emchar emch = make_emchar (Vcharset_composite, 1771 Ichar emch = make_ichar (Vcharset_composite,
1772 ch & 0x7F, 1772 ch & 0x7F,
1773 c & 0x7F); 1773 c & 0x7F);
1774 Lisp_Object lstr = 1774 Lisp_Object lstr =
1775 composite_char_string (emch); 1775 composite_char_string (emch);
1776 saved_n = n; 1776 saved_n = n;