Mercurial > hg > xemacs-beta
comparison src/unicode.c @ 867:804517e16990
[xemacs-hg @ 2002-06-05 09:54:39 by ben]
Textual renaming: text/char names
abbrev.c, alloc.c, buffer.c, buffer.h, bytecode.c, callint.c, casefiddle.c, casetab.c, charset.h, chartab.c, chartab.h, cmds.c, console-gtk.h, console-msw.c, console-msw.h, console-stream.c, console-tty.c, console-x.c, console-x.h, console.h, data.c, device-msw.c, device-x.c, dialog-msw.c, dired-msw.c, dired.c, doc.c, doprnt.c, editfns.c, eldap.c, emodules.c, eval.c, event-Xt.c, event-gtk.c, event-msw.c, event-stream.c, event-unixoid.c, events.c, events.h, file-coding.c, file-coding.h, fileio.c, filelock.c, fns.c, font-lock.c, frame-gtk.c, frame-msw.c, frame-x.c, frame.c, glyphs-eimage.c, glyphs-msw.c, glyphs-x.c, glyphs.c, glyphs.h, gpmevent.c, gui-x.c, gui-x.h, gui.c, gui.h, hpplay.c, indent.c, insdel.c, insdel.h, intl-win32.c, keymap.c, line-number.c, line-number.h, lisp-disunion.h, lisp-union.h, lisp.h, lread.c, lrecord.h, lstream.c, lstream.h, md5.c, menubar-msw.c, menubar-x.c, menubar.c, minibuf.c, mule-ccl.c, mule-charset.c, mule-coding.c, mule-wnnfns.c, ndir.h, nt.c, objects-gtk.c, objects-gtk.h, objects-msw.c, objects-tty.c, objects-x.c, objects.c, objects.h, postgresql.c, print.c, process-nt.c, process-unix.c, process.c, procimpl.h, realpath.c, redisplay-gtk.c, redisplay-msw.c, redisplay-output.c, redisplay-tty.c, redisplay-x.c, redisplay.c, redisplay.h, regex.c, search.c, select-common.h, select-gtk.c, select-x.c, sound.h, symbols.c, syntax.c, syntax.h, sysdep.c, sysdep.h, sysdir.h, sysfile.h, sysproc.h, syspwd.h, systime.h, syswindows.h, termcap.c, tests.c, text.c, text.h, toolbar-common.c, tooltalk.c, ui-gtk.c, unexnt.c, unicode.c, win32.c: Text/char naming rationalization.
[a] distinguish between "charptr" when it refers to operations on
the pointer itself and when it refers to operations on text; and
[b] use consistent naming for everything referring to internal
format, i.e.
Itext == text in internal format
Ibyte == a byte in such text
Ichar == a char as represented in internal character format
thus e.g.
set_charptr_emchar -> set_itext_ichar
The pre and post tags on either side of this change are:
pre-internal-format-textual-renaming
post-internal-format-textual-renaming
See the Internals Manual for details of exactly how this was done,
how to handle the change in your workspace, etc.
author | ben |
---|---|
date | Wed, 05 Jun 2002 09:58:45 +0000 |
parents | 6728e641994e |
children | 79c6ff3eef26 |
comparison
equal
deleted
inserted
replaced
866:613552a02607 | 867:804517e16990 |
---|---|
59 default empty table; that way, memory usage is more reasonable but | 59 default empty table; that way, memory usage is more reasonable but |
60 lookup still fast. | 60 lookup still fast. |
61 | 61 |
62 -- If from_unicode_levels == 1, from_unicode_table is a 256-element | 62 -- If from_unicode_levels == 1, from_unicode_table is a 256-element |
63 array of shorts (octet 1 in high byte, octet 2 in low byte; we don't | 63 array of shorts (octet 1 in high byte, octet 2 in low byte; we don't |
64 store Emchars directly to save space). | 64 store Ichars directly to save space). |
65 | 65 |
66 -- If from_unicode_levels == 2, from_unicode_table is a | 66 -- If from_unicode_levels == 2, from_unicode_table is a |
67 256-element array of short * pointers, each of which points to a | 67 256-element array of short * pointers, each of which points to a |
68 256-element array of shorts. | 68 256-element array of shorts. |
69 | 69 |
584 if (tab[i] != -1) | 584 if (tab[i] != -1) |
585 { | 585 { |
586 Lisp_Object char_charset; | 586 Lisp_Object char_charset; |
587 int c1, c2; | 587 int c1, c2; |
588 | 588 |
589 assert (valid_emchar_p (tab[i])); | 589 assert (valid_ichar_p (tab[i])); |
590 BREAKUP_EMCHAR (tab[i], char_charset, c1, c2); | 590 BREAKUP_ICHAR (tab[i], char_charset, c1, c2); |
591 assert (EQ (charset, char_charset)); | 591 assert (EQ (charset, char_charset)); |
592 if (XCHARSET_DIMENSION (charset) == 1) | 592 if (XCHARSET_DIMENSION (charset) == 1) |
593 { | 593 { |
594 int *to_table = | 594 int *to_table = |
595 (int *) XCHARSET_TO_UNICODE_TABLE (charset); | 595 (int *) XCHARSET_TO_UNICODE_TABLE (charset); |
667 for (i = 0; i < 96; i++) | 667 for (i = 0; i < 96; i++) |
668 { | 668 { |
669 if (tab[i] != -1) | 669 if (tab[i] != -1) |
670 { | 670 { |
671 int u4, u3, u2, u1, levels; | 671 int u4, u3, u2, u1, levels; |
672 Emchar ch; | 672 Ichar ch; |
673 Emchar this_ch; | 673 Ichar this_ch; |
674 short val; | 674 short val; |
675 void *frtab = XCHARSET_FROM_UNICODE_TABLE (charset); | 675 void *frtab = XCHARSET_FROM_UNICODE_TABLE (charset); |
676 | 676 |
677 if (XCHARSET_DIMENSION (charset) == 1) | 677 if (XCHARSET_DIMENSION (charset) == 1) |
678 this_ch = make_emchar (charset, i + 32, 0); | 678 this_ch = make_ichar (charset, i + 32, 0); |
679 else | 679 else |
680 this_ch = make_emchar (charset, codetop + 32, i + 32); | 680 this_ch = make_ichar (charset, codetop + 32, i + 32); |
681 | 681 |
682 assert (tab[i] >= 0); | 682 assert (tab[i] >= 0); |
683 BREAKUP_UNICODE_CODE (tab[i], u4, u3, u2, u1, levels); | 683 BREAKUP_UNICODE_CODE (tab[i], u4, u3, u2, u1, levels); |
684 assert (levels <= XCHARSET_FROM_UNICODE_LEVELS (charset)); | 684 assert (levels <= XCHARSET_FROM_UNICODE_LEVELS (charset)); |
685 | 685 |
690 case 3: val = ((short ***) frtab)[u3][u2][u1]; break; | 690 case 3: val = ((short ***) frtab)[u3][u2][u1]; break; |
691 case 4: val = ((short ****) frtab)[u4][u3][u2][u1]; break; | 691 case 4: val = ((short ****) frtab)[u4][u3][u2][u1]; break; |
692 default: abort (); | 692 default: abort (); |
693 } | 693 } |
694 | 694 |
695 ch = make_emchar (charset, val >> 8, val & 0xFF); | 695 ch = make_ichar (charset, val >> 8, val & 0xFF); |
696 assert (ch == this_ch); | 696 assert (ch == this_ch); |
697 | 697 |
698 switch (XCHARSET_FROM_UNICODE_LEVELS (charset)) | 698 switch (XCHARSET_FROM_UNICODE_LEVELS (charset)) |
699 { | 699 { |
700 case 4: | 700 case 4: |
774 } | 774 } |
775 | 775 |
776 #endif /* SLEDGEHAMMER_CHECK_UNICODE */ | 776 #endif /* SLEDGEHAMMER_CHECK_UNICODE */ |
777 | 777 |
778 static void | 778 static void |
779 set_unicode_conversion (Emchar chr, int code) | 779 set_unicode_conversion (Ichar chr, int code) |
780 { | 780 { |
781 Lisp_Object charset; | 781 Lisp_Object charset; |
782 int c1, c2; | 782 int c1, c2; |
783 | 783 |
784 BREAKUP_EMCHAR (chr, charset, c1, c2); | 784 BREAKUP_ICHAR (chr, charset, c1, c2); |
785 | 785 |
786 assert (!EQ (charset, Vcharset_ascii)); | 786 assert (!EQ (charset, Vcharset_ascii)); |
787 assert (!EQ (charset, Vcharset_control_1)); | 787 assert (!EQ (charset, Vcharset_control_1)); |
788 assert (!EQ (charset, Vcharset_composite)); | 788 assert (!EQ (charset, Vcharset_composite)); |
789 | 789 |
911 sledgehammer_check_unicode_tables (charset); | 911 sledgehammer_check_unicode_tables (charset); |
912 #endif | 912 #endif |
913 } | 913 } |
914 | 914 |
915 int | 915 int |
916 emchar_to_unicode (Emchar chr) | 916 ichar_to_unicode (Ichar chr) |
917 { | 917 { |
918 Lisp_Object charset; | 918 Lisp_Object charset; |
919 int c1, c2; | 919 int c1, c2; |
920 | 920 |
921 type_checking_assert (valid_emchar_p (chr)); | 921 type_checking_assert (valid_ichar_p (chr)); |
922 if (chr < 256) | 922 if (chr < 256) |
923 return (int) chr; | 923 return (int) chr; |
924 | 924 |
925 BREAKUP_EMCHAR (chr, charset, c1, c2); | 925 BREAKUP_ICHAR (chr, charset, c1, c2); |
926 if (EQ (charset, Vcharset_composite)) | 926 if (EQ (charset, Vcharset_composite)) |
927 return -1; /* #### don't know how to handle */ | 927 return -1; /* #### don't know how to handle */ |
928 else if (XCHARSET_DIMENSION (charset) == 1) | 928 else if (XCHARSET_DIMENSION (charset) == 1) |
929 return ((int *) XCHARSET_TO_UNICODE_TABLE (charset))[c1 - 32]; | 929 return ((int *) XCHARSET_TO_UNICODE_TABLE (charset))[c1 - 32]; |
930 else | 930 else |
931 return ((int **) XCHARSET_TO_UNICODE_TABLE (charset))[c1 - 32][c2 - 32]; | 931 return ((int **) XCHARSET_TO_UNICODE_TABLE (charset))[c1 - 32][c2 - 32]; |
932 } | 932 } |
933 | 933 |
934 static Emchar | 934 static Ichar |
935 unicode_to_char (int code, Lisp_Object_dynarr *charsets) | 935 unicode_to_char (int code, Lisp_Object_dynarr *charsets) |
936 { | 936 { |
937 int u1, u2, u3, u4; | 937 int u1, u2, u3, u4; |
938 int code_levels; | 938 int code_levels; |
939 int i; | 939 int i; |
940 int n = Dynarr_length (charsets); | 940 int n = Dynarr_length (charsets); |
941 | 941 |
942 type_checking_assert (code >= 0); | 942 type_checking_assert (code >= 0); |
943 if (code < 256) | 943 if (code < 256) |
944 return (Emchar) code; | 944 return (Ichar) code; |
945 | 945 |
946 BREAKUP_UNICODE_CODE (code, u4, u3, u2, u1, code_levels); | 946 BREAKUP_UNICODE_CODE (code, u4, u3, u2, u1, code_levels); |
947 | 947 |
948 for (i = 0; i < n; i++) | 948 for (i = 0; i < n; i++) |
949 { | 949 { |
962 case 4: retval = ((short ****) table)[u4][u3][u2][u1]; break; | 962 case 4: retval = ((short ****) table)[u4][u3][u2][u1]; break; |
963 default: abort (); retval = 0; | 963 default: abort (); retval = 0; |
964 } | 964 } |
965 | 965 |
966 if (retval != -1) | 966 if (retval != -1) |
967 return make_emchar (charset, retval >> 8, retval & 0xFF); | 967 return make_ichar (charset, retval >> 8, retval & 0xFF); |
968 } | 968 } |
969 } | 969 } |
970 | 970 |
971 return (Emchar) -1; | 971 return (Ichar) -1; |
972 } | 972 } |
973 | 973 |
974 static void | 974 static void |
975 add_charsets_to_precedence_list (Lisp_Object list, int *lbs, | 975 add_charsets_to_precedence_list (Lisp_Object list, int *lbs, |
976 Lisp_Object_dynarr *dynarr) | 976 Lisp_Object_dynarr *dynarr) |
1098 Lisp_Object charset; | 1098 Lisp_Object charset; |
1099 | 1099 |
1100 CHECK_CHAR (character); | 1100 CHECK_CHAR (character); |
1101 CHECK_NATNUM (code); | 1101 CHECK_NATNUM (code); |
1102 | 1102 |
1103 charset = emchar_charset (XCHAR (character)); | 1103 charset = ichar_charset (XCHAR (character)); |
1104 if (EQ (charset, Vcharset_ascii) || | 1104 if (EQ (charset, Vcharset_ascii) || |
1105 EQ (charset, Vcharset_control_1) || | 1105 EQ (charset, Vcharset_control_1) || |
1106 EQ (charset, Vcharset_composite)) | 1106 EQ (charset, Vcharset_composite)) |
1107 signal_error (Qinvalid_argument, "Cannot set Unicode translation for ASCII, Control-1 or Composite chars", | 1107 signal_error (Qinvalid_argument, "Cannot set Unicode translation for ASCII, Control-1 or Composite chars", |
1108 character); | 1108 character); |
1120 */ | 1120 */ |
1121 (character)) | 1121 (character)) |
1122 { | 1122 { |
1123 CHECK_CHAR (character); | 1123 CHECK_CHAR (character); |
1124 #ifdef MULE | 1124 #ifdef MULE |
1125 return make_int (emchar_to_unicode (XCHAR (character))); | 1125 return make_int (ichar_to_unicode (XCHAR (character))); |
1126 #else | 1126 #else |
1127 return Fchar_to_int (character); | 1127 return Fchar_to_int (character); |
1128 #endif /* MULE */ | 1128 #endif /* MULE */ |
1129 } | 1129 } |
1130 | 1130 |
1154 Fget_charset (elt); | 1154 Fget_charset (elt); |
1155 } | 1155 } |
1156 | 1156 |
1157 if (NILP (charsets)) | 1157 if (NILP (charsets)) |
1158 { | 1158 { |
1159 Emchar ret = unicode_to_char (c, unicode_precedence_dynarr); | 1159 Ichar ret = unicode_to_char (c, unicode_precedence_dynarr); |
1160 if (ret == -1) | 1160 if (ret == -1) |
1161 return Qnil; | 1161 return Qnil; |
1162 return make_char (ret); | 1162 return make_char (ret); |
1163 } | 1163 } |
1164 | 1164 |
1165 dyn = Dynarr_new (Lisp_Object); | 1165 dyn = Dynarr_new (Lisp_Object); |
1166 memset (lbs, 0, NUM_LEADING_BYTES * sizeof (int)); | 1166 memset (lbs, 0, NUM_LEADING_BYTES * sizeof (int)); |
1167 add_charsets_to_precedence_list (charsets, lbs, dyn); | 1167 add_charsets_to_precedence_list (charsets, lbs, dyn); |
1168 { | 1168 { |
1169 Emchar ret = unicode_to_char (c, unicode_precedence_dynarr); | 1169 Ichar ret = unicode_to_char (c, unicode_precedence_dynarr); |
1170 Dynarr_free (dyn); | 1170 Dynarr_free (dyn); |
1171 if (ret == -1) | 1171 if (ret == -1) |
1172 return Qnil; | 1172 return Qnil; |
1173 return make_char (ret); | 1173 return make_char (ret); |
1174 } | 1174 } |
1321 cp1high = cp1 >> 8; | 1321 cp1high = cp1 >> 8; |
1322 cp1low = cp1 & 255; | 1322 cp1low = cp1 & 255; |
1323 | 1323 |
1324 if (big5) | 1324 if (big5) |
1325 { | 1325 { |
1326 Emchar ch = decode_big5_char (cp1high, cp1low); | 1326 Ichar ch = decode_big5_char (cp1high, cp1low); |
1327 if (ch == -1) | 1327 if (ch == -1) |
1328 | 1328 |
1329 warn_when_safe (Qunicode, Qwarning, | 1329 warn_when_safe (Qunicode, Qwarning, |
1330 "Out of range Big5 codepoint 0x%x in " | 1330 "Out of range Big5 codepoint 0x%x in " |
1331 "translation file %s:\n%s", | 1331 "translation file %s:\n%s", |
1334 set_unicode_conversion (ch, cp2); | 1334 set_unicode_conversion (ch, cp2); |
1335 } | 1335 } |
1336 else | 1336 else |
1337 { | 1337 { |
1338 int l1, h1, l2, h2; | 1338 int l1, h1, l2, h2; |
1339 Emchar emch; | 1339 Ichar emch; |
1340 | 1340 |
1341 switch (XCHARSET_TYPE (charset)) | 1341 switch (XCHARSET_TYPE (charset)) |
1342 { | 1342 { |
1343 case CHARSET_TYPE_94: l1 = 33; h1 = 126; l2 = 0; h2 = 0; break; | 1343 case CHARSET_TYPE_94: l1 = 33; h1 = 126; l2 = 0; h2 = 0; break; |
1344 case CHARSET_TYPE_96: l1 = 32; h1 = 127; l2 = 0; h2 = 0; break; | 1344 case CHARSET_TYPE_96: l1 = 32; h1 = 127; l2 = 0; h2 = 0; break; |
1350 } | 1350 } |
1351 | 1351 |
1352 if (cp1high < l2 || cp1high > h2 || cp1low < l1 || cp1low > h1) | 1352 if (cp1high < l2 || cp1high > h2 || cp1low < l1 || cp1low > h1) |
1353 goto out_of_range; | 1353 goto out_of_range; |
1354 | 1354 |
1355 emch = (cp1high == 0 ? make_emchar (charset, cp1low, 0) : | 1355 emch = (cp1high == 0 ? make_ichar (charset, cp1low, 0) : |
1356 make_emchar (charset, cp1high, cp1low)); | 1356 make_ichar (charset, cp1high, cp1low)); |
1357 set_unicode_conversion (emch, cp2); | 1357 set_unicode_conversion (emch, cp2); |
1358 } | 1358 } |
1359 } | 1359 } |
1360 } | 1360 } |
1361 | 1361 |
1432 if (ch == 0xFEFF && !data->seen_char && ignore_bom) | 1432 if (ch == 0xFEFF && !data->seen_char && ignore_bom) |
1433 ; | 1433 ; |
1434 else | 1434 else |
1435 { | 1435 { |
1436 #ifdef MULE | 1436 #ifdef MULE |
1437 Emchar chr = unicode_to_char (ch, unicode_precedence_dynarr); | 1437 Ichar chr = unicode_to_char (ch, unicode_precedence_dynarr); |
1438 | 1438 |
1439 if (chr != -1) | 1439 if (chr != -1) |
1440 { | 1440 { |
1441 Intbyte work[MAX_EMCHAR_LEN]; | 1441 Ibyte work[MAX_ICHAR_LEN]; |
1442 int len; | 1442 int len; |
1443 | 1443 |
1444 len = set_charptr_emchar (work, chr); | 1444 len = set_itext_ichar (work, chr); |
1445 Dynarr_add_many (dst, work, len); | 1445 Dynarr_add_many (dst, work, len); |
1446 } | 1446 } |
1447 else | 1447 else |
1448 { | 1448 { |
1449 Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208); | 1449 Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208); |
1450 Dynarr_add (dst, 34 + 128); | 1450 Dynarr_add (dst, 34 + 128); |
1451 Dynarr_add (dst, 46 + 128); | 1451 Dynarr_add (dst, 46 + 128); |
1452 } | 1452 } |
1453 #else | 1453 #else |
1454 Dynarr_add (dst, (Intbyte) ch); | 1454 Dynarr_add (dst, (Ibyte) ch); |
1455 #endif /* MULE */ | 1455 #endif /* MULE */ |
1456 } | 1456 } |
1457 | 1457 |
1458 data->seen_char = 1; | 1458 data->seen_char = 1; |
1459 } | 1459 } |
1546 encode_unicode_char (Lisp_Object charset, int h, int l, | 1546 encode_unicode_char (Lisp_Object charset, int h, int l, |
1547 unsigned_char_dynarr *dst, enum unicode_type type, | 1547 unsigned_char_dynarr *dst, enum unicode_type type, |
1548 int little_endian) | 1548 int little_endian) |
1549 { | 1549 { |
1550 #ifdef MULE | 1550 #ifdef MULE |
1551 int code = emchar_to_unicode (make_emchar (charset, h & 127, l & 127)); | 1551 int code = ichar_to_unicode (make_ichar (charset, h & 127, l & 127)); |
1552 | 1552 |
1553 if (code == -1) | 1553 if (code == -1) |
1554 { | 1554 { |
1555 if (type != UNICODE_UTF_16 && | 1555 if (type != UNICODE_UTF_16 && |
1556 XCHARSET_DIMENSION (charset) == 2 && | 1556 XCHARSET_DIMENSION (charset) == 2 && |
1695 | 1695 |
1696 #ifdef ENABLE_COMPOSITE_CHARS | 1696 #ifdef ENABLE_COMPOSITE_CHARS |
1697 /* flags for handling composite chars. We do a little switcheroo | 1697 /* flags for handling composite chars. We do a little switcheroo |
1698 on the source while we're outputting the composite char. */ | 1698 on the source while we're outputting the composite char. */ |
1699 Bytecount saved_n = 0; | 1699 Bytecount saved_n = 0; |
1700 const Intbyte *saved_src = NULL; | 1700 const Ibyte *saved_src = NULL; |
1701 int in_composite = 0; | 1701 int in_composite = 0; |
1702 | 1702 |
1703 back_to_square_n: | 1703 back_to_square_n: |
1704 #endif /* ENABLE_COMPOSITE_CHARS */ | 1704 #endif /* ENABLE_COMPOSITE_CHARS */ |
1705 | 1705 |
1709 data->wrote_bom = 1; | 1709 data->wrote_bom = 1; |
1710 } | 1710 } |
1711 | 1711 |
1712 while (n--) | 1712 while (n--) |
1713 { | 1713 { |
1714 Intbyte c = *src++; | 1714 Ibyte c = *src++; |
1715 | 1715 |
1716 #ifdef MULE | 1716 #ifdef MULE |
1717 if (byte_ascii_p (c)) | 1717 if (byte_ascii_p (c)) |
1718 #endif /* MULE */ | 1718 #endif /* MULE */ |
1719 { /* Processing ASCII character */ | 1719 { /* Processing ASCII character */ |
1722 little_endian); | 1722 little_endian); |
1723 | 1723 |
1724 char_boundary = 1; | 1724 char_boundary = 1; |
1725 } | 1725 } |
1726 #ifdef MULE | 1726 #ifdef MULE |
1727 else if (intbyte_leading_byte_p (c) || intbyte_leading_byte_p (ch)) | 1727 else if (ibyte_leading_byte_p (c) || ibyte_leading_byte_p (ch)) |
1728 { /* Processing Leading Byte */ | 1728 { /* Processing Leading Byte */ |
1729 ch = 0; | 1729 ch = 0; |
1730 charset = charset_by_leading_byte (c); | 1730 charset = charset_by_leading_byte (c); |
1731 if (leading_byte_prefix_p(c)) | 1731 if (leading_byte_prefix_p(c)) |
1732 ch = c; | 1732 ch = c; |
1766 dst, type, | 1766 dst, type, |
1767 little_endian); | 1767 little_endian); |
1768 } | 1768 } |
1769 else | 1769 else |
1770 { | 1770 { |
1771 Emchar emch = make_emchar (Vcharset_composite, | 1771 Ichar emch = make_ichar (Vcharset_composite, |
1772 ch & 0x7F, | 1772 ch & 0x7F, |
1773 c & 0x7F); | 1773 c & 0x7F); |
1774 Lisp_Object lstr = | 1774 Lisp_Object lstr = |
1775 composite_char_string (emch); | 1775 composite_char_string (emch); |
1776 saved_n = n; | 1776 saved_n = n; |