comparison src/lread.c @ 3367:84ee3ca77e7f

[xemacs-hg @ 2006-04-29 14:36:49 by aidan] Support Unicode escapes in the Lisp reader, taking the syntax from C#.
author aidan
date Sat, 29 Apr 2006 14:36:57 +0000
parents 21e33c0c844b
children d1754e7f0cea
comparison
equal deleted inserted replaced
3366:db585a1b4d86 3367:84ee3ca77e7f
205 static int saved_doc_string_position; 205 static int saved_doc_string_position;
206 #endif 206 #endif
207 207
208 static int locate_file_open_or_access_file (Ibyte *fn, int access_mode); 208 static int locate_file_open_or_access_file (Ibyte *fn, int access_mode);
209 EXFUN (Fread_from_string, 3); 209 EXFUN (Fread_from_string, 3);
210
211 EXFUN (Funicode_to_char, 2); /* In unicode.c. */
210 212
211 /* When errors are signaled, the actual readcharfun should not be used 213 /* When errors are signaled, the actual readcharfun should not be used
212 as an argument if it is an lstream, so that lstreams don't escape 214 as an argument if it is an lstream, so that lstreams don't escape
213 to the Lisp level. */ 215 to the Lisp level. */
214 #define READCHARFUN_MAYBE(x) (LSTREAMP (x) \ 216 #define READCHARFUN_MAYBE(x) (LSTREAMP (x) \
1673 static Ichar 1675 static Ichar
1674 read_escape (Lisp_Object readcharfun) 1676 read_escape (Lisp_Object readcharfun)
1675 { 1677 {
1676 /* This function can GC */ 1678 /* This function can GC */
1677 Ichar c = readchar (readcharfun); 1679 Ichar c = readchar (readcharfun);
1680 /* \u allows up to four hex digits, \U up to eight. Default to the
1681 behaviour for \u, and change this value in the case that \U is seen. */
1682 int unicode_hex_count = 4;
1678 1683
1679 if (c < 0) 1684 if (c < 0)
1680 signal_error (Qend_of_file, 0, READCHARFUN_MAYBE (readcharfun)); 1685 signal_error (Qend_of_file, 0, READCHARFUN_MAYBE (readcharfun));
1681 1686
1682 switch (c) 1687 switch (c)
1761 unreadchar (readcharfun, c); 1766 unreadchar (readcharfun, c);
1762 break; 1767 break;
1763 } 1768 }
1764 } 1769 }
1765 if (i >= 0400) 1770 if (i >= 0400)
1766 syntax_error ("Attempt to create non-ASCII/ISO-8859-1 character", 1771 syntax_error ("Non-ISO-8859-1 character specified with octal escape",
1767 make_int (i)); 1772 make_int (i));
1768 return i; 1773 return i;
1769 } 1774 }
1770 1775
1771 case 'x': 1776 case 'x':
1789 break; 1794 break;
1790 } 1795 }
1791 } 1796 }
1792 return i; 1797 return i;
1793 } 1798 }
1794 1799 case 'U':
1800 /* Post-Unicode-2.0: Up to eight hex chars */
1801 unicode_hex_count = 8;
1802 case 'u':
1803
1804 /* A Unicode escape, as in C# (though we only permit them in strings
1805 and characters, not arbitrarily in the source code.) */
1806 {
1807 REGISTER Ichar i = 0;
1808 REGISTER int count = 0;
1809 Lisp_Object lisp_char;
1810 while (++count <= unicode_hex_count)
1811 {
1812 c = readchar (readcharfun);
1813 /* Remember, can't use isdigit(), isalpha() etc. on Ichars */
1814 if (c >= '0' && c <= '9') i = (i << 4) + (c - '0');
1815 else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10;
1816 else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10;
1817 else
1818 {
1819 syntax_error ("Non-hex digit used for Unicode escape",
1820 make_char (c));
1821 break;
1822 }
1823 }
1824
1825 lisp_char = Funicode_to_char(make_int(i), Qnil);
1826
1827 if (EQ(Qnil, lisp_char))
1828 {
1829 /* This is ugly and horrible and trashes the user's data, but
1830 it's what unicode.c does. In the future, unicode-to-char
1831 should not return nil. */
1795 #ifdef MULE 1832 #ifdef MULE
1796 /* #### need some way of reading an extended character with 1833 i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128);
1797 an escape sequence. */ 1834 #else
1798 #endif 1835 i = '~';
1836 #endif
1837 return i;
1838 }
1839 else
1840 {
1841 return XCHAR(lisp_char);
1842 }
1843 }
1799 1844
1800 default: 1845 default:
1801 return c; 1846 return c;
1802 } 1847 }
1803 } 1848 }