Mercurial > hg > xemacs-beta
comparison src/lread.c @ 3367:84ee3ca77e7f
[xemacs-hg @ 2006-04-29 14:36:49 by aidan]
Support Unicode escapes in the Lisp reader, taking the syntax from C#.
author | aidan |
---|---|
date | Sat, 29 Apr 2006 14:36:57 +0000 |
parents | 21e33c0c844b |
children | d1754e7f0cea |
comparison
equal
deleted
inserted
replaced
3366:db585a1b4d86 | 3367:84ee3ca77e7f |
---|---|
205 static int saved_doc_string_position; | 205 static int saved_doc_string_position; |
206 #endif | 206 #endif |
207 | 207 |
208 static int locate_file_open_or_access_file (Ibyte *fn, int access_mode); | 208 static int locate_file_open_or_access_file (Ibyte *fn, int access_mode); |
209 EXFUN (Fread_from_string, 3); | 209 EXFUN (Fread_from_string, 3); |
210 | |
211 EXFUN (Funicode_to_char, 2); /* In unicode.c. */ | |
210 | 212 |
211 /* When errors are signaled, the actual readcharfun should not be used | 213 /* When errors are signaled, the actual readcharfun should not be used |
212 as an argument if it is an lstream, so that lstreams don't escape | 214 as an argument if it is an lstream, so that lstreams don't escape |
213 to the Lisp level. */ | 215 to the Lisp level. */ |
214 #define READCHARFUN_MAYBE(x) (LSTREAMP (x) \ | 216 #define READCHARFUN_MAYBE(x) (LSTREAMP (x) \ |
1673 static Ichar | 1675 static Ichar |
1674 read_escape (Lisp_Object readcharfun) | 1676 read_escape (Lisp_Object readcharfun) |
1675 { | 1677 { |
1676 /* This function can GC */ | 1678 /* This function can GC */ |
1677 Ichar c = readchar (readcharfun); | 1679 Ichar c = readchar (readcharfun); |
1680 /* \u allows up to four hex digits, \U up to eight. Default to the | |
1681 behaviour for \u, and change this value in the case that \U is seen. */ | |
1682 int unicode_hex_count = 4; | |
1678 | 1683 |
1679 if (c < 0) | 1684 if (c < 0) |
1680 signal_error (Qend_of_file, 0, READCHARFUN_MAYBE (readcharfun)); | 1685 signal_error (Qend_of_file, 0, READCHARFUN_MAYBE (readcharfun)); |
1681 | 1686 |
1682 switch (c) | 1687 switch (c) |
1761 unreadchar (readcharfun, c); | 1766 unreadchar (readcharfun, c); |
1762 break; | 1767 break; |
1763 } | 1768 } |
1764 } | 1769 } |
1765 if (i >= 0400) | 1770 if (i >= 0400) |
1766 syntax_error ("Attempt to create non-ASCII/ISO-8859-1 character", | 1771 syntax_error ("Non-ISO-8859-1 character specified with octal escape", |
1767 make_int (i)); | 1772 make_int (i)); |
1768 return i; | 1773 return i; |
1769 } | 1774 } |
1770 | 1775 |
1771 case 'x': | 1776 case 'x': |
1789 break; | 1794 break; |
1790 } | 1795 } |
1791 } | 1796 } |
1792 return i; | 1797 return i; |
1793 } | 1798 } |
1794 | 1799 case 'U': |
1800 /* Post-Unicode-2.0: Up to eight hex chars */ | |
1801 unicode_hex_count = 8; | |
1802 case 'u': | |
1803 | |
1804 /* A Unicode escape, as in C# (though we only permit them in strings | |
1805 and characters, not arbitrarily in the source code.) */ | |
1806 { | |
1807 REGISTER Ichar i = 0; | |
1808 REGISTER int count = 0; | |
1809 Lisp_Object lisp_char; | |
1810 while (++count <= unicode_hex_count) | |
1811 { | |
1812 c = readchar (readcharfun); | |
1813 /* Remember, can't use isdigit(), isalpha() etc. on Ichars */ | |
1814 if (c >= '0' && c <= '9') i = (i << 4) + (c - '0'); | |
1815 else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10; | |
1816 else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10; | |
1817 else | |
1818 { | |
1819 syntax_error ("Non-hex digit used for Unicode escape", | |
1820 make_char (c)); | |
1821 break; | |
1822 } | |
1823 } | |
1824 | |
1825 lisp_char = Funicode_to_char(make_int(i), Qnil); | |
1826 | |
1827 if (EQ(Qnil, lisp_char)) | |
1828 { | |
1829 /* This is ugly and horrible and trashes the user's data, but | |
1830 it's what unicode.c does. In the future, unicode-to-char | |
1831 should not return nil. */ | |
1795 #ifdef MULE | 1832 #ifdef MULE |
1796 /* #### need some way of reading an extended character with | 1833 i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128); |
1797 an escape sequence. */ | 1834 #else |
1798 #endif | 1835 i = '~'; |
1836 #endif | |
1837 return i; | |
1838 } | |
1839 else | |
1840 { | |
1841 return XCHAR(lisp_char); | |
1842 } | |
1843 } | |
1799 | 1844 |
1800 default: | 1845 default: |
1801 return c; | 1846 return c; |
1802 } | 1847 } |
1803 } | 1848 } |