Mercurial > hg > xemacs-beta
comparison src/lread.c @ 3543:c136144fe765
[xemacs-hg @ 2006-08-04 22:55:04 by aidan]
Raw strings, from Python via SXEmacs
author | aidan |
---|---|
date | Fri, 04 Aug 2006 22:55:19 +0000 |
parents | d1754e7f0cea |
children | 5724b7632db3 |
comparison
equal
deleted
inserted
replaced
3542:1ce31579a443 | 3543:c136144fe765 |
---|---|
1668 list1 (Fchar_to_string (make_char (c)))); | 1668 list1 (Fchar_to_string (make_char (c)))); |
1669 } | 1669 } |
1670 | 1670 |
1671 return val; | 1671 return val; |
1672 } | 1672 } |
1673 | |
1674 /* A Unicode escape, as in C# (though we only permit them in strings | |
1675 and characters, not arbitrarily in the source code.) */ | |
1676 static Ichar | |
1677 read_unicode_escape (Lisp_Object readcharfun, int unicode_hex_count) | |
1678 { | |
1679 REGISTER Ichar i = 0, c; | |
1680 REGISTER int count = 0; | |
1681 Lisp_Object lisp_char; | |
1682 while (++count <= unicode_hex_count) | |
1683 { | |
1684 c = readchar (readcharfun); | |
1685 /* Remember, can't use isdigit(), isalpha() etc. on Ichars */ | |
1686 if (c >= '0' && c <= '9') i = (i << 4) + (c - '0'); | |
1687 else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10; | |
1688 else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10; | |
1689 else | |
1690 { | |
1691 syntax_error ("Non-hex digit used for Unicode escape", | |
1692 make_char (c)); | |
1693 break; | |
1694 } | |
1695 } | |
1696 | |
1697 lisp_char = Funicode_to_char(make_int(i), Qnil); | |
1698 | |
1699 if (EQ(Qnil, lisp_char)) | |
1700 { | |
1701 /* This is ugly and horrible and trashes the user's data, but | |
1702 it's what unicode.c does. In the future, unicode-to-char | |
1703 should not return nil. */ | |
1704 #ifdef MULE | |
1705 i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128); | |
1706 #else | |
1707 i = '~'; | |
1708 #endif | |
1709 return i; | |
1710 } | |
1711 else | |
1712 { | |
1713 return XCHAR(lisp_char); | |
1714 } | |
1715 } | |
1716 | |
1673 | 1717 |
1674 static Ichar | 1718 static Ichar |
1675 read_escape (Lisp_Object readcharfun) | 1719 read_escape (Lisp_Object readcharfun) |
1676 { | 1720 { |
1677 /* This function can GC */ | 1721 /* This function can GC */ |
1678 Ichar c = readchar (readcharfun); | 1722 Ichar c = readchar (readcharfun); |
1679 /* \u allows up to four hex digits, \U up to eight. Default to the | |
1680 behaviour for \u, and change this value in the case that \U is seen. */ | |
1681 int unicode_hex_count = 4; | |
1682 | 1723 |
1683 if (c < 0) | 1724 if (c < 0) |
1684 signal_error (Qend_of_file, 0, READCHARFUN_MAYBE (readcharfun)); | 1725 signal_error (Qend_of_file, 0, READCHARFUN_MAYBE (readcharfun)); |
1685 | 1726 |
1686 switch (c) | 1727 switch (c) |
1795 } | 1836 } |
1796 return i; | 1837 return i; |
1797 } | 1838 } |
1798 case 'U': | 1839 case 'U': |
1799 /* Post-Unicode-2.0: Up to eight hex chars */ | 1840 /* Post-Unicode-2.0: Up to eight hex chars */ |
1800 unicode_hex_count = 8; | 1841 return read_unicode_escape(readcharfun, 8); |
1801 case 'u': | 1842 case 'u': |
1802 | 1843 /* Unicode-2.0 and before; four hex chars. */ |
1803 /* A Unicode escape, as in C# (though we only permit them in strings | 1844 return read_unicode_escape(readcharfun, 4); |
1804 and characters, not arbitrarily in the source code.) */ | |
1805 { | |
1806 REGISTER Ichar i = 0; | |
1807 REGISTER int count = 0; | |
1808 Lisp_Object lisp_char; | |
1809 while (++count <= unicode_hex_count) | |
1810 { | |
1811 c = readchar (readcharfun); | |
1812 /* Remember, can't use isdigit(), isalpha() etc. on Ichars */ | |
1813 if (c >= '0' && c <= '9') i = (i << 4) + (c - '0'); | |
1814 else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10; | |
1815 else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10; | |
1816 else | |
1817 { | |
1818 syntax_error ("Non-hex digit used for Unicode escape", | |
1819 make_char (c)); | |
1820 break; | |
1821 } | |
1822 } | |
1823 | |
1824 lisp_char = Funicode_to_char(make_int(i), Qnil); | |
1825 | |
1826 if (EQ(Qnil, lisp_char)) | |
1827 { | |
1828 /* This is ugly and horrible and trashes the user's data, but | |
1829 it's what unicode.c does. In the future, unicode-to-char | |
1830 should not return nil. */ | |
1831 #ifdef MULE | |
1832 i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128); | |
1833 #else | |
1834 i = '~'; | |
1835 #endif | |
1836 return i; | |
1837 } | |
1838 else | |
1839 { | |
1840 return XCHAR(lisp_char); | |
1841 } | |
1842 } | |
1843 | 1845 |
1844 default: | 1846 default: |
1845 return c; | 1847 return c; |
1846 } | 1848 } |
1847 } | 1849 } |
2267 list2_pure (int pure, Lisp_Object a, Lisp_Object b) | 2269 list2_pure (int pure, Lisp_Object a, Lisp_Object b) |
2268 { | 2270 { |
2269 return pure ? pure_cons (a, pure_cons (b, Qnil)) : list2 (a, b); | 2271 return pure ? pure_cons (a, pure_cons (b, Qnil)) : list2 (a, b); |
2270 } | 2272 } |
2271 #endif | 2273 #endif |
2274 | |
2275 static Lisp_Object | |
2276 read_string (Lisp_Object readcharfun, Ichar delim, int raw, | |
2277 int honor_unicode) | |
2278 { | |
2279 #ifdef I18N3 | |
2280 /* #### If the input stream is translating, then the string | |
2281 should be marked as translatable by setting its | |
2282 `string-translatable' property to t. .el and .elc files | |
2283 normally are translating input streams. See Fgettext() | |
2284 and print_internal(). */ | |
2285 #endif | |
2286 Ichar c; | |
2287 int cancel = 0; | |
2288 | |
2289 Lstream_rewind(XLSTREAM(Vread_buffer_stream)); | |
2290 while ((c = readchar(readcharfun)) >= 0 && c != delim) | |
2291 { | |
2292 if (c == '\\') | |
2293 { | |
2294 if (raw) | |
2295 { | |
2296 c = readchar(readcharfun); | |
2297 if (honor_unicode && ('u' == c || 'U' == c)) | |
2298 { | |
2299 c = read_unicode_escape(readcharfun, | |
2300 'U' == c ? 8 : 4); | |
2301 } | |
2302 else | |
2303 { | |
2304 /* For raw strings, insert the | |
2305 backslash and the next char, */ | |
2306 Lstream_put_ichar(XLSTREAM | |
2307 (Vread_buffer_stream), | |
2308 '\\'); | |
2309 } | |
2310 } | |
2311 else | |
2312 /* otherwise, backslash escapes the next char. */ | |
2313 c = read_escape(readcharfun); | |
2314 } | |
2315 /* c is -1 if \ newline has just been seen */ | |
2316 if (c == -1) | |
2317 { | |
2318 if (Lstream_byte_count | |
2319 (XLSTREAM(Vread_buffer_stream)) == | |
2320 0) | |
2321 cancel = 1; | |
2322 } | |
2323 else | |
2324 Lstream_put_ichar(XLSTREAM | |
2325 (Vread_buffer_stream), | |
2326 c); | |
2327 QUIT; | |
2328 } | |
2329 if (c < 0) | |
2330 return Fsignal(Qend_of_file, | |
2331 list1(READCHARFUN_MAYBE(readcharfun))); | |
2332 | |
2333 /* If purifying, and string starts with \ newline, | |
2334 return zero instead. This is for doc strings | |
2335 that we are really going to find in lib-src/DOC.nn.nn */ | |
2336 if (purify_flag && NILP(Vinternal_doc_file_name) | |
2337 && cancel) | |
2338 return Qzero; | |
2339 | |
2340 Lstream_flush(XLSTREAM(Vread_buffer_stream)); | |
2341 return make_string(resizing_buffer_stream_ptr | |
2342 (XLSTREAM(Vread_buffer_stream)), | |
2343 Lstream_byte_count(XLSTREAM(Vread_buffer_stream))); | |
2344 } | |
2345 | |
2346 static Lisp_Object | |
2347 read_raw_string (Lisp_Object readcharfun) | |
2348 { | |
2349 Ichar c; | |
2350 Ichar permit_unicode = 0; | |
2351 | |
2352 do { | |
2353 c = reader_nextchar(readcharfun); | |
2354 switch (c) { | |
2355 /* #r:engine"my sexy raw string" -- raw string w/ flags*/ | |
2356 /* case ':': */ | |
2357 /* #ru"Hi there\u20AC \U000020AC" -- raw string, honouring Unicode. */ | |
2358 case 'u': | |
2359 case 'U': | |
2360 permit_unicode = c; | |
2361 continue; | |
2362 | |
2363 /* #r"my raw string" -- raw string */ | |
2364 case '\"': | |
2365 return read_string(readcharfun, '\"', 1, permit_unicode); | |
2366 /* invalid syntax */ | |
2367 default: | |
2368 { | |
2369 if (permit_unicode) | |
2370 { | |
2371 unreadchar(readcharfun, permit_unicode); | |
2372 } | |
2373 unreadchar(readcharfun, c); | |
2374 return Fsignal(Qinvalid_read_syntax, | |
2375 list1(build_string | |
2376 ("unrecognized raw string syntax"))); | |
2377 } | |
2378 } | |
2379 } while (1); | |
2380 } | |
2272 | 2381 |
2273 /* Read the next Lisp object from the stream READCHARFUN and return it. | 2382 /* Read the next Lisp object from the stream READCHARFUN and return it. |
2274 If the return value is a cons whose car is Qunbound, then read1() | 2383 If the return value is a cons whose car is Qunbound, then read1() |
2275 encountered a misplaced token (e.g. a right bracket, right paren, | 2384 encountered a misplaced token (e.g. a right bracket, right paren, |
2276 or dot followed by a non-number). To filter this stuff out, | 2385 or dot followed by a non-number). To filter this stuff out, |
2507 case 'o': return read_integer (readcharfun, 8); | 2616 case 'o': return read_integer (readcharfun, 8); |
2508 /* #xdead => 57005 -- hex constant syntax */ | 2617 /* #xdead => 57005 -- hex constant syntax */ |
2509 case 'x': return read_integer (readcharfun, 16); | 2618 case 'x': return read_integer (readcharfun, 16); |
2510 /* #b010 => 2 -- binary constant syntax */ | 2619 /* #b010 => 2 -- binary constant syntax */ |
2511 case 'b': return read_integer (readcharfun, 2); | 2620 case 'b': return read_integer (readcharfun, 2); |
2621 /* #r"raw\stringt" -- raw string syntax */ | |
2622 case 'r': return read_raw_string(readcharfun); | |
2512 /* #s(foobar key1 val1 key2 val2) -- structure syntax */ | 2623 /* #s(foobar key1 val1 key2 val2) -- structure syntax */ |
2513 case 's': return read_structure (readcharfun); | 2624 case 's': return read_structure (readcharfun); |
2514 case '<': | 2625 case '<': |
2515 { | 2626 { |
2516 unreadchar (readcharfun, c); | 2627 unreadchar (readcharfun, c); |
2652 c = read_escape (readcharfun); | 2763 c = read_escape (readcharfun); |
2653 return make_char (c); | 2764 return make_char (c); |
2654 } | 2765 } |
2655 | 2766 |
2656 case '\"': | 2767 case '\"': |
2657 { | 2768 /* String */ |
2658 /* String */ | 2769 return read_string(readcharfun, '\"', 0, 1); |
2659 #ifdef I18N3 | |
2660 /* #### If the input stream is translating, then the string | |
2661 should be marked as translatable by setting its | |
2662 `string-translatable' property to t. .el and .elc files | |
2663 normally are translating input streams. See Fgettext() | |
2664 and print_internal(). */ | |
2665 #endif | |
2666 int cancel = 0; | |
2667 | |
2668 Lstream_rewind (XLSTREAM (Vread_buffer_stream)); | |
2669 while ((c = readchar (readcharfun)) >= 0 | |
2670 && c != '\"') | |
2671 { | |
2672 if (c == '\\') | |
2673 c = read_escape (readcharfun); | |
2674 /* c is -1 if \ newline has just been seen */ | |
2675 if (c == -1) | |
2676 { | |
2677 if (Lstream_byte_count (XLSTREAM (Vread_buffer_stream)) == 0) | |
2678 cancel = 1; | |
2679 } | |
2680 else | |
2681 Lstream_put_ichar (XLSTREAM (Vread_buffer_stream), c); | |
2682 QUIT; | |
2683 } | |
2684 if (c < 0) | |
2685 return Fsignal (Qend_of_file, list1 (READCHARFUN_MAYBE (readcharfun))); | |
2686 | |
2687 /* If purifying, and string starts with \ newline, | |
2688 return zero instead. This is for doc strings | |
2689 that we are really going to find in lib-src/DOC.nn.nn */ | |
2690 if (purify_flag && NILP (Vinternal_doc_file_name) && cancel) | |
2691 return Qzero; | |
2692 | |
2693 Lstream_flush (XLSTREAM (Vread_buffer_stream)); | |
2694 return | |
2695 make_string | |
2696 (resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream)), | |
2697 Lstream_byte_count (XLSTREAM (Vread_buffer_stream))); | |
2698 } | |
2699 | 2770 |
2700 default: | 2771 default: |
2701 { | 2772 { |
2702 /* Ignore whitespace and control characters */ | 2773 /* Ignore whitespace and control characters */ |
2703 if (c <= 040) | 2774 if (c <= 040) |