comparison src/lread.c @ 3543:c136144fe765

[xemacs-hg @ 2006-08-04 22:55:04 by aidan] Raw strings, from Python via SXEmacs
author aidan
date Fri, 04 Aug 2006 22:55:19 +0000
parents d1754e7f0cea
children 5724b7632db3
comparison
equal deleted inserted replaced
3542:1ce31579a443 3543:c136144fe765
1668 list1 (Fchar_to_string (make_char (c)))); 1668 list1 (Fchar_to_string (make_char (c))));
1669 } 1669 }
1670 1670
1671 return val; 1671 return val;
1672 } 1672 }
1673
1674 /* A Unicode escape, as in C# (though we only permit them in strings
1675 and characters, not arbitrarily in the source code.) */
1676 static Ichar
1677 read_unicode_escape (Lisp_Object readcharfun, int unicode_hex_count)
1678 {
1679 REGISTER Ichar i = 0, c;
1680 REGISTER int count = 0;
1681 Lisp_Object lisp_char;
1682 while (++count <= unicode_hex_count)
1683 {
1684 c = readchar (readcharfun);
1685 /* Remember, can't use isdigit(), isalpha() etc. on Ichars */
1686 if (c >= '0' && c <= '9') i = (i << 4) + (c - '0');
1687 else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10;
1688 else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10;
1689 else
1690 {
1691 syntax_error ("Non-hex digit used for Unicode escape",
1692 make_char (c));
1693 break;
1694 }
1695 }
1696
1697 lisp_char = Funicode_to_char(make_int(i), Qnil);
1698
1699 if (EQ(Qnil, lisp_char))
1700 {
1701 /* This is ugly and horrible and trashes the user's data, but
1702 it's what unicode.c does. In the future, unicode-to-char
1703 should not return nil. */
1704 #ifdef MULE
1705 i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128);
1706 #else
1707 i = '~';
1708 #endif
1709 return i;
1710 }
1711 else
1712 {
1713 return XCHAR(lisp_char);
1714 }
1715 }
1716
1673 1717
1674 static Ichar 1718 static Ichar
1675 read_escape (Lisp_Object readcharfun) 1719 read_escape (Lisp_Object readcharfun)
1676 { 1720 {
1677 /* This function can GC */ 1721 /* This function can GC */
1678 Ichar c = readchar (readcharfun); 1722 Ichar c = readchar (readcharfun);
1679 /* \u allows up to four hex digits, \U up to eight. Default to the
1680 behaviour for \u, and change this value in the case that \U is seen. */
1681 int unicode_hex_count = 4;
1682 1723
1683 if (c < 0) 1724 if (c < 0)
1684 signal_error (Qend_of_file, 0, READCHARFUN_MAYBE (readcharfun)); 1725 signal_error (Qend_of_file, 0, READCHARFUN_MAYBE (readcharfun));
1685 1726
1686 switch (c) 1727 switch (c)
1795 } 1836 }
1796 return i; 1837 return i;
1797 } 1838 }
1798 case 'U': 1839 case 'U':
1799 /* Post-Unicode-2.0: Up to eight hex chars */ 1840 /* Post-Unicode-2.0: Up to eight hex chars */
1800 unicode_hex_count = 8; 1841 return read_unicode_escape(readcharfun, 8);
1801 case 'u': 1842 case 'u':
1802 1843 /* Unicode-2.0 and before; four hex chars. */
1803 /* A Unicode escape, as in C# (though we only permit them in strings 1844 return read_unicode_escape(readcharfun, 4);
1804 and characters, not arbitrarily in the source code.) */
1805 {
1806 REGISTER Ichar i = 0;
1807 REGISTER int count = 0;
1808 Lisp_Object lisp_char;
1809 while (++count <= unicode_hex_count)
1810 {
1811 c = readchar (readcharfun);
1812 /* Remember, can't use isdigit(), isalpha() etc. on Ichars */
1813 if (c >= '0' && c <= '9') i = (i << 4) + (c - '0');
1814 else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10;
1815 else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10;
1816 else
1817 {
1818 syntax_error ("Non-hex digit used for Unicode escape",
1819 make_char (c));
1820 break;
1821 }
1822 }
1823
1824 lisp_char = Funicode_to_char(make_int(i), Qnil);
1825
1826 if (EQ(Qnil, lisp_char))
1827 {
1828 /* This is ugly and horrible and trashes the user's data, but
1829 it's what unicode.c does. In the future, unicode-to-char
1830 should not return nil. */
1831 #ifdef MULE
1832 i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128);
1833 #else
1834 i = '~';
1835 #endif
1836 return i;
1837 }
1838 else
1839 {
1840 return XCHAR(lisp_char);
1841 }
1842 }
1843 1845
1844 default: 1846 default:
1845 return c; 1847 return c;
1846 } 1848 }
1847 } 1849 }
2267 list2_pure (int pure, Lisp_Object a, Lisp_Object b) 2269 list2_pure (int pure, Lisp_Object a, Lisp_Object b)
2268 { 2270 {
2269 return pure ? pure_cons (a, pure_cons (b, Qnil)) : list2 (a, b); 2271 return pure ? pure_cons (a, pure_cons (b, Qnil)) : list2 (a, b);
2270 } 2272 }
2271 #endif 2273 #endif
2274
2275 static Lisp_Object
2276 read_string (Lisp_Object readcharfun, Ichar delim, int raw,
2277 int honor_unicode)
2278 {
2279 #ifdef I18N3
2280 /* #### If the input stream is translating, then the string
2281 should be marked as translatable by setting its
2282 `string-translatable' property to t. .el and .elc files
2283 normally are translating input streams. See Fgettext()
2284 and print_internal(). */
2285 #endif
2286 Ichar c;
2287 int cancel = 0;
2288
2289 Lstream_rewind(XLSTREAM(Vread_buffer_stream));
2290 while ((c = readchar(readcharfun)) >= 0 && c != delim)
2291 {
2292 if (c == '\\')
2293 {
2294 if (raw)
2295 {
2296 c = readchar(readcharfun);
2297 if (honor_unicode && ('u' == c || 'U' == c))
2298 {
2299 c = read_unicode_escape(readcharfun,
2300 'U' == c ? 8 : 4);
2301 }
2302 else
2303 {
2304 /* For raw strings, insert the
2305 backslash and the next char, */
2306 Lstream_put_ichar(XLSTREAM
2307 (Vread_buffer_stream),
2308 '\\');
2309 }
2310 }
2311 else
2312 /* otherwise, backslash escapes the next char. */
2313 c = read_escape(readcharfun);
2314 }
2315 /* c is -1 if \ newline has just been seen */
2316 if (c == -1)
2317 {
2318 if (Lstream_byte_count
2319 (XLSTREAM(Vread_buffer_stream)) ==
2320 0)
2321 cancel = 1;
2322 }
2323 else
2324 Lstream_put_ichar(XLSTREAM
2325 (Vread_buffer_stream),
2326 c);
2327 QUIT;
2328 }
2329 if (c < 0)
2330 return Fsignal(Qend_of_file,
2331 list1(READCHARFUN_MAYBE(readcharfun)));
2332
2333 /* If purifying, and string starts with \ newline,
2334 return zero instead. This is for doc strings
2335 that we are really going to find in lib-src/DOC.nn.nn */
2336 if (purify_flag && NILP(Vinternal_doc_file_name)
2337 && cancel)
2338 return Qzero;
2339
2340 Lstream_flush(XLSTREAM(Vread_buffer_stream));
2341 return make_string(resizing_buffer_stream_ptr
2342 (XLSTREAM(Vread_buffer_stream)),
2343 Lstream_byte_count(XLSTREAM(Vread_buffer_stream)));
2344 }
2345
2346 static Lisp_Object
2347 read_raw_string (Lisp_Object readcharfun)
2348 {
2349 Ichar c;
2350 Ichar permit_unicode = 0;
2351
2352 do {
2353 c = reader_nextchar(readcharfun);
2354 switch (c) {
2355 /* #r:engine"my sexy raw string" -- raw string w/ flags*/
2356 /* case ':': */
2357 /* #ru"Hi there\u20AC \U000020AC" -- raw string, honouring Unicode. */
2358 case 'u':
2359 case 'U':
2360 permit_unicode = c;
2361 continue;
2362
2363 /* #r"my raw string" -- raw string */
2364 case '\"':
2365 return read_string(readcharfun, '\"', 1, permit_unicode);
2366 /* invalid syntax */
2367 default:
2368 {
2369 if (permit_unicode)
2370 {
2371 unreadchar(readcharfun, permit_unicode);
2372 }
2373 unreadchar(readcharfun, c);
2374 return Fsignal(Qinvalid_read_syntax,
2375 list1(build_string
2376 ("unrecognized raw string syntax")));
2377 }
2378 }
2379 } while (1);
2380 }
2272 2381
2273 /* Read the next Lisp object from the stream READCHARFUN and return it. 2382 /* Read the next Lisp object from the stream READCHARFUN and return it.
2274 If the return value is a cons whose car is Qunbound, then read1() 2383 If the return value is a cons whose car is Qunbound, then read1()
2275 encountered a misplaced token (e.g. a right bracket, right paren, 2384 encountered a misplaced token (e.g. a right bracket, right paren,
2276 or dot followed by a non-number). To filter this stuff out, 2385 or dot followed by a non-number). To filter this stuff out,
2507 case 'o': return read_integer (readcharfun, 8); 2616 case 'o': return read_integer (readcharfun, 8);
2508 /* #xdead => 57005 -- hex constant syntax */ 2617 /* #xdead => 57005 -- hex constant syntax */
2509 case 'x': return read_integer (readcharfun, 16); 2618 case 'x': return read_integer (readcharfun, 16);
2510 /* #b010 => 2 -- binary constant syntax */ 2619 /* #b010 => 2 -- binary constant syntax */
2511 case 'b': return read_integer (readcharfun, 2); 2620 case 'b': return read_integer (readcharfun, 2);
2621 /* #r"raw\stringt" -- raw string syntax */
2622 case 'r': return read_raw_string(readcharfun);
2512 /* #s(foobar key1 val1 key2 val2) -- structure syntax */ 2623 /* #s(foobar key1 val1 key2 val2) -- structure syntax */
2513 case 's': return read_structure (readcharfun); 2624 case 's': return read_structure (readcharfun);
2514 case '<': 2625 case '<':
2515 { 2626 {
2516 unreadchar (readcharfun, c); 2627 unreadchar (readcharfun, c);
2652 c = read_escape (readcharfun); 2763 c = read_escape (readcharfun);
2653 return make_char (c); 2764 return make_char (c);
2654 } 2765 }
2655 2766
2656 case '\"': 2767 case '\"':
2657 { 2768 /* String */
2658 /* String */ 2769 return read_string(readcharfun, '\"', 0, 1);
2659 #ifdef I18N3
2660 /* #### If the input stream is translating, then the string
2661 should be marked as translatable by setting its
2662 `string-translatable' property to t. .el and .elc files
2663 normally are translating input streams. See Fgettext()
2664 and print_internal(). */
2665 #endif
2666 int cancel = 0;
2667
2668 Lstream_rewind (XLSTREAM (Vread_buffer_stream));
2669 while ((c = readchar (readcharfun)) >= 0
2670 && c != '\"')
2671 {
2672 if (c == '\\')
2673 c = read_escape (readcharfun);
2674 /* c is -1 if \ newline has just been seen */
2675 if (c == -1)
2676 {
2677 if (Lstream_byte_count (XLSTREAM (Vread_buffer_stream)) == 0)
2678 cancel = 1;
2679 }
2680 else
2681 Lstream_put_ichar (XLSTREAM (Vread_buffer_stream), c);
2682 QUIT;
2683 }
2684 if (c < 0)
2685 return Fsignal (Qend_of_file, list1 (READCHARFUN_MAYBE (readcharfun)));
2686
2687 /* If purifying, and string starts with \ newline,
2688 return zero instead. This is for doc strings
2689 that we are really going to find in lib-src/DOC.nn.nn */
2690 if (purify_flag && NILP (Vinternal_doc_file_name) && cancel)
2691 return Qzero;
2692
2693 Lstream_flush (XLSTREAM (Vread_buffer_stream));
2694 return
2695 make_string
2696 (resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream)),
2697 Lstream_byte_count (XLSTREAM (Vread_buffer_stream)));
2698 }
2699 2770
2700 default: 2771 default:
2701 { 2772 {
2702 /* Ignore whitespace and control characters */ 2773 /* Ignore whitespace and control characters */
2703 if (c <= 040) 2774 if (c <= 040)