Mercurial > hg > xemacs-beta
comparison src/lread.c @ 5908:6174848f3e6c
Use parse_integer() in read_atom(); support bases with ratios like integers
src/ChangeLog addition:
2015-05-08 Aidan Kehoe <kehoea@parhasard.net>
* data.c (init_errors_once_early):
Move the Qunsupported_type here from numbers.c, so it's available
when the majority of our types are not supported.
* general-slots.h: Add it here, too.
* number.c: Remove the definition of Qunsupported_type from here.
* lread.c (read_atom):
Check if the first character could reflect a rational, if so, call
parse_integer(), don't check the syntax of the other
characters. This allows us to accept the non-ASCII digit
characters too.
If that worked partially, but not completely, and the next char is
a slash, try to parse as a ratio.
If that fails, try isfloat_string(), but only if the first
character could plausibly be part of a float.
Otherwise, treat as a symbol.
* lread.c (read_rational):
Rename from read_integer. Handle ratios with the same radix
specification as was used for integers.
* lread.c (read1):
Rename read_integer in this function. Support the Common Lisp
#NNNrMMM syntax for parsing a number MMM of arbitrary radix NNN.
man/ChangeLog addition:
2015-05-08 Aidan Kehoe <kehoea@parhasard.net>
* lispref/numbers.texi (Numbers):
Describe the newly-supported arbitrary-base syntax for rationals
(integers and ratios). Describe that ratios can take the same base
specification as integers, something also new.
tests/ChangeLog addition:
2015-05-08 Aidan Kehoe <kehoea@parhasard.net>
* automated/lisp-reader-tests.el:
Check the arbitrary-base integer reader syntax support, just
added. Check the reader base support for ratios, just added.
Check the non-ASCII-digit support in the reader, just added.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sat, 09 May 2015 00:40:57 +0100 |
parents | 750fab17b299 |
children | d138e600aa3a |
comparison
equal
deleted
inserted
replaced
5907:87e29d93e11b | 5908:6174848f3e6c |
---|---|
1928 int uninterned_symbol) | 1928 int uninterned_symbol) |
1929 { | 1929 { |
1930 /* This function can GC */ | 1930 /* This function can GC */ |
1931 int saw_a_backslash; | 1931 int saw_a_backslash; |
1932 Bytecount len = read_atom_0 (readcharfun, firstchar, &saw_a_backslash); | 1932 Bytecount len = read_atom_0 (readcharfun, firstchar, &saw_a_backslash); |
1933 char *read_ptr = (char *) | 1933 Ibyte *read_ptr |
1934 resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream)); | 1934 = (Ibyte *) resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream)); |
1935 | 1935 |
1936 /* Is it an integer? */ | 1936 /* Is it an integer? |
1937 if (! (saw_a_backslash || uninterned_symbol)) | 1937 |
1938 { | 1938 If a token had any backslashes in it, it is disqualified from being an |
1939 /* If a token had any backslashes in it, it is disqualified from | 1939 integer or a float. This means that 123\456 is a symbol, as is \123 |
1940 being an integer or a float. This means that 123\456 is a | 1940 (which is the way (intern "123") prints). Also, if token was preceded by |
1941 symbol, as is \123 (which is the way (intern "123") prints). | 1941 #:, it's always a symbol. */ |
1942 Also, if token was preceded by #:, it's always a symbol. | 1942 |
1943 */ | 1943 if (!(saw_a_backslash || uninterned_symbol)) |
1944 char *p = read_ptr + len; | 1944 { |
1945 char *p1 = read_ptr; | 1945 Lisp_Object got = get_char_table (firstchar, Vdigit_fixnum_map); |
1946 | 1946 Fixnum fixval = FIXNUMP (got) ? XREALFIXNUM (got) : -1; |
1947 if (*p1 == '+' || *p1 == '-') p1++; | 1947 Boolint starts_like_an_int_p = (fixval > -1 && fixval < 10) |
1948 if (p1 != p) | 1948 || firstchar == '+' || firstchar == '-'; |
1949 { | 1949 Ibyte *endp = NULL; |
1950 int c; | 1950 Lisp_Object num = Qnil; |
1951 | 1951 |
1952 while (p1 != p && (c = *p1) >= '0' && c <= '9') | 1952 /* Attempt to parse as an integer, with :JUNK-ALLOWED t. Do a gross |
1953 p1++; | 1953 plausibility check (above) first, though, we'd prefer not to call |
1954 /* Integers can have trailing decimal points. */ | 1954 parse_integer() on every symbol we see. */ |
1955 if (p1 > read_ptr && p1 < p && *p1 == '.') | 1955 if (starts_like_an_int_p) |
1956 p1++; | 1956 { |
1957 if (p1 == p) | 1957 num = parse_integer (read_ptr, &endp, len, 10, 1, Qnil); |
1958 } | |
1959 | |
1960 if (INTEGERP (num)) | |
1961 { | |
1962 if (endp == (read_ptr + len)) | |
1958 { | 1963 { |
1959 Ibyte *buf_end; | 1964 /* We consumed the whole atom, it's definitely an integer. */ |
1960 /* It is an integer. */ | 1965 return num; |
1961 if (p1[-1] == '.') | 1966 } |
1967 else if ('.' == itext_ichar (endp)) | |
1968 { | |
1969 /* Trailing decimal point is allowed in the Lisp reader, this is | |
1970 an integer. */ | |
1971 INC_IBYTEPTR (endp); | |
1972 if (endp == (read_ptr + len)) | |
1962 { | 1973 { |
1963 len -= 1; | 1974 return num; |
1964 } | 1975 } |
1965 | 1976 } |
1966 return parse_integer ((Ibyte *) read_ptr, &buf_end, len, 10, | 1977 else if ('/' == itext_ichar (endp)) |
1967 0, Qnil); | 1978 { |
1968 } | 1979 /* Maybe it's a ratio? */ |
1969 } | 1980 Lisp_Object denom = Qnil; |
1970 #ifdef HAVE_RATIO | 1981 |
1971 if (isratio_string (read_ptr)) | 1982 INC_IBYTEPTR (endp); |
1972 { | 1983 |
1973 /* GMP ratio_set_string has no effect with initial + sign */ | 1984 if (endp < (read_ptr + len)) |
1974 if (*read_ptr == '+') | 1985 { |
1975 read_ptr++; | 1986 Ichar cc = itext_ichar (endp); |
1976 ratio_set_string (scratch_ratio, read_ptr, 0); | 1987 /* No leading sign allowed in the denominator, that would |
1977 if (bignum_sign (ratio_denominator (scratch_ratio)) != 0) { | 1988 make it a symbol (according to Common Lisp, of course).*/ |
1978 ratio_canonicalize (scratch_ratio); | 1989 if (cc != '+' && cc != '-') |
1979 return Fcanonicalize_number (make_ratio_rt (scratch_ratio)); | 1990 { |
1980 } | 1991 denom = parse_integer (endp, &endp, |
1981 return Fsignal (Qinvalid_read_syntax, | 1992 len - (endp - read_ptr), 10, |
1982 list2 (build_msg_string | 1993 1, Qnil); |
1983 ("Invalid ratio constant in reader"), | 1994 } |
1984 make_string ((Ibyte *) read_ptr, len))); | 1995 } |
1985 } | 1996 |
1986 #endif | 1997 if (INTEGERP (denom) && endp == (read_ptr + len)) |
1987 if (isfloat_string (read_ptr)) | 1998 { |
1988 return make_float (atof (read_ptr)); | 1999 if (ZEROP (denom)) |
2000 { | |
2001 Fsignal (Qinvalid_read_syntax, | |
2002 list2 (build_msg_string | |
2003 ("Invalid ratio constant in reader"), | |
2004 make_string (read_ptr, len))); | |
2005 } | |
2006 #ifndef HAVE_RATIO | |
2007 /* Support a couple of trivial ratios in the reader to allow | |
2008 people to test ratio syntax: */ | |
2009 if (EQ (denom, make_fixnum (1))) | |
2010 { | |
2011 return num; | |
2012 } | |
2013 if (!NILP (Fequal (num, denom))) | |
2014 { | |
2015 return make_fixnum (1); | |
2016 } | |
2017 | |
2018 return Fsignal (Qunsupported_type, | |
2019 list3 (build_ascstring ("ratio"), | |
2020 num, denom)); | |
2021 #else | |
2022 switch (promote_args (&num, &denom)) | |
2023 { | |
2024 case FIXNUM_T: | |
2025 num = make_ratio (XREALFIXNUM (num), | |
2026 XREALFIXNUM (denom)); | |
2027 return Fcanonicalize_number (num); | |
2028 break; | |
2029 case BIGNUM_T: | |
2030 num = make_ratio_bg (XBIGNUM_DATA (num), | |
2031 XBIGNUM_DATA (denom)); | |
2032 return Fcanonicalize_number (num); | |
2033 break; | |
2034 default: | |
2035 assert (0); | |
2036 } | |
2037 #endif /* HAVE_RATIO */ | |
2038 } | |
2039 /* Otherwise, not a ratio or integer, despite that the partial | |
2040 parse may have succeeded. The trailing junk disqualifies | |
2041 it. */ | |
2042 } | |
2043 } | |
2044 | |
2045 if ((starts_like_an_int_p || '.' == firstchar) | |
2046 && isfloat_string ((char *) read_ptr)) | |
2047 { | |
2048 return make_float (atof ((char *) read_ptr)); | |
2049 } | |
1989 } | 2050 } |
1990 | 2051 |
1991 { | 2052 { |
1992 Lisp_Object sym; | 2053 Lisp_Object sym; |
1993 if (uninterned_symbol) | 2054 if (uninterned_symbol) |
2000 return sym; | 2061 return sym; |
2001 } | 2062 } |
2002 } | 2063 } |
2003 | 2064 |
2004 static Lisp_Object | 2065 static Lisp_Object |
2005 read_integer (Lisp_Object readcharfun, int base) | 2066 read_rational (Lisp_Object readcharfun, Fixnum base) |
2006 { | 2067 { |
2007 /* This function can GC */ | 2068 /* This function can GC */ |
2008 int saw_a_backslash; | 2069 int saw_a_backslash; |
2009 Ibyte *buf_end; | 2070 Ibyte *buf_end, *buf_ptr, *slash; |
2010 Bytecount len = read_atom_0 (readcharfun, -1, &saw_a_backslash); | 2071 Bytecount len = read_atom_0 (readcharfun, -1, &saw_a_backslash); |
2011 return (parse_integer | 2072 Lisp_Object num = Qnil, denom = Qzero; |
2012 (resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream)), | 2073 |
2013 &buf_end, len, base, 0, Qnil)); | 2074 buf_ptr = resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream)); |
2075 | |
2076 if ((slash = memchr (buf_ptr, '/', len)) == NULL) | |
2077 { | |
2078 /* Can't be a ratio, parse as as an integer. */ | |
2079 return parse_integer (buf_ptr, &buf_end, len, base, 0, Qnil); | |
2080 } | |
2081 | |
2082 /* No need to call isratio_string, the detailed parsing (and erroring, as | |
2083 necessary) will be done by parse_integer. */ | |
2084 num = parse_integer (buf_ptr, &buf_end, slash - buf_ptr, base, 0, Qnil); | |
2085 | |
2086 INC_IBYTEPTR (slash); | |
2087 if (slash < (buf_ptr + len)) | |
2088 { | |
2089 Ichar cc = itext_ichar (slash); | |
2090 if (cc != '+' && cc != '-') | |
2091 { | |
2092 denom = parse_integer (slash, &buf_end, len - (slash - buf_ptr), | |
2093 base, 0, Qnil); | |
2094 } | |
2095 } | |
2096 | |
2097 if (ZEROP (denom)) | |
2098 { | |
2099 /* The denominator was zero, or it had a sign specified; these are | |
2100 invalid ratios, for slightly different reasons. */ | |
2101 Fsignal (Qinvalid_read_syntax, | |
2102 list2 (build_msg_string ("Invalid ratio constant in reader"), | |
2103 make_string (buf_ptr, len))); | |
2104 } | |
2105 | |
2106 #ifndef HAVE_RATIO | |
2107 /* Support a couple of trivial ratios in the reader to allow people to test | |
2108 ratio syntax: */ | |
2109 if (EQ (denom, make_fixnum (1))) | |
2110 { | |
2111 return num; | |
2112 } | |
2113 if (!NILP (Fequal (num, denom))) | |
2114 { | |
2115 return make_fixnum (1); | |
2116 } | |
2117 | |
2118 return Fsignal (Qunsupported_type, list3 (build_ascstring ("ratio"), | |
2119 num, denom)); | |
2120 #else | |
2121 switch (promote_args (&num, &denom)) | |
2122 { | |
2123 case FIXNUM_T: | |
2124 num = make_ratio (XREALFIXNUM (num), XREALFIXNUM (denom)); | |
2125 return Fcanonicalize_number (num); | |
2126 break; | |
2127 case BIGNUM_T: | |
2128 num = make_ratio_bg (XBIGNUM_DATA (num), XBIGNUM_DATA (denom)); | |
2129 return Fcanonicalize_number (num); | |
2130 break; | |
2131 default: | |
2132 assert (0); /* promote_args() with two integers won't give us anything | |
2133 but fixnums or bignums. */ | |
2134 return Qnil; | |
2135 } | |
2136 #endif | |
2014 } | 2137 } |
2015 | 2138 |
2016 static Lisp_Object | 2139 static Lisp_Object |
2017 read_bit_vector (Lisp_Object readcharfun) | 2140 read_bit_vector (Lisp_Object readcharfun) |
2018 { | 2141 { |
2567 case '#': return intern (""); | 2690 case '#': return intern (""); |
2568 case '$': return Vload_file_name_internal; | 2691 case '$': return Vload_file_name_internal; |
2569 /* bit vectors */ | 2692 /* bit vectors */ |
2570 case '*': return read_bit_vector (readcharfun); | 2693 case '*': return read_bit_vector (readcharfun); |
2571 /* #o10 => 8 -- octal constant syntax */ | 2694 /* #o10 => 8 -- octal constant syntax */ |
2572 case 'o': case 'O': return read_integer (readcharfun, 8); | 2695 case 'o': case 'O': return read_rational (readcharfun, 8); |
2573 /* #xdead => 57005 -- hex constant syntax */ | 2696 /* #xdead => 57005 -- hex constant syntax */ |
2574 case 'x': case 'X': return read_integer (readcharfun, 16); | 2697 case 'x': case 'X': return read_rational (readcharfun, 16); |
2575 /* #b010 => 2 -- binary constant syntax */ | 2698 /* #b010 => 2 -- binary constant syntax */ |
2576 case 'b': case 'B': return read_integer (readcharfun, 2); | 2699 case 'b': case 'B': return read_rational (readcharfun, 2); |
2577 /* #r"raw\stringt" -- raw string syntax */ | 2700 /* #r"raw\stringt" -- raw string syntax */ |
2578 case 'r': return read_raw_string(readcharfun); | 2701 case 'r': return read_raw_string(readcharfun); |
2579 /* #s(foobar key1 val1 key2 val2) -- structure syntax */ | 2702 /* #s(foobar key1 val1 key2 val2) -- structure syntax */ |
2580 case 's': return read_structure (readcharfun); | 2703 case 's': return read_structure (readcharfun); |
2581 case '<': | 2704 case '<': |
2604 return obj; | 2727 return obj; |
2605 } | 2728 } |
2606 #endif | 2729 #endif |
2607 case '0': case '1': case '2': case '3': case '4': | 2730 case '0': case '1': case '2': case '3': case '4': |
2608 case '5': case '6': case '7': case '8': case '9': | 2731 case '5': case '6': case '7': case '8': case '9': |
2609 /* Reader forms that can reuse previously read objects. */ | 2732 hash_digit_syntax: |
2733 /* Reader forms that can reuse previously read objects, or the | |
2734 Common Lisp syntax for a rational of arbitrary base. */ | |
2610 { | 2735 { |
2611 Lisp_Object parsed, found; | 2736 Lisp_Object got = get_char_table (c, Vdigit_fixnum_map); |
2737 Fixnum fixval = FIXNUMP (got) ? XREALFIXNUM (got) : -1; | |
2738 Lisp_Object parsed, found; | |
2612 Ibyte *buf_end; | 2739 Ibyte *buf_end; |
2613 | 2740 |
2614 Lstream_rewind (XLSTREAM (Vread_buffer_stream)); | 2741 Lstream_rewind (XLSTREAM (Vread_buffer_stream)); |
2615 | 2742 |
2616 /* Using read_integer() here is impossible, because it | 2743 /* Using read_rational() here is impossible, because it |
2617 chokes on `='. */ | 2744 chokes on `='. */ |
2618 while (c >= '0' && c <= '9') | 2745 while (fixval >= 0 && fixval <= 9) |
2619 { | 2746 { |
2620 Lstream_put_ichar (XLSTREAM (Vread_buffer_stream), c); | 2747 Lstream_put_ichar (XLSTREAM (Vread_buffer_stream), c); |
2621 QUIT; | 2748 QUIT; |
2622 c = readchar (readcharfun); | 2749 c = readchar (readcharfun); |
2750 got = get_char_table (c, Vdigit_fixnum_map); | |
2751 fixval = FIXNUMP (got) ? XREALFIXNUM (got) : -1; | |
2623 } | 2752 } |
2624 | 2753 |
2625 /* blasted terminating 0 */ | |
2626 Lstream_put_ichar (XLSTREAM (Vread_buffer_stream), 0); | |
2627 Lstream_flush (XLSTREAM (Vread_buffer_stream)); | 2754 Lstream_flush (XLSTREAM (Vread_buffer_stream)); |
2628 | 2755 |
2629 parsed | 2756 parsed |
2630 = parse_integer (resizing_buffer_stream_ptr | 2757 = parse_integer (resizing_buffer_stream_ptr |
2631 (XLSTREAM (Vread_buffer_stream)), &buf_end, | 2758 (XLSTREAM (Vread_buffer_stream)), &buf_end, |
2632 Lstream_byte_count (XLSTREAM | 2759 Lstream_byte_count (XLSTREAM |
2633 (Vread_buffer_stream)) | 2760 (Vread_buffer_stream)), |
2634 - 1, 10, 0, Qnil); | 2761 10, 0, Qnil); |
2762 | |
2763 if ('r' == c || 'R' == c) | |
2764 { | |
2765 /* Common Lisp syntax to specify an integer of arbitrary | |
2766 base. */ | |
2767 CHECK_FIXNUM (parsed); | |
2768 return read_rational (readcharfun, XFIXNUM (parsed)); | |
2769 } | |
2635 | 2770 |
2636 found = assoc_no_quit (parsed, Vread_objects); | 2771 found = assoc_no_quit (parsed, Vread_objects); |
2637 if (c == '=') | 2772 if (c == '=') |
2638 { | 2773 { |
2639 /* #n=object returns object, but associates it with | 2774 /* #n=object returns object, but associates it with |
2679 return Fsignal (Qinvalid_read_syntax, | 2814 return Fsignal (Qinvalid_read_syntax, |
2680 list1 (build_ascstring ("#"))); | 2815 list1 (build_ascstring ("#"))); |
2681 } | 2816 } |
2682 default: | 2817 default: |
2683 { | 2818 { |
2819 Lisp_Object got = get_char_table (c, Vdigit_fixnum_map); | |
2820 Fixnum fixval = FIXNUMP (got) ? XREALFIXNUM (got) : -1; | |
2821 | |
2822 if (fixval > -1 && fixval < 10) | |
2823 { | |
2824 goto hash_digit_syntax; | |
2825 } | |
2826 | |
2684 unreadchar (readcharfun, c); | 2827 unreadchar (readcharfun, c); |
2685 return Fsignal (Qinvalid_read_syntax, | 2828 return Fsignal (Qinvalid_read_syntax, |
2686 list1 (build_ascstring ("#"))); | 2829 list1 (build_ascstring ("#"))); |
2687 } | 2830 } |
2688 } | 2831 } |