comparison src/lread.c @ 5908:6174848f3e6c

Use parse_integer() in read_atom(); support bases with ratios like integers src/ChangeLog addition: 2015-05-08 Aidan Kehoe <kehoea@parhasard.net> * data.c (init_errors_once_early): Move the Qunsupported_type here from numbers.c, so it's available when the majority of our types are not supported. * general-slots.h: Add it here, too. * number.c: Remove the definition of Qunsupported_type from here. * lread.c (read_atom): Check if the first character could reflect a rational, if so, call parse_integer(), don't check the syntax of the other characters. This allows us to accept the non-ASCII digit characters too. If that worked partially, but not completely, and the next char is a slash, try to parse as a ratio. If that fails, try isfloat_string(), but only if the first character could plausibly be part of a float. Otherwise, treat as a symbol. * lread.c (read_rational): Rename from read_integer. Handle ratios with the same radix specification as was used for integers. * lread.c (read1): Rename read_integer in this function. Support the Common Lisp #NNNrMMM syntax for parsing a number MMM of arbitrary radix NNN. man/ChangeLog addition: 2015-05-08 Aidan Kehoe <kehoea@parhasard.net> * lispref/numbers.texi (Numbers): Describe the newly-supported arbitrary-base syntax for rationals (integers and ratios). Describe that ratios can take the same base specification as integers, something also new. tests/ChangeLog addition: 2015-05-08 Aidan Kehoe <kehoea@parhasard.net> * automated/lisp-reader-tests.el: Check the arbitrary-base integer reader syntax support, just added. Check the reader base support for ratios, just added. Check the non-ASCII-digit support in the reader, just added.
author Aidan Kehoe <kehoea@parhasard.net>
date Sat, 09 May 2015 00:40:57 +0100
parents 750fab17b299
children d138e600aa3a
comparison
equal deleted inserted replaced
5907:87e29d93e11b 5908:6174848f3e6c
1928 int uninterned_symbol) 1928 int uninterned_symbol)
1929 { 1929 {
1930 /* This function can GC */ 1930 /* This function can GC */
1931 int saw_a_backslash; 1931 int saw_a_backslash;
1932 Bytecount len = read_atom_0 (readcharfun, firstchar, &saw_a_backslash); 1932 Bytecount len = read_atom_0 (readcharfun, firstchar, &saw_a_backslash);
1933 char *read_ptr = (char *) 1933 Ibyte *read_ptr
1934 resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream)); 1934 = (Ibyte *) resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream));
1935 1935
1936 /* Is it an integer? */ 1936 /* Is it an integer?
1937 if (! (saw_a_backslash || uninterned_symbol)) 1937
1938 { 1938 If a token had any backslashes in it, it is disqualified from being an
1939 /* If a token had any backslashes in it, it is disqualified from 1939 integer or a float. This means that 123\456 is a symbol, as is \123
1940 being an integer or a float. This means that 123\456 is a 1940 (which is the way (intern "123") prints). Also, if token was preceded by
1941 symbol, as is \123 (which is the way (intern "123") prints). 1941 #:, it's always a symbol. */
1942 Also, if token was preceded by #:, it's always a symbol. 1942
1943 */ 1943 if (!(saw_a_backslash || uninterned_symbol))
1944 char *p = read_ptr + len; 1944 {
1945 char *p1 = read_ptr; 1945 Lisp_Object got = get_char_table (firstchar, Vdigit_fixnum_map);
1946 1946 Fixnum fixval = FIXNUMP (got) ? XREALFIXNUM (got) : -1;
1947 if (*p1 == '+' || *p1 == '-') p1++; 1947 Boolint starts_like_an_int_p = (fixval > -1 && fixval < 10)
1948 if (p1 != p) 1948 || firstchar == '+' || firstchar == '-';
1949 { 1949 Ibyte *endp = NULL;
1950 int c; 1950 Lisp_Object num = Qnil;
1951 1951
1952 while (p1 != p && (c = *p1) >= '0' && c <= '9') 1952 /* Attempt to parse as an integer, with :JUNK-ALLOWED t. Do a gross
1953 p1++; 1953 plausibility check (above) first, though, we'd prefer not to call
1954 /* Integers can have trailing decimal points. */ 1954 parse_integer() on every symbol we see. */
1955 if (p1 > read_ptr && p1 < p && *p1 == '.') 1955 if (starts_like_an_int_p)
1956 p1++; 1956 {
1957 if (p1 == p) 1957 num = parse_integer (read_ptr, &endp, len, 10, 1, Qnil);
1958 }
1959
1960 if (INTEGERP (num))
1961 {
1962 if (endp == (read_ptr + len))
1958 { 1963 {
1959 Ibyte *buf_end; 1964 /* We consumed the whole atom, it's definitely an integer. */
1960 /* It is an integer. */ 1965 return num;
1961 if (p1[-1] == '.') 1966 }
1967 else if ('.' == itext_ichar (endp))
1968 {
1969 /* Trailing decimal point is allowed in the Lisp reader, this is
1970 an integer. */
1971 INC_IBYTEPTR (endp);
1972 if (endp == (read_ptr + len))
1962 { 1973 {
1963 len -= 1; 1974 return num;
1964 } 1975 }
1965 1976 }
1966 return parse_integer ((Ibyte *) read_ptr, &buf_end, len, 10, 1977 else if ('/' == itext_ichar (endp))
1967 0, Qnil); 1978 {
1968 } 1979 /* Maybe it's a ratio? */
1969 } 1980 Lisp_Object denom = Qnil;
1970 #ifdef HAVE_RATIO 1981
1971 if (isratio_string (read_ptr)) 1982 INC_IBYTEPTR (endp);
1972 { 1983
1973 /* GMP ratio_set_string has no effect with initial + sign */ 1984 if (endp < (read_ptr + len))
1974 if (*read_ptr == '+') 1985 {
1975 read_ptr++; 1986 Ichar cc = itext_ichar (endp);
1976 ratio_set_string (scratch_ratio, read_ptr, 0); 1987 /* No leading sign allowed in the denominator, that would
1977 if (bignum_sign (ratio_denominator (scratch_ratio)) != 0) { 1988 make it a symbol (according to Common Lisp, of course).*/
1978 ratio_canonicalize (scratch_ratio); 1989 if (cc != '+' && cc != '-')
1979 return Fcanonicalize_number (make_ratio_rt (scratch_ratio)); 1990 {
1980 } 1991 denom = parse_integer (endp, &endp,
1981 return Fsignal (Qinvalid_read_syntax, 1992 len - (endp - read_ptr), 10,
1982 list2 (build_msg_string 1993 1, Qnil);
1983 ("Invalid ratio constant in reader"), 1994 }
1984 make_string ((Ibyte *) read_ptr, len))); 1995 }
1985 } 1996
1986 #endif 1997 if (INTEGERP (denom) && endp == (read_ptr + len))
1987 if (isfloat_string (read_ptr)) 1998 {
1988 return make_float (atof (read_ptr)); 1999 if (ZEROP (denom))
2000 {
2001 Fsignal (Qinvalid_read_syntax,
2002 list2 (build_msg_string
2003 ("Invalid ratio constant in reader"),
2004 make_string (read_ptr, len)));
2005 }
2006 #ifndef HAVE_RATIO
2007 /* Support a couple of trivial ratios in the reader to allow
2008 people to test ratio syntax: */
2009 if (EQ (denom, make_fixnum (1)))
2010 {
2011 return num;
2012 }
2013 if (!NILP (Fequal (num, denom)))
2014 {
2015 return make_fixnum (1);
2016 }
2017
2018 return Fsignal (Qunsupported_type,
2019 list3 (build_ascstring ("ratio"),
2020 num, denom));
2021 #else
2022 switch (promote_args (&num, &denom))
2023 {
2024 case FIXNUM_T:
2025 num = make_ratio (XREALFIXNUM (num),
2026 XREALFIXNUM (denom));
2027 return Fcanonicalize_number (num);
2028 break;
2029 case BIGNUM_T:
2030 num = make_ratio_bg (XBIGNUM_DATA (num),
2031 XBIGNUM_DATA (denom));
2032 return Fcanonicalize_number (num);
2033 break;
2034 default:
2035 assert (0);
2036 }
2037 #endif /* HAVE_RATIO */
2038 }
2039 /* Otherwise, not a ratio or integer, despite that the partial
2040 parse may have succeeded. The trailing junk disqualifies
2041 it. */
2042 }
2043 }
2044
2045 if ((starts_like_an_int_p || '.' == firstchar)
2046 && isfloat_string ((char *) read_ptr))
2047 {
2048 return make_float (atof ((char *) read_ptr));
2049 }
1989 } 2050 }
1990 2051
1991 { 2052 {
1992 Lisp_Object sym; 2053 Lisp_Object sym;
1993 if (uninterned_symbol) 2054 if (uninterned_symbol)
2000 return sym; 2061 return sym;
2001 } 2062 }
2002 } 2063 }
2003 2064
2004 static Lisp_Object 2065 static Lisp_Object
2005 read_integer (Lisp_Object readcharfun, int base) 2066 read_rational (Lisp_Object readcharfun, Fixnum base)
2006 { 2067 {
2007 /* This function can GC */ 2068 /* This function can GC */
2008 int saw_a_backslash; 2069 int saw_a_backslash;
2009 Ibyte *buf_end; 2070 Ibyte *buf_end, *buf_ptr, *slash;
2010 Bytecount len = read_atom_0 (readcharfun, -1, &saw_a_backslash); 2071 Bytecount len = read_atom_0 (readcharfun, -1, &saw_a_backslash);
2011 return (parse_integer 2072 Lisp_Object num = Qnil, denom = Qzero;
2012 (resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream)), 2073
2013 &buf_end, len, base, 0, Qnil)); 2074 buf_ptr = resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream));
2075
2076 if ((slash = memchr (buf_ptr, '/', len)) == NULL)
2077 {
2078 /* Can't be a ratio, parse as as an integer. */
2079 return parse_integer (buf_ptr, &buf_end, len, base, 0, Qnil);
2080 }
2081
2082 /* No need to call isratio_string, the detailed parsing (and erroring, as
2083 necessary) will be done by parse_integer. */
2084 num = parse_integer (buf_ptr, &buf_end, slash - buf_ptr, base, 0, Qnil);
2085
2086 INC_IBYTEPTR (slash);
2087 if (slash < (buf_ptr + len))
2088 {
2089 Ichar cc = itext_ichar (slash);
2090 if (cc != '+' && cc != '-')
2091 {
2092 denom = parse_integer (slash, &buf_end, len - (slash - buf_ptr),
2093 base, 0, Qnil);
2094 }
2095 }
2096
2097 if (ZEROP (denom))
2098 {
2099 /* The denominator was zero, or it had a sign specified; these are
2100 invalid ratios, for slightly different reasons. */
2101 Fsignal (Qinvalid_read_syntax,
2102 list2 (build_msg_string ("Invalid ratio constant in reader"),
2103 make_string (buf_ptr, len)));
2104 }
2105
2106 #ifndef HAVE_RATIO
2107 /* Support a couple of trivial ratios in the reader to allow people to test
2108 ratio syntax: */
2109 if (EQ (denom, make_fixnum (1)))
2110 {
2111 return num;
2112 }
2113 if (!NILP (Fequal (num, denom)))
2114 {
2115 return make_fixnum (1);
2116 }
2117
2118 return Fsignal (Qunsupported_type, list3 (build_ascstring ("ratio"),
2119 num, denom));
2120 #else
2121 switch (promote_args (&num, &denom))
2122 {
2123 case FIXNUM_T:
2124 num = make_ratio (XREALFIXNUM (num), XREALFIXNUM (denom));
2125 return Fcanonicalize_number (num);
2126 break;
2127 case BIGNUM_T:
2128 num = make_ratio_bg (XBIGNUM_DATA (num), XBIGNUM_DATA (denom));
2129 return Fcanonicalize_number (num);
2130 break;
2131 default:
2132 assert (0); /* promote_args() with two integers won't give us anything
2133 but fixnums or bignums. */
2134 return Qnil;
2135 }
2136 #endif
2014 } 2137 }
2015 2138
2016 static Lisp_Object 2139 static Lisp_Object
2017 read_bit_vector (Lisp_Object readcharfun) 2140 read_bit_vector (Lisp_Object readcharfun)
2018 { 2141 {
2567 case '#': return intern (""); 2690 case '#': return intern ("");
2568 case '$': return Vload_file_name_internal; 2691 case '$': return Vload_file_name_internal;
2569 /* bit vectors */ 2692 /* bit vectors */
2570 case '*': return read_bit_vector (readcharfun); 2693 case '*': return read_bit_vector (readcharfun);
2571 /* #o10 => 8 -- octal constant syntax */ 2694 /* #o10 => 8 -- octal constant syntax */
2572 case 'o': case 'O': return read_integer (readcharfun, 8); 2695 case 'o': case 'O': return read_rational (readcharfun, 8);
2573 /* #xdead => 57005 -- hex constant syntax */ 2696 /* #xdead => 57005 -- hex constant syntax */
2574 case 'x': case 'X': return read_integer (readcharfun, 16); 2697 case 'x': case 'X': return read_rational (readcharfun, 16);
2575 /* #b010 => 2 -- binary constant syntax */ 2698 /* #b010 => 2 -- binary constant syntax */
2576 case 'b': case 'B': return read_integer (readcharfun, 2); 2699 case 'b': case 'B': return read_rational (readcharfun, 2);
2577 /* #r"raw\stringt" -- raw string syntax */ 2700 /* #r"raw\stringt" -- raw string syntax */
2578 case 'r': return read_raw_string(readcharfun); 2701 case 'r': return read_raw_string(readcharfun);
2579 /* #s(foobar key1 val1 key2 val2) -- structure syntax */ 2702 /* #s(foobar key1 val1 key2 val2) -- structure syntax */
2580 case 's': return read_structure (readcharfun); 2703 case 's': return read_structure (readcharfun);
2581 case '<': 2704 case '<':
2604 return obj; 2727 return obj;
2605 } 2728 }
2606 #endif 2729 #endif
2607 case '0': case '1': case '2': case '3': case '4': 2730 case '0': case '1': case '2': case '3': case '4':
2608 case '5': case '6': case '7': case '8': case '9': 2731 case '5': case '6': case '7': case '8': case '9':
2609 /* Reader forms that can reuse previously read objects. */ 2732 hash_digit_syntax:
2733 /* Reader forms that can reuse previously read objects, or the
2734 Common Lisp syntax for a rational of arbitrary base. */
2610 { 2735 {
2611 Lisp_Object parsed, found; 2736 Lisp_Object got = get_char_table (c, Vdigit_fixnum_map);
2737 Fixnum fixval = FIXNUMP (got) ? XREALFIXNUM (got) : -1;
2738 Lisp_Object parsed, found;
2612 Ibyte *buf_end; 2739 Ibyte *buf_end;
2613 2740
2614 Lstream_rewind (XLSTREAM (Vread_buffer_stream)); 2741 Lstream_rewind (XLSTREAM (Vread_buffer_stream));
2615 2742
2616 /* Using read_integer() here is impossible, because it 2743 /* Using read_rational() here is impossible, because it
2617 chokes on `='. */ 2744 chokes on `='. */
2618 while (c >= '0' && c <= '9') 2745 while (fixval >= 0 && fixval <= 9)
2619 { 2746 {
2620 Lstream_put_ichar (XLSTREAM (Vread_buffer_stream), c); 2747 Lstream_put_ichar (XLSTREAM (Vread_buffer_stream), c);
2621 QUIT; 2748 QUIT;
2622 c = readchar (readcharfun); 2749 c = readchar (readcharfun);
2750 got = get_char_table (c, Vdigit_fixnum_map);
2751 fixval = FIXNUMP (got) ? XREALFIXNUM (got) : -1;
2623 } 2752 }
2624 2753
2625 /* blasted terminating 0 */
2626 Lstream_put_ichar (XLSTREAM (Vread_buffer_stream), 0);
2627 Lstream_flush (XLSTREAM (Vread_buffer_stream)); 2754 Lstream_flush (XLSTREAM (Vread_buffer_stream));
2628 2755
2629 parsed 2756 parsed
2630 = parse_integer (resizing_buffer_stream_ptr 2757 = parse_integer (resizing_buffer_stream_ptr
2631 (XLSTREAM (Vread_buffer_stream)), &buf_end, 2758 (XLSTREAM (Vread_buffer_stream)), &buf_end,
2632 Lstream_byte_count (XLSTREAM 2759 Lstream_byte_count (XLSTREAM
2633 (Vread_buffer_stream)) 2760 (Vread_buffer_stream)),
2634 - 1, 10, 0, Qnil); 2761 10, 0, Qnil);
2762
2763 if ('r' == c || 'R' == c)
2764 {
2765 /* Common Lisp syntax to specify an integer of arbitrary
2766 base. */
2767 CHECK_FIXNUM (parsed);
2768 return read_rational (readcharfun, XFIXNUM (parsed));
2769 }
2635 2770
2636 found = assoc_no_quit (parsed, Vread_objects); 2771 found = assoc_no_quit (parsed, Vread_objects);
2637 if (c == '=') 2772 if (c == '=')
2638 { 2773 {
2639 /* #n=object returns object, but associates it with 2774 /* #n=object returns object, but associates it with
2679 return Fsignal (Qinvalid_read_syntax, 2814 return Fsignal (Qinvalid_read_syntax,
2680 list1 (build_ascstring ("#"))); 2815 list1 (build_ascstring ("#")));
2681 } 2816 }
2682 default: 2817 default:
2683 { 2818 {
2819 Lisp_Object got = get_char_table (c, Vdigit_fixnum_map);
2820 Fixnum fixval = FIXNUMP (got) ? XREALFIXNUM (got) : -1;
2821
2822 if (fixval > -1 && fixval < 10)
2823 {
2824 goto hash_digit_syntax;
2825 }
2826
2684 unreadchar (readcharfun, c); 2827 unreadchar (readcharfun, c);
2685 return Fsignal (Qinvalid_read_syntax, 2828 return Fsignal (Qinvalid_read_syntax,
2686 list1 (build_ascstring ("#"))); 2829 list1 (build_ascstring ("#")));
2687 } 2830 }
2688 } 2831 }