changeset 5904:ee27ca517e90

Revise print_symbol(), never calling is{float,ratio}_string(). src/ChangeLog addition: 2015-05-08 Aidan Kehoe <kehoea@parhasard.net> * print.c (print_symbol): Revise this. No longer call isfloat_string() and isratio_string() on practically every symbol seen; check explicitly for the known float format in this function, which turns out to be a more limited and cheap job than you would think. Also check for integer and ratio syntax in passing. Use Vdigit_fixnum_map when working out whether a given character is a digit. * lisp.h: Make Vdigit_fixnum_map available generally. tests/ChangeLog addition: 2015-05-08 Aidan Kehoe <kehoea@parhasard.net> * automated/lisp-reader-tests.el: Check read and print handling of symbols that look like numbers. In passing, check the read and print handling of the associated numbers.
author Aidan Kehoe <kehoea@parhasard.net>
date Fri, 08 May 2015 14:33:46 +0100
parents 5afddd952c46
children 85fd1ab80057
files src/ChangeLog src/lisp.h src/print.c tests/ChangeLog tests/automated/lisp-reader-tests.el
diffstat 5 files changed, 166 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Fri May 08 13:58:22 2015 +0100
+++ b/src/ChangeLog	Fri May 08 14:33:46 2015 +0100
@@ -1,3 +1,16 @@
+2015-05-08  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* print.c (print_symbol):
+	Revise this. No longer call isfloat_string() and isratio_string()
+	on practically every symbol seen; check explicitly for the known
+	float format in this function, which turns out to be a more
+	limited and cheap job than you would think.
+	Also check for integer and ratio syntax in passing.
+	Use Vdigit_fixnum_map when working out whether a given character
+	is a digit.
+	* lisp.h:
+	Make Vdigit_fixnum_map available generally.
+
 2015-05-08  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* data.c (Fstring_to_number):
--- a/src/lisp.h	Fri May 08 13:58:22 2015 +0100
+++ b/src/lisp.h	Fri May 08 14:33:46 2015 +0100
@@ -4605,6 +4605,8 @@
 			   Bytecount len, EMACS_INT base,
 			   Boolint junk_allowed, Lisp_Object base_table);
 
+extern Lisp_Object Vdigit_fixnum_map;
+
 extern Lisp_Object Qarrayp, Qbitp, Qchar_or_string_p, Qcharacterp,
     Qerror_conditions, Qerror_message, Qinteger_char_or_marker_p,
     Qinteger_or_char_p, Qinteger_or_marker_p, Qlambda, Qlistp, Qnatnump,
--- a/src/print.c	Fri May 08 13:58:22 2015 +0100
+++ b/src/print.c	Fri May 08 14:33:46 2015 +0100
@@ -2563,51 +2563,97 @@
       return;
     }
 
+  if (0 == size)
+    {
+      /* Compatible with GNU, but not with Common Lisp, where the syntax
+         for this symbol is ||. */
+      write_ascstring (printcharfun,
+                       (print_gensym && !IN_OBARRAY (obj)) ? "#:" : "##");
+      return;
+    }
+
   GCPRO2 (obj, printcharfun);
 
   if (print_gensym && !IN_OBARRAY (obj))
     {
       write_ascstring (printcharfun, "#:");
     }
-  else if (0 == size)
-    {
-      /* Compatible with GNU, but not with Common Lisp, where the syntax for
-         this symbol is ||. */
-      write_ascstring (printcharfun, "##");
-    }
-
-  /* Does it look like an integer or a float? */
+
+  /* Does it look like a rational or a float? */
   {
-    Ibyte *data = XSTRING_DATA (name);
-    Bytecount confusing = 0;
-
-    if (size == 0)
-      goto not_yet_confused;    /* Really confusing */
-    else if (isdigit (data[0]))
-      confusing = 0;
-    else if (size == 1)
-      goto not_yet_confused;
-    else if (data[0] == '-' || data[0] == '+')
-      confusing = 1;
-    else
-      goto not_yet_confused;
-
-    for (; confusing < size; confusing++)
+    Ibyte *data = XSTRING_DATA (name), *pend = data + XSTRING_LENGTH (name);
+    Fixnum nondigits = 0, fixval = -1;
+    Boolint confusing = 1;
+    Ichar cc = itext_ichar (data);
+    Lisp_Object got = Qnil;
+    
+    if (cc == '-' || cc == '+')
       {
-	if (!isdigit (data[confusing]) && '/' != data[confusing])
+        INC_IBYTEPTR (data);
+        if (data == pend)
           {
             confusing = 0;
-            break;
           }
       }
-  not_yet_confused:
-
-    if (!confusing)
-      /* #### Ugh, this is needlessly complex and slow for what we
-         need here.  It might be a good idea to copy equivalent code
-         from FSF.  --hniksic */
-      confusing = isfloat_string ((char *) data)
-	|| isratio_string ((char *) data);
+
+    /* No need to check for '.' when working out whether the symbol looks like
+       a number, '.' will get a backslash on printing no matter what,
+       disqualifying it from being a number when read. */
+    while (confusing && data < pend)
+      {
+        cc = itext_ichar (data);
+
+        switch (cc)
+          {
+            /* A symbol like 2e10 cound be confused with a float: */
+          case 'e':
+          case 'E':
+            /* As can one like 123/456: */
+          case '/':
+            nondigits++;
+            confusing = nondigits < 2
+              /* If it starts with an E or a slash, that's fine, it can't be a
+                 float. */
+              && data != XSTRING_DATA (name)
+              /* And if it ends with an e or a slash that's fine too. */
+              && (data + itext_ichar_len (data)) != pend;
+            break;
+
+            /* There can be a sign in the exponent. Such a sign needs to be
+               directly after an e and to have trailing digits after it to be
+               valid float syntax. */
+          case '+':
+          case '-':
+            confusing = (1 == nondigits)
+              && data != XSTRING_DATA (name)
+              && (data + itext_ichar_len (data));
+            if (confusing)
+              {
+                Ibyte *lastp = data;
+                Ichar clast;
+
+                DEC_IBYTEPTR (lastp);
+                clast = itext_ichar (lastp);
+
+                confusing = clast == 'E' || clast == 'e';
+              }
+            break;
+
+            /* A symbol that is all decimal digits could be confused with an
+               integer: */
+          default:
+            got = get_char_table (cc, Vdigit_fixnum_map);
+            fixval = FIXNUMP (got) ? XREALFIXNUM (got) : -1;
+            if (fixval < 0 || fixval > 9)
+              {
+                confusing = 0;
+              }
+            break;
+          }
+
+        INC_IBYTEPTR (data);
+      }
+
     if (confusing)
       write_ascstring (printcharfun, "\\");
   }
@@ -2618,6 +2664,9 @@
 
     for (i = 0; i < size; i++)
       {
+        /* In the event that we adopt a non-ASCII-compatible internal format,
+           this will no longer be Mule-safe. As of May 2015, that is very,
+           very unlikely. */
 	switch (string_byte (name, i))
 	  {
 	  case  0: case  1: case  2: case  3:
--- a/tests/ChangeLog	Fri May 08 13:58:22 2015 +0100
+++ b/tests/ChangeLog	Fri May 08 14:33:46 2015 +0100
@@ -1,3 +1,10 @@
+2015-05-08  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* automated/lisp-reader-tests.el:
+	Check read and print handling of symbols that look like
+	numbers. In passing, check the read and print handling of the
+	associated numbers.
+
 2015-04-11  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* automated/lisp-tests.el:
--- a/tests/automated/lisp-reader-tests.el	Fri May 08 13:58:22 2015 +0100
+++ b/tests/automated/lisp-reader-tests.el	Fri May 08 14:33:46 2015 +0100
@@ -155,3 +155,65 @@
 					   (+ most-positive-fixnum 2)
 					   (+ most-positive-fixnum 2)))))
 	  "checking bignum object labels don't wrap on reading"))
+
+(Assert (not (eq (intern "") (read (prin1-to-string (make-symbol "")))))
+        "checking uninterned zero-length symbol printed distinctly")
+
+;; Check the read and print handling of symbols that look like numbers. In
+;; passing, check the read and print handling of the associated numbers.
+(Assert (eql (log 1) '0e0) "checking float syntax with e accepted")
+(Assert (eql (log 1) 0.0) "checking float syntax with decimal point accepted")
+(Assert (not (ratiop (read "2/-3")))
+        "ratios can't have a negative sign in the denominator")
+(Assert (not (ratiop (read "2/+3")))
+        "ratios can't have a positive sign in the denominator")
+
+(macrolet
+    ((Assert-no-symbol-number-confusion (&rest values)
+       `(let ((print-gensym t)
+              (print-readably t))
+         ,@(loop
+            for (type . rest) in values
+            collect (cons
+                     'progn
+                     (loop for string in rest
+                           collect
+                           `(progn
+                             (Assert (symbolp (read (prin1-to-string
+                                                     (make-symbol ,string)))))
+                             (Assert (equal (symbol-name
+                                             (read (prin1-to-string
+                                                    (make-symbol ,string))))
+                                             ,string))
+                             ,@(when (ignore-errors (coerce-number 1 type))
+                                     `((Assert (typep (read ,string)
+                                                      ',type))
+                                       (Assert (eql (string-to-number
+                                                     ,string)
+                                                (read ,string))))))))))))
+  (Assert-no-symbol-number-confusion
+   (float "0.0" "0E0" "-.0" "0.0e0" "3.1415926535897932384E0"
+          "6.02E+23" "602E+21" "3.010299957e-1" "-0.000000001e9")
+   (fixnum "1" "1." "1073741823" "-1" "-1073741824")
+   (ratio "1/2" "2/5" "-1073741822/1073741823"
+          "+2/3" "-3/2"
+          "2894802230932904885589274625217197696331749616641014100986439600\
+1978282409984/20"
+          "+289480223093290488558927462521719769633174961664101410098643960\
+01978282409984/20"
+          "-289480223093290488558927462521719769633174961664101410098643960\
+01978282409984/20"
+          "20/2894802230932904885589274625217197696331749616641014100986439\
+6001978282409984"
+          "+20/289480223093290488558927462521719769633174961664101410098643\
+96001978282409984"
+          "-20/289480223093290488558927462521719769633174961664101410098643\
+96001978282409984")
+   ;; These two are (lsh 1 254) and (lognot (lsh 1 254)). The assumption that
+   ;; they are always bignums if they can be made into rationals should hold
+   ;; for another couple of processor generations at least.
+   (bignum
+    "2894802230932904885589274625217197696331749616641014100986439600197828\
+2409984"
+    "-289480223093290488558927462521719769633174961664101410098643960019782\
+82409985")))