view tests/automated/lisp-reader-tests.el @ 5908:6174848f3e6c

Use parse_integer() in read_atom(); support bases with ratios like integers src/ChangeLog addition: 2015-05-08 Aidan Kehoe <kehoea@parhasard.net> * data.c (init_errors_once_early): Move the Qunsupported_type here from numbers.c, so it's available when the majority of our types are not supported. * general-slots.h: Add it here, too. * number.c: Remove the definition of Qunsupported_type from here. * lread.c (read_atom): Check if the first character could reflect a rational, if so, call parse_integer(), don't check the syntax of the other characters. This allows us to accept the non-ASCII digit characters too. If that worked partially, but not completely, and the next char is a slash, try to parse as a ratio. If that fails, try isfloat_string(), but only if the first character could plausibly be part of a float. Otherwise, treat as a symbol. * lread.c (read_rational): Rename from read_integer. Handle ratios with the same radix specification as was used for integers. * lread.c (read1): Rename read_integer in this function. Support the Common Lisp #NNNrMMM syntax for parsing a number MMM of arbitrary radix NNN. man/ChangeLog addition: 2015-05-08 Aidan Kehoe <kehoea@parhasard.net> * lispref/numbers.texi (Numbers): Describe the newly-supported arbitrary-base syntax for rationals (integers and ratios). Describe that ratios can take the same base specification as integers, something also new. tests/ChangeLog addition: 2015-05-08 Aidan Kehoe <kehoea@parhasard.net> * automated/lisp-reader-tests.el: Check the arbitrary-base integer reader syntax support, just added. Check the reader base support for ratios, just added. Check the non-ASCII-digit support in the reader, just added.
author Aidan Kehoe <kehoea@parhasard.net>
date Sat, 09 May 2015 00:40:57 +0100
parents ee27ca517e90
children
line wrap: on
line source

;; Copyright (C) 2005 Martin Kuehl.

;; Author: Martin Kuehl <martin.kuehl@gmail.com>
;; Maintainer: Martin Kuehl <martin.kuehl@gmail.com>
;; Created: 2005
;; Keywords: tests

;; This file is part of XEmacs.

;; XEmacs is free software: you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by the
;; Free Software Foundation, either version 3 of the License, or (at your
;; option) any later version.

;; XEmacs is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
;; for more details.

;; You should have received a copy of the GNU General Public License
;; along with XEmacs.  If not, see <http://www.gnu.org/licenses/>.

;;; Synched up with: Not in FSF.

;;; Commentary:

;; Test the lisp reader.
;; See test-harness.el for instructions on how to run these tests.

;;; Raw Strings
;;; ===========

;; Equality to "traditional" strings
;; ---------------------------------
(dolist (strings '((#r"xyz"   "xyz")	 ; no backslashes
		   (#r"\xyz"  "\\xyz")   ; backslash at start
                   (#r"\\xyz" "\\\\xyz") ; backslashes at start
                   (#r"\nxyz" "\\nxyz")  ; escape seq. at start
                   (#r"\"xyz" "\\\"xyz") ; quote at start
                   (#r"xy\z"  "xy\\z")   ; backslash in middle
                   (#r"xy\\z" "xy\\\\z") ; backslashes in middle
                   (#r"xy\nz" "xy\\nz")  ; escape seq. in middle
                   (#r"xy\"z" "xy\\\"z") ; quote in middle
                   ;;(#r"xyz\"  "xyz\\")   ; backslash at end: error
                   (#r"xyz\\" "xyz\\\\") ; backslashes at end
                   (#r"xyz\n" "xyz\\n")  ; escape seq. at end
                   (#r"xyz\"" "xyz\\\"") ; quote at end
		   (#ru"\u00ABxyz" "\u00ABxyz") ; one Unicode escape
		   (#rU"\U000000ABxyz" "\U000000ABxyz") ; another Unicode escape
		   (#rU"xyz\u00AB" "xyz\u00AB") ; one Unicode escape
                   ))
  (Assert (apply #'string= strings)))

;; Odd number of backslashes at the end
;; ------------------------------------
(dolist (string '("#r\"xyz\\\""         ; `#r"abc\"': escaped delimiter
                  "#r\"xyz\\\\\\\""     ; `#r"abc\\\"': escaped delimiter
                  ))
  (with-temp-buffer
    (insert string)
    (Check-Error end-of-file (eval-buffer))))

;; Alternate string/regex delimiters
;; ---------------------------------
(dolist (string '("#r/xyz/"             ; Perl syntax
                  "#r:ix/xyz/"          ; Extended Perl syntax
                  "#r|xyz|"             ; TeX syntax
                  "#r[xyz]"             ; (uncommon) Perl syntax
                  "#r<xyz>"             ; Perl6 syntax?
                  "#r(xyz)"             ; arbitrary santax
                  "#r{xyz}"             ; arbitrary santax
                  "#r,xyz,"             ; arbitrary santax
                  "#r!xyz!"             ; arbitrary santax
                  ))
  (with-temp-buffer
    (insert string)
    (Check-Error-Message invalid-read-syntax "unrecognized raw string"
                         (eval-buffer))))

(when (featurep 'bignum)
  ;; This failed, up to 20110501.
  (Assert (eql (1+ most-positive-fixnum)
	       (read (format "+%d" (1+ most-positive-fixnum))))
	  "checking leading + is handled properly if reading a bignum")
  ;; This never did.
  (Assert (eql (1- most-positive-fixnum)
	       (read (format "+%d" (1- most-positive-fixnum))))
	  "checking leading + is handled properly if reading a fixnum"))

;; Test print-circle.
(let ((cons '#1=(1 2 3 4 5 6 . #1#))
      (vector #2=[1 2 3 4 5 6 #2#])
      (compiled-function #3=#[(argument) "\xc2\x09\x08\"\x87"
                              [pi argument #3#] 3])
      (char-table #4=#s(char-table :type generic :data (?\u0080 #4#)))
      (hash-table #5=#s(hash-table :test eql :data (a b c #5# e f)))
      (range-table #6=#s(range-table :type start-closed-end-open
                                     :data ((#x00 #xff) hello
                                            (#x100 #x1ff) #6#
                                            (#x200 #x2ff) everyone)))
      (print-readably t)
      (print-circle t)
      deserialized-cons deserialized-vector deserialized-compiled-function
      deserialized-char-table deserialized-hash-table deserialized-range-table)
  (Assert (eq (nthcdr 6 cons) cons)
          "checking basic recursive cons read properly")
  (Assert (eq vector (aref vector (1- (length vector))))
          "checking basic recursive vector read properly")
  (Assert (eq compiled-function
              (find-if #'compiled-function-p
                       (compiled-function-constants compiled-function)))
          "checking basic recursive compiled-function read properly")
  (Check-Error wrong-number-of-arguments (funcall compiled-function 3))
  (Assert (eq char-table (get-char-table ?\u0080 char-table))
          "checking basic recursive char table read properly")
  (Assert (eq hash-table (gethash 'c hash-table))
          "checking basic recursive hash table read properly")
  (Assert (eq range-table (get-range-table #x180 range-table))
          "checking basic recursive range table read properly")
  (setf (gethash 'g hash-table) cons
        (car cons) hash-table
        deserialized-hash-table (read (prin1-to-string hash-table)))
  (Assert (not (eq deserialized-hash-table hash-table))
          "checking printing and reading hash-table creates a new object")
  (Assert (eq deserialized-hash-table (gethash 'c deserialized-hash-table))
          "checking the lisp reader handles deserialized hash-table identity")
  (Assert (eq deserialized-hash-table
              (car (gethash 'g deserialized-hash-table)))
          "checking the reader handles deserialization identity, hash-table")
  (setf (get-char-table ?a char-table) cons
        (car cons) char-table
        deserialized-char-table (read (prin1-to-string char-table)))
  (Assert (not (eq deserialized-char-table char-table))
          "checking printing and reading creates a new object")
  (Assert (eq deserialized-char-table
              (get-char-table ?\u0080 deserialized-char-table))
          "checking the lisp reader handles deserialization identity")
  (Assert (eq deserialized-char-table
              (car (get-char-table ?a deserialized-char-table)))
          "checking the lisp reader handles deserialization identity, mixed")
  (put-range-table #x1000 #x1010 cons range-table)
  (setf (car cons) range-table
        deserialized-range-table (read (prin1-to-string range-table)))
  (Assert (not (eq deserialized-range-table range-table))
          "checking printing and reading creates a new object")
  (Assert (eq deserialized-range-table
              (get-range-table #x101 deserialized-range-table))
          "checking the lisp reader handles deserialization identity")
  (Assert (eq deserialized-range-table
              (car (get-range-table #x1001 deserialized-range-table)))
          "checking the lisp reader handles deserialization identity, mixed"))

(when (featurep 'bignum)
  (Assert (null (list-length (read (format "#%d=(1 #1=(5) 3 4 . #%d#)"
					   (+ most-positive-fixnum 2)
					   (+ most-positive-fixnum 2)))))
	  "checking bignum object labels don't wrap on reading"))

(Assert (not (eq (intern "") (read (prin1-to-string (make-symbol "")))))
        "checking uninterned zero-length symbol printed distinctly")

;; Check the read and print handling of symbols that look like numbers. In
;; passing, check the read and print handling of the associated numbers.
(Assert (eql (log 1) '0e0) "checking float syntax with e accepted")
(Assert (eql (log 1) 0.0) "checking float syntax with decimal point accepted")
(Assert (not (ratiop (read "2/-3")))
        "ratios can't have a negative sign in the denominator")
(Assert (not (ratiop (read "2/+3")))
        "ratios can't have a positive sign in the denominator")

(macrolet
    ((Assert-no-symbol-number-confusion (&rest values)
       `(let ((print-gensym t)
              (print-readably t))
         ,@(loop
            for (type . rest) in values
            collect (cons
                     'progn
                     (loop for string in rest
                           collect
                           `(progn
                             (Assert (symbolp (read (prin1-to-string
                                                     (make-symbol ,string)))))
                             (Assert (equal (symbol-name
                                             (read (prin1-to-string
                                                    (make-symbol ,string))))
                                             ,string))
                             ,@(when (ignore-errors (coerce-number 1 type))
                                     `((Assert (typep (read ,string)
                                                      ',type))
                                       (Assert (eql (string-to-number
                                                     ,string)
                                                (read ,string))))))))))))
  (Assert-no-symbol-number-confusion
   (float "0.0" "0E0" "-.0" "0.0e0" "3.1415926535897932384E0"
          "6.02E+23" "602E+21" "3.010299957e-1" "-0.000000001e9")
   (fixnum "1" "1." "1073741823" "-1" "-1073741824")
   (ratio "1/2" "2/5" "-1073741822/1073741823"
          "+2/3" "-3/2"
          "2894802230932904885589274625217197696331749616641014100986439600\
1978282409984/20"
          "+289480223093290488558927462521719769633174961664101410098643960\
01978282409984/20"
          "-289480223093290488558927462521719769633174961664101410098643960\
01978282409984/20"
          "20/2894802230932904885589274625217197696331749616641014100986439\
6001978282409984"
          "+20/289480223093290488558927462521719769633174961664101410098643\
96001978282409984"
          "-20/289480223093290488558927462521719769633174961664101410098643\
96001978282409984")
   ;; These two are (lsh 1 254) and (lognot (lsh 1 254)). The assumption that
   ;; they are always bignums if they can be made into rationals should hold
   ;; for another couple of processor generations at least.
   (bignum
    "2894802230932904885589274625217197696331749616641014100986439600197828\
2409984"
    "-289480223093290488558927462521719769633174961664101410098643960019782\
82409985")))

(macrolet
    ((Assert-reading-rationals (&rest details)
       (cons
        'progn
        (loop
            for (guard first . rest) in details
            collect `(when ,guard
                      ,@(loop for value in rest
                              collect `(Assert (eql ,first
                                                (read ,value))))))))
     (with-digits (ascii alternate &body body)
       (let ((tree-alist (list (cons 'old 'new)))
             (text-alist (mapcar* #'cons ascii alternate)))
         (list*
          'progn
          (sublis tree-alist body
                  :test #'(lambda (new old)
                            ;; This function replaces any ASCII decimal digits
                            ;; in any string encountered in the tree with the
                            ;; non-ASCII digits supplied in ALTERNATE.
                            (when (and (stringp old)
                                       (find-if #'digit-char-p old))
                              (setf (cdar tree-alist)
                                    (concatenate 'string
                                                 (sublis text-alist
                                                         (append old nil))))
                              t))))))
     (with-all-digits (&body body)
       (list
        'progn
        (list* 'with-digits "0123456789" "0123456789" body)
        (when (featurep 'mule)
          (cons
           'progn
           (loop for (code-point . script)
             in '((#x0660 . "Arabic-Indic")
                  (#x06f0 . "Extended Arabic-Indic")
                  (#x07c0 . "Nko")
                  (#x0966 . "Devanagari")
                  (#x09e6 . "Bengali")
                  (#x0a66 . "Gurmukhi")
                  (#x0ae6 . "Gujarati")
                  (#x0b66 . "Oriya")
                  (#x0be6 . "Tamil")
                  (#x0c66 . "Telugu")
                  (#x0ce6 . "Kannada")
                  (#x0d66 . "Malayalam")
                  (#x0de6 . "Sinhala Lith")
                  (#x0e50 . "Thai")
                  (#x0ed0 . "Lao")
                  (#x0f20 . "Tibetan")
                  (#x1040 . "Myanmar")
                  (#x1090 . "Myanmar Shan")
                  (#x17e0 . "Khmer")
                  (#x1810 . "Mongolian")
                  (#x1946 . "Limbu")
                  (#x19d0 . "New Tai Lue")
                  (#x1a80 . "Tai Tham Hora")
                  (#x1a90 . "Tai Tham Tham")
                  (#x1b50 . "Balinese")
                  (#x1bb0 . "Sundanese")
                  (#x1c40 . "Lepcha")
                  (#x1c50 . "Ol Chiki")
                  (#xa620 . "Vai")
                  (#xa8d0 . "Saurashtra")
                  (#xa900 . "Kayah Li")
                  (#xa9d0 . "Javanese")
                  (#xa9f0 . "Myanmar Tai Laing")
                  (#xaa50 . "Cham")
                  (#xabf0 . "Meetei Mayek")
                  (#xff10 . "Fullwidth")
                  (#x000104a0 . "Osmanya")
                  (#x00011066 . "Brahmi")
                  (#x000110f0 . "Sora Sompeng")
                  (#x00011136 . "Chakma")
                  (#x000111d0 . "Sharada")
                  (#x000112f0 . "Khudawadi")
                  (#x000114d0 . "Tirhuta")
                  (#x00011650 . "Modi")
                  (#x000116c0 . "Takri")
                  (#x000118e0 . "Warang Citi")
                  (#x00016a60 . "Mro")
                  (#x00016b50 . "Pahawh Hmong")
                  (#x0001d7ce . "Mathematical Bold")
                  (#x0001d7d8 . "Mathematical Double-Struck")
                  (#x0001d7e2 . "Mathematical Sans-Serif")
                  (#x0001d7ec . "Mathematical Sans-Serif Bold")
                  (#x0001d7f6 . "Mathematical Monospace"))
           collect 
           (list* 'with-digits "0123456789"
                  ;; All the Unicode decimal digits have contiguous code
                  ;; point ranges as documented by the Unicode standard,
                  ;; we can just increment.
                  (concat (loop for fixnum from code-point
                                to (+ code-point 9)
                                collect (decode-char 'ucs fixnum))
                          "")
                  body)))))))
  (with-all-digits  
    (Assert-reading-rationals
     (t 1 "1" "#b1" "#o1" "#x1" "#2r1" "#20r1" "#2000r1")
     (t 0 "-0" "#b0" "#o0" "#x0" "#1r0" "#2r0" "#20r0" "#2000r0")
     (t -1 "-1" "#b-1" "#o-1" "#x-1" "#2r-1" "#20r-1" "#2000r-1")
     (t 1073741823 "#b111111111111111111111111111111" "#o7777777777"
        "#x3fffffff" "#32rVVVVVV")
     (t -1073741824 "#b-1000000000000000000000000000000" "#o-10000000000"
        "#x-40000000" "#32r-1000000")
     ((featurep 'ratio)
      1 "1/1" "2/2" "#b1/1" "#o2/2" "#x3/3" "#2r1/1" "#20r2000/2000")
     ((featurep 'ratio)
      -1 "-1/1" "-2/2" "#b-1/1" "#o-2/2" "#x-3/3" "#2r-1/1"
      "#20r-2000/2000"))
    (Check-Error invalid-read-syntax (read "1234567/0"))
    (Check-Error invalid-read-syntax (read "#x1234567/0"))
    (Check-Error invalid-read-syntax (read "#20000r1234567/0"))
    ;; Unintuitive, but that's the Common Lisp behaviour. Maybe we should
    ;; error.
    (Assert (symbolp (read "1234/-123")))))

;;; end of lisp-reader-tests.el