comparison tests/automated/lisp-tests.el @ 5867:e0f1dfaa821e

Add the non-ASCII digit support, now #'parse-integer can handle it. lisp/ChangeLog addition: 2015-03-08 Aidan Kehoe <kehoea@parhasard.net> * mule/digit.el: New file, specifying integer weights for those decimal digits specified by Unicode, and for the Latin characters in ASCII. * dumped-lisp.el (preloaded-file-list): Dump this file, so those weights are available at runtime. tests/ChangeLog addition: 2015-03-16 Aidan Kehoe <kehoea@parhasard.net> * automated/lisp-tests.el: Extend the tests for #'parse-integer, #'digit-char, #'digit-char-p substantially, testing the Unicode decimal digits in detail. src/ChangeLog addition: 2015-03-08 Aidan Kehoe <kehoea@parhasard.net> * data.c (Fparse_integer): Document a complication that arises when a leading minus sign has a digit weight but is nonetheles interpreted as a minus.
author Aidan Kehoe <kehoea@parhasard.net>
date Mon, 16 Mar 2015 00:28:18 +0000
parents a45722e74335
children c96000075e49
comparison
equal deleted inserted replaced
5866:5ea790936de9 5867:e0f1dfaa821e
3545 ;;----------------------------------------------------- 3545 ;;-----------------------------------------------------
3546 ;; Test #'parse-integer and friends. 3546 ;; Test #'parse-integer and friends.
3547 ;;----------------------------------------------------- 3547 ;;-----------------------------------------------------
3548 3548
3549 (Check-Error wrong-type-argument (parse-integer 123456789)) 3549 (Check-Error wrong-type-argument (parse-integer 123456789))
3550 (Check-Error wrong-type-argument (parse-integer "123456789" :start -1)) 3550
3551 (if (featurep 'bignum) 3551 (if (featurep 'bignum)
3552 (progn 3552 (progn
3553 (Check-Error args-out-of-range 3553 (Check-Error args-out-of-range
3554 (parse-integer "123456789" :start (1+ most-positive-fixnum))) 3554 (parse-integer "123456789" :start (1+ most-positive-fixnum)))
3555 (Check-Error args-out-of-range 3555 (Check-Error args-out-of-range
3564 (parse-integer "123456789" :radix (1+ most-positive-fixnum))) 3564 (parse-integer "123456789" :radix (1+ most-positive-fixnum)))
3565 (Check-Error wrong-number-of-arguments 3565 (Check-Error wrong-number-of-arguments
3566 (parse-integer "123456789" :junk-allowed)) 3566 (parse-integer "123456789" :junk-allowed))
3567 (Check-Error invalid-keyword-argument 3567 (Check-Error invalid-keyword-argument
3568 (parse-integer "123456789" :no-such-keyword t)) 3568 (parse-integer "123456789" :no-such-keyword t))
3569 (Check-Error wrong-type-argument (parse-integer "123456789" :start -1))
3570 (Check-Error invalid-argument (parse-integer "abc"))
3571 (Check-Error invalid-argument (parse-integer "efz" :radix 16))
3572
3573 (macrolet
3574 ((with-digits (ascii alternate script)
3575 (let ((tree-alist (list (cons 'old 'new)))
3576 (text-alist (mapcar* #'cons ascii alternate)))
3577 (list*
3578 'progn
3579 (cons
3580 'progn
3581 (loop for ascii-digit across ascii
3582 for non-ascii across alternate
3583 collect `(Assert (eql (digit-char-p ,non-ascii)
3584 ,(- ascii-digit ?0))
3585 ,(concat "checking `digit-char-p', base-10, "
3586 script))))
3587 (sublis
3588 tree-alist
3589 '((Assert (equal (multiple-value-list
3590 (parse-integer " -123 "))
3591 '(-123 7)))
3592 (Assert (equal (multiple-value-list
3593 (parse-integer "-3efz" :radix 16
3594 :junk-allowed t))
3595 '(-1007 4)))
3596 (Assert (equal (multiple-value-list
3597 (parse-integer "zzef9" :radix 16 :start 2))
3598 '(3833 5)))
3599 (Assert (equal (multiple-value-list
3600 (parse-integer "0123456789" :radix 8
3601 :junk-allowed t))
3602 '(342391 8)))
3603 (Assert (equal (multiple-value-list
3604 (parse-integer "":junk-allowed t))
3605 '(nil 0)))
3606 (Assert (equal (multiple-value-list
3607 (parse-integer "abc" :junk-allowed t))
3608 '(nil 0)))
3609 (Check-Error invalid-argument (parse-integer "0123456789"
3610 :radix 8))
3611 (Check-Error invalid-argument (parse-integer "abc"))
3612 (Check-Error invalid-argument (parse-integer "efz" :radix 16))
3613
3614 ;; In contravention of Common Lisp, we allow both 0 and 1 as
3615 ;; values for RADIX, useless as that is.
3616 (Assert (equal (multiple-value-list
3617 (parse-integer "00000" :radix 1)) '(0 5))
3618 "checking 1 is allowed as a value for RADIX")
3619 (Assert (equal (multiple-value-list
3620 (parse-integer "" :radix 0 :junk-allowed t))
3621 '(nil 0))
3622 "checking 0 is allowed as a value for RADIX"))
3623 :test #'(lambda (new old)
3624 ;; This function replaces any ASCII decimal digits in
3625 ;; any string encountered in the tree with the
3626 ;; non-ASCII digits supplied in ALTERNATE.
3627 (when (and (stringp old) (find-if #'digit-char-p old))
3628 (setf (cdar tree-alist)
3629 (concatenate 'string
3630 (sublis text-alist
3631 (append old nil))))
3632 t))))))
3633 (repeat-parse-integer-non-ascii ()
3634 (when (featurep 'mule)
3635 (cons
3636 'progn
3637 (loop for (code-point . script)
3638 in '((#x0660 . "Arabic-Indic")
3639 (#x06f0 . "Extended Arabic-Indic")
3640 (#x07c0 . "Nko")
3641 (#x0966 . "Devanagari")
3642 (#x09e6 . "Bengali")
3643 (#x0a66 . "Gurmukhi")
3644 (#x0ae6 . "Gujarati")
3645 (#x0b66 . "Oriya")
3646 (#x0be6 . "Tamil")
3647 (#x0c66 . "Telugu")
3648 (#x0ce6 . "Kannada")
3649 (#x0d66 . "Malayalam")
3650 (#x0de6 . "Sinhala Lith")
3651 (#x0e50 . "Thai")
3652 (#x0ed0 . "Lao")
3653 (#x0f20 . "Tibetan")
3654 (#x1040 . "Myanmar")
3655 (#x1090 . "Myanmar Shan")
3656 (#x17e0 . "Khmer")
3657 (#x1810 . "Mongolian")
3658 (#x1946 . "Limbu")
3659 (#x19d0 . "New Tai Lue")
3660 (#x1a80 . "Tai Tham Hora")
3661 (#x1a90 . "Tai Tham Tham")
3662 (#x1b50 . "Balinese")
3663 (#x1bb0 . "Sundanese")
3664 (#x1c40 . "Lepcha")
3665 (#x1c50 . "Ol Chiki")
3666 (#xa620 . "Vai")
3667 (#xa8d0 . "Saurashtra")
3668 (#xa900 . "Kayah Li")
3669 (#xa9d0 . "Javanese")
3670 (#xa9f0 . "Myanmar Tai Laing")
3671 (#xaa50 . "Cham")
3672 (#xabf0 . "Meetei Mayek")
3673 (#xff10 . "Fullwidth")
3674 (#x000104a0 . "Osmanya")
3675 (#x00011066 . "Brahmi")
3676 (#x000110f0 . "Sora Sompeng")
3677 (#x00011136 . "Chakma")
3678 (#x000111d0 . "Sharada")
3679 (#x000112f0 . "Khudawadi")
3680 (#x000114d0 . "Tirhuta")
3681 (#x00011650 . "Modi")
3682 (#x000116c0 . "Takri")
3683 (#x000118e0 . "Warang Citi")
3684 (#x00016a60 . "Mro")
3685 (#x00016b50 . "Pahawh Hmong")
3686 (#x0001d7ce . "Mathematical Bold")
3687 (#x0001d7d8 . "Mathematical Double-Struck")
3688 (#x0001d7e2 . "Mathematical Sans-Serif")
3689 (#x0001d7ec . "Mathematical Sans-Serif Bold")
3690 (#x0001d7f6 . "Mathematical Monospace"))
3691 collect `(with-digits "0123456789"
3692 ;; All the Unicode decimal digits have contiguous code
3693 ;; point ranges as documented by the Unicode standard,
3694 ;; we can just increment.
3695 ,(concat (loop for fixnum from code-point
3696 to (+ code-point 9)
3697 collect (decode-char 'ucs fixnum))
3698 "")
3699 ,script))))))
3700 (with-digits "0123456789" "0123456789" "ASCII")
3701 (repeat-parse-integer-non-ascii))
3569 3702
3570 ;; Next two paragraphs of tests from GNU, thank you Leo Liu. 3703 ;; Next two paragraphs of tests from GNU, thank you Leo Liu.
3571 (Assert (eql (digit-char-p ?3) 3)) 3704 (Assert (eql (digit-char-p ?3) 3))
3572 (Assert (eql (digit-char-p ?a 11) 10)) 3705 (Assert (eql (digit-char-p ?a 11) 10))
3573 (Assert (eql (digit-char-p ?w 36) 32)) 3706 (Assert (eql (digit-char-p ?w 36) 32))
3574 (Assert (not (digit-char-p ?a))) 3707 (Assert (not (digit-char-p ?a)))
3575 (Check-Error args-out-of-range (digit-char-p ?a 37)) 3708 (Check-Error args-out-of-range (digit-char-p ?a 37))
3576 (Assert (not (digit-char-p ?a 1))) 3709 (Assert (not (digit-char-p ?a 1)))
3577 3710
3578 (Assert (equal (multiple-value-list (parse-integer " -123 ")) '(-123 7))) 3711 (loop for fixnum from 0 below 36
3579 (Assert (equal (multiple-value-list 3712 do (Assert (eql fixnum (digit-char-p (digit-char fixnum 36) 36))))
3580 (parse-integer "-efz" :radix 16 :junk-allowed t)) 3713
3581 '(-239 3))) 3714 (let ((max -1)
3582 (Assert (equal (multiple-value-list (parse-integer "zzef" :radix 16 :start 2)) 3715 (min most-positive-fixnum)
3583 '(239 4))) 3716 (max-char nil) (min-char nil))
3584 (Assert (equal (multiple-value-list 3717 (map-char-table #'(lambda (key value)
3585 (parse-integer "0123456789" :radix 8 :junk-allowed t)) 3718 (if (> value max)
3586 '(342391 8))) 3719 (setf max value
3587 (Assert (equal (multiple-value-list (parse-integer "" :junk-allowed t)) 3720 max-char key))
3588 '(nil 0))) 3721 (if (< value min)
3589 (Assert (equal (multiple-value-list (parse-integer "abc" :junk-allowed t)) 3722 (setf min value
3590 '(nil 0))) 3723 min-char key))
3591 (Check-Error invalid-argument (parse-integer "0123456789" :radix 8)) 3724 nil)
3592 (Check-Error invalid-argument (parse-integer "abc")) 3725 digit-fixnum-map)
3593 (Check-Error invalid-argument (parse-integer "efz" :radix 16)) 3726 (Assert (>= 35 max) "checking base 36 is supported, `digit-fixnum-map'")
3594 3727 (Assert (<= min 2) "checking base two supported, `digit-fixnum-map'")
3595 ;; We don't allow a trailing decimal point, as the Lisp reader does. 3728 (Assert (eql (digit-char-p max-char (1+ max)) max))
3596 (Check-Error invalid-argument (parse-integer "12348.")) 3729 (Assert (eql (digit-char-p min-char (1+ min)) min))
3597
3598 ;; In contravention of Common Lisp, we allow both 0 and 1 as values for RADIX,
3599 ;; useless as that is.
3600 (Assert (equal (multiple-value-list (parse-integer "00000" :radix 1)) '(0 5))
3601 "checking 1 is allowed as a value for RADIX")
3602 (Assert (equal (multiple-value-list
3603 (parse-integer "" :radix 0 :junk-allowed t))
3604 '(nil 0))
3605 "checking 0 is allowed as a value for RADIX")
3606 3730
3607 (let ((binary-table 3731 (let ((binary-table
3608 (copy-char-table #s(char-table :type generic :default -1 :data ())))) 3732 (copy-char-table #s(char-table :type generic :default -1 :data ()))))
3609 (loop for fixnum from 00 to #xff 3733 (loop for fixnum from 00 to #xff
3610 do (put-char-table (int-char fixnum) fixnum binary-table)) 3734 do (put-char-table (int-char fixnum) fixnum binary-table))