Mercurial > hg > xemacs-beta
diff tests/automated/lisp-tests.el @ 5867:e0f1dfaa821e
Add the non-ASCII digit support, now #'parse-integer can handle it.
lisp/ChangeLog addition:
2015-03-08 Aidan Kehoe <kehoea@parhasard.net>
* mule/digit.el: New file, specifying integer weights for those
decimal digits specified by Unicode, and for the Latin characters
in ASCII.
* dumped-lisp.el (preloaded-file-list):
Dump this file, so those weights are available at runtime.
tests/ChangeLog addition:
2015-03-16 Aidan Kehoe <kehoea@parhasard.net>
* automated/lisp-tests.el:
Extend the tests for #'parse-integer, #'digit-char, #'digit-char-p
substantially, testing the Unicode decimal digits in detail.
src/ChangeLog addition:
2015-03-08 Aidan Kehoe <kehoea@parhasard.net>
* data.c (Fparse_integer):
Document a complication that arises when a leading minus sign has
a digit weight but is nonetheles interpreted as a minus.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Mon, 16 Mar 2015 00:28:18 +0000 |
parents | a45722e74335 |
children | c96000075e49 |
line wrap: on
line diff
--- a/tests/automated/lisp-tests.el Mon Mar 16 00:11:30 2015 +0000 +++ b/tests/automated/lisp-tests.el Mon Mar 16 00:28:18 2015 +0000 @@ -3547,7 +3547,7 @@ ;;----------------------------------------------------- (Check-Error wrong-type-argument (parse-integer 123456789)) -(Check-Error wrong-type-argument (parse-integer "123456789" :start -1)) + (if (featurep 'bignum) (progn (Check-Error args-out-of-range @@ -3566,6 +3566,139 @@ (parse-integer "123456789" :junk-allowed)) (Check-Error invalid-keyword-argument (parse-integer "123456789" :no-such-keyword t)) +(Check-Error wrong-type-argument (parse-integer "123456789" :start -1)) +(Check-Error invalid-argument (parse-integer "abc")) +(Check-Error invalid-argument (parse-integer "efz" :radix 16)) + +(macrolet + ((with-digits (ascii alternate script) + (let ((tree-alist (list (cons 'old 'new))) + (text-alist (mapcar* #'cons ascii alternate))) + (list* + 'progn + (cons + 'progn + (loop for ascii-digit across ascii + for non-ascii across alternate + collect `(Assert (eql (digit-char-p ,non-ascii) + ,(- ascii-digit ?0)) + ,(concat "checking `digit-char-p', base-10, " + script)))) + (sublis + tree-alist + '((Assert (equal (multiple-value-list + (parse-integer " -123 ")) + '(-123 7))) + (Assert (equal (multiple-value-list + (parse-integer "-3efz" :radix 16 + :junk-allowed t)) + '(-1007 4))) + (Assert (equal (multiple-value-list + (parse-integer "zzef9" :radix 16 :start 2)) + '(3833 5))) + (Assert (equal (multiple-value-list + (parse-integer "0123456789" :radix 8 + :junk-allowed t)) + '(342391 8))) + (Assert (equal (multiple-value-list + (parse-integer "":junk-allowed t)) + '(nil 0))) + (Assert (equal (multiple-value-list + (parse-integer "abc" :junk-allowed t)) + '(nil 0))) + (Check-Error invalid-argument (parse-integer "0123456789" + :radix 8)) + (Check-Error invalid-argument (parse-integer "abc")) + (Check-Error invalid-argument (parse-integer "efz" :radix 16)) + + ;; In contravention of Common Lisp, we allow both 0 and 1 as + ;; values for RADIX, useless as that is. + (Assert (equal (multiple-value-list + (parse-integer "00000" :radix 1)) '(0 5)) + "checking 1 is allowed as a value for RADIX") + (Assert (equal (multiple-value-list + (parse-integer "" :radix 0 :junk-allowed t)) + '(nil 0)) + "checking 0 is allowed as a value for RADIX")) + :test #'(lambda (new old) + ;; This function replaces any ASCII decimal digits in + ;; any string encountered in the tree with the + ;; non-ASCII digits supplied in ALTERNATE. + (when (and (stringp old) (find-if #'digit-char-p old)) + (setf (cdar tree-alist) + (concatenate 'string + (sublis text-alist + (append old nil)))) + t)))))) + (repeat-parse-integer-non-ascii () + (when (featurep 'mule) + (cons + 'progn + (loop for (code-point . script) + in '((#x0660 . "Arabic-Indic") + (#x06f0 . "Extended Arabic-Indic") + (#x07c0 . "Nko") + (#x0966 . "Devanagari") + (#x09e6 . "Bengali") + (#x0a66 . "Gurmukhi") + (#x0ae6 . "Gujarati") + (#x0b66 . "Oriya") + (#x0be6 . "Tamil") + (#x0c66 . "Telugu") + (#x0ce6 . "Kannada") + (#x0d66 . "Malayalam") + (#x0de6 . "Sinhala Lith") + (#x0e50 . "Thai") + (#x0ed0 . "Lao") + (#x0f20 . "Tibetan") + (#x1040 . "Myanmar") + (#x1090 . "Myanmar Shan") + (#x17e0 . "Khmer") + (#x1810 . "Mongolian") + (#x1946 . "Limbu") + (#x19d0 . "New Tai Lue") + (#x1a80 . "Tai Tham Hora") + (#x1a90 . "Tai Tham Tham") + (#x1b50 . "Balinese") + (#x1bb0 . "Sundanese") + (#x1c40 . "Lepcha") + (#x1c50 . "Ol Chiki") + (#xa620 . "Vai") + (#xa8d0 . "Saurashtra") + (#xa900 . "Kayah Li") + (#xa9d0 . "Javanese") + (#xa9f0 . "Myanmar Tai Laing") + (#xaa50 . "Cham") + (#xabf0 . "Meetei Mayek") + (#xff10 . "Fullwidth") + (#x000104a0 . "Osmanya") + (#x00011066 . "Brahmi") + (#x000110f0 . "Sora Sompeng") + (#x00011136 . "Chakma") + (#x000111d0 . "Sharada") + (#x000112f0 . "Khudawadi") + (#x000114d0 . "Tirhuta") + (#x00011650 . "Modi") + (#x000116c0 . "Takri") + (#x000118e0 . "Warang Citi") + (#x00016a60 . "Mro") + (#x00016b50 . "Pahawh Hmong") + (#x0001d7ce . "Mathematical Bold") + (#x0001d7d8 . "Mathematical Double-Struck") + (#x0001d7e2 . "Mathematical Sans-Serif") + (#x0001d7ec . "Mathematical Sans-Serif Bold") + (#x0001d7f6 . "Mathematical Monospace")) + collect `(with-digits "0123456789" + ;; All the Unicode decimal digits have contiguous code + ;; point ranges as documented by the Unicode standard, + ;; we can just increment. + ,(concat (loop for fixnum from code-point + to (+ code-point 9) + collect (decode-char 'ucs fixnum)) + "") + ,script)))))) + (with-digits "0123456789" "0123456789" "ASCII") + (repeat-parse-integer-non-ascii)) ;; Next two paragraphs of tests from GNU, thank you Leo Liu. (Assert (eql (digit-char-p ?3) 3)) @@ -3575,34 +3708,25 @@ (Check-Error args-out-of-range (digit-char-p ?a 37)) (Assert (not (digit-char-p ?a 1))) -(Assert (equal (multiple-value-list (parse-integer " -123 ")) '(-123 7))) -(Assert (equal (multiple-value-list - (parse-integer "-efz" :radix 16 :junk-allowed t)) - '(-239 3))) -(Assert (equal (multiple-value-list (parse-integer "zzef" :radix 16 :start 2)) - '(239 4))) -(Assert (equal (multiple-value-list - (parse-integer "0123456789" :radix 8 :junk-allowed t)) - '(342391 8))) -(Assert (equal (multiple-value-list (parse-integer "" :junk-allowed t)) - '(nil 0))) -(Assert (equal (multiple-value-list (parse-integer "abc" :junk-allowed t)) - '(nil 0))) -(Check-Error invalid-argument (parse-integer "0123456789" :radix 8)) -(Check-Error invalid-argument (parse-integer "abc")) -(Check-Error invalid-argument (parse-integer "efz" :radix 16)) +(loop for fixnum from 0 below 36 + do (Assert (eql fixnum (digit-char-p (digit-char fixnum 36) 36)))) -;; We don't allow a trailing decimal point, as the Lisp reader does. -(Check-Error invalid-argument (parse-integer "12348.")) - -;; In contravention of Common Lisp, we allow both 0 and 1 as values for RADIX, -;; useless as that is. -(Assert (equal (multiple-value-list (parse-integer "00000" :radix 1)) '(0 5)) - "checking 1 is allowed as a value for RADIX") -(Assert (equal (multiple-value-list - (parse-integer "" :radix 0 :junk-allowed t)) - '(nil 0)) - "checking 0 is allowed as a value for RADIX") +(let ((max -1) + (min most-positive-fixnum) + (max-char nil) (min-char nil)) + (map-char-table #'(lambda (key value) + (if (> value max) + (setf max value + max-char key)) + (if (< value min) + (setf min value + min-char key)) + nil) + digit-fixnum-map) + (Assert (>= 35 max) "checking base 36 is supported, `digit-fixnum-map'") + (Assert (<= min 2) "checking base two supported, `digit-fixnum-map'") + (Assert (eql (digit-char-p max-char (1+ max)) max)) + (Assert (eql (digit-char-p min-char (1+ min)) min)) (let ((binary-table (copy-char-table #s(char-table :type generic :default -1 :data ()))))