changeset 4792:95b04754ea8c

Make #'equalp more compatible with CL; add a compiler macro, test & doc it. lisp/ChangeLog addition: 2009-11-08 Aidan Kehoe <kehoea@parhasard.net> * cl-extra.el (cl-string-vector-equalp) (cl-bit-vector-vector-equalp, cl-vector-array-equalp) (cl-hash-table-contents-equalp): New functions, to implement equalp treating arrays with identical contents as equivalent, as specified by Common Lisp. (equalp): Revise this function to implement array equivalence, and the hash-table equalp behaviour specified by CL. * cl-macs.el (equalp): Add a compiler macro for this function, used when one of the arguments is constant, and as such, its type is known at compile time. man/ChangeLog addition: 2009-11-08 Aidan Kehoe <kehoea@parhasard.net> * lispref/objects.texi (Equality Predicates): Document #'equalp here, as well as #'equal and #'eq. tests/ChangeLog addition: 2009-12-31 Aidan Kehoe <kehoea@parhasard.net> * automated/lisp-tests.el: Test much of the functionality of equalp; add a pointer to Paul Dietz' ANSI test suite for this function, converted to Emacs Lisp. Not including the tests themselves in XEmacs because who owns the copyright on the files is unclear and the GCL people didn't respond to my queries.
author Aidan Kehoe <kehoea@parhasard.net>
date Thu, 31 Dec 2009 15:09:41 +0000
parents ea07b60c097f
children 8484c6c76837
files lisp/ChangeLog lisp/cl-extra.el lisp/cl-macs.el man/ChangeLog man/lispref/objects.texi tests/ChangeLog tests/automated/lisp-tests.el
diffstat 7 files changed, 379 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Thu Dec 31 08:21:30 2009 +0000
+++ b/lisp/ChangeLog	Thu Dec 31 15:09:41 2009 +0000
@@ -165,6 +165,19 @@
 	calls to it. Keep the information about the bytecode's numeric
 	value, we want that for disassembling code.
 
+2009-11-08  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* cl-extra.el (cl-string-vector-equalp)
+	(cl-bit-vector-vector-equalp, cl-vector-array-equalp)
+	(cl-hash-table-contents-equalp): New functions, to implement
+	equalp treating arrays with identical contents as equivalent, as
+	specified by Common Lisp. 
+	(equalp): Revise this function to implement array equivalence,
+	and the hash-table equalp behaviour specified by CL.
+	* cl-macs.el (equalp): Add a compiler macro for this function,
+	used when one of the arguments is constant, and as such, its type
+	is known at compile time. 
+
 2009-11-01  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* cl-extra.el (equalp): 
--- a/lisp/cl-extra.el	Thu Dec 31 08:21:30 2009 +0000
+++ b/lisp/cl-extra.el	Thu Dec 31 15:09:41 2009 +0000
@@ -89,35 +89,128 @@
 
 ;;; Predicates.
 
+;; I'd actually prefer not to have this inline, the space
+;; vs. amount-it's-called trade-off isn't reasonable, but that would
+;; introduce bytecode problems with the compiler macro in cl-macs.el.
+(defsubst cl-string-vector-equalp (cl-string cl-vector)
+  "Helper function for `equalp', which see."
+;  (check-argument-type #'stringp cl-string)
+;  (check-argument-type #'vector cl-vector)
+  (let ((cl-i (length cl-string))
+	cl-char cl-other)
+    (when (= cl-i (length cl-vector))
+      (while (and (>= (setq cl-i (1- cl-i)) 0)
+		  (or (eq (setq cl-char (aref cl-string cl-i))
+			  (setq cl-other (aref cl-vector cl-i)))
+		      (and (characterp cl-other) ; Note we want to call this
+					         ; as rarely as possible, it
+					         ; doesn't have a bytecode.
+			   (eq (downcase cl-char) (downcase cl-other))))))
+      (< cl-i 0))))
+
+;; See comment on cl-string-vector-equalp above.
+(defsubst cl-bit-vector-vector-equalp (cl-bit-vector cl-vector)
+  "Helper function for `equalp', which see."
+;  (check-argument-type #'bit-vector-p cl-bit-vector)
+;  (check-argument-type #'vectorp cl-vector)
+  (let ((cl-i (length cl-bit-vector))
+	cl-other)
+    (when (= cl-i (length cl-vector))
+      (while (and (>= (setq cl-i (1- cl-i)) 0)
+		  (numberp (setq cl-other (aref cl-vector cl-i)))
+		  ;; Differs from clisp here. 
+		  (= (aref cl-bit-vector cl-i) cl-other)))
+      (< cl-i 0))))
+
+;; These two helper functions call equalp recursively, the two above have no
+;; need to.
+(defsubst cl-vector-array-equalp (cl-vector cl-array)
+  "Helper function for `equalp', which see."
+;  (check-argument-type #'vector cl-vector)
+;  (check-argument-type #'arrayp cl-array)
+  (let ((cl-i (length cl-vector)))
+    (when (= cl-i (length cl-array))
+      (while (and (>= (setq cl-i (1- cl-i)) 0)
+		  (equalp (aref cl-vector cl-i) (aref cl-array cl-i))))
+      (< cl-i 0))))
+
+(defsubst cl-hash-table-contents-equalp (cl-hash-table-1 cl-hash-table-2)
+  "Helper function for `equalp', which see."
+  (symbol-macrolet
+      ;; If someone has gone and fished the uninterned symbol out of this
+      ;; function's constants vector, and subsequently stored it as a value
+      ;; in a hash table, it's their own damn fault when
+      ;; `cl-hash-table-contents-equalp' gives the wrong answer.
+      ((equalp-default '#:equalp-default))
+    (loop
+      for x-key being the hash-key in cl-hash-table-1
+      using (hash-value x-value)
+      with y-value = nil
+      always (and (not (eq equalp-default
+			   (setq y-value (gethash x-key cl-hash-table-2
+						  equalp-default))))
+		  (equalp y-value x-value)))))
+
 (defun equalp (x y)
   "Return t if two Lisp objects have similar structures and contents.
+
 This is like `equal', except that it accepts numerically equal
-numbers of different types (float vs. integer), and also compares
-strings case-insensitively."
-  (cond ((eq x y) t)
+numbers of different types (float, integer, bignum, bigfloat), and also
+compares strings and characters case-insensitively.
+
+Arrays (that is, strings, bit-vectors, and vectors) of the same length and
+with contents that are `equalp' are themselves `equalp'.
+
+Two hash tables are `equalp' if they have the same test (see
+`hash-table-test'), if they have the same number of entries, and if, for
+each entry in one hash table, its key is equivalent to a key in the other
+hash table using the hash table test, and its value is `equalp' to the other
+hash table's value for that key."
+  (cond ((eq x y))
 	((stringp x)
-	 ;; XEmacs change: avoid downcase
-	 (and (stringp y)
-	      (eq t (compare-strings x nil nil y nil nil t))))
-	;; XEmacs addition: compare characters
-	((characterp x)
-	 (and (characterp y)
-	      (or (char-equal x y)
-		  (char-equal (downcase x) (downcase y)))))
+	 (if (stringp y)
+	     (eq t (compare-strings x nil nil y nil nil t))
+	   (if (vectorp y)
+	       (cl-string-vector-equalp x y)
+	     ;; bit-vectors and strings are only equalp if they're
+	     ;; zero-length:
+	     (and (equal "" x) (equal #* y)))))
 	((numberp x)
 	 (and (numberp y) (= x y)))
 	((consp x)
 	 (while (and (consp x) (consp y) (equalp (car x) (car y)))
 	   (setq x (cdr x) y (cdr y)))
 	 (and (not (consp x)) (equalp x y)))
-	((vectorp x)
-	 (and (vectorp y) (= (length x) (length y))
-	      (let ((i (length x)))
-		(while (and (>= (setq i (1- i)) 0)
-			    (equalp (aref x i) (aref y i))))
-		(< i 0))))
-	(t (equal x y))))
-
+	(t
+	 ;; From here on, the type tests don't (yet) have bytecodes. 
+	 (let ((x-type (type-of x)))
+	   (cond ((eq 'vector x-type)
+		  (if (stringp y)
+		      (cl-string-vector-equalp y x)
+		    (if (vectorp y)
+			(cl-vector-array-equalp x y)
+		      (if (bit-vector-p y)
+			  (cl-bit-vector-vector-equalp y x)))))
+		 ((eq 'character x-type)
+		  (and (characterp y)
+		       ;; If the characters are actually identical, the
+		       ;; first eq test will have caught them above; we only
+		       ;; need to check them case-insensitively here.
+		       (eq (downcase x) (downcase y))))
+		 ((eq 'hash-table x-type)
+		  (and (hash-table-p y)
+		       (eq (hash-table-test x) (hash-table-test y))
+		       (= (hash-table-count x) (hash-table-count y))
+		       (cl-hash-table-contents-equalp x y)))
+		 ((eq 'bit-vector x-type)
+		  (if (bit-vector-p y)
+		      (equal x y)
+		    (if (vectorp y)
+			(cl-bit-vector-vector-equalp x y)
+		      ;; bit-vectors and strings are only equalp if they're
+		      ;; zero-length:
+		      (and (equal "" y) (equal #* x)))))
+		 (t (equal x y)))))))
 
 ;;; Control structures.
 
--- a/lisp/cl-macs.el	Thu Dec 31 08:21:30 2009 +0000
+++ b/lisp/cl-macs.el	Thu Dec 31 15:09:41 2009 +0000
@@ -3348,6 +3348,117 @@
       (regexp-quote string)
     form))
 
+(define-compiler-macro equalp (&whole form x y) 
+  "Expand calls to `equalp' where X or Y is a constant expression.
+
+Much of the processing that `equalp' does is dependent on the types of both
+of its arguments, and with type information for one of them, we can
+eliminate much of the body of the function at compile time.
+
+Where both X and Y are constant expressions, `equalp' is evaluated at
+compile time by byte-optimize.el--this compiler macro passes FORM through to
+the byte optimizer in those cases."
+  ;; Cases where both arguments are constant are handled in
+  ;; byte-optimize.el, we only need to handle those cases where one is
+  ;; constant here.
+  (let* ((equalp-sym (eval-when-compile (gensym)))
+	(let-form '(progn))
+	(check-bit-vector t)
+	(check-string t)
+	(original-y y)
+	equalp-temp checked)
+  (macrolet
+      ((unordered-check (check)
+	 `(prog1
+	     (setq checked
+		   (or ,check
+		       (prog1 ,(sublis '((x . y) (y . x)) check :test #'eq)
+			 (setq equalp-temp x x y y equalp-temp))))
+	   (when checked
+	     (unless (symbolp y)
+	       (setq let-form `(let ((,equalp-sym ,y))) y equalp-sym))))))
+    ;; In the bodies of the below clauses, x is always a constant expression
+    ;; of the type we're interested in, and y is always a symbol that refers
+    ;; to the result non-constant side of the comparison. 
+    (cond ((unordered-check (and (arrayp x) (not (cl-const-expr-p y))))
+	   ;; Strings and other arrays. A vector containing the same
+	   ;; character elements as a given string is equalp to that string;
+	   ;; a bit-vector can only be equalp to a string if both are
+	   ;; zero-length.
+	   (cond
+	    ((member x '("" #* []))
+	     ;; No need to protect against multiple evaluation here:
+	     `(and (member ,original-y '("" #* [])) t))
+	    ((stringp x)
+	     `(,@let-form
+	       (if (stringp ,y)
+		   (eq t (compare-strings ,x nil nil
+					  ,y nil nil t))
+		 (if (vectorp ,y) 
+		     (cl-string-vector-equalp ,x ,y)))))
+	    ((bit-vector-p x)
+	     `(,@let-form
+	       (if (bit-vector-p ,y)
+		   ;; No need to call equalp on each element here:
+		   (equal ,x ,y)
+		 (if (vectorp ,y) 
+		     (cl-bit-vector-vector-equalp ,x ,y)))))
+	    (t
+	     (loop
+	       for elt across x
+	       ;; We may not need to check the other argument if it's a
+	       ;; string or bit vector, depending on the contents of x:
+	       always (progn
+			(unless (characterp elt) (setq check-string nil))
+			(unless (and (numberp elt) (or (= elt 0) (= elt 1)))
+			  (setq check-bit-vector nil))
+			(or check-string check-bit-vector)))
+	     `(,@let-form
+	       (cond
+		,@(if check-string
+		      `(((stringp ,y) 
+			 (cl-string-vector-equalp ,y ,x))))
+		,@(if check-bit-vector 
+		      `(((bit-vector-p ,y)
+			 (cl-bit-vector-vector-equalp ,y ,x))))
+		((vectorp ,y)
+		 (cl-vector-array-equalp ,x ,y)))))))
+	  ((unordered-check (and (characterp x) (not (cl-const-expr-p y))))
+	   `(,@let-form
+	     (or (eq ,x ,y)
+		  ;; eq has a bytecode, char-equal doesn't.
+		 (and (characterp ,y)
+		      (eq (downcase ,x) (downcase ,y))))))
+	  ((unordered-check (and (numberp x) (not (cl-const-expr-p y))))
+	   `(,@let-form
+	     (and (numberp ,y)
+		  (= ,x ,y))))
+	  ((unordered-check (and (hash-table-p x) (not (cl-const-expr-p y))))
+	   ;; Hash tables; follow the CL spec.
+	   `(,@let-form
+	     (and (hash-table-p ,y)
+		  (eq ',(hash-table-test x) (hash-table-test ,y))
+		  (= ,(hash-table-count x) (hash-table-count ,y))
+		  (cl-hash-table-contents-equalp ,x ,y))))
+	  ((unordered-check
+	    ;; Symbols; eq. 
+	    (and (not (cl-const-expr-p y))
+		 (or (memq x '(nil t))
+		     (and (eq (car-safe x) 'quote) (symbolp (second x))))))
+	   (cons 'eq (cdr form)))
+	  ((unordered-check
+	    ;; Compare conses at runtime, there's no real upside to
+	    ;; unrolling the function -> they fall through to the next
+	    ;; clause in this function.
+	    (and (cl-const-expr-p x) (not (consp x))
+		 (not (cl-const-expr-p y))))
+	   ;; All other types; use equal.
+	   (cons 'equal (cdr form)))
+	  ;; Neither side is a constant expression, do all our evaluation at
+	  ;; runtime (or both are, and equalp will be called from
+	  ;; byte-optimize.el).
+	  (t form)))))
+
 (mapc
  #'(lambda (y)
      (put (car y) 'side-effect-free t)
--- a/man/ChangeLog	Thu Dec 31 08:21:30 2009 +0000
+++ b/man/ChangeLog	Thu Dec 31 15:09:41 2009 +0000
@@ -33,6 +33,11 @@
 	* xemacs-faq.texi (Q1.2.2: What versions of Unix does XEmacs run on?):
 	Ditto.
 
+2009-11-08  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* lispref/objects.texi (Equality Predicates): 
+	Document #'equalp here, as well as #'equal and #'eq. 
+
 2009-10-05  Jerry James  <james@xemacs.org>
 
 	* beta.texi (Building XEmacs from a full distribution): Remove
--- a/man/lispref/objects.texi	Thu Dec 31 08:21:30 2009 +0000
+++ b/man/lispref/objects.texi	Thu Dec 31 15:09:41 2009 +0000
@@ -2424,3 +2424,56 @@
 
   The test for equality is implemented recursively, and circular lists may
 therefore cause infinite recursion (leading to an error).
+
+@defun equalp object1 object2
+This function is like @code{equal}, but compares characters and strings
+case-insensitively; numbers are compared using @code{=}; arrays (that
+is, strings, bit-vectors and vectors) are regarded as being
+@code{equalp} if their contents are @code{equalp}; and
+@code{hash-tables} are @code{equalp} if their values are @code{equalp}
+and they would otherwise be @code{equal}.
+
+@code{equalp} is recursive with vectors, lists and hash-tables, but not
+with other complex types.  For types without a defined @code{equalp}
+behavior, @code{equalp} behaves as @code{equal} does. 
+
+@example
+@group
+(equalp "asdf" "ASDF")
+     @result{} t
+@end group
+@group
+(equalp "asdf" [?a ?s ?D ?F])
+     @result{} t
+@end group
+@group
+(equalp "asdf" [?a ?s ?D ?F ?g])
+     @result{} nil
+@end group
+@group
+(equalp "" (bit-vector))
+     @result{} t
+@end group
+@group
+(equalp #s(hash-table) (make-hash-table))
+     @result{} t
+@end group
+@group
+(equalp #s(hash-table data (t "hi there"))
+	(let ((ht (make-hash-table)))
+	  (puthash t "HI THERE" ht)
+	  ht))
+     @result{} t
+@group
+@end group
+(equalp #s(hash-table test eq data (1.0 "hi there"))
+	(let ((ht (make-hash-table :test 'eql)))
+	  (puthash 1.0 "HI THERE" ht)
+	  ht))
+     @result{} nil
+@end group
+@end example
+@end defun
+
+@code{equalp} can also provoke an error if handed a circular structure,
+as with @code{equal}. 
--- a/tests/ChangeLog	Thu Dec 31 08:21:30 2009 +0000
+++ b/tests/ChangeLog	Thu Dec 31 15:09:41 2009 +0000
@@ -1,3 +1,12 @@
+2009-12-31  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* automated/lisp-tests.el: 
+	Test much of the functionality of equalp; add a pointer to Paul
+	Dietz' ANSI test suite for this function, converted to Emacs
+	Lisp. Not including the tests themselves in XEmacs because who
+	owns the copyright on the files is unclear and the GCL people
+	didn't respond to my queries. 
+
 2009-12-21  Jerry James  <james@xemacs.org>
 
 	* Dnd/README: Remove references to OffiX drag-and-drop.
--- a/tests/automated/lisp-tests.el	Thu Dec 31 08:21:30 2009 +0000
+++ b/tests/automated/lisp-tests.el	Thu Dec 31 15:09:41 2009 +0000
@@ -2085,14 +2085,81 @@
         (* three one-four-one-five-nine)))
    "checking letf handles #'values in a basic sense"))
 
-(Assert (equalp "hi there" "Hi There")
-	"checking equalp isn't case-sensitive")
-(Assert (equalp 99 99.0)
-	"checking equalp compares numerical values of different types")
-(Assert (null (equalp 99 ?c))
-	"checking equalp does not convert characters to numbers")
-;; Fixed in Hg d0ea57eb3de4.
-(Assert (null (equalp "hi there" [hi there]))
-	"checking equalp doesn't error with string and non-string")
+;; #'equalp tests.
+(let ((string-variable "aBcDeeFgH\u00Edj")
+      (eacute-character ?\u00E9)
+      (Eacute-character ?\u00c9)
+      (+base-chars+ (loop
+		       with res = (make-string 96 ?\x20)
+		       for int-char from #x20 to #x7f
+		       for char being each element in-ref res
+		       do (setf char (int-to-char int-char))
+		       finally return res)))
+  (Assert (equalp "hi there" "Hi There")
+	  "checking equalp isn't case-sensitive")
+  (Assert (equalp 99 99.0)
+	  "checking equalp compares numerical values of different types")
+  (Assert (null (equalp 99 ?c))
+	  "checking equalp does not convert characters to numbers")
+  ;; Fixed in Hg d0ea57eb3de4.
+  (Assert (null (equalp "hi there" [hi there]))
+	  "checking equalp doesn't error with string and non-string")
+  (Assert (eq t (equalp "ABCDEEFGH\u00CDJ" string-variable))
+	  "checking #'equalp is case-insensitive with an upcased constant") 
+  (Assert (eq t (equalp "abcdeefgh\xedj" string-variable))
+	  "checking #'equalp is case-insensitive with a downcased constant")
+  (Assert (eq t (equalp string-variable string-variable))
+	  "checking #'equalp works when handed the same string twice")
+  (Assert (eq t (equalp string-variable "aBcDeeFgH\u00Edj"))
+	  "check #'equalp is case-insensitive with a variable-cased constant")
+  (Assert (eq t (equalp "" (bit-vector))) 
+	  "check empty string and empty bit-vector are #'equalp.")
+  (Assert (eq t (equalp (string) (bit-vector))) 
+	  "check empty string and empty bit-vector are #'equalp, no constants")
+  (Assert (eq t (equalp "hi there" (vector ?h ?i ?\  ?t ?h ?e ?r ?e)))
+	  "check string and vector with same contents #'equalp")
+  (Assert (eq t (equalp (string ?h ?i ?\  ?t ?h ?e ?r ?e)
+			(vector ?h ?i ?\  ?t ?h ?e ?r ?e)))
+	  "check string and vector with same contents #'equalp, no constants")
+  (Assert (eq t (equalp [?h ?i ?\  ?t ?h ?e ?r ?e]
+			(string ?h ?i ?\  ?t ?h ?e ?r ?e)))
+	  "check string and vector with same contents #'equalp, vector constant")
+  (Assert (eq t (equalp [0 1.0 0.0 0 1]
+			(bit-vector 0 1 0 0 1)))
+	  "check vector and bit-vector with same contents #'equalp,\
+ vector constant")
+  (Assert (eq t (equalp #*01001
+			(vector 0 1.0 0.0 0 1)))
+	  "check vector and bit-vector with same contents #'equalp,\
+ bit-vector constant")
+  (Assert (eq t (equalp ?\u00E9 Eacute-character))
+	  "checking characters are case-insensitive, one constant")
+  (Assert (eq nil (equalp ?\u00E9 (aref (format "%c" ?a) 0)))
+	  "checking distinct characters are not equalp, one constant")
+  (Assert (eq t (equalp t (and)))
+	  "checking symbols are correctly #'equalp")
+  (Assert (eq nil (equalp t (or nil '#:t)))
+	  "checking distinct symbols with the same name are not #'equalp")
+  (Assert (eq t (equalp #s(char-table type generic data (?\u0080 "hi-there"))
+			(let ((aragh (make-char-table 'generic)))
+			  (put-char-table ?\u0080 "hi-there" aragh)
+			  aragh)))
+	  "checking #'equalp succeeds correctly, char-tables")
+  (Assert (eq nil (equalp #s(char-table type generic data (?\u0080 "hi-there"))
+			  (let ((aragh (make-char-table 'generic)))
+			    (put-char-table ?\u0080 "HI-THERE" aragh)
+			    aragh)))
+	  "checking #'equalp fails correctly, char-tables"))
+
+;; There are more tests available for equalp here: 
+;;
+;; http://www.parhasard.net/xemacs/equalp-tests.el
+;;
+;; They are taken from Paul Dietz' GCL ANSI test suite, licensed under the
+;; LGPL and part of GNU Common Lisp; the GCL people didn't respond to
+;; several requests for information on who owned the copyright for the
+;; files, so I haven't included the tests with XEmacs. Anyone doing XEmacs
+;; development on equalp should still run them, though. Aidan Kehoe, Thu Dec
+;; 31 14:53:52 GMT 2009. 
 
 ;;; end of lisp-tests.el