Mercurial > hg > xemacs-beta

diff tests/automated/mule-tests.el @ 3439:d1754e7f0cea
[xemacs-hg @ 2006-06-03 17:50:39 by aidan] Just-in-time Unicode code point support.
author: aidan
date: Sat, 03 Jun 2006 17:51:06 +0000
parents: ca02e61c9829
children: 43b4a54fbf66
--- a/tests/automated/mule-tests.el	Fri Jun 02 22:18:08 2006 +0000
+++ b/tests/automated/mule-tests.el	Sat Jun 03 17:51:06 2006 +0000
@@ -78,6 +78,38 @@
 ;; once compiled, for no good reason.
 (test-chars t)
 
+(defun unicode-code-point-to-utf-8-string (code-point)
+  "Convert a Unicode code point to the equivalent UTF-8 string. 
+This is a naive implementation in Lisp.  "
+  (check-argument-type 'natnump code-point)
+  (check-argument-range code-point 0 #x1fffff)
+  (if (< code-point #x80)
+      (format "%c" code-point)
+    (if (< code-point #x800)
+	(format "%c%c" 
+		;; ochars[0] = 0xC0 | (input & ~(0xFFFFF83F)) >> 6;
+		(logior #xc0 (lsh (logand code-point #x7c0) -6))
+		;; ochars[1] = 0x80 | input & ~(0xFFFFFFC0);
+		(logior #x80 (logand code-point #x3f)))
+      (if (< code-point #x00010000)
+	  (format "%c%c%c" 
+		  ;; ochars[0] = 0xE0 | (input >> 12) & ~(0xFFFFFFF0); 
+		  (logior #xe0 (logand (lsh code-point -12) #x0f))
+		  ;; ochars[1] = 0x80 | (input >> 6) & ~(0xFFFFFFC0); 
+		  (logior #x80 (logand (lsh code-point -6) #x3f))
+		  ;; ochars[2] = 0x80 | input & ~(0xFFFFFFC0); 
+		  (logior #x80 (logand code-point #x3f)))
+	(if (< code-point #x200000)
+	    (format "%c%c%c%c" 
+		    ;; ochars[0] = 0xF0 | (input >> 18) & ~(0xFFFFFFF8)
+		    (logior #xF0 (logand (lsh code-point -18) #x7))
+		    ;; ochars[1] = 0x80 | (input >> 12) & ~(0xFFFFFFC0);
+		    (logior #x80 (logand (lsh code-point -12) #x3f))
+		    ;; ochars[2] = 0x80 | (input >> 6) & ~(0xFFFFFFC0); 
+		    (logior #x80 (logand (lsh code-point -6) #x3f))
+		    ;; ochars[3] = 0x80 | input & ~(0xFFFFFFC0); 
+		    (logior #x80 (logand code-point #x3f))))))))
+
 ;;-----------------------------------------------------------------
 ;; Test string modification functions that modify the length of a char.
 ;;-----------------------------------------------------------------
@@ -336,6 +368,37 @@
   
     (Check-Error wrong-type-argument (set-unicode-conversion scaron -10000)))
 
+  (dolist (utf-8-char 
+	   '("\xc6\x92"		  ;; U+0192 LATIN SMALL LETTER F WITH HOOK
+	     "\xe2\x81\x8a"	  ;; U+204A TIRONIAN SIGN ET
+	     "\xe2\x82\xae"	  ;; U+20AE TUGRIK SIGN
+	     "\xf0\x9d\x92\xbd"	  ;; U+1D4BD MATHEMATICAL SCRIPT SMALL H
+	     "\xf0\x9d\x96\x93"   ;; U+1D593 MATHEMATICAL BOLD FRAKTUR SMALL N
+	     "\xf0\xaf\xa8\x88"   ;; U+2FA08 CJK COMPATIBILITY FOR U+4BCE
+	     "\xf4\x8f\xbf\xbd")) ;; U+10FFFD <Plane 16 Private Use, Last>
+    (let* ((xemacs-character (car (append 
+				  (decode-coding-string utf-8-char 'utf-8) 
+				  nil)))
+	   (xemacs-charset (car (split-char xemacs-character))))
+
+      ;; Trivial test of the UTF-8 support of the escape-quoted character set. 
+      (Assert (equal (decode-coding-string utf-8-char 'utf-8)
+		     (decode-coding-string (concat "\033%G" utf-8-char)
+					   'escape-quoted)))
+
+      ;; Check that the reverse mapping holds. 
+      (Assert (equal (unicode-code-point-to-utf-8-string 
+		      (encode-char xemacs-character 'ucs))
+		     utf-8-char))
+
+      ;; Check that, if this character has been JIT-allocated, it is encoded
+      ;; in escape-quoted using the corresponding UTF-8 escape. 
+      (when (charset-property xemacs-charset 'encode-as-utf-8)
+	(Assert (equal (concat "\033%G" utf-8-char)
+		       (encode-coding-string xemacs-character 'escape-quoted)))
+	(Assert (equal (concat "\033%G" utf-8-char)
+		       (encode-coding-string xemacs-character 'ctext))))))
+
   ;;---------------------------------------------------------------
   ;; Test charset-in-* functions
   ;;---------------------------------------------------------------
author	aidan
date	Sat, 03 Jun 2006 17:51:06 +0000
parents	ca02e61c9829
children	43b4a54fbf66