comparison tests/automated/mule-tests.el @ 3439:d1754e7f0cea

[xemacs-hg @ 2006-06-03 17:50:39 by aidan] Just-in-time Unicode code point support.
author aidan
date Sat, 03 Jun 2006 17:51:06 +0000
parents ca02e61c9829
children 43b4a54fbf66
comparison
equal deleted inserted replaced
3438:14fbcab7c67b 3439:d1754e7f0cea
76 ;; run a test in byte-compiled mode only. It's tedious to have 76 ;; run a test in byte-compiled mode only. It's tedious to have
77 ;; time-consuming tests like this one run twice, once interpreted and 77 ;; time-consuming tests like this one run twice, once interpreted and
78 ;; once compiled, for no good reason. 78 ;; once compiled, for no good reason.
79 (test-chars t) 79 (test-chars t)
80 80
81 (defun unicode-code-point-to-utf-8-string (code-point)
82 "Convert a Unicode code point to the equivalent UTF-8 string.
83 This is a naive implementation in Lisp. "
84 (check-argument-type 'natnump code-point)
85 (check-argument-range code-point 0 #x1fffff)
86 (if (< code-point #x80)
87 (format "%c" code-point)
88 (if (< code-point #x800)
89 (format "%c%c"
90 ;; ochars[0] = 0xC0 | (input & ~(0xFFFFF83F)) >> 6;
91 (logior #xc0 (lsh (logand code-point #x7c0) -6))
92 ;; ochars[1] = 0x80 | input & ~(0xFFFFFFC0);
93 (logior #x80 (logand code-point #x3f)))
94 (if (< code-point #x00010000)
95 (format "%c%c%c"
96 ;; ochars[0] = 0xE0 | (input >> 12) & ~(0xFFFFFFF0);
97 (logior #xe0 (logand (lsh code-point -12) #x0f))
98 ;; ochars[1] = 0x80 | (input >> 6) & ~(0xFFFFFFC0);
99 (logior #x80 (logand (lsh code-point -6) #x3f))
100 ;; ochars[2] = 0x80 | input & ~(0xFFFFFFC0);
101 (logior #x80 (logand code-point #x3f)))
102 (if (< code-point #x200000)
103 (format "%c%c%c%c"
104 ;; ochars[0] = 0xF0 | (input >> 18) & ~(0xFFFFFFF8)
105 (logior #xF0 (logand (lsh code-point -18) #x7))
106 ;; ochars[1] = 0x80 | (input >> 12) & ~(0xFFFFFFC0);
107 (logior #x80 (logand (lsh code-point -12) #x3f))
108 ;; ochars[2] = 0x80 | (input >> 6) & ~(0xFFFFFFC0);
109 (logior #x80 (logand (lsh code-point -6) #x3f))
110 ;; ochars[3] = 0x80 | input & ~(0xFFFFFFC0);
111 (logior #x80 (logand code-point #x3f))))))))
112
81 ;;----------------------------------------------------------------- 113 ;;-----------------------------------------------------------------
82 ;; Test string modification functions that modify the length of a char. 114 ;; Test string modification functions that modify the length of a char.
83 ;;----------------------------------------------------------------- 115 ;;-----------------------------------------------------------------
84 116
85 (when (featurep 'mule) 117 (when (featurep 'mule)
334 (Assert (eq code (char-to-unicode scaron))) 366 (Assert (eq code (char-to-unicode scaron)))
335 (Assert (eq scaron (unicode-to-char code '(latin-iso8859-2)))))) 367 (Assert (eq scaron (unicode-to-char code '(latin-iso8859-2))))))
336 368
337 (Check-Error wrong-type-argument (set-unicode-conversion scaron -10000))) 369 (Check-Error wrong-type-argument (set-unicode-conversion scaron -10000)))
338 370
371 (dolist (utf-8-char
372 '("\xc6\x92" ;; U+0192 LATIN SMALL LETTER F WITH HOOK
373 "\xe2\x81\x8a" ;; U+204A TIRONIAN SIGN ET
374 "\xe2\x82\xae" ;; U+20AE TUGRIK SIGN
375 "\xf0\x9d\x92\xbd" ;; U+1D4BD MATHEMATICAL SCRIPT SMALL H
376 "\xf0\x9d\x96\x93" ;; U+1D593 MATHEMATICAL BOLD FRAKTUR SMALL N
377 "\xf0\xaf\xa8\x88" ;; U+2FA08 CJK COMPATIBILITY FOR U+4BCE
378 "\xf4\x8f\xbf\xbd")) ;; U+10FFFD <Plane 16 Private Use, Last>
379 (let* ((xemacs-character (car (append
380 (decode-coding-string utf-8-char 'utf-8)
381 nil)))
382 (xemacs-charset (car (split-char xemacs-character))))
383
384 ;; Trivial test of the UTF-8 support of the escape-quoted character set.
385 (Assert (equal (decode-coding-string utf-8-char 'utf-8)
386 (decode-coding-string (concat "\033%G" utf-8-char)
387 'escape-quoted)))
388
389 ;; Check that the reverse mapping holds.
390 (Assert (equal (unicode-code-point-to-utf-8-string
391 (encode-char xemacs-character 'ucs))
392 utf-8-char))
393
394 ;; Check that, if this character has been JIT-allocated, it is encoded
395 ;; in escape-quoted using the corresponding UTF-8 escape.
396 (when (charset-property xemacs-charset 'encode-as-utf-8)
397 (Assert (equal (concat "\033%G" utf-8-char)
398 (encode-coding-string xemacs-character 'escape-quoted)))
399 (Assert (equal (concat "\033%G" utf-8-char)
400 (encode-coding-string xemacs-character 'ctext))))))
401
339 ;;--------------------------------------------------------------- 402 ;;---------------------------------------------------------------
340 ;; Test charset-in-* functions 403 ;; Test charset-in-* functions
341 ;;--------------------------------------------------------------- 404 ;;---------------------------------------------------------------
342 (with-temp-buffer 405 (with-temp-buffer
343 (insert-file-contents (locate-data-file "HELLO")) 406 (insert-file-contents (locate-data-file "HELLO"))