Mercurial > hg > xemacs-beta
comparison tests/automated/mule-tests.el @ 3439:d1754e7f0cea
[xemacs-hg @ 2006-06-03 17:50:39 by aidan]
Just-in-time Unicode code point support.
author | aidan |
---|---|
date | Sat, 03 Jun 2006 17:51:06 +0000 |
parents | ca02e61c9829 |
children | 43b4a54fbf66 |
comparison
equal
deleted
inserted
replaced
3438:14fbcab7c67b | 3439:d1754e7f0cea |
---|---|
76 ;; run a test in byte-compiled mode only. It's tedious to have | 76 ;; run a test in byte-compiled mode only. It's tedious to have |
77 ;; time-consuming tests like this one run twice, once interpreted and | 77 ;; time-consuming tests like this one run twice, once interpreted and |
78 ;; once compiled, for no good reason. | 78 ;; once compiled, for no good reason. |
79 (test-chars t) | 79 (test-chars t) |
80 | 80 |
81 (defun unicode-code-point-to-utf-8-string (code-point) | |
82 "Convert a Unicode code point to the equivalent UTF-8 string. | |
83 This is a naive implementation in Lisp. " | |
84 (check-argument-type 'natnump code-point) | |
85 (check-argument-range code-point 0 #x1fffff) | |
86 (if (< code-point #x80) | |
87 (format "%c" code-point) | |
88 (if (< code-point #x800) | |
89 (format "%c%c" | |
90 ;; ochars[0] = 0xC0 | (input & ~(0xFFFFF83F)) >> 6; | |
91 (logior #xc0 (lsh (logand code-point #x7c0) -6)) | |
92 ;; ochars[1] = 0x80 | input & ~(0xFFFFFFC0); | |
93 (logior #x80 (logand code-point #x3f))) | |
94 (if (< code-point #x00010000) | |
95 (format "%c%c%c" | |
96 ;; ochars[0] = 0xE0 | (input >> 12) & ~(0xFFFFFFF0); | |
97 (logior #xe0 (logand (lsh code-point -12) #x0f)) | |
98 ;; ochars[1] = 0x80 | (input >> 6) & ~(0xFFFFFFC0); | |
99 (logior #x80 (logand (lsh code-point -6) #x3f)) | |
100 ;; ochars[2] = 0x80 | input & ~(0xFFFFFFC0); | |
101 (logior #x80 (logand code-point #x3f))) | |
102 (if (< code-point #x200000) | |
103 (format "%c%c%c%c" | |
104 ;; ochars[0] = 0xF0 | (input >> 18) & ~(0xFFFFFFF8) | |
105 (logior #xF0 (logand (lsh code-point -18) #x7)) | |
106 ;; ochars[1] = 0x80 | (input >> 12) & ~(0xFFFFFFC0); | |
107 (logior #x80 (logand (lsh code-point -12) #x3f)) | |
108 ;; ochars[2] = 0x80 | (input >> 6) & ~(0xFFFFFFC0); | |
109 (logior #x80 (logand (lsh code-point -6) #x3f)) | |
110 ;; ochars[3] = 0x80 | input & ~(0xFFFFFFC0); | |
111 (logior #x80 (logand code-point #x3f)))))))) | |
112 | |
81 ;;----------------------------------------------------------------- | 113 ;;----------------------------------------------------------------- |
82 ;; Test string modification functions that modify the length of a char. | 114 ;; Test string modification functions that modify the length of a char. |
83 ;;----------------------------------------------------------------- | 115 ;;----------------------------------------------------------------- |
84 | 116 |
85 (when (featurep 'mule) | 117 (when (featurep 'mule) |
334 (Assert (eq code (char-to-unicode scaron))) | 366 (Assert (eq code (char-to-unicode scaron))) |
335 (Assert (eq scaron (unicode-to-char code '(latin-iso8859-2)))))) | 367 (Assert (eq scaron (unicode-to-char code '(latin-iso8859-2)))))) |
336 | 368 |
337 (Check-Error wrong-type-argument (set-unicode-conversion scaron -10000))) | 369 (Check-Error wrong-type-argument (set-unicode-conversion scaron -10000))) |
338 | 370 |
371 (dolist (utf-8-char | |
372 '("\xc6\x92" ;; U+0192 LATIN SMALL LETTER F WITH HOOK | |
373 "\xe2\x81\x8a" ;; U+204A TIRONIAN SIGN ET | |
374 "\xe2\x82\xae" ;; U+20AE TUGRIK SIGN | |
375 "\xf0\x9d\x92\xbd" ;; U+1D4BD MATHEMATICAL SCRIPT SMALL H | |
376 "\xf0\x9d\x96\x93" ;; U+1D593 MATHEMATICAL BOLD FRAKTUR SMALL N | |
377 "\xf0\xaf\xa8\x88" ;; U+2FA08 CJK COMPATIBILITY FOR U+4BCE | |
378 "\xf4\x8f\xbf\xbd")) ;; U+10FFFD <Plane 16 Private Use, Last> | |
379 (let* ((xemacs-character (car (append | |
380 (decode-coding-string utf-8-char 'utf-8) | |
381 nil))) | |
382 (xemacs-charset (car (split-char xemacs-character)))) | |
383 | |
384 ;; Trivial test of the UTF-8 support of the escape-quoted character set. | |
385 (Assert (equal (decode-coding-string utf-8-char 'utf-8) | |
386 (decode-coding-string (concat "\033%G" utf-8-char) | |
387 'escape-quoted))) | |
388 | |
389 ;; Check that the reverse mapping holds. | |
390 (Assert (equal (unicode-code-point-to-utf-8-string | |
391 (encode-char xemacs-character 'ucs)) | |
392 utf-8-char)) | |
393 | |
394 ;; Check that, if this character has been JIT-allocated, it is encoded | |
395 ;; in escape-quoted using the corresponding UTF-8 escape. | |
396 (when (charset-property xemacs-charset 'encode-as-utf-8) | |
397 (Assert (equal (concat "\033%G" utf-8-char) | |
398 (encode-coding-string xemacs-character 'escape-quoted))) | |
399 (Assert (equal (concat "\033%G" utf-8-char) | |
400 (encode-coding-string xemacs-character 'ctext)))))) | |
401 | |
339 ;;--------------------------------------------------------------- | 402 ;;--------------------------------------------------------------- |
340 ;; Test charset-in-* functions | 403 ;; Test charset-in-* functions |
341 ;;--------------------------------------------------------------- | 404 ;;--------------------------------------------------------------- |
342 (with-temp-buffer | 405 (with-temp-buffer |
343 (insert-file-contents (locate-data-file "HELLO")) | 406 (insert-file-contents (locate-data-file "HELLO")) |