Mercurial > hg > xemacs-beta
changeset 4587:b25f081370e0
Automated merge with file:///Sources/xemacs-21.5-checked-out
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sat, 31 Jan 2009 13:21:37 +0000 |
parents | 2669b1b7e33b (diff) 732e3243f2e4 (current diff) |
children | 03ec57ae1d1e dbe504bac1f4 |
files | |
diffstat | 4 files changed, 60 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/src/ChangeLog Sat Jan 31 13:21:32 2009 +0000 +++ b/src/ChangeLog Sat Jan 31 13:21:37 2009 +0000 @@ -1,3 +1,8 @@ +2009-01-31 Aidan Kehoe <kehoea@parhasard.net> + + * unicode.c (unicode_convert): + Correct little-endian UTF-16 surrogate handling. + 2009-01-16 Aidan Kehoe <kehoea@parhasard.net> * chartab.c (print_table_entry):
--- a/src/unicode.c Sat Jan 31 13:21:32 2009 +0000 +++ b/src/unicode.c Sat Jan 31 13:21:37 2009 +0000 @@ -2115,23 +2115,47 @@ { int tempch; - if (!valid_utf_16_last_surrogate(ch & 0xFFFF)) - { - DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data, - ignore_bom); - DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data, - ignore_bom); - DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data, - ignore_bom); - DECODE_ERROR_OCTET (ch & 0xFF, dst, data, - ignore_bom); - } - else + if (little_endian) { - tempch = utf_16_surrogates_to_code((ch >> 16), - (ch & 0xffff)); - decode_unicode_char(tempch, dst, data, ignore_bom); + if (!valid_utf_16_last_surrogate(ch >> 16)) + { + DECODE_ERROR_OCTET (ch & 0xFF, dst, data, + ignore_bom); + DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data, + ignore_bom); + DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data, + ignore_bom); + DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data, + ignore_bom); + } + else + { + tempch = utf_16_surrogates_to_code((ch & 0xffff), + (ch >> 16)); + decode_unicode_char(tempch, dst, data, ignore_bom); + } } + else + { + if (!valid_utf_16_last_surrogate(ch & 0xFFFF)) + { + DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data, + ignore_bom); + DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data, + ignore_bom); + DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data, + ignore_bom); + DECODE_ERROR_OCTET (ch & 0xFF, dst, data, + ignore_bom); + } + else + { + tempch = utf_16_surrogates_to_code((ch >> 16), + (ch & 0xffff)); + decode_unicode_char(tempch, dst, data, ignore_bom); + } + } + ch = 0; counter = 0; }
--- a/tests/ChangeLog Sat Jan 31 13:21:32 2009 +0000 +++ b/tests/ChangeLog Sat Jan 31 13:21:37 2009 +0000 @@ -1,3 +1,8 @@ +2009-01-31 Aidan Kehoe <kehoea@parhasard.net> + + * automated/mule-tests.el: + Test little-endian Unicode surrogates too. + 2009-01-18 Aidan Kehoe <kehoea@parhasard.net> * automated/lisp-tests.el: (char-table-with-string):
--- a/tests/automated/mule-tests.el Sat Jan 31 13:21:32 2009 +0000 +++ b/tests/automated/mule-tests.el Sat Jan 31 13:21:37 2009 +0000 @@ -446,12 +446,17 @@ (encode-coding-string xemacs-character 'ctext)))))) (loop - for (code-point encoded) - in '((#x10000 "\xd8\x00\xdc\x00") - (#x10FFFD "\xdb\xff\xdf\xfd")) - do (Assert (equal (encode-coding-string - (decode-char 'ucs code-point) 'utf-16) - encoded))) + for (code-point utf-16-big-endian utf-16-little-endian) + in '((#x10000 "\xd8\x00\xdc\x00" "\x00\xd8\x00\xdc") + (#x10FFFD "\xdb\xff\xdf\xfd" "\xff\xdb\xfd\xdf")) + do + (Assert (equal (encode-coding-string + (decode-char 'ucs code-point) 'utf-16) + utf-16-big-endian)) + (Assert (equal (encode-coding-string + (decode-char 'ucs code-point) 'utf-16-le) + utf-16-little-endian)) + ;;--------------------------------------------------------------- ;; Regression test for a couple of CCL-related bugs.