# HG changeset patch # User Aidan Kehoe # Date 1431363898 -3600 # Node ID 1b2fdcc3cc5c5ea6726dfacecd6e15422cdbb76c # Parent 47ffa085a9addc8163d39765b612d6b12bddda44 Remove an old, buggy attempt to preserve East Asian chars, encode_unicode_char src/ChangeLog addition: 2015-05-11 Aidan Kehoe * unicode.c (encode_unicode_char): If we have no information on the Unicode mapping of a character, write U+FFFD unconditionally. Remove an old, incomplete, inappropriate attempt to preserve certain East Asian characters. tests/ChangeLog addition: 2015-05-11 Aidan Kehoe * automated/mule-tests.el: Assert that a character which we know has no Unicode mapping is represented as REPLACEMENT CHARACTER in UTF-8, rather than as a character corresponding to a very large codepoint. diff -r 47ffa085a9ad -r 1b2fdcc3cc5c src/ChangeLog --- a/src/ChangeLog Sun May 10 23:55:41 2015 +0100 +++ b/src/ChangeLog Mon May 11 18:04:58 2015 +0100 @@ -1,3 +1,10 @@ +2015-05-11 Aidan Kehoe + + * unicode.c (encode_unicode_char): + If we have no information on the Unicode mapping of a character, + write U+FFFD unconditionally. Remove an old, incomplete, + inappropriate attempt to preserve certain East Asian characters. + 2015-05-10 Aidan Kehoe * floatfns.c (truncate_one_float): diff -r 47ffa085a9ad -r 1b2fdcc3cc5c src/unicode.c --- a/src/unicode.c Sun May 10 23:55:41 2015 +0100 +++ b/src/unicode.c Mon May 11 18:04:58 2015 +0100 @@ -1966,20 +1966,7 @@ if (code == -1) { - if (type != UNICODE_UTF_16 && - XCHARSET_DIMENSION (charset) == 2 && - XCHARSET_CHARS (charset) == 94) - { - unsigned char final = XCHARSET_FINAL (charset); - - if (('@' <= final) && (final < 0x7f)) - code = (0xe00000 + (final - '@') * 94 * 94 - + ((h & 127) - 33) * 94 + (l & 127) - 33); - else - code = '?'; - } - else - code = '?'; + code = 0xFFFD; } #else int code = h; diff -r 47ffa085a9ad -r 1b2fdcc3cc5c tests/ChangeLog --- a/tests/ChangeLog Sun May 10 23:55:41 2015 +0100 +++ b/tests/ChangeLog Mon May 11 18:04:58 2015 +0100 @@ -1,3 +1,10 @@ +2015-05-11 Aidan Kehoe + + * automated/mule-tests.el: + Assert that a character which we know has no Unicode mapping is + represented as REPLACEMENT CHARACTER in UTF-8, rather than as a + character corresponding to a very large codepoint. + 2015-05-10 Aidan Kehoe * automated/lisp-tests.el: diff -r 47ffa085a9ad -r 1b2fdcc3cc5c tests/automated/mule-tests.el --- a/tests/automated/mule-tests.el Sun May 10 23:55:41 2015 +0100 +++ b/tests/automated/mule-tests.el Mon May 11 18:04:58 2015 +0100 @@ -460,6 +460,16 @@ finally (set-unicode-conversion scaron initial-unicode)) (Check-Error args-out-of-range (set-unicode-conversion scaron -10000))) + (Assert (not (natnump (char-to-unicode (make-char 'japanese-jisx0208 + 34 49)))) + "checking character with no Unicode mapping treated as such") + + (Assert (equal (decode-coding-string + (encode-coding-string (make-char 'japanese-jisx0208 34 49) + 'utf-8) 'utf-8) + "\uFFFD") + "checking REPLACEMENT CHARACTER used correctly") + (dolist (utf-8-char '("\xc6\x92" ;; U+0192 LATIN SMALL LETTER F WITH HOOK "\xe2\x81\x8a" ;; U+204A TIRONIAN SIGN ET