changeset 5913:1b2fdcc3cc5c

Remove an old, buggy attempt to preserve East Asian chars, encode_unicode_char src/ChangeLog addition: 2015-05-11 Aidan Kehoe <kehoea@parhasard.net> * unicode.c (encode_unicode_char): If we have no information on the Unicode mapping of a character, write U+FFFD unconditionally. Remove an old, incomplete, inappropriate attempt to preserve certain East Asian characters. tests/ChangeLog addition: 2015-05-11 Aidan Kehoe <kehoea@parhasard.net> * automated/mule-tests.el: Assert that a character which we know has no Unicode mapping is represented as REPLACEMENT CHARACTER in UTF-8, rather than as a character corresponding to a very large codepoint.
author Aidan Kehoe <kehoea@parhasard.net>
date Mon, 11 May 2015 18:04:58 +0100
parents 47ffa085a9ad
children bd4d2c8ef9cc
files src/ChangeLog src/unicode.c tests/ChangeLog tests/automated/mule-tests.el
diffstat 4 files changed, 25 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Sun May 10 23:55:41 2015 +0100
+++ b/src/ChangeLog	Mon May 11 18:04:58 2015 +0100
@@ -1,3 +1,10 @@
+2015-05-11  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* unicode.c (encode_unicode_char):
+	If we have no information on the Unicode mapping of a character,
+	write U+FFFD unconditionally. Remove an old, incomplete,
+	inappropriate attempt to preserve certain East Asian characters.
+
 2015-05-10  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* floatfns.c (truncate_one_float):
--- a/src/unicode.c	Sun May 10 23:55:41 2015 +0100
+++ b/src/unicode.c	Mon May 11 18:04:58 2015 +0100
@@ -1966,20 +1966,7 @@
 
   if (code == -1)
     {
-      if (type != UNICODE_UTF_16 &&
-	  XCHARSET_DIMENSION (charset) == 2 &&
-	  XCHARSET_CHARS (charset) == 94)
-	{
-	  unsigned char final = XCHARSET_FINAL (charset);
-
-	  if (('@' <= final) && (final < 0x7f))
-	    code = (0xe00000 + (final - '@') * 94 * 94
-		    + ((h & 127) - 33) * 94 + (l & 127) - 33);
-	  else
-	    code = '?';
-	}
-      else
-	code = '?';
+      code = 0xFFFD;
     }
 #else
   int code = h;
--- a/tests/ChangeLog	Sun May 10 23:55:41 2015 +0100
+++ b/tests/ChangeLog	Mon May 11 18:04:58 2015 +0100
@@ -1,3 +1,10 @@
+2015-05-11  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* automated/mule-tests.el:
+	Assert that a character which we know has no Unicode mapping is
+	represented as REPLACEMENT CHARACTER in UTF-8, rather than as a
+	character corresponding to a very large codepoint.
+
 2015-05-10  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* automated/lisp-tests.el:
--- a/tests/automated/mule-tests.el	Sun May 10 23:55:41 2015 +0100
+++ b/tests/automated/mule-tests.el	Mon May 11 18:04:58 2015 +0100
@@ -460,6 +460,16 @@
       finally (set-unicode-conversion scaron initial-unicode))
     (Check-Error args-out-of-range (set-unicode-conversion scaron -10000)))
 
+  (Assert (not (natnump (char-to-unicode (make-char 'japanese-jisx0208
+                                                    34 49))))
+          "checking character with no Unicode mapping treated as such")
+
+  (Assert (equal (decode-coding-string
+                  (encode-coding-string (make-char 'japanese-jisx0208 34 49)
+                                        'utf-8) 'utf-8)
+                 "\uFFFD")
+          "checking REPLACEMENT CHARACTER used correctly")
+
   (dolist (utf-8-char 
 	   '("\xc6\x92"		  ;; U+0192 LATIN SMALL LETTER F WITH HOOK
 	     "\xe2\x81\x8a"	  ;; U+204A TIRONIAN SIGN ET