changeset 3695:42e4605ef1de

[xemacs-hg @ 2006-11-23 13:43:17 by aidan] Handle bytes in the range 0x80-0xC0 better when dealing with ISO-IR 196.
author aidan
date Thu, 23 Nov 2006 13:43:29 +0000
parents a30f3a38c4ae
children ec0171167c5d
files src/ChangeLog src/mule-coding.c tests/ChangeLog tests/automated/mule-tests.el
diffstat 4 files changed, 23 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Wed Nov 22 22:51:11 2006 +0000
+++ b/src/ChangeLog	Thu Nov 23 13:43:29 2006 +0000
@@ -1,3 +1,9 @@
+2006-11-23  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* mule-coding.c (iso2022_decode):
+	Only take the lower seven bits of any eight-bit character that
+	would be illegal in UTF-8, when handling ISO/IR 196 escapes. 
+
 2006-11-14  Stephen J. Turnbull  <stephen@xemacs.org>
 
 	* buffer.c (buffer-file-name): Document invariant.
--- a/src/mule-coding.c	Wed Nov 22 22:51:11 2006 +0000
+++ b/src/mule-coding.c	Thu Nov 23 13:43:29 2006 +0000
@@ -1949,8 +1949,11 @@
 		  counter = 1;
 		}
 	      else
-		/* ASCII, or the lower control characters. */
-		Dynarr_add (dst, c);
+		/* ASCII, or the lower control characters.
+                   
+                   Perhaps we should signal an error if the character is in
+                   the range 0x80-0xc0; this is illegal UTF-8. */
+                Dynarr_add (dst, (c & 0x7f));
 
 	      break;
 	    case 1:
--- a/tests/ChangeLog	Wed Nov 22 22:51:11 2006 +0000
+++ b/tests/ChangeLog	Thu Nov 23 13:43:29 2006 +0000
@@ -1,3 +1,9 @@
+2006-11-23  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* automated/mule-tests.el (featurep):
+	Add a test that ISO/IR 196 escape handling in ISO-2022-based
+	charsets don't choke on invalid bytes in UTF-8 text. 
+
 2006-11-20  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* automated/mule-tests.el (featurep):
--- a/tests/automated/mule-tests.el	Wed Nov 22 22:51:11 2006 +0000
+++ b/tests/automated/mule-tests.el	Thu Nov 23 13:43:29 2006 +0000
@@ -441,6 +441,12 @@
                (eq (aref ccl-vector 4)  
                    (encode-char (make-char 'control-1 31) 'ucs)))))
 
+  ;; This used to crash, at least in debug builds:
+
+  (Assert (decode-coding-string 
+           (string ?\33 ?\45 ?\107 ?\306 ?\222 ?\215 ?\306)
+           'iso-2022-jp))
+
   ;;---------------------------------------------------------------
   ;; Test charset-in-* functions
   ;;---------------------------------------------------------------