diff src/unicode.c @ 4583:2669b1b7e33b

Correct little-endian UTF-16 surrogate handling. src/ChangeLog addition: 2009-01-31 Aidan Kehoe <kehoea@parhasard.net> * unicode.c (unicode_convert): Correct little-endian UTF-16 surrogate handling. tests/ChangeLog addition: 2009-01-31 Aidan Kehoe <kehoea@parhasard.net> * automated/mule-tests.el: Test little-endian Unicode surrogates too.
author Aidan Kehoe <kehoea@parhasard.net>
date Sat, 31 Jan 2009 13:06:37 +0000
parents bd9b678f4db7
children 7e54adf407a1
line wrap: on
line diff
--- a/src/unicode.c	Sun Jan 18 12:56:51 2009 +0000
+++ b/src/unicode.c	Sat Jan 31 13:06:37 2009 +0000
@@ -2115,23 +2115,47 @@
 		{
 		  int tempch;
 
-		  if (!valid_utf_16_last_surrogate(ch & 0xFFFF))
-		    {
-                      DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data,
-                                        ignore_bom);
-                      DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data,
-                                        ignore_bom);
-                      DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data,
-                                        ignore_bom);
-                      DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
-                                        ignore_bom);
-		    }
-                  else 
+                  if (little_endian)
                     {
-                      tempch = utf_16_surrogates_to_code((ch >> 16), 
-                                                         (ch & 0xffff));
-                      decode_unicode_char(tempch, dst, data, ignore_bom);
+                      if (!valid_utf_16_last_surrogate(ch >> 16))
+                        {
+                          DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
+                                              ignore_bom);
+                          DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data,
+                                              ignore_bom);
+                          DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data,
+                                              ignore_bom);
+                          DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data,
+                                              ignore_bom);
+                        }
+                      else
+                        {
+                          tempch = utf_16_surrogates_to_code((ch & 0xffff),
+                                                             (ch >> 16));
+                          decode_unicode_char(tempch, dst, data, ignore_bom); 
+                        }
                     }
+                  else
+                    {
+                      if (!valid_utf_16_last_surrogate(ch & 0xFFFF))
+                        {
+                          DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data,
+                                              ignore_bom);
+                          DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data,
+                                              ignore_bom);
+                          DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data,
+                                              ignore_bom);
+                          DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
+                                              ignore_bom);
+                        }
+                      else 
+                        {
+                          tempch = utf_16_surrogates_to_code((ch >> 16), 
+                                                             (ch & 0xffff));
+                          decode_unicode_char(tempch, dst, data, ignore_bom); 
+                        }
+                    }
+
 		  ch = 0;
 		  counter = 0;
                 }