comparison src/unicode.c @ 4583:2669b1b7e33b

Correct little-endian UTF-16 surrogate handling. src/ChangeLog addition: 2009-01-31 Aidan Kehoe <kehoea@parhasard.net> * unicode.c (unicode_convert): Correct little-endian UTF-16 surrogate handling. tests/ChangeLog addition: 2009-01-31 Aidan Kehoe <kehoea@parhasard.net> * automated/mule-tests.el: Test little-endian Unicode surrogates too.
author Aidan Kehoe <kehoea@parhasard.net>
date Sat, 31 Jan 2009 13:06:37 +0000
parents bd9b678f4db7
children 7e54adf407a1
comparison
equal deleted inserted replaced
4582:00ed9903a988 4583:2669b1b7e33b
2113 } 2113 }
2114 else if (32 == counter) 2114 else if (32 == counter)
2115 { 2115 {
2116 int tempch; 2116 int tempch;
2117 2117
2118 if (!valid_utf_16_last_surrogate(ch & 0xFFFF)) 2118 if (little_endian)
2119 {
2120 DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data,
2121 ignore_bom);
2122 DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data,
2123 ignore_bom);
2124 DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data,
2125 ignore_bom);
2126 DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
2127 ignore_bom);
2128 }
2129 else
2130 { 2119 {
2131 tempch = utf_16_surrogates_to_code((ch >> 16), 2120 if (!valid_utf_16_last_surrogate(ch >> 16))
2132 (ch & 0xffff)); 2121 {
2133 decode_unicode_char(tempch, dst, data, ignore_bom); 2122 DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
2123 ignore_bom);
2124 DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data,
2125 ignore_bom);
2126 DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data,
2127 ignore_bom);
2128 DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data,
2129 ignore_bom);
2130 }
2131 else
2132 {
2133 tempch = utf_16_surrogates_to_code((ch & 0xffff),
2134 (ch >> 16));
2135 decode_unicode_char(tempch, dst, data, ignore_bom);
2136 }
2134 } 2137 }
2138 else
2139 {
2140 if (!valid_utf_16_last_surrogate(ch & 0xFFFF))
2141 {
2142 DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data,
2143 ignore_bom);
2144 DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data,
2145 ignore_bom);
2146 DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data,
2147 ignore_bom);
2148 DECODE_ERROR_OCTET (ch & 0xFF, dst, data,
2149 ignore_bom);
2150 }
2151 else
2152 {
2153 tempch = utf_16_surrogates_to_code((ch >> 16),
2154 (ch & 0xffff));
2155 decode_unicode_char(tempch, dst, data, ignore_bom);
2156 }
2157 }
2158
2135 ch = 0; 2159 ch = 0;
2136 counter = 0; 2160 counter = 0;
2137 } 2161 }
2138 else 2162 else
2139 assert(8 == counter || 24 == counter); 2163 assert(8 == counter || 24 == counter);