Mercurial > hg > xemacs-beta
comparison src/unicode.c @ 4583:2669b1b7e33b
Correct little-endian UTF-16 surrogate handling.
src/ChangeLog addition:
2009-01-31 Aidan Kehoe <kehoea@parhasard.net>
* unicode.c (unicode_convert):
Correct little-endian UTF-16 surrogate handling.
tests/ChangeLog addition:
2009-01-31 Aidan Kehoe <kehoea@parhasard.net>
* automated/mule-tests.el:
Test little-endian Unicode surrogates too.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sat, 31 Jan 2009 13:06:37 +0000 |
parents | bd9b678f4db7 |
children | 7e54adf407a1 |
comparison
equal
deleted
inserted
replaced
4582:00ed9903a988 | 4583:2669b1b7e33b |
---|---|
2113 } | 2113 } |
2114 else if (32 == counter) | 2114 else if (32 == counter) |
2115 { | 2115 { |
2116 int tempch; | 2116 int tempch; |
2117 | 2117 |
2118 if (!valid_utf_16_last_surrogate(ch & 0xFFFF)) | 2118 if (little_endian) |
2119 { | |
2120 DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data, | |
2121 ignore_bom); | |
2122 DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data, | |
2123 ignore_bom); | |
2124 DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data, | |
2125 ignore_bom); | |
2126 DECODE_ERROR_OCTET (ch & 0xFF, dst, data, | |
2127 ignore_bom); | |
2128 } | |
2129 else | |
2130 { | 2119 { |
2131 tempch = utf_16_surrogates_to_code((ch >> 16), | 2120 if (!valid_utf_16_last_surrogate(ch >> 16)) |
2132 (ch & 0xffff)); | 2121 { |
2133 decode_unicode_char(tempch, dst, data, ignore_bom); | 2122 DECODE_ERROR_OCTET (ch & 0xFF, dst, data, |
2123 ignore_bom); | |
2124 DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data, | |
2125 ignore_bom); | |
2126 DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data, | |
2127 ignore_bom); | |
2128 DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data, | |
2129 ignore_bom); | |
2130 } | |
2131 else | |
2132 { | |
2133 tempch = utf_16_surrogates_to_code((ch & 0xffff), | |
2134 (ch >> 16)); | |
2135 decode_unicode_char(tempch, dst, data, ignore_bom); | |
2136 } | |
2134 } | 2137 } |
2138 else | |
2139 { | |
2140 if (!valid_utf_16_last_surrogate(ch & 0xFFFF)) | |
2141 { | |
2142 DECODE_ERROR_OCTET ((ch >> 24) & 0xFF, dst, data, | |
2143 ignore_bom); | |
2144 DECODE_ERROR_OCTET ((ch >> 16) & 0xFF, dst, data, | |
2145 ignore_bom); | |
2146 DECODE_ERROR_OCTET ((ch >> 8) & 0xFF, dst, data, | |
2147 ignore_bom); | |
2148 DECODE_ERROR_OCTET (ch & 0xFF, dst, data, | |
2149 ignore_bom); | |
2150 } | |
2151 else | |
2152 { | |
2153 tempch = utf_16_surrogates_to_code((ch >> 16), | |
2154 (ch & 0xffff)); | |
2155 decode_unicode_char(tempch, dst, data, ignore_bom); | |
2156 } | |
2157 } | |
2158 | |
2135 ch = 0; | 2159 ch = 0; |
2136 counter = 0; | 2160 counter = 0; |
2137 } | 2161 } |
2138 else | 2162 else |
2139 assert(8 == counter || 24 == counter); | 2163 assert(8 == counter || 24 == counter); |