Mercurial > hg > xemacs-beta
diff src/mule-coding.c @ 985:7f62a956b825
[xemacs-hg @ 2002-09-01 06:41:40 by youngs]
2002-08-31 Seiichi Ikiuo <ikiuo@polyphony.co.jp>
* code-init.el (reset-coding-categories-to-default): Add new
coding category 'utf-8-bom'.
* coding.el (coding-system-category): Add check for 'utf-8-bom'.
* unicode.el: Add new coding system 'utf-8-bom'.
2002-08-31 Seiichi Ikiuo <ikiuo@polyphony.co.jp>
* file-coding.h (detection_result): fix DET_LOWEST.
* mule-coding.c
(big5_detector): Add member 'seen_euc_char'.
(big5_detect): It was made not to become the trouble of EUC.
(iso2022_detector): Add member 'longest_even_high_byte'.
(iso2022_detect): Fix checking for even/odd_high_byte_group.
The judgment of EUC is changed.
* unicode.c: Add DETECTOR_CATEGORY utf_8_bom.
(utf_8_detector): Add member byteno, first_byte and second_byte.
(utf_8_detect): The judgment of UTF-8-BOM is added.
author | youngs |
---|---|
date | Sun, 01 Sep 2002 06:41:45 +0000 |
parents | 804517e16990 |
children | e22b0213b713 |
line wrap: on
line diff
--- a/src/mule-coding.c Sat Aug 31 11:04:01 2002 +0000 +++ b/src/mule-coding.c Sun Sep 01 06:41:45 2002 +0000 @@ -599,6 +599,7 @@ struct big5_detector { int seen_big5_char; + int seen_euc_char; unsigned int seen_iso2022_esc:1; unsigned int seen_bad_first_byte:1; unsigned int seen_bad_second_byte:1; @@ -628,7 +629,9 @@ else { data->in_second_byte = 0; - if ((c >= 0x40 && c <= 0x7E) || (c >= 0xA1 && c <= 0xFE)) + if (c >= 0xA1 && c <= 0xFE) + data->seen_euc_char++; + else if (c >= 0x40 && c <= 0x7E) data->seen_big5_char++; else data->seen_bad_second_byte = 1; @@ -643,6 +646,8 @@ DET_RESULT (st, big5) = DET_SOMEWHAT_UNLIKELY; else if (data->seen_big5_char >= 4) DET_RESULT (st, big5) = DET_SOMEWHAT_LIKELY; + else if (data->seen_euc_char) + DET_RESULT (st, big5) = DET_SLIGHTLY_LIKELY; else DET_RESULT (st, big5) = DET_AS_LIKELY_AS_UNLIKELY; } @@ -2749,6 +2754,7 @@ unsigned int bad_multibyte_escape_sequences; unsigned int good_multibyte_escape_sequences; int even_high_byte_groups; + int longest_even_high_byte; int odd_high_byte_groups; }; @@ -2794,7 +2800,11 @@ if (data->high_byte_count & 1) data->odd_high_byte_groups++; else - data->even_high_byte_groups++; + { + data->even_high_byte_groups++; + if (data->longest_even_high_byte < data->high_byte_count) + data->longest_even_high_byte = data->high_byte_count; + } } data->high_byte_count = 0; data->saw_single_shift_just_now = 0; @@ -2861,6 +2871,19 @@ label_continue_loop:; } + if (data->high_byte_count && + !data->saw_single_shift_just_now) + { + if (data->high_byte_count & 1) + data->odd_high_byte_groups++; + else + { + data->even_high_byte_groups++; + if (data->longest_even_high_byte < data->high_byte_count) + data->longest_even_high_byte = data->high_byte_count; + } + } + if (data->bad_multibyte_escape_sequences > 2 || (data->bad_multibyte_escape_sequences > 0 && data->good_multibyte_escape_sequences / @@ -2919,6 +2942,7 @@ else if (data->odd_high_byte_groups == 0 && data->even_high_byte_groups > 0) { +#if 0 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); if (data->even_high_byte_groups > 10) { @@ -2930,6 +2954,15 @@ DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_UNLIKELY; /* else it stays at quite improbable */ } +#else + SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); + if (data->seen_single_shift) + DET_RESULT (st, iso_8_2) = DET_QUITE_PROBABLE; + else if (data->even_high_byte_groups > 10) + DET_RESULT (st, iso_8_2) = DET_SOMEWHAT_LIKELY; + else if (data->longest_even_high_byte > 6) + DET_RESULT (st, iso_8_2) = DET_SLIGHTLY_LIKELY; +#endif } else if (data->odd_high_byte_groups > 0 && data->even_high_byte_groups > 0)