xemacs-beta: src/mule-coding.c comparison

[xemacs-hg @ 2006-05-11 08:57:59 by stephent] Improve detection of ISO-8-1 coding systems. <874pzx2bn2.fsf@tleepslib.sk.tsukuba.ac.jp>

comparison

equal deleted inserted replaced

-:824c3c18a129
+:96ec8f16af45
 	DET_RESULT (st, iso_8_2) = DET_SLIGHTLY_LIKELY;
 #endif
 }
 else if (data->odd_high_byte_groups > 0 &&
 	   data->even_high_byte_groups > 0)
-SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY);
+{
+/* Well, this could be a Latin-1 text, with most high-byte
+	 characters single, but sometimes two are together, though
+	 this happens not as often. This is common for Western
+	 European languages like German, French, Danish, Swedish, etc.
+	 Then we would either have a rather small file and
+	 even_high_byte_groups would be low.
+	 Or we would have a larger file and the ratio of odd to even
+	 groups would be very high. */
+SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY);
+if (data->even_high_byte_groups <= 3 ||
+	  data->odd_high_byte_groups >= 10 * data->even_high_byte_groups)
+	DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_LIKELY;
+}
 else
 SET_DET_RESULTS (st, iso2022, DET_AS_LIKELY_AS_UNLIKELY);
 }
 static void

Mercurial > hg > xemacs-beta