diff lisp/mule/mule-cmds.el @ 4489:b75b075a9041

Support displaying invalid UTF-8 in language-environment-specific ways. 2008-08-05 Aidan Kehoe <kehoea@parhasard.net> * specifier.el (current-display-table): Initialise this here, not in x-init.el, since we want it even on non-X builds to use the support for displaying Unicode error sequences according to the current locale. * mule/mule-cmds.el (set-language-info): Document error-sequence-coding-system, used to describe how to display characters that are not valid Unicode on disk. * mule/mule-cmds.el (finish-set-language-environment): Implement error-sequence-coding-system. * unicode.el (unicode-error-sequence-warning-face): New face, to make it possible to distinguish invalid Unicode sequences from the characters given by the valid Unicode sequences. * mule/cyrillic.el ("Russian"): ("Ukrainian"): ("Bulgarian"): ("Belarusian"): ("Cyrillic-ALT"): Add support for error-sequence-coding-system for all these languages. * mule/latin.el: Add support for error-sequence-coding-system for the Latin-alphabet language environments.
author Aidan Kehoe <kehoea@parhasard.net>
date Tue, 05 Aug 2008 09:06:41 +0200
parents a78d697ccd2c
children 67fbcaf3dbdc
line wrap: on
line diff
--- a/lisp/mule/mule-cmds.el	Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/mule/mule-cmds.el	Tue Aug 05 09:06:41 2008 +0200
@@ -225,7 +225,15 @@
 
                      If there is no value for this property, the MS Windows
                      locale is assumed to have the same name as the
-                     language environment."
+                     language environment.
+
+  error-sequence-coding-system
+                     VALUE is a fixed-width 8-bit coding system used to
+                     display Unicode error sequences (using a face to make
+                     it clear that the data is invalid).  In Western Europe
+                     this is normally windows-1252; in the Russia and the
+                     former Soviet Union koi8-ru or windows-1251 makes more
+                     sense."
   (if (symbolp lang-env)
       (setq lang-env (symbol-name lang-env)))
   (let (lang-slot prop-slot)
@@ -760,6 +768,24 @@
     (if (functionp func)
 	(funcall func)))
 
+  (let ((error-sequence-coding-system
+         (get-language-info language-name 'error-sequence-coding-system))
+        (disp-table (specifier-instance current-display-table))
+        glyph)
+    (when (consp error-sequence-coding-system)
+      (setq error-sequence-coding-system (car error-sequence-coding-system)))
+    (map-char-table
+     #'(lambda (key entry)
+         (setq glyph (make-glyph
+                      (vector
+                       'string :data
+                       (decode-coding-string (string entry)
+                                             error-sequence-coding-system))))
+         (set-glyph-face glyph 'unicode-error-sequence-warning-face)
+         (put-char-table key glyph disp-table)
+         nil)
+     unicode-error-default-translation-table))
+
   ;; Fit the charsets preferences in unicode conversions for the
   ;; language environment.
   (set-language-unicode-precedence-list