changeset 4489:b75b075a9041

Support displaying invalid UTF-8 in language-environment-specific ways. 2008-08-05 Aidan Kehoe <kehoea@parhasard.net> * specifier.el (current-display-table): Initialise this here, not in x-init.el, since we want it even on non-X builds to use the support for displaying Unicode error sequences according to the current locale. * mule/mule-cmds.el (set-language-info): Document error-sequence-coding-system, used to describe how to display characters that are not valid Unicode on disk. * mule/mule-cmds.el (finish-set-language-environment): Implement error-sequence-coding-system. * unicode.el (unicode-error-sequence-warning-face): New face, to make it possible to distinguish invalid Unicode sequences from the characters given by the valid Unicode sequences. * mule/cyrillic.el ("Russian"): ("Ukrainian"): ("Bulgarian"): ("Belarusian"): ("Cyrillic-ALT"): Add support for error-sequence-coding-system for all these languages. * mule/latin.el: Add support for error-sequence-coding-system for the Latin-alphabet language environments.
author Aidan Kehoe <kehoea@parhasard.net>
date Tue, 05 Aug 2008 09:06:41 +0200
parents 6b0000935adc
children 67fbcaf3dbdc
files lisp/ChangeLog lisp/mule/cyrillic.el lisp/mule/greek.el lisp/mule/latin.el lisp/mule/mule-cmds.el lisp/specifier.el lisp/unicode.el lisp/x-init.el
diffstat 8 files changed, 80 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/ChangeLog	Tue Aug 05 09:06:41 2008 +0200
@@ -1,3 +1,28 @@
+2008-08-05  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* specifier.el (current-display-table): Initialise this here, not
+	in x-init.el, since we want it even on non-X builds to use the
+	support for displaying Unicode error sequences according to the
+	current locale.
+	* mule/mule-cmds.el (set-language-info): 
+	Document error-sequence-coding-system, used to describe how to
+	display characters that are not valid Unicode on disk. 
+	* mule/mule-cmds.el (finish-set-language-environment): 
+	Implement error-sequence-coding-system. 
+	* unicode.el (unicode-error-sequence-warning-face): 
+	New face, to make it possible to distinguish invalid Unicode
+	sequences from the characters given by the valid Unicode
+	sequences. 
+	* mule/cyrillic.el ("Russian"): 
+	("Ukrainian"): 
+	("Bulgarian"): 
+	("Belarusian"): 
+	("Cyrillic-ALT"): Add support for error-sequence-coding-system for
+	all these languages.
+	* mule/latin.el: 
+	Add support for error-sequence-coding-system for the
+	Latin-alphabet language environments.
+
 2008-07-26  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* x-init.el (x-initialize-compose): 
--- a/lisp/mule/cyrillic.el	Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/mule/cyrillic.el	Tue Aug 05 09:06:41 2008 +0200
@@ -370,6 +370,7 @@
 	     (coding-system koi8-r)
 	     (native-coding-system koi8-r)
 	     (coding-priority koi8-r)
+	     (error-sequence-coding-system koi8-r)
 	     (input-method . "cyrillic-yawerty")
 	     (features cyril-util)
 	     (locale "ru")
@@ -543,6 +544,7 @@
  "Ukrainian" '((coding-system koi8-u)
                (coding-priority koi8-u)
                (locale "uk")
+               (error-sequence-coding-system koi8-u)
                (input-method . "cyrillic-ukrainian")
                (documentation
                 . "Support for Ukrainian."))
@@ -689,6 +691,7 @@
 (set-language-info-alist
  "Bulgarian" '((coding-system windows-1251)
                (coding-priority windows-1251)
+	       (error-sequence-coding-system windows-1251)
                (input-method . "bulgarian-bds")
                (locale "bg")
                (documentation
@@ -699,6 +702,7 @@
 (set-language-info-alist
  "Belarusian" '((coding-system windows-1251)
                 (coding-priority windows-1251)
+		(error-sequence-coding-system windows-1251)
                 (locale "be")
                 (input-method . "belarusian")
                 (documentation
@@ -845,6 +849,7 @@
  "Cyrillic-ALT" '((charset cyrillic-iso8859-5)
                   (coding-system alternativnyj)
                   (native-coding-system alternativnyj)
+		  (error-sequence-coding-system alternativnyj)
                   (coding-priority alternativnyj)
                   (input-method . "cyrillic-yawerty")
                   (features cyril-util)
--- a/lisp/mule/greek.el	Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/mule/greek.el	Tue Aug 05 09:06:41 2008 +0200
@@ -328,6 +328,7 @@
 	   (coding-system iso-8859-7)
 	   (coding-priority iso-8859-7)
 	   (native-coding-system iso-8859-7)
+	   (error-sequence-coding-system iso-8859-7)
 	   (locale "el")
 	   (input-method . "greek")
 	   (sample-text . "Greek (,FGkk]mija(B)	,FCei\(B ,Fsar(B")
--- a/lisp/mule/latin.el	Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/mule/latin.el	Tue Aug 05 09:06:41 2008 +0200
@@ -957,11 +957,12 @@
   for ((charset codesys default-input nice-charset-1 nice-charset-2
                 ;; supported-langs is a list if the doc string is replaced
                 ;; entirely
-                supported-langs) 
+                supported-langs error-sequence-coding-system) 
        langenvs) in
   '(((latin-iso8859-1 iso-8859-1 "latin-1-prefix" "Latin-1" "ISO-8859-1"
 " Danish, Dutch, English, Faeroese, Finnish, French, German, Icelandic,
- Irish, Italian, Norwegian, Portuguese, Spanish, and Swedish.")
+ Irish, Italian, Norwegian, Portuguese, Spanish, and Swedish."
+      windows-1252)
      (("Danish" "da")
       ("Dutch" "nl" "TUTORIAL.nl")
       ("Faeroese" "fo")
@@ -1024,6 +1025,8 @@
      (coding-system ,codesys)
      (coding-priority ,codesys)
      (native-coding-system ,codesys)
+     (error-sequence-coding-system ,(or error-sequence-coding-system
+                                        codesys))
      (documentation . ,(if (listp supported-langs) (car supported-langs)
 			 (format "\
 Generic language environment for %s (%s)." nice-charset-1 nice-charset-2))))
--- a/lisp/mule/mule-cmds.el	Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/mule/mule-cmds.el	Tue Aug 05 09:06:41 2008 +0200
@@ -225,7 +225,15 @@
 
                      If there is no value for this property, the MS Windows
                      locale is assumed to have the same name as the
-                     language environment."
+                     language environment.
+
+  error-sequence-coding-system
+                     VALUE is a fixed-width 8-bit coding system used to
+                     display Unicode error sequences (using a face to make
+                     it clear that the data is invalid).  In Western Europe
+                     this is normally windows-1252; in the Russia and the
+                     former Soviet Union koi8-ru or windows-1251 makes more
+                     sense."
   (if (symbolp lang-env)
       (setq lang-env (symbol-name lang-env)))
   (let (lang-slot prop-slot)
@@ -760,6 +768,24 @@
     (if (functionp func)
 	(funcall func)))
 
+  (let ((error-sequence-coding-system
+         (get-language-info language-name 'error-sequence-coding-system))
+        (disp-table (specifier-instance current-display-table))
+        glyph)
+    (when (consp error-sequence-coding-system)
+      (setq error-sequence-coding-system (car error-sequence-coding-system)))
+    (map-char-table
+     #'(lambda (key entry)
+         (setq glyph (make-glyph
+                      (vector
+                       'string :data
+                       (decode-coding-string (string entry)
+                                             error-sequence-coding-system))))
+         (set-glyph-face glyph 'unicode-error-sequence-warning-face)
+         (put-char-table key glyph disp-table)
+         nil)
+     unicode-error-default-translation-table))
+
   ;; Fit the charsets preferences in unicode conversions for the
   ;; language environment.
   (set-language-unicode-precedence-list
--- a/lisp/specifier.el	Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/specifier.el	Tue Aug 05 09:06:41 2008 +0200
@@ -988,4 +988,18 @@
 			 (specifier-instance specifier domain))))
 		   (list (cons nil inst))))))))))
 
+;; Character 160 (octal 0240) displays incorrectly under some X
+;; installations apparently due to a universally crocked font width
+;; specification.  Display it as a space since that's what's expected. 
+;;
+;; (make-char-table 'generic) instead of (make-display-table) because
+;; make-display-table isn't dumped, and this file is. 
+;;
+;; We also want the global display table to be actually globally
+;; initialised; that's why this is here, and not in x-init.el, these days.
+
+(set-specifier current-display-table 
+               #s(char-table type generic data (?\xA0 ?\x20))
+               'global)
+
 ;;; specifier.el ends here
--- a/lisp/unicode.el	Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/unicode.el	Tue Aug 05 09:06:41 2008 +0200
@@ -611,6 +611,9 @@
        (translate-region start finish table))
      begin end buffer))
 
+;; Sure would be nice to be able to use defface here. 
+(copy-face 'highlight 'unicode-error-sequence-warning-face)
+
 (unless (featurep 'mule)
   ;; We do this in such a roundabout way--instead of having the above defun
   ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have
--- a/lisp/x-init.el	Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/x-init.el	Tue Aug 05 09:06:41 2008 +0200
@@ -312,15 +312,4 @@
   (if (equal display "") (setq display nil))
   (make-frame-on-device 'x display props))
 
-;; Character 160 (octal 0240) displays incorrectly under X apparently
-;; due to a universally crocked font width specification.  Display it
-;; as a space since that's what seems to be expected.
-;;
-;; (make-char-table 'generic) instead of (make-display-table) because
-;; make-display-table isn't dumped, and this file is. 
-
-(let ((tab (make-char-table 'generic)))
-  (put-char-table 160 " " tab)
-  (set-specifier current-display-table tab 'global 'x))
-
 ;;; x-init.el ends here