changeset 4492:121aadac896e

Merge.
author Aidan Kehoe <kehoea@parhasard.net>
date Fri, 08 Aug 2008 21:28:13 +0200
parents 67fbcaf3dbdc (diff) d402d7b18bd8 (current diff)
children 89406c31b125
files lisp/ChangeLog lisp/unicode.el
diffstat 8 files changed, 96 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Tue Aug 05 08:37:17 2008 +0200
+++ b/lisp/ChangeLog	Fri Aug 08 21:28:13 2008 +0200
@@ -1,3 +1,37 @@
+2008-08-08  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* unicode.el: 
+	* mule/mule-cmds.el: 
+	* mule/latin.el: 
+	* mule/cyrillic.el: 
+	Rework the various identifiers using error-sequence to use
+	invalid-sequence instead.
+
+2008-08-05  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* specifier.el (current-display-table): Initialise this here, not
+	in x-init.el, since we want it even on non-X builds to use the
+	support for displaying Unicode error sequences according to the
+	current locale.
+	* mule/mule-cmds.el (set-language-info): 
+	Document error-sequence-coding-system, used to describe how to
+	display characters that are not valid Unicode on disk. 
+	* mule/mule-cmds.el (finish-set-language-environment): 
+	Implement error-sequence-coding-system. 
+	* unicode.el (unicode-error-sequence-warning-face): 
+	New face, to make it possible to distinguish invalid Unicode
+	sequences from the characters given by the valid Unicode
+	sequences. 
+	* mule/cyrillic.el ("Russian"): 
+	("Ukrainian"): 
+	("Bulgarian"): 
+	("Belarusian"): 
+	("Cyrillic-ALT"): Add support for error-sequence-coding-system for
+	all these languages.
+	* mule/latin.el: 
+	Add support for error-sequence-coding-system for the
+	Latin-alphabet language environments.
+
 2008-08-05  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* mule/iso-with-esc.el (greek-iso-8bit-with-esc): 
--- a/lisp/mule/cyrillic.el	Tue Aug 05 08:37:17 2008 +0200
+++ b/lisp/mule/cyrillic.el	Fri Aug 08 21:28:13 2008 +0200
@@ -370,6 +370,7 @@
 	     (coding-system koi8-r)
 	     (native-coding-system koi8-r)
 	     (coding-priority koi8-r)
+	     (invalid-sequence-coding-system koi8-r)
 	     (input-method . "cyrillic-yawerty")
 	     (features cyril-util)
 	     (locale "ru")
@@ -543,6 +544,7 @@
  "Ukrainian" '((coding-system koi8-u)
                (coding-priority koi8-u)
                (locale "uk")
+               (invalid-sequence-coding-system koi8-u)
                (input-method . "cyrillic-ukrainian")
                (documentation
                 . "Support for Ukrainian."))
@@ -689,6 +691,7 @@
 (set-language-info-alist
  "Bulgarian" '((coding-system windows-1251)
                (coding-priority windows-1251)
+	       (invalid-sequence-coding-system windows-1251)
                (input-method . "bulgarian-bds")
                (locale "bg")
                (documentation
@@ -699,6 +702,7 @@
 (set-language-info-alist
  "Belarusian" '((coding-system windows-1251)
                 (coding-priority windows-1251)
+		(invalid-sequence-coding-system windows-1251)
                 (locale "be")
                 (input-method . "belarusian")
                 (documentation
@@ -845,6 +849,7 @@
  "Cyrillic-ALT" '((charset cyrillic-iso8859-5)
                   (coding-system alternativnyj)
                   (native-coding-system alternativnyj)
+		  (invalid-sequence-coding-system alternativnyj)
                   (coding-priority alternativnyj)
                   (input-method . "cyrillic-yawerty")
                   (features cyril-util)
--- a/lisp/mule/greek.el	Tue Aug 05 08:37:17 2008 +0200
+++ b/lisp/mule/greek.el	Fri Aug 08 21:28:13 2008 +0200
@@ -328,6 +328,7 @@
 	   (coding-system iso-8859-7)
 	   (coding-priority iso-8859-7)
 	   (native-coding-system iso-8859-7)
+	   (invalid-sequence-coding-system iso-8859-7)
 	   (locale "el")
 	   (input-method . "greek")
 	   (sample-text . "Greek (,FGkk]mija(B)	,FCei\(B ,Fsar(B")
--- a/lisp/mule/latin.el	Tue Aug 05 08:37:17 2008 +0200
+++ b/lisp/mule/latin.el	Fri Aug 08 21:28:13 2008 +0200
@@ -957,11 +957,12 @@
   for ((charset codesys default-input nice-charset-1 nice-charset-2
                 ;; supported-langs is a list if the doc string is replaced
                 ;; entirely
-                supported-langs) 
+                supported-langs invalid-sequence-coding-system) 
        langenvs) in
   '(((latin-iso8859-1 iso-8859-1 "latin-1-prefix" "Latin-1" "ISO-8859-1"
 " Danish, Dutch, English, Faeroese, Finnish, French, German, Icelandic,
- Irish, Italian, Norwegian, Portuguese, Spanish, and Swedish.")
+ Irish, Italian, Norwegian, Portuguese, Spanish, and Swedish."
+      windows-1252)
      (("Danish" "da")
       ("Dutch" "nl" "TUTORIAL.nl")
       ("Faeroese" "fo")
@@ -1024,6 +1025,8 @@
      (coding-system ,codesys)
      (coding-priority ,codesys)
      (native-coding-system ,codesys)
+     (invalid-sequence-coding-system ,(or invalid-sequence-coding-system
+                                          codesys))
      (documentation . ,(if (listp supported-langs) (car supported-langs)
 			 (format "\
 Generic language environment for %s (%s)." nice-charset-1 nice-charset-2))))
--- a/lisp/mule/mule-cmds.el	Tue Aug 05 08:37:17 2008 +0200
+++ b/lisp/mule/mule-cmds.el	Fri Aug 08 21:28:13 2008 +0200
@@ -225,7 +225,15 @@
 
                      If there is no value for this property, the MS Windows
                      locale is assumed to have the same name as the
-                     language environment."
+                     language environment.
+
+  invalid-sequence-coding-system
+                     VALUE is a fixed-width 8-bit coding system used to
+                     display Unicode error sequences (using a face to make
+                     it clear that the data is invalid).  In Western Europe
+                     this is normally windows-1252; in the Russia and the
+                     former Soviet Union koi8-ru or windows-1251 makes more
+                     sense."
   (if (symbolp lang-env)
       (setq lang-env (symbol-name lang-env)))
   (let (lang-slot prop-slot)
@@ -760,6 +768,25 @@
     (if (functionp func)
 	(funcall func)))
 
+  (let ((invalid-sequence-coding-system
+         (get-language-info language-name 'invalid-sequence-coding-system))
+        (disp-table (specifier-instance current-display-table))
+        glyph)
+    (when (consp invalid-sequence-coding-system)
+      (setq invalid-sequence-coding-system
+            (car invalid-sequence-coding-system)))
+    (map-char-table
+     #'(lambda (key entry)
+         (setq glyph (make-glyph
+                      (vector
+                       'string :data
+                       (decode-coding-string (string entry)
+                                             invalid-sequence-coding-system))))
+         (set-glyph-face glyph 'unicode-invalid-sequence-warning-face)
+         (put-char-table key glyph disp-table)
+         nil)
+     unicode-error-default-translation-table))
+
   ;; Fit the charsets preferences in unicode conversions for the
   ;; language environment.
   (set-language-unicode-precedence-list
--- a/lisp/specifier.el	Tue Aug 05 08:37:17 2008 +0200
+++ b/lisp/specifier.el	Fri Aug 08 21:28:13 2008 +0200
@@ -988,4 +988,18 @@
 			 (specifier-instance specifier domain))))
 		   (list (cons nil inst))))))))))
 
+;; Character 160 (octal 0240) displays incorrectly under some X
+;; installations apparently due to a universally crocked font width
+;; specification.  Display it as a space since that's what's expected. 
+;;
+;; (make-char-table 'generic) instead of (make-display-table) because
+;; make-display-table isn't dumped, and this file is. 
+;;
+;; We also want the global display table to be actually globally
+;; initialised; that's why this is here, and not in x-init.el, these days.
+
+(set-specifier current-display-table 
+               #s(char-table type generic data (?\xA0 ?\x20))
+               'global)
+
 ;;; specifier.el ends here
--- a/lisp/unicode.el	Tue Aug 05 08:37:17 2008 +0200
+++ b/lisp/unicode.el	Fri Aug 08 21:28:13 2008 +0200
@@ -525,7 +525,7 @@
 To transform XEmacs Unicode error sequences to the Latin-1 characters that
 correspond to the octets on disk, you can use this variable.  ")
 
-(defvar unicode-error-sequence-regexp-range
+(defvar unicode-invalid-sequence-regexp-range
   (and (featurep 'mule)
        (format "%c%c-%c"
                (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0)
@@ -563,7 +563,7 @@
     ;; Comment out until the issue in
     ;; 18179.49815.622843.336527@parhasard.net is fixed.
     (assert t ; (re-search-forward (concat "[" 
-              ;                        unicode-error-sequence-regexp-range
+              ;                        unicode-invalid-sequence-regexp-range
               ;                        "]"))
             nil
             (format "Could not find char ?\\x%x in buffer" i))))
@@ -585,12 +585,12 @@
 	  (setq begin
 		(progn
 		  (skip-chars-forward
-		   (concat "^" unicode-error-sequence-regexp-range))
+		   (concat "^" unicode-invalid-sequence-regexp-range))
 		  (point))
 		end (and (not (= (point) (point-max)))
 			 (progn
 			   (skip-chars-forward
-			    unicode-error-sequence-regexp-range)
+			    unicode-invalid-sequence-regexp-range)
 			   (point))))
 	  (if end
 	      (funcall frob-function begin end))))))
@@ -610,6 +610,9 @@
        (translate-region start finish table))
      begin end buffer))
 
+;; Sure would be nice to be able to use defface here. 
+(copy-face 'highlight 'unicode-invalid-sequence-warning-face)
+
 (unless (featurep 'mule)
   ;; We do this in such a roundabout way--instead of having the above defun
   ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have
@@ -618,8 +621,8 @@
   ;; Lisp.
   (mapcar #'unintern
           '(ccl-encode-to-ucs-2 unicode-error-default-translation-table
-            unicode-error-sequence-regexp-range
-            frob-unicode-errors-region unicode-error-translate-region)))
+            unicode-invalid-regexp-range frob-unicode-errors-region
+            unicode-error-translate-region)))
 
 ;; #### UTF-7 is not yet implemented, and it's tricky to do.  There's
 ;; an implementation in appendix A.1 of the Unicode Standard, Version
--- a/lisp/x-init.el	Tue Aug 05 08:37:17 2008 +0200
+++ b/lisp/x-init.el	Fri Aug 08 21:28:13 2008 +0200
@@ -312,15 +312,4 @@
   (if (equal display "") (setq display nil))
   (make-frame-on-device 'x display props))
 
-;; Character 160 (octal 0240) displays incorrectly under X apparently
-;; due to a universally crocked font width specification.  Display it
-;; as a space since that's what seems to be expected.
-;;
-;; (make-char-table 'generic) instead of (make-display-table) because
-;; make-display-table isn't dumped, and this file is. 
-
-(let ((tab (make-char-table 'generic)))
-  (put-char-table 160 " " tab)
-  (set-specifier current-display-table tab 'global 'x))
-
 ;;; x-init.el ends here