Mercurial > hg > xemacs-beta
changeset 4489:b75b075a9041
Support displaying invalid UTF-8 in language-environment-specific ways.
2008-08-05 Aidan Kehoe <kehoea@parhasard.net>
* specifier.el (current-display-table): Initialise this here, not
in x-init.el, since we want it even on non-X builds to use the
support for displaying Unicode error sequences according to the
current locale.
* mule/mule-cmds.el (set-language-info):
Document error-sequence-coding-system, used to describe how to
display characters that are not valid Unicode on disk.
* mule/mule-cmds.el (finish-set-language-environment):
Implement error-sequence-coding-system.
* unicode.el (unicode-error-sequence-warning-face):
New face, to make it possible to distinguish invalid Unicode
sequences from the characters given by the valid Unicode
sequences.
* mule/cyrillic.el ("Russian"):
("Ukrainian"):
("Bulgarian"):
("Belarusian"):
("Cyrillic-ALT"): Add support for error-sequence-coding-system for
all these languages.
* mule/latin.el:
Add support for error-sequence-coding-system for the
Latin-alphabet language environments.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Tue, 05 Aug 2008 09:06:41 +0200 |
parents | 6b0000935adc |
children | 67fbcaf3dbdc |
files | lisp/ChangeLog lisp/mule/cyrillic.el lisp/mule/greek.el lisp/mule/latin.el lisp/mule/mule-cmds.el lisp/specifier.el lisp/unicode.el lisp/x-init.el |
diffstat | 8 files changed, 80 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/ChangeLog Sat Jul 26 13:50:27 2008 +0300 +++ b/lisp/ChangeLog Tue Aug 05 09:06:41 2008 +0200 @@ -1,3 +1,28 @@ +2008-08-05 Aidan Kehoe <kehoea@parhasard.net> + + * specifier.el (current-display-table): Initialise this here, not + in x-init.el, since we want it even on non-X builds to use the + support for displaying Unicode error sequences according to the + current locale. + * mule/mule-cmds.el (set-language-info): + Document error-sequence-coding-system, used to describe how to + display characters that are not valid Unicode on disk. + * mule/mule-cmds.el (finish-set-language-environment): + Implement error-sequence-coding-system. + * unicode.el (unicode-error-sequence-warning-face): + New face, to make it possible to distinguish invalid Unicode + sequences from the characters given by the valid Unicode + sequences. + * mule/cyrillic.el ("Russian"): + ("Ukrainian"): + ("Bulgarian"): + ("Belarusian"): + ("Cyrillic-ALT"): Add support for error-sequence-coding-system for + all these languages. + * mule/latin.el: + Add support for error-sequence-coding-system for the + Latin-alphabet language environments. + 2008-07-26 Aidan Kehoe <kehoea@parhasard.net> * x-init.el (x-initialize-compose):
--- a/lisp/mule/cyrillic.el Sat Jul 26 13:50:27 2008 +0300 +++ b/lisp/mule/cyrillic.el Tue Aug 05 09:06:41 2008 +0200 @@ -370,6 +370,7 @@ (coding-system koi8-r) (native-coding-system koi8-r) (coding-priority koi8-r) + (error-sequence-coding-system koi8-r) (input-method . "cyrillic-yawerty") (features cyril-util) (locale "ru") @@ -543,6 +544,7 @@ "Ukrainian" '((coding-system koi8-u) (coding-priority koi8-u) (locale "uk") + (error-sequence-coding-system koi8-u) (input-method . "cyrillic-ukrainian") (documentation . "Support for Ukrainian.")) @@ -689,6 +691,7 @@ (set-language-info-alist "Bulgarian" '((coding-system windows-1251) (coding-priority windows-1251) + (error-sequence-coding-system windows-1251) (input-method . "bulgarian-bds") (locale "bg") (documentation @@ -699,6 +702,7 @@ (set-language-info-alist "Belarusian" '((coding-system windows-1251) (coding-priority windows-1251) + (error-sequence-coding-system windows-1251) (locale "be") (input-method . "belarusian") (documentation @@ -845,6 +849,7 @@ "Cyrillic-ALT" '((charset cyrillic-iso8859-5) (coding-system alternativnyj) (native-coding-system alternativnyj) + (error-sequence-coding-system alternativnyj) (coding-priority alternativnyj) (input-method . "cyrillic-yawerty") (features cyril-util)
--- a/lisp/mule/greek.el Sat Jul 26 13:50:27 2008 +0300 +++ b/lisp/mule/greek.el Tue Aug 05 09:06:41 2008 +0200 @@ -328,6 +328,7 @@ (coding-system iso-8859-7) (coding-priority iso-8859-7) (native-coding-system iso-8859-7) + (error-sequence-coding-system iso-8859-7) (locale "el") (input-method . "greek") (sample-text . "Greek (,FGkk]mija(B) ,FCei\(B ,Fsar(B")
--- a/lisp/mule/latin.el Sat Jul 26 13:50:27 2008 +0300 +++ b/lisp/mule/latin.el Tue Aug 05 09:06:41 2008 +0200 @@ -957,11 +957,12 @@ for ((charset codesys default-input nice-charset-1 nice-charset-2 ;; supported-langs is a list if the doc string is replaced ;; entirely - supported-langs) + supported-langs error-sequence-coding-system) langenvs) in '(((latin-iso8859-1 iso-8859-1 "latin-1-prefix" "Latin-1" "ISO-8859-1" " Danish, Dutch, English, Faeroese, Finnish, French, German, Icelandic, - Irish, Italian, Norwegian, Portuguese, Spanish, and Swedish.") + Irish, Italian, Norwegian, Portuguese, Spanish, and Swedish." + windows-1252) (("Danish" "da") ("Dutch" "nl" "TUTORIAL.nl") ("Faeroese" "fo") @@ -1024,6 +1025,8 @@ (coding-system ,codesys) (coding-priority ,codesys) (native-coding-system ,codesys) + (error-sequence-coding-system ,(or error-sequence-coding-system + codesys)) (documentation . ,(if (listp supported-langs) (car supported-langs) (format "\ Generic language environment for %s (%s)." nice-charset-1 nice-charset-2))))
--- a/lisp/mule/mule-cmds.el Sat Jul 26 13:50:27 2008 +0300 +++ b/lisp/mule/mule-cmds.el Tue Aug 05 09:06:41 2008 +0200 @@ -225,7 +225,15 @@ If there is no value for this property, the MS Windows locale is assumed to have the same name as the - language environment." + language environment. + + error-sequence-coding-system + VALUE is a fixed-width 8-bit coding system used to + display Unicode error sequences (using a face to make + it clear that the data is invalid). In Western Europe + this is normally windows-1252; in the Russia and the + former Soviet Union koi8-ru or windows-1251 makes more + sense." (if (symbolp lang-env) (setq lang-env (symbol-name lang-env))) (let (lang-slot prop-slot) @@ -760,6 +768,24 @@ (if (functionp func) (funcall func))) + (let ((error-sequence-coding-system + (get-language-info language-name 'error-sequence-coding-system)) + (disp-table (specifier-instance current-display-table)) + glyph) + (when (consp error-sequence-coding-system) + (setq error-sequence-coding-system (car error-sequence-coding-system))) + (map-char-table + #'(lambda (key entry) + (setq glyph (make-glyph + (vector + 'string :data + (decode-coding-string (string entry) + error-sequence-coding-system)))) + (set-glyph-face glyph 'unicode-error-sequence-warning-face) + (put-char-table key glyph disp-table) + nil) + unicode-error-default-translation-table)) + ;; Fit the charsets preferences in unicode conversions for the ;; language environment. (set-language-unicode-precedence-list
--- a/lisp/specifier.el Sat Jul 26 13:50:27 2008 +0300 +++ b/lisp/specifier.el Tue Aug 05 09:06:41 2008 +0200 @@ -988,4 +988,18 @@ (specifier-instance specifier domain)))) (list (cons nil inst)))))))))) +;; Character 160 (octal 0240) displays incorrectly under some X +;; installations apparently due to a universally crocked font width +;; specification. Display it as a space since that's what's expected. +;; +;; (make-char-table 'generic) instead of (make-display-table) because +;; make-display-table isn't dumped, and this file is. +;; +;; We also want the global display table to be actually globally +;; initialised; that's why this is here, and not in x-init.el, these days. + +(set-specifier current-display-table + #s(char-table type generic data (?\xA0 ?\x20)) + 'global) + ;;; specifier.el ends here
--- a/lisp/unicode.el Sat Jul 26 13:50:27 2008 +0300 +++ b/lisp/unicode.el Tue Aug 05 09:06:41 2008 +0200 @@ -611,6 +611,9 @@ (translate-region start finish table)) begin end buffer)) +;; Sure would be nice to be able to use defface here. +(copy-face 'highlight 'unicode-error-sequence-warning-face) + (unless (featurep 'mule) ;; We do this in such a roundabout way--instead of having the above defun ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have
--- a/lisp/x-init.el Sat Jul 26 13:50:27 2008 +0300 +++ b/lisp/x-init.el Tue Aug 05 09:06:41 2008 +0200 @@ -312,15 +312,4 @@ (if (equal display "") (setq display nil)) (make-frame-on-device 'x display props)) -;; Character 160 (octal 0240) displays incorrectly under X apparently -;; due to a universally crocked font width specification. Display it -;; as a space since that's what seems to be expected. -;; -;; (make-char-table 'generic) instead of (make-display-table) because -;; make-display-table isn't dumped, and this file is. - -(let ((tab (make-char-table 'generic))) - (put-char-table 160 " " tab) - (set-specifier current-display-table tab 'global 'x)) - ;;; x-init.el ends here