diff lisp/mule/mule-cmds.el @ 4604:e0a8715fdb1f

Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region. lisp/ChangeLog addition: 2009-02-07 Aidan Kehoe <kehoea@parhasard.net> * coding.el (query-coding-clear-highlights): Rename the BUFFER argument to BUFFER-OR-STRING, describe it as possibly being a string in its documentation. (default-query-coding-region): Add a new IGNORE-INVALID-SEQUENCESP argument, document that this function does not support it. Bind case-fold-search to nil, we don't want this to influence what the function thinks is encodable or not. (query-coding-region): Add a new IGNORE-INVALID-SEQUENCESP argument, document what it does; reflect this new argument in the associated compiler macro. (query-coding-string): Add a new IGNORE-INVALID-SEQUENCESP argument, document what it does. Support the HIGHLIGHT argument correctly. * unicode.el (unicode-query-coding-region): Add a new IGNORE-INVALID-SEQUENCESP argument, document what it does, implement this. Document a potential problem. Use #'query-coding-clear-highlights instead of reimplementing it ourselves. Remove some debugging messages. * mule/arabic.el (iso-8859-6): * mule/cyrillic.el (iso-8859-5): * mule/greek.el (iso-8859-7): * mule/hebrew.el (iso-8859-8): * mule/latin.el (iso-8859-2): * mule/latin.el (iso-8859-3): * mule/latin.el (iso-8859-4): * mule/latin.el (iso-8859-14): * mule/latin.el (iso-8859-15): * mule/latin.el (iso-8859-16): * mule/latin.el (iso-8859-9): * mule/latin.el (windows-1252): * mule/mule-coding.el (iso-8859-1): Avoid the assumption that characters not given an explicit mapping in these coding systems map to the ISO 8859-1 characters corresponding to the octets on disk; this makes it much more reasonable to implement the IGNORE-INVALID-SEQUENCESP argument to query-coding-region. * mule/mule-cmds.el (set-language-info): Correct the docstring. * mule/mule-cmds.el (finish-set-language-environment): Treat invalid Unicode sequences produced from invalid-sequence-coding-system and corresponding to control characters the same as control characters in redisplay. * mule/mule-cmds.el: Document that encode-coding-char is available in coding.el * mule/mule-coding.el (make-8-bit-generate-helper): Change to return the both the encode-program generated and the relevant non-ASCII charset; update the docstring to reflect this. * mule/mule-coding.el (make-8-bit-generate-encode-program-and-skip-chars-strings): Rename this function; have it return skip-chars-strings as well as the encode program. Have these skip-chars-strings use ranges for charsets, where possible. * mule/mule-coding.el (make-8-bit-create-decode-encode-tables): Revise this to allow people to specify explicitly characters that should be undefined (= corresponding to keys in unicode-error-default-translation-table), and treating unspecified octets above #x7f as undefined by default. * mule/mule-coding.el (8-bit-fixed-query-coding-region): Add a new IGNORE-INVALID-SEQUENCESP argument, implement support for it using the 8-bit-fixed-invalid-sequences-skip-chars coding system property; remove some debugging messages. * mule/mule-coding.el (make-8-bit-coding-system): This function is dumped, autoloading it makes no sense. Document what happens when characters above #x7f are not specified, implement this. * mule/vietnamese.el: Correct spelling. tests/ChangeLog addition: 2009-02-07 Aidan Kehoe <kehoea@parhasard.net> * automated/query-coding-tests.el: Add FAILING-CASE arguments to the Assert calls, making #'q-c-debug mostly unnecessary. Remove #'q-c-debug. Add new tests that use the IGNORE-INVALID-SEQUENCESP argument to #'query-coding-region; rework the existing ones to respect it.
author Aidan Kehoe <kehoea@parhasard.net>
date Sat, 07 Feb 2009 17:13:37 +0000
parents c83cab5a4f04
children ba06a6cae484
line wrap: on
line diff
--- a/lisp/mule/mule-cmds.el	Thu Feb 05 21:18:37 2009 -0500
+++ b/lisp/mule/mule-cmds.el	Sat Feb 07 17:13:37 2009 +0000
@@ -231,9 +231,9 @@
                      VALUE is a fixed-width 8-bit coding system used to
                      display Unicode error sequences (using a face to make
                      it clear that the data is invalid).  In Western Europe
-                     this is normally windows-1252; in the Russia and the
-                     former Soviet Union koi8-ru or windows-1251 makes more
-                     sense."
+                     and the Americas this is normally windows-1252; in
+                     Russia and the former Soviet Union koi8-ru or
+                     windows-1251 makes more sense."
   (if (symbolp lang-env)
       (setq lang-env (symbol-name lang-env)))
   (let (lang-slot prop-slot)
@@ -771,7 +771,7 @@
   (let ((invalid-sequence-coding-system
          (get-language-info language-name 'invalid-sequence-coding-system))
         (disp-table (specifier-instance current-display-table))
-        glyph string)
+        glyph string unicode-error-lookup)
     (when (consp invalid-sequence-coding-system)
       (setq invalid-sequence-coding-system
             (car invalid-sequence-coding-system)))
@@ -779,9 +779,15 @@
      #'(lambda (key entry)
          (setq string (decode-coding-string (string entry)
                                             invalid-sequence-coding-system))
-         ;; Treat control characters specially:
-         (when (string-match "^[\x00-\x1f\x80-\x9f]$" string)
-           (setq string (format "^%c" (+ ?@ (aref string 0)))))
+         (when (= 1 (length string))
+           ;; Treat control characters specially:
+           (cond
+            ((string-match "^[\x00-\x1f\x80-\x9f]$" string)
+             (setq string (format "^%c" (+ ?@ (aref string 0)))))
+            ((setq unicode-error-lookup
+                   (get-char-table (aref string 0)
+                                   unicode-error-default-translation-table))
+             (setq string (format "^%c" (+ ?@ unicode-error-lookup))))))
          (setq glyph (make-glyph (vector 'string :data string)))
          (set-glyph-face glyph 'unicode-invalid-sequence-warning-face)
          (put-char-table key glyph disp-table)
@@ -939,7 +945,7 @@
 
 (defun encoded-string-description (str coding-system)
   "Return a pretty description of STR that is encoded by CODING-SYSTEM."
-;  (setq str (string-as-unibyte str))
+  ;; XEmacs; no transformation to unibyte.
   (mapconcat
    (if (and coding-system (eq (coding-system-type coding-system) 'iso2022))
        ;; Try to get a pretty description for ISO 2022 escape sequences.
@@ -948,36 +954,8 @@
      (function (lambda (x) (format "#x%02X" x))))
    str " "))
 
-;; (defun encode-coding-char (char coding-system)
-;;   "Encode CHAR by CODING-SYSTEM and return the resulting string.
-;; If CODING-SYSTEM can't safely encode CHAR, return nil."
-;;   (if (cmpcharp char)
-;;       (setq char (car (decompose-composite-char char 'list))))
-;;   (let ((str1 (char-to-string char))
-;;         (str2 (make-string 2 char))
-;;         (safe-charsets (and coding-system
-;;                             (coding-system-get coding-system 'safe-charsets)))
-;;         enc1 enc2 i1 i2)
-;;     (when (or (eq safe-charsets t)
-;;               (memq (char-charset char) safe-charsets))
-;;       ;; We must find the encoded string of CHAR.  But, just encoding
-;;       ;; CHAR will put extra control sequences (usually to designate
-;;       ;; ASCII charset) at the tail if type of CODING is ISO 2022.
-;;       ;; To exclude such tailing bytes, we at first encode one-char
-;;       ;; string and two-char string, then check how many bytes at the
-;;       ;; tail of both encoded strings are the same.
-;; 
-;;       (setq enc1 (string-as-unibyte (encode-coding-string str1 coding-system))
-;;             i1 (length enc1)
-;;             enc2 (string-as-unibyte (encode-coding-string str2 coding-system))
-;;             i2 (length enc2))
-;;       (while (and (> i1 0) (= (aref enc1 (1- i1)) (aref enc2 (1- i2))))
-;;         (setq i1 (1- i1) i2 (1- i2)))
-;; 
-;;       ;; Now (substring enc1 i1) and (substring enc2 i2) are the same,
-;;       ;; and they are the extra control sequences at the tail to
-;;       ;; exclude.
-;;       (substring enc2 0 i2))))
+;; XEmacs; 
+;; (defun encode-coding-char (char coding-system) in coding.el.
 
 
 ;; #### The following section is utter junk from mule-misc.el.