changeset 4557:790bd95b84c1

Automated merge with file:/Sources/xemacs-21.5-checked-out
author Aidan Kehoe <kehoea@parhasard.net>
date Tue, 13 May 2008 20:26:47 +0200
parents a1f8c5c250c2 (diff) d9b9b5f90386 (current diff)
children d9fcb5442c95
files
diffstat 6 files changed, 565 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Tue May 13 20:16:53 2008 +0200
+++ b/lisp/ChangeLog	Tue May 13 20:26:47 2008 +0200
@@ -66,6 +66,13 @@
 	* mule/mule-win32-init.el: Don't use the Windows-specific CP1250
 	implementation, rely on that in latin.el instead. 
 
+2008-05-11  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* coding.el (query-coding-clear-highlights): 
+	New function--clear any face information added by
+	`query-coding-region'. 
+	(default-query-coding-region): Use it.
+
 2008-04-13  Henry S. Thompson <ht@inf.ed.ac.uk>, Mike Sperber  <mike@xemacs.org>
 
 	* window-xemacs.el (save-window-excursion/mapping,
@@ -151,6 +158,55 @@
 
 	* info.el (Info-suffix-list):
 	Support LZMA compression, as used--oddly--by Mandriva Linux.
+	* coding.el (decode-coding-string): 
+	(encode-coding-string): Accept GNU's NOCOPY argument for
+	these. Todo; write compiler macros to use it. 
+	(query-coding-warning-face): New face, to show unencodable
+	characters. 
+	(default-query-coding-region-safe-charset-skip-chars-map): 
+	New variable, a cache used by #'default-query-coding-region. 
+	(default-query-coding-region): Default implementation of
+	#'query-coding-region, using the safe-charsets and safe-chars
+	coding systemproperties. 
+	(query-coding-region): New function; can a given coding system
+	encode a given region? 
+	(query-coding-string): New function; can a given coding system
+	encode a given string? 
+	(unencodable-char-position): Function API taken from GNU; return
+	the first unencodable position given a string and coding system. 
+	(encode-coding-char): Function API taken from GNU; return CHAR
+	encoded using CODING-SYSTEM, or nil if CODING-SYSTEM would trash
+	CHAR. 
+	((unless (featurep 'mule)): Override the default
+	query-coding-region implementation on non-Mule. 
+	* mule/mule-coding.el (make-8-bit-generate-helper): Eliminate a
+	duplicate comment. 
+	(make-8-bit-choose-category): Simplify implementation. 
+	(8-bit-fixed-query-coding-region): Implementation of
+	#'query-coding-region for coding systems created with
+	#'make-8-bit-coding-system. 
+	(make-8-bit-coding-system): Initialise the #'query-coding-region
+	implementation for these character sets. 
+	(make-8-bit-coding-system): Ditto for the compiler macro version
+	of this function. 
+	* unicode.el (unicode-query-coding-skip-chars-arg): New variable,
+	used by unicode-query-coding-region, initialised in
+	mule/general-late.el. 
+	(unicode-query-coding-region): New function, the
+	#'query-coding-region implementation for Unicode coding systems. 
+	Initialise the query-coding-function property for the Unicode
+	coding systems to #'unicode-query-coding-region.
+	* mule/mule-charset.el (charset-skip-chars-string): New
+	function. Return a #'skip-chars-forward argument that skips all
+	characters in CHARSET. 
+	(map-charset-chars): Function synced from GNU, modified to work
+	with XEmacs. Map FUNC across the int value charset ranges of
+	CHARSET. 
+
+2008-01-21  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* info.el (Info-suffix-list): 
+	Support LZMA compression, as used--oddly--by Mandriva Linux. 
 
 2008-01-17  Mike Sperber  <mike@xemacs.org>
 
--- a/lisp/coding.el	Tue May 13 20:16:53 2008 +0200
+++ b/lisp/coding.el	Tue May 13 20:26:47 2008 +0200
@@ -125,15 +125,20 @@
   (interactive "r\nP")
   (princ (detect-coding-region start end)))
 
-(defun decode-coding-string (str coding-system)
+(defun decode-coding-string (str coding-system &optional nocopy)
   "Decode the string STR which is encoded in CODING-SYSTEM.
-Does not modify STR.  Returns the decoded string on successful conversion."
+Normally does not modify STR.  Returns the decoded string on
+successful conversion.
+Optional argument NOCOPY says that modifying STR and returning it is
+allowed."
   (with-string-as-buffer-contents
    str (decode-coding-region (point-min) (point-max) coding-system)))
 
-(defun encode-coding-string (str coding-system)
+(defun encode-coding-string (str coding-system &optional nocopy)
   "Encode the string STR using CODING-SYSTEM.
-Does not modify STR.  Returns the encoded string on successful conversion."
+Does not modify STR.  Returns the encoded string on successful conversion.
+Optional argument NOCOPY says that the original string may be returned
+if does not differ from the encoded string. "
   (with-string-as-buffer-contents
    str (encode-coding-region (point-min) (point-max) coding-system)))
 
@@ -274,4 +279,225 @@
 
 (make-compatible-variable 'enable-multibyte-characters "Unimplemented")
 
+;; Sure would be nice to be able to use defface here. 
+(copy-face 'highlight 'query-coding-warning-face)
+
+(defvar default-query-coding-region-safe-charset-skip-chars-map
+  #s(hash-table test equal data ())
+  "A map from list of charsets to `skip-chars-forward' arguments for them.")
+
+(defsubst query-coding-clear-highlights (begin end &optional buffer)
+  "Remove extent faces added by `query-coding-region' between BEGIN and END.
+
+Optional argument BUFFER is the buffer to use, and defaults to the current
+buffer.
+
+The HIGHLIGHTP argument to `query-coding-region' indicates that it should
+display unencodable characters using `query-coding-warning-face'.  After
+this function has been called, this will no longer be the case.  "
+  (map-extents #'(lambda (extent ignored-arg)
+                   (when (eq 'query-coding-warning-face
+                             (extent-face extent))
+                     (delete-extent extent))) buffer begin end))
+
+(defun default-query-coding-region (begin end coding-system
+				    &optional buffer errorp highlightp)
+  "The default `query-coding-region' implementation.
+
+Uses the `safe-charsets' and `safe-chars' coding system properties.
+The former is a list of XEmacs character sets that can be safely
+encoded by CODING-SYSTEM; the latter a char table describing, in
+addition, characters that can be safely encoded by CODING-SYSTEM."
+  (check-argument-type #'coding-system-p
+                       (setq coding-system (find-coding-system coding-system)))
+  (check-argument-type #'integer-or-marker-p begin)
+  (check-argument-type #'integer-or-marker-p end)
+  (let* ((safe-charsets
+          (or (coding-system-get coding-system 'safe-charsets)
+	      (coding-system-get (coding-system-base coding-system)
+				 'safe-charsets)))
+         (safe-chars
+	  (or (coding-system-get coding-system 'safe-chars)
+	      (coding-system-get (coding-system-base coding-system)
+				 'safe-chars)))
+         (skip-chars-arg
+          (gethash safe-charsets
+                   default-query-coding-region-safe-charset-skip-chars-map))
+         (ranges (make-range-table))
+         fail-range-start fail-range-end previous-fail char-after
+	 looking-at-arg failed extent)
+    (unless skip-chars-arg
+      (setq skip-chars-arg
+	    (puthash safe-charsets
+		     (mapconcat #'charset-skip-chars-string
+				safe-charsets "")
+		     default-query-coding-region-safe-charset-skip-chars-map)))
+    (when highlightp
+      (query-coding-clear-highlights begin end buffer))
+    (if (and (zerop (length skip-chars-arg)) (null safe-chars))
+	(progn
+	    ;; Uh-oh, nothing known about this coding system. Fail. 
+	    (when errorp 
+	      (error 'text-conversion-error
+		     "Coding system doesn't say what it can encode"
+		     (coding-system-name coding-system)))
+	    (put-range-table begin end t ranges)
+	    (when highlightp
+	      (setq extent (make-extent begin end buffer))
+	      (set-extent-priority extent (+ mouse-highlight-priority 2))
+	      (set-extent-face extent 'query-coding-warning-face))
+	    (values nil ranges))
+      (setq looking-at-arg (if (equal "" skip-chars-arg)
+			       ;; Regexp that will never match.
+			       #r".\{0,0\}" 
+                             (concat "[" skip-chars-arg "]")))
+      (save-excursion
+	(goto-char begin buffer)
+	(skip-chars-forward skip-chars-arg end buffer)
+	(while (< (point buffer) end)
+	  (message
+	   "fail-range-start is %S, previous-fail %S, point is %S, end is %S"
+	   fail-range-start previous-fail (point buffer) end)
+	  (setq char-after (char-after (point buffer) buffer)
+		fail-range-start (point buffer))
+	  (while (and
+		  (< (point buffer) end)
+		  (not (looking-at looking-at-arg))
+		  (or (not safe-chars)
+		      (not (get-char-table char-after safe-chars))))
+	    (forward-char 1 buffer)
+	    (setq char-after (char-after (point buffer) buffer)
+		  failed t))
+	  (if (= fail-range-start (point buffer))
+	      ;; The character can actually be encoded by the coding
+	      ;; system; check the characters past it.
+	      (forward-char 1 buffer)
+            ;; Can't be encoded; note this.
+	    (when errorp 
+	      (error 'text-conversion-error
+		     (format "Cannot encode %s using coding system"
+			     (buffer-substring fail-range-start (point buffer)
+					       buffer))
+		     (coding-system-name coding-system)))
+	    (put-range-table fail-range-start
+			     ;; If char-after is non-nil, we're not at
+			     ;; the end of the buffer.
+			     (setq fail-range-end (if char-after
+						      (point buffer)
+						    (point-max buffer)))
+			     t ranges)
+	    (when highlightp
+	      (setq extent (make-extent fail-range-start fail-range-end buffer))
+	      (set-extent-priority extent (+ mouse-highlight-priority 2))
+	      (set-extent-face extent 'query-coding-warning-face)))
+	  (skip-chars-forward skip-chars-arg end buffer))
+	(if failed
+	    (values nil ranges)
+	  (values t nil))))))
+
+(defun query-coding-region (start end coding-system &optional buffer
+                               errorp highlight)
+  "Work out whether CODING-SYSTEM can losslessly encode a region.
+
+START and END are the beginning and end of the region to check.
+CODING-SYSTEM is the coding system to try.
+
+Optional argument BUFFER is the buffer to check, and defaults to the current
+buffer.  Optional argument ERRORP says to signal a `text-conversion-error'
+if some character in the region cannot be encoded, and defaults to nil. 
+
+Optional argument HIGHLIGHT says to display unencodable characters in the
+region using `query-coding-warning-face'. It defaults to nil.
+
+This function returns a list; the intention is that callers use 
+`multiple-value-bind' or the related CL multiple value functions to deal
+with it.  The first element is `t' if the string can be encoded using
+CODING-SYSTEM, or `nil' if not.  The second element is `nil' if the string
+can be encoded using CODING-SYSTEM; otherwise, it is a range table
+describing the positions of the unencodable characters. See
+`make-range-table'."
+  (funcall (or (coding-system-get coding-system 'query-coding-function)
+               #'default-query-coding-region)
+           start end coding-system buffer errorp highlight))
+
+(defun query-coding-string (string coding-system &optional errorp highlight)
+  "Work out whether CODING-SYSTEM can losslessly encode STRING.
+CODING-SYSTEM is the coding system to check.
+
+Optional argument ERRORP says to signal a `text-conversion-error' if some
+character in the region cannot be encoded, and defaults to nil.
+
+Optional argument HIGHLIGHT says to display unencodable characters in the
+region using `query-coding-warning-face'. It defaults to nil.
+
+This function returns a list; the intention is that callers use use
+`multiple-value-bind' or the related CL multiple value functions to deal
+with it.  The first element is `t' if the string can be encoded using
+CODING-SYSTEM, or `nil' if not.  The second element is `nil' if the string
+can be encoded using CODING-SYSTEM; otherwise, it is a range table
+describing the positions of the unencodable characters. See
+`make-range-table'."
+  (with-temp-buffer 
+    (insert string)
+    (query-coding-region (point-min) (point-max) coding-system (current-buffer)
+                         ;; ### Will highlight work here?
+                         errorp highlight)))
+
+(defun unencodable-char-position  (start end coding-system
+                                   &optional count string) 
+  "Return position of first un-encodable character in a region.
+START and END specify the region and CODING-SYSTEM specifies the
+encoding to check.  Return nil if CODING-SYSTEM does encode the region.
+
+If optional 4th argument COUNT is non-nil, it specifies at most how
+many un-encodable characters to search.  In this case, the value is a
+list of positions.
+
+If optional 5th argument STRING is non-nil, it is a string to search
+for un-encodable characters.  In that case, START and END are indexes
+in the string."
+  (flet ((thunk ()
+	   (multiple-value-bind (result ranges)
+	       (query-coding-region start end coding-system)
+	     (if result
+		 ;; If query-coding-region thinks the entire region is
+		 ;; encodable, result will be t, and the thunk should
+		 ;; return nil, because there are no unencodable
+		 ;; positions in the region.
+                 nil
+               (if count 
+                   (block counted
+                     (map-range-table
+                      #'(lambda (begin end value)
+                          (while (and (<= begin end) (<= begin count))
+                            (push begin result)
+                            (incf begin))
+                          (if (> begin count) (return-from counted)))
+                      ranges))
+                 (map-range-table
+                  #'(lambda (begin end value)
+		      (while (<= begin end)
+			(push begin result)
+			(incf begin))) ranges))
+	       result))))
+    (if string
+	(with-temp-buffer (insert string) (thunk))
+      (thunk))))
+
+(defun encode-coding-char (char coding-system)
+  "Encode CHAR by CODING-SYSTEM and return the resulting string.
+If CODING-SYSTEM can't safely encode CHAR, return nil."
+  (check-argument-type #'characterp char)
+  (multiple-value-bind (succeededp)
+      (query-coding-string char coding-system)
+    (when succeededp
+      (encode-coding-string char coding-system))))
+
+(unless (featurep 'mule)
+  ;; If we're under non-Mule, every XEmacs character can be encoded
+  ;; with every XEmacs coding system.
+  (fset #'default-query-coding-region
+	#'(lambda (&rest ignored) (values t nil)))
+  (unintern 'default-query-coding-region-safe-charset-skip-chars-map))
+
 ;;; coding.el ends here
--- a/lisp/mule/general-late.el	Tue May 13 20:16:53 2008 +0200
+++ b/lisp/mule/general-late.el	Tue May 13 20:26:47 2008 +0200
@@ -63,7 +63,34 @@
 			      (decode-coding-string
 			       Installation-string
 			       Installation-file-coding-system)
-			    Installation-string))
+			    Installation-string)
+
+      ;; Convince the byte compiler that, really, this file can't be encoded
+      ;; as binary. Ugh.
+      system-type (symbol-value (intern "\u0073ystem-type"))
+
+      unicode-query-coding-skip-chars-arg
+      (eval-when-compile 
+        (when-fboundp #'map-charset-chars 
+          (loop
+            for charset in (charset-list)
+            with skip-chars-string = ""
+            do
+            (block no-ucs-mapping
+              (map-charset-chars
+               #'(lambda (begin end)
+                   (loop
+                     while (/= end begin)
+                     do
+                     (when (= -1 (char-to-unicode begin))
+                       (setq this-charset-works nil)
+                       (return-from no-ucs-mapping))
+                     (setq begin (int-to-char (1+ begin)))))
+               charset)
+              (setq skip-chars-string
+                    (concat skip-chars-string
+                            (charset-skip-chars-string charset))))
+            finally return skip-chars-string))))
 
 ;; At this point in the dump, all the charsets have been loaded. Now, load
 ;; their Unicode mappings.
--- a/lisp/mule/mule-charset.el	Tue May 13 20:16:53 2008 +0200
+++ b/lisp/mule/mule-charset.el	Tue May 13 20:26:47 2008 +0200
@@ -117,6 +117,65 @@
   "Useless in XEmacs, returns 1."
    1)
 
+(defun charset-skip-chars-string (charset)
+  "Given  CHARSET, return a string suitable for for `skip-chars-forward'.
+Passing the string to `skip-chars-forward' will cause it to skip all
+characters in CHARSET."
+  (setq charset (get-charset charset))
+  (cond 
+   ;; Aargh, the general algorithm doesn't work for these charsets, because
+   ;; make-char strips the high bit. Hard code them.
+   ((eq (find-charset 'ascii) charset) "\x00-\x7f")
+   ((eq (find-charset 'control-1) charset) "\x80-\x9f")
+   (t 
+    (let (charset-lower charset-upper row-upper row-lower)
+      (if (= 1 (charset-dimension charset))
+          (condition-case args-out-of-range
+              (make-char charset #x100)
+            (args-out-of-range 
+             (setq charset-lower (third args-out-of-range)
+                   charset-upper (fourth args-out-of-range))
+             (format "%c-%c"
+                     (make-char charset charset-lower)
+                     (make-char charset charset-upper))))
+        (condition-case args-out-of-range
+            (make-char charset #x100 #x22)
+          (args-out-of-range
+           (setq row-lower (third args-out-of-range)
+                 row-upper (fourth args-out-of-range))))
+        (condition-case args-out-of-range
+            (make-char charset #x22 #x100)
+          (args-out-of-range
+           (setq charset-lower (third args-out-of-range)
+                 charset-upper (fourth args-out-of-range))))
+        (format "%c-%c"
+                (make-char charset row-lower charset-lower)
+                (make-char charset row-upper charset-upper)))))))
+;; From GNU. 
+(defun map-charset-chars (func charset)
+  "Use FUNC to map over all characters in CHARSET for side effects.
+FUNC is a function of two args, the start and end (inclusive) of a
+character code range.  Thus FUNC should iterate over [START, END]."
+  (check-argument-type #'functionp func)
+  (check-argument-type #'charsetp (setq charset (find-charset charset)))
+  (let* ((dim (charset-dimension charset))
+	 (chars (charset-chars charset))
+	 (start (if (= chars 94)
+		    33
+		  32)))
+    (if (= dim 1)
+        (cond 
+         ((eq (find-charset 'ascii) charset) (funcall func ?\x00 ?\x7f))
+         ((eq (find-charset 'control-1) charset) (funcall func ?\x80 ?\x9f))
+         (t 
+          (funcall func
+                   (make-char charset start)
+                   (make-char charset (+ start chars -1)))))
+      (dotimes (i chars)
+	(funcall func
+		 (make-char charset (+ i start) start)
+		 (make-char charset (+ i start) (+ start chars -1)))))))
+
 ;;;; Define setf methods for all settable Charset properties
 
 (defsetf charset-registry    set-charset-registry)
--- a/lisp/mule/mule-coding.el	Tue May 13 20:16:53 2008 +0200
+++ b/lisp/mule/mule-coding.el	Tue May 13 20:26:47 2008 +0200
@@ -240,8 +240,6 @@
                            ((if (r0 == #xABAB)
                                 ;; #xBFFE is a sentinel in the compiled
                                 ;; program.
-                                ;; #xBFFE is a sentinel in the compiled
-                                ;; program.
 				((r0 = r1 & #x7F)
 				 (write r0 ,(make-vector vec-len #xBFFE)))
                               ((mule-to-unicode r0 r1)
@@ -531,12 +529,94 @@
 disk to XEmacs characters for some fixed-width 8-bit coding system.  "
   (check-argument-type #'vectorp decode-table)
   (check-argument-range (length decode-table) #x100 #x100)
-  (block category
-    (loop
-      for i from #x80 to #xBF
-      do (unless (= i (aref decode-table i))
-           (return-from category 'no-conversion)))
-    'iso-8-1))
+  (loop
+    named category
+    for i from #x80 to #xBF
+    do (unless (= i (aref decode-table i))
+	 (return-from category 'no-conversion))
+    finally return 'iso-8-1))
+
+(defun 8-bit-fixed-query-coding-region (begin end coding-system
+                                        &optional buffer errorp highlightp)
+  "The `query-coding-region' implementation for 8-bit-fixed coding systems.
+
+Uses the `8-bit-fixed-query-from-unicode' and `8-bit-fixed-query-skip-chars'
+coding system properties.  The former is a hash table mapping from valid
+Unicode code points to on-disk octets in the coding system; the latter a set
+of characters as used by `skip-chars-forward'.  Both of these properties are
+generated automatically by `make-8-bit-coding-system'.
+
+See that the documentation of `query-coding-region'; see also
+`make-8-bit-coding-system'. "
+  (check-argument-type #'coding-system-p
+                       (setq coding-system (find-coding-system coding-system)))
+  (check-argument-type #'integer-or-marker-p begin)
+  (check-argument-type #'integer-or-marker-p end)
+  (let ((from-unicode
+         (or (coding-system-get coding-system '8-bit-fixed-query-from-unicode)
+	     (coding-system-get (coding-system-base coding-system)
+				'8-bit-fixed-query-from-unicode)))
+        (skip-chars-arg
+         (or (coding-system-get coding-system '8-bit-fixed-query-skip-chars)
+	     (coding-system-get (coding-system-base coding-system)
+				'8-bit-fixed-query-skip-chars)))
+	(ranges (make-range-table))
+        char-after fail-range-start fail-range-end previous-fail extent
+	failed)
+    (check-type from-unicode hash-table)
+    (check-type skip-chars-arg string)
+    (save-excursion
+      (when highlightp
+	(map-extents #'(lambda (extent ignored-arg)
+			 (when (eq 'query-coding-warning-face
+				   (extent-face extent))
+			   (delete-extent extent))) buffer begin end))
+      (goto-char begin buffer)
+      (skip-chars-forward skip-chars-arg end buffer)
+      (while (< (point buffer) end)
+        (message
+	 "fail-range-start is %S, previous-fail %S, point is %S, end is %S"
+	 fail-range-start previous-fail (point buffer) end)
+	(setq char-after (char-after (point buffer) buffer)
+	      fail-range-start (point buffer))
+	(message "arguments are %S %S"
+		 (< (point buffer) end)
+		 (not (gethash (encode-char char-after 'ucs) from-unicode)))
+	(while (and
+		(< (point buffer) end)
+		(not (gethash (encode-char char-after 'ucs) from-unicode)))
+	  (forward-char 1 buffer)
+	  (setq char-after (char-after (point buffer) buffer)
+		failed t))
+	(if (= fail-range-start (point buffer))
+	    ;; The character can actually be encoded by the coding
+	    ;; system; check the characters past it.
+	    (forward-char 1 buffer)
+	  ;; The character actually failed. 
+	  (message "past the move through, point now %S" (point buffer))
+	  (when errorp 
+	    (error 'text-conversion-error
+		   (format "Cannot encode %s using coding system"
+			   (buffer-substring fail-range-start (point buffer)
+					     buffer))
+		   (coding-system-name coding-system)))
+	  (put-range-table fail-range-start
+			   ;; If char-after is non-nil, we're not at
+			   ;; the end of the buffer.
+			   (setq fail-range-end (if char-after
+						    (point buffer)
+						  (point-max buffer)))
+			   t ranges)
+	  (when highlightp
+	    (message "highlighting")
+	    (setq extent (make-extent fail-range-start fail-range-end buffer))
+	    (set-extent-priority extent (+ mouse-highlight-priority 2))
+	    (set-extent-face extent 'query-coding-warning-face))
+	  (skip-chars-forward skip-chars-arg end buffer)))
+      (message "about to give the result, ranges %S" ranges)
+      (if failed 
+	  (values nil ranges)
+	(values t nil)))))
 
 ;;;###autoload
 (defun make-8-bit-coding-system (name unicode-map &optional description props)
@@ -618,13 +698,27 @@
     (coding-system-put name '8-bit-fixed t)
     (coding-system-put name 'category 
                        (make-8-bit-choose-category decode-table))
+    (coding-system-put name '8-bit-fixed-query-skip-chars
+                       (apply #'string (append decode-table nil)))
+    (coding-system-put name '8-bit-fixed-query-from-unicode encode-table)
+
+    (coding-system-put name 'query-coding-function
+                       #'8-bit-fixed-query-coding-region)
+    (coding-system-put (intern (format "%s-unix" name))
+		       'query-coding-function
+                       #'8-bit-fixed-query-coding-region)
+    (coding-system-put (intern (format "%s-dos" name))
+		       'query-coding-function
+                       #'8-bit-fixed-query-coding-region)
+    (coding-system-put (intern (format "%s-mac" name))
+		       'query-coding-function
+                       #'8-bit-fixed-query-coding-region)
     (loop for alias in aliases
       do (define-coding-system-alias alias name))
     result))
 
 (define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map
 						 &optional description props)
-
   ;; We provide the compiler macro (= macro that is expanded only on
   ;; compilation, and that can punt to a runtime version of the
   ;; associate function if necessary) not for reasons of speed, though
@@ -674,8 +768,9 @@
              ;; (invalid-read-syntax "Multiply defined symbol label" 1)
              ;;
              ;; when the file is byte compiled.
-             (case-fold-search t))
-        (define-translation-hash-table encode-table-sym ,encode-table)
+             (case-fold-search t)
+             (encode-table ,encode-table))
+        (define-translation-hash-table encode-table-sym encode-table)
         (make-coding-system 
          ',name 'ccl ,description
          (plist-put (plist-put ',props 'decode 
@@ -688,8 +783,22 @@
                                    (symbol-value 'encode-table-sym)))
                             ',encode-program))))
 	(coding-system-put ',name '8-bit-fixed t)
-        (coding-system-put ',name 'category ',
-                           (make-8-bit-choose-category decode-table))
+        (coding-system-put ',name 'category 
+                           ',(make-8-bit-choose-category decode-table))
+        (coding-system-put ',name '8-bit-fixed-query-skip-chars
+                           ',(apply #'string (append decode-table nil)))
+        (coding-system-put ',name '8-bit-fixed-query-from-unicode encode-table)
+        (coding-system-put ',name 'query-coding-function
+                           #'8-bit-fixed-query-coding-region)
+	(coding-system-put ',(intern (format "%s-unix" name))
+			   'query-coding-function
+			   #'8-bit-fixed-query-coding-region)
+	(coding-system-put ',(intern (format "%s-dos" name))
+			   'query-coding-function
+			   #'8-bit-fixed-query-coding-region)
+	(coding-system-put ',(intern (format "%s-mac" name))
+			   'query-coding-function
+			   #'8-bit-fixed-query-coding-region)
         ,(macroexpand `(loop for alias in ',aliases
                         do (define-coding-system-alias alias
                              ',name)))
@@ -703,4 +812,3 @@
  '(mnemonic "Latin 1"
    documentation "The most used encoding of Western Europe and the Americas."
    aliases (iso-latin-1 latin-1)))
-
--- a/lisp/unicode.el	Tue May 13 20:16:53 2008 +0200
+++ b/lisp/unicode.el	Tue May 13 20:26:47 2008 +0200
@@ -611,6 +611,76 @@
        (translate-region start finish table))
      begin end buffer))
 
+(defvar unicode-query-coding-skip-chars-arg nil ;; Set in general-late.el
+  "Used by `unicode-query-coding-region' to skip chars with known mappings.")
+
+(defun unicode-query-coding-region (begin end coding-system
+				    &optional buffer errorp highlightp)
+  "The `query-coding-region' implementation for Unicode coding systems."
+  (check-argument-type #'coding-system-p
+                       (setq coding-system (find-coding-system coding-system)))
+  (check-argument-type #'integer-or-marker-p begin)
+  (check-argument-type #'integer-or-marker-p end)
+  (let* ((skip-chars-arg unicode-query-coding-skip-chars-arg)
+         (ranges (make-range-table))
+         (looking-at-arg (concat "[" skip-chars-arg "]"))
+         fail-range-start fail-range-end previous-fail char-after failed
+	 extent)
+    (save-excursion
+      (when highlightp
+	(map-extents #'(lambda (extent ignored-arg)
+			 (when (eq 'query-coding-warning-face
+				   (extent-face extent))
+			   (delete-extent extent))) buffer begin end))
+      (goto-char begin buffer)
+      (skip-chars-forward skip-chars-arg end buffer)
+      (while (< (point buffer) end)
+;        (message
+;         "fail-range-start is %S, previous-fail %S, point is %S, end is %S"
+;         fail-range-start previous-fail (point buffer) end)
+        (setq char-after (char-after (point buffer) buffer)
+              fail-range-start (point buffer))
+        (while (and
+                (< (point buffer) end)
+                (not (looking-at looking-at-arg))
+                (= -1 (char-to-unicode char-after)))
+          (forward-char 1 buffer)
+	  (message "what?!?")
+          (setq char-after (char-after (point buffer) buffer)
+                failed t))
+        (if (= fail-range-start (point buffer))
+            ;; The character can actually be encoded by the coding
+            ;; system; check the characters past it.
+	    (forward-char 1 buffer)
+          ;; Can't be encoded; note this.
+          (when errorp 
+            (error 'text-conversion-error
+                   (format "Cannot encode %s using coding system"
+                           (buffer-substring fail-range-start (point buffer)
+                                             buffer))
+                   (coding-system-name coding-system)))
+          (put-range-table fail-range-start
+                           ;; If char-after is non-nil, we're not at
+                           ;; the end of the buffer.
+                           (setq fail-range-end (if char-after
+                                                    (point buffer)
+                                                  (point-max buffer)))
+                           t ranges)
+          (when highlightp
+            (setq extent (make-extent fail-range-start fail-range-end buffer))
+            (set-extent-priority extent (+ mouse-highlight-priority 2))
+            (set-extent-face extent 'query-coding-warning-face)))
+        (skip-chars-forward skip-chars-arg end buffer))
+      (if failed
+          (values nil ranges)
+        (values t nil)))))
+
+(loop
+  for coding-system in (coding-system-list)
+  do (when (eq 'unicode (coding-system-type coding-system))
+       (coding-system-put coding-system 'query-coding-function
+			  #'unicode-query-coding-region)))
+
 (unless (featurep 'mule)
   ;; We do this in such a roundabout way--instead of having the above defun
   ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have