diff lisp/mule/mule-coding.el @ 4549:68d1ca56cffa

First part of interactive checks that coding systems encode regions. 2008-01-21 Aidan Kehoe <kehoea@parhasard.net> * coding.el (decode-coding-string): (encode-coding-string): Accept GNU's NOCOPY argument for these. Todo; write compiler macros to use it. (query-coding-warning-face): New face, to show unencodable characters. (default-query-coding-region-safe-charset-skip-chars-map): New variable, a cache used by #'default-query-coding-region. (default-query-coding-region): Default implementation of #'query-coding-region, using the safe-charsets and safe-chars coding systemproperties. (query-coding-region): New function; can a given coding system encode a given region? (query-coding-string): New function; can a given coding system encode a given string? (unencodable-char-position): Function API taken from GNU; return the first unencodable position given a string and coding system. (encode-coding-char): Function API taken from GNU; return CHAR encoded using CODING-SYSTEM, or nil if CODING-SYSTEM would trash CHAR. ((unless (featurep 'mule)): Override the default query-coding-region implementation on non-Mule. * mule/mule-coding.el (make-8-bit-generate-helper): Eliminate a duplicate comment. (make-8-bit-choose-category): Simplify implementation. (8-bit-fixed-query-coding-region): Implementation of #'query-coding-region for coding systems created with #'make-8-bit-coding-system. (make-8-bit-coding-system): Initialise the #'query-coding-region implementation for these character sets. (make-8-bit-coding-system): Ditto for the compiler macro version of this function. * unicode.el (unicode-query-coding-skip-chars-arg): New variable, used by unicode-query-coding-region, initialised in mule/general-late.el. (unicode-query-coding-region): New function, the #'query-coding-region implementation for Unicode coding systems. Initialise the query-coding-function property for the Unicode coding systems to #'unicode-query-coding-region. * mule/mule-charset.el (charset-skip-chars-string): New function. Return a #'skip-chars-forward argument that skips all characters in CHARSET. (map-charset-chars): Function synced from GNU, modified to work with XEmacs. Map FUNC across the int value charset ranges of CHARSET.
author Aidan Kehoe <kehoea@parhasard.net>
date Mon, 21 Jan 2008 22:51:21 +0100
parents f4c3ffe60a4f
children 6812571bfcb9
line wrap: on
line diff
--- a/lisp/mule/mule-coding.el	Thu Jan 17 11:55:11 2008 +0100
+++ b/lisp/mule/mule-coding.el	Mon Jan 21 22:51:21 2008 +0100
@@ -240,8 +240,6 @@
                            ((if (r0 == #xABAB)
                                 ;; #xBFFE is a sentinel in the compiled
                                 ;; program.
-                                ;; #xBFFE is a sentinel in the compiled
-                                ;; program.
 				((r0 = r1 & #x7F)
 				 (write r0 ,(make-vector vec-len #xBFFE)))
                               ((mule-to-unicode r0 r1)
@@ -531,12 +529,85 @@
 disk to XEmacs characters for some fixed-width 8-bit coding system.  "
   (check-argument-type #'vectorp decode-table)
   (check-argument-range (length decode-table) #x100 #x100)
-  (block category
-    (loop
-      for i from #x80 to #xBF
-      do (unless (= i (aref decode-table i))
-           (return-from category 'no-conversion)))
-    'iso-8-1))
+  (loop
+    named category
+    for i from #x80 to #xBF
+    do (unless (= i (aref decode-table i))
+	 (return-from category 'no-conversion))
+    finally return 'iso-8-1))
+
+(defun 8-bit-fixed-query-coding-region (begin end coding-system
+                                        &optional buffer errorp highlightp)
+  "The `query-coding-region' implementation for 8-bit-fixed coding systems.
+
+Uses the `8-bit-fixed-query-from-unicode' and `8-bit-fixed-query-skip-chars'
+coding system properties.  The former is a hash table mapping from valid
+Unicode code points to on-disk octets in the coding system; the latter a set
+of characters as used by `skip-chars-forward'.  Both of these properties are
+generated automatically by `make-8-bit-coding-system'.
+
+See that the documentation of `query-coding-region'; see also
+`make-8-bit-coding-system'. "
+  (check-argument-type #'coding-system-p
+                       (setq coding-system (find-coding-system coding-system)))
+  (check-argument-type #'integer-or-marker-p begin)
+  (check-argument-type #'integer-or-marker-p end)
+  (let ((from-unicode
+         (coding-system-get coding-system '8-bit-fixed-query-from-unicode))
+        (skip-chars-arg
+         (coding-system-get coding-system '8-bit-fixed-query-skip-chars))
+	(ranges (make-range-table))
+        char-after fail-range-start fail-range-end previous-fail extent
+	failed)
+    (check-type from-unicode hash-table)
+    (check-type skip-chars-arg string)
+    (save-excursion
+      (goto-char begin buffer)
+      (skip-chars-forward skip-chars-arg end buffer)
+      (while (< (point buffer) end)
+        (message
+	 "fail-range-start is %S, previous-fail %S, point is %S, end is %S"
+	 fail-range-start previous-fail (point buffer) end)
+	(setq char-after (char-after (point buffer) buffer)
+	      fail-range-start (point buffer))
+	(message "arguments are %S %S"
+		 (< (point buffer) end)
+		 (not (gethash (encode-char char-after 'ucs) from-unicode)))
+	(while (and
+		(< (point buffer) end)
+		(not (gethash (encode-char char-after 'ucs) from-unicode)))
+	  (forward-char 1 buffer)
+	  (setq char-after (char-after (point buffer) buffer)
+		failed t))
+	(if (= fail-range-start (point buffer))
+	    ;; The character can actually be encoded by the coding
+	    ;; system; check the characters past it.
+	    (forward-char 1 buffer)
+	  ;; The character actually failed. 
+	  (message "past the move through, point now %S" (point buffer))
+	  (when errorp 
+	    (error 'text-conversion-error
+		   (format "Cannot encode %s using coding system"
+			   (buffer-substring fail-range-start (point buffeR)
+					     buffer))
+		   (coding-system-name coding-system)))
+	  (put-range-table fail-range-start
+			   ;; If char-after is non-nil, we're not at
+			   ;; the end of the buffer.
+			   (setq fail-range-end (if char-after
+						    (point buffer)
+						  (point-max buffer)))
+			   t ranges)
+	  (when highlightp
+	    (message "highlighting")
+	    (setq extent (make-extent fail-range-start fail-range-end buffer))
+	    (set-extent-priority extent (+ mouse-highlight-priority 2))
+	    (set-extent-face extent 'query-coding-warning-face))
+	  (skip-chars-forward skip-chars-arg end buffer))
+	(message "about to give the result, ranges %S" ranges))
+      (if failed 
+	  (values nil ranges)
+	(values t nil)))))
 
 ;;;###autoload
 (defun make-8-bit-coding-system (name unicode-map &optional description props)
@@ -618,13 +689,27 @@
     (coding-system-put name '8-bit-fixed t)
     (coding-system-put name 'category 
                        (make-8-bit-choose-category decode-table))
+    (coding-system-put name '8-bit-fixed-query-skip-chars
+                       (apply #'string (append decode-table nil)))
+    (coding-system-put name '8-bit-fixed-query-from-unicode encode-table)
+
+    (coding-system-put name 'query-coding-function
+                       #'8-bit-fixed-query-coding-region)
+    (coding-system-put (intern (format "%s-unix" name))
+		       'query-coding-function
+                       #'8-bit-fixed-query-coding-region)
+    (coding-system-put (intern (format "%s-dos" name))
+		       'query-coding-function
+                       #'8-bit-fixed-query-coding-region)
+    (coding-system-put (intern (format "%s-mac" name))
+		       'query-coding-function
+                       #'8-bit-fixed-query-coding-region)
     (loop for alias in aliases
       do (define-coding-system-alias alias name))
     result))
 
 (define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map
 						 &optional description props)
-
   ;; We provide the compiler macro (= macro that is expanded only on
   ;; compilation, and that can punt to a runtime version of the
   ;; associate function if necessary) not for reasons of speed, though
@@ -674,8 +759,9 @@
              ;; (invalid-read-syntax "Multiply defined symbol label" 1)
              ;;
              ;; when the file is byte compiled.
-             (case-fold-search t))
-        (define-translation-hash-table encode-table-sym ,encode-table)
+             (case-fold-search t)
+             (encode-table ,encode-table))
+        (define-translation-hash-table encode-table-sym encode-table)
         (make-coding-system 
          ',name 'ccl ,description
          (plist-put (plist-put ',props 'decode 
@@ -688,8 +774,22 @@
                                    (symbol-value 'encode-table-sym)))
                             ',encode-program))))
 	(coding-system-put ',name '8-bit-fixed t)
-        (coding-system-put ',name 'category ',
-                           (make-8-bit-choose-category decode-table))
+        (coding-system-put ',name 'category 
+                           ',(make-8-bit-choose-category decode-table))
+        (coding-system-put ',name '8-bit-fixed-query-skip-chars
+                           ',(apply #'string (append decode-table nil)))
+        (coding-system-put ',name '8-bit-fixed-query-from-unicode encode-table)
+        (coding-system-put ',name 'query-coding-function
+                           #'8-bit-fixed-query-coding-region)
+	(coding-system-put ',(intern (format "%s-unix" name))
+			   'query-coding-function
+			   #'8-bit-fixed-query-coding-region)
+	(coding-system-put ',(intern (format "%s-dos" name))
+			   'query-coding-function
+			   #'8-bit-fixed-query-coding-region)
+	(coding-system-put ',(intern (format "%s-mac" name))
+			   'query-coding-function
+			   #'8-bit-fixed-query-coding-region)
         ,(macroexpand `(loop for alias in ',aliases
                         do (define-coding-system-alias alias
                              ',name)))
@@ -703,4 +803,3 @@
  '(mnemonic "Latin 1"
    documentation "The most used encoding of Western Europe and the Americas."
    aliases (iso-latin-1 latin-1)))
-