diff lisp/coding.el @ 4568:1d74a1d115ee

Add #'query-coding-region tests; do the work necessary to get them running. lisp/ChangeLog addition: 2008-12-28 Aidan Kehoe <kehoea@parhasard.net> * coding.el (default-query-coding-region): Declare using defun*, so we can #'return-from to it on encountering a safe-charsets value of t. Comment out a few debug messages. (query-coding-region): Correct the docstring, it deals with a region, not a string. (unencodable-char-position): Correct the implementation for non-nil COUNT, special-case a zero value for count, treat it as one. Don't rely on dynamic scope when calling the main lambda. * unicode.el (unicode-query-coding-region): Comment out some debug messages here. * mule/mule-coding.el (8-bit-fixed-query-coding-region): Comment out some debug messages here. * code-init.el (raw-text): Add a safe-charsets property to this coding system. * mule/korean.el (iso-2022-int-1): * mule/korean.el (euc-kr): * mule/korean.el (iso-2022-kr): Add safe-charsets properties for these coding systems. * mule/japanese.el (iso-2022-jp): * mule/japanese.el (jis7): * mule/japanese.el (jis8): * mule/japanese.el (shift-jis): * mule/japanese.el (iso-2022-jp-1978-irv): * mule/japanese.el (euc-jp): Add safe-charsets properties for all these coding systems. * mule/iso-with-esc.el: Add safe-charsets properties to all the coding systems in here. Comment on the downside of a safe-charsets value of t for iso-latin-1-with-esc. * mule/hebrew.el (ctext-hebrew): Add a safe-charsets property for this coding system. * mule/devanagari.el (in-is13194-devanagari): Add a safe-charsets property for this coding system. * mule/chinese.el (cn-gb-2312): * mule/chinese.el (hz-gb-2312): * mule/chinese.el (big5): Add safe-charsets properties for these coding systems. * mule/latin.el (iso-8859-14): Add an implementation for this, using #'make-8-bit-coding-system. * mule/mule-coding.el (ctext): * mule/mule-coding.el (iso-2022-8bit-ss2): * mule/mule-coding.el (iso-2022-7bit-ss2): * mule/mule-coding.el (iso-2022-jp-2): * mule/mule-coding.el (iso-2022-7bit): * mule/mule-coding.el (iso-2022-8): * mule/mule-coding.el (escape-quoted): * mule/mule-coding.el (iso-2022-lock): Add safe-charsets properties for all these coding systems. src/ChangeLog addition: 2008-12-28 Aidan Kehoe <kehoea@parhasard.net> * file-coding.c (Fmake_coding_system): Document our use of the safe-chars and safe-charsets properties, and the differences compared to GNU. (make_coding_system_1): Don't drop the safe-chars and safe-charsets properties. (Fcoding_system_property): Return the safe-chars and safe-charsets properties when asked for them. * file-coding.h (CODING_SYSTEM_SAFE_CHARSETS): * coding-system-slots.h: Make the safe-chars and safe-charsets slots available in these headers. tests/ChangeLog addition: 2008-12-28 Aidan Kehoe <kehoea@parhasard.net> * automated/query-coding-tests.el: New file, testing the functionality of #'query-coding-region and #'query-coding-string.
author Aidan Kehoe <kehoea@parhasard.net>
date Sun, 28 Dec 2008 14:46:24 +0000
parents 46ddeaa7c738
children e6a7054a9c30
line wrap: on
line diff
--- a/lisp/coding.el	Sat Aug 09 13:15:09 2008 +0200
+++ b/lisp/coding.el	Sun Dec 28 14:46:24 2008 +0000
@@ -300,8 +300,8 @@
                              (extent-face extent))
                      (delete-extent extent))) buffer begin end))
 
-(defun default-query-coding-region (begin end coding-system
-				    &optional buffer errorp highlightp)
+(defun* default-query-coding-region (begin end coding-system
+				     &optional buffer errorp highlightp)
   "The default `query-coding-region' implementation.
 
 Uses the `safe-charsets' and `safe-chars' coding system properties.
@@ -324,8 +324,11 @@
           (gethash safe-charsets
                    default-query-coding-region-safe-charset-skip-chars-map))
          (ranges (make-range-table))
-         fail-range-start fail-range-end previous-fail char-after
+         fail-range-start fail-range-end char-after
 	 looking-at-arg failed extent)
+    ;; Coding systems with a value of t for safe-charsets support everything.
+    (when (eq t safe-charsets)
+      (return-from default-query-coding-region (values t nil)))
     (unless skip-chars-arg
       (setq skip-chars-arg
 	    (puthash safe-charsets
@@ -355,9 +358,9 @@
 	(goto-char begin buffer)
 	(skip-chars-forward skip-chars-arg end buffer)
 	(while (< (point buffer) end)
-	  (message
-	   "fail-range-start is %S, previous-fail %S, point is %S, end is %S"
-	   fail-range-start previous-fail (point buffer) end)
+	  ; (message
+	  ; "fail-range-start is %S, point is %S, end is %S"
+	  ;  fail-range-start (point buffer) end)
 	  (setq char-after (char-after (point buffer) buffer)
 		fail-range-start (point buffer))
 	  (while (and
@@ -411,8 +414,8 @@
 
 This function returns a list; the intention is that callers use 
 `multiple-value-bind' or the related CL multiple value functions to deal
-with it.  The first element is `t' if the string can be encoded using
-CODING-SYSTEM, or `nil' if not.  The second element is `nil' if the string
+with it.  The first element is `t' if the region can be encoded using
+CODING-SYSTEM, or `nil' if not.  The second element is `nil' if the region
 can be encoded using CODING-SYSTEM; otherwise, it is a range table
 describing the positions of the unencodable characters. See
 `make-range-table'."
@@ -456,33 +459,42 @@
 If optional 5th argument STRING is non-nil, it is a string to search
 for un-encodable characters.  In that case, START and END are indexes
 in the string."
-  (flet ((thunk ()
-	   (multiple-value-bind (result ranges)
-	       (query-coding-region start end coding-system)
-	     (if result
-		 ;; If query-coding-region thinks the entire region is
-		 ;; encodable, result will be t, and the thunk should
-		 ;; return nil, because there are no unencodable
-		 ;; positions in the region.
-                 nil
-               (if count 
-                   (block counted
-                     (map-range-table
-                      #'(lambda (begin end value)
-                          (while (and (<= begin end) (<= begin count))
-                            (push begin result)
-                            (incf begin))
-                          (if (> begin count) (return-from counted)))
-                      ranges))
-                 (map-range-table
-                  #'(lambda (begin end value)
-		      (while (<= begin end)
-			(push begin result)
-			(incf begin))) ranges))
-	       result))))
+  (let ((thunk
+	 #'(lambda (start end coding-system &optional count)
+	     (multiple-value-bind (result ranges)
+		 (query-coding-region start end coding-system)
+	       (if result
+		   nil
+		 (block worked-it-all-out
+		   (if count
+		       (map-range-table
+			#'(lambda (begin end value)
+			    (while (and (< begin end)
+					(< (length result) count))
+			      (push begin result)
+			      (incf begin))
+			    (when (= (length result) count)
+			      (return-from worked-it-all-out result)))
+			ranges)
+		     (map-range-table
+		      #'(lambda (begin end value)
+			  (return-from worked-it-all-out begin))
+		      ranges))
+		   (assert (not (null count)) t
+			   "We should never reach this point with null COUNT.")
+		   result))))))
+    (check-argument-type #'integer-or-marker-p start)
+    (check-argument-type #'integer-or-marker-p end)
+    (check-coding-system coding-system)
+    (and count (check-argument-type #'natnump count)
+	 ;; Special-case zero, sigh. 
+	 (if (zerop count) (setq count 1)))
+    (and string (check-argument-type #'stringp string))
     (if string
-	(with-temp-buffer (insert string) (thunk))
-      (thunk))))
+	(with-temp-buffer
+	  (insert string)
+	  (funcall thunk start end coding-system count))
+      (funcall thunk start end coding-system count))))
 
 (defun encode-coding-char (char coding-system)
   "Encode CHAR by CODING-SYSTEM and return the resulting string.