changeset 4623:a9f83990e6bf

Fix a byte compiler bug with characters above ?\xFF. lisp/ChangeLog addition: 2009-02-22 Aidan Kehoe <kehoea@parhasard.net> * bytecomp.el (byte-compile-force-escape-quoted): New variable, used to force `byte-compile-insert-header' to treat the output as having characters above ?\xFF. (byte-compile-from-buffer): If the compiled output contains characters above ?\xFF, and byte-compile-dynamic-docstrings or byte-compile-dynamic is non-nil (or we're using an inappropriate coding system) recompile the file, turning off the dynamic features and using a more appropriate header. (byte-compile-insert-header): Pay attention to byte-compile-force-escape-quoted. tests/ChangeLog addition: 2009-02-22 Aidan Kehoe <kehoea@parhasard.net> * automated/mule-tests.el: Use more realistic tests for the escape-quoted mule encoding checks; update a comment, change a Known-Bug-Expect-Failure to a normal test now that we've addressed an old bug.
author Aidan Kehoe <kehoea@parhasard.net>
date Sun, 22 Feb 2009 19:57:28 +0000
parents 8cbca852bcd4
children f1bb4cc3144a
files lisp/ChangeLog lisp/bytecomp.el tests/ChangeLog tests/automated/mule-tests.el
diffstat 4 files changed, 98 insertions(+), 48 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Wed Feb 18 07:53:34 2009 +0000
+++ b/lisp/ChangeLog	Sun Feb 22 19:57:28 2009 +0000
@@ -1,3 +1,17 @@
+2009-02-22  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* bytecomp.el (byte-compile-force-escape-quoted): New variable,
+	used to force `byte-compile-insert-header' to treat the output as
+	having characters above ?\xFF. 
+	(byte-compile-from-buffer): 
+	If the compiled output contains characters above ?\xFF, and
+	byte-compile-dynamic-docstrings or byte-compile-dynamic is non-nil
+	(or we're using an inappropriate coding system) recompile the
+	file, turning off the dynamic features and using a more
+	appropriate header. 
+	(byte-compile-insert-header): Pay attention to
+	byte-compile-force-escape-quoted. 
+
 2009-02-18  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* coding.el (check-coding-systems-region): 
--- a/lisp/bytecomp.el	Wed Feb 18 07:53:34 2009 +0000
+++ b/lisp/bytecomp.el	Sun Feb 22 19:57:28 2009 +0000
@@ -439,6 +439,13 @@
 that is, the current lexical environment.  This list lives partly
 on the specbind stack.  The cdr of each cell is an integer bitmask.")
 
+(defvar byte-compile-force-escape-quoted nil
+  "If non-nil, `byte-compile-insert-header' always adds a coding cookie.
+
+This is for situations where the byte compiler output file needs to be
+able to encode character values above ?\\xFF, but this cannot be
+easily determined from the input file.")
+
 (defconst byte-compile-referenced-bit 1)
 (defconst byte-compile-assigned-bit 2)
 (defconst byte-compile-arglist-bit 4)
@@ -1710,6 +1717,9 @@
 	;;	  (byte-compile-warnings (if (eq byte-compile-warnings t)
 	;;				     byte-compile-warning-types
 	;;				   byte-compile-warnings))
+        (byte-compile-force-escape-quoted byte-compile-force-escape-quoted)
+        (byte-compile-using-dynamic nil)
+        (byte-compile-using-escape-quoted nil)
 	)
     (byte-compile-close-variables
      (save-excursion
@@ -1723,7 +1733,11 @@
 	    (byte-compile-insert-header filename
 					byte-compile-inbuffer
 					byte-compile-outbuffer))
-
+       (setq byte-compile-using-dynamic
+             (or (symbol-value-in-buffer 'byte-compile-dynamic
+                                         byte-compile-inbuffer)
+                 (symbol-value-in-buffer 'byte-compile-dynamic-docstrings
+                                         byte-compile-inbuffer)))
        ;; This is a kludge.  Some operating systems (OS/2, DOS) need to
        ;; write files containing binary information specially.
        ;; Under most circumstances, such files will be in binary
@@ -1733,6 +1747,9 @@
        (setq overwrite-mode 'overwrite-mode-binary))
      (displaying-byte-compile-warnings
       (save-excursion
+	;; All our save-excursions may have led to a less-than-useful
+	;; value for point in the outbuffer:
+	(goto-char (point-max byte-compile-outbuffer) byte-compile-outbuffer)
 	(set-buffer byte-compile-inbuffer)
 	(goto-char 1)
 
@@ -1753,7 +1770,22 @@
 	(setq byte-compile-unresolved-functions nil)))
      (save-excursion
        (set-buffer byte-compile-outbuffer)
-       (goto-char (point-min))))
+       (goto-char (point-min))
+       (when (and (or byte-compile-using-dynamic
+                      (eq buffer-file-coding-system 'raw-text-unix))
+                  (re-search-forward "[^\x00-\xff]" nil t))
+	 (when (or noninteractive byte-compile-verbose)
+	   (message
+	    "%s: includes char above ?\\xFF, recompiling sans dynamic features."
+	    filename))
+         (set-symbol-value-in-buffer 'byte-compile-dynamic nil
+                                     byte-compile-inbuffer)
+         (set-symbol-value-in-buffer 'byte-compile-dynamic-docstrings nil
+                                     byte-compile-inbuffer)
+         (setq byte-compile-force-escape-quoted t
+               byte-compile-outbuffer
+               (byte-compile-from-buffer byte-compile-inbuffer 
+                                         filename eval)))))
     (if (not eval)
 	byte-compile-outbuffer
       (let (form)
@@ -1842,23 +1874,25 @@
   ;; Otherwise, use `raw-text' for maximum portability with non-Mule
   ;; Emacsen.
   (if (or (featurep '(not mule)) ;; Don't scan buffer if we are not muleized
-	  (save-excursion
-	    (set-buffer byte-compile-inbuffer)
-	    (goto-char (point-min))
-            ;; Look for any non-Latin-1 literals or Unicode character
-            ;; escapes. Any such occurrences in a @#COUNT comment will lead
-            ;; to an escape-quoted coding cookie being inserted, but this is
-            ;; not true of ordinary comments.
-            (let ((non-latin-1-re
-                   (concat "[^\000-\377]" 
-                           #r"\|\\u[0-9a-fA-F]\{4,4\}\|\\U[0-9a-fA-F]\{8,8\}"))
-                  (case-fold-search nil))
-              (catch 'need-to-escape-quote
-                (while (re-search-forward non-latin-1-re nil t)
-                  (skip-chars-backward "^;" (point-at-bol))
-                  (if (bolp) (throw 'need-to-escape-quote nil))
-                  (forward-line 1))
-                t))))
+          (and
+	   (not byte-compile-force-escape-quoted)
+	   (save-excursion
+	     (set-buffer byte-compile-inbuffer)
+	     (goto-char (point-min))
+	     ;; Look for any non-Latin-1 literals or Unicode character
+	     ;; escapes. Any such occurrences in a @#COUNT comment will lead
+	     ;; to an escape-quoted coding cookie being inserted, but this is
+	     ;; not true of ordinary comments.
+	     (let ((non-latin-1-re
+		    (concat "[^\000-\377]" 
+			    #r"\|\\u[0-9a-fA-F]\{4,4\}\|\\U[0-9a-fA-F]\{8,8\}"))
+		   (case-fold-search nil))
+	       (catch 'need-to-escape-quote
+		 (while (re-search-forward non-latin-1-re nil t)
+		   (skip-chars-backward "^;" (point-at-bol))
+		   (if (bolp) (throw 'need-to-escape-quote nil))
+		   (forward-line 1))
+		 t)))))
       (setq buffer-file-coding-system 'raw-text-unix)
     (insert "(or (featurep 'mule) (error \"Loading this file requires Mule support\"))
 ;;;###coding system: escape-quoted\n")
--- a/tests/ChangeLog	Wed Feb 18 07:53:34 2009 +0000
+++ b/tests/ChangeLog	Sun Feb 22 19:57:28 2009 +0000
@@ -1,3 +1,10 @@
+2009-02-22  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* automated/mule-tests.el: 
+	Use more realistic tests for the escape-quoted mule encoding
+	checks; update a comment, change a Known-Bug-Expect-Failure to a
+	normal test now that we've addressed an old bug. 
+
 2009-02-18  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* automated/query-coding-tests.el : Check that
--- a/tests/automated/mule-tests.el	Wed Feb 18 07:53:34 2009 +0000
+++ b/tests/automated/mule-tests.el	Sun Feb 22 19:57:28 2009 +0000
@@ -599,30 +599,24 @@
         ((Assert-elc-is-escape-quoted ()
            "Assert the current buffer has an escape-quoted cookie if compiled."
            (save-excursion
-             (let ((byte-compile-result (byte-compile-from-buffer
-                                         (current-buffer) nil nil))
-                   (temporary-file-name (make-temp-name
-                                         (expand-file-name "zjPQ2Pk"
-                                                           (temp-directory)))))
-               (byte-compile-insert-header
-                temporary-file-name
-                (current-buffer)
-                byte-compile-result)
+             (let* ((temporary-file-name (make-temp-name
+					  (expand-file-name "zjPQ2Pk"
+							    (temp-directory))))
+		    (byte-compile-result (byte-compile-from-buffer
+					  (current-buffer) temporary-file-name
+					  nil)))
                (Assert (string-match
                         "^;;;###coding system: escape-quoted"
                         (buffer-substring nil nil byte-compile-result))))))
          (Assert-elc-has-no-specified-encoding ()
            "Assert the current buffer has no coding cookie if compiled."
            (save-excursion
-             (let ((byte-compile-result (byte-compile-from-buffer
-                                         (current-buffer) nil nil))
-                   (temporary-file-name (make-temp-name
-                                         (expand-file-name "zjPQ2Pk"
-                                                           (temp-directory)))))
-               (byte-compile-insert-header
-                temporary-file-name
-                (current-buffer)
-                byte-compile-result)
+             (let* ((temporary-file-name (make-temp-name
+					  (expand-file-name "zjPQ2Pk"
+							    (temp-directory))))
+		    (byte-compile-result (byte-compile-from-buffer
+					  (current-buffer) temporary-file-name
+					  nil)))
                (Assert (not (string-match
                              ";;;###coding system:"
                              (buffer-substring nil nil
@@ -703,8 +697,8 @@
       (Assert-elc-has-no-specified-encoding)
       (delete-region (point-min) (point-max))
 
-      ;; This bug exists because the coding-cookie insertion code looks at
-      ;; the input buffer, not the output buffer.
+      ;; There used to be a bug here because the coding-cookie insertion code
+      ;; looks at the input buffer, not the output buffer.
       ;;
       ;; It looks at the input buffer because byte-compile-dynamic and
       ;; byte-compile-dynamic-docstrings currently need to be
@@ -712,19 +706,20 @@
       ;; compilation of function bodies and docstrings fails if you can't
       ;; call (point) and trivially get the byte offset in the file.
       ;;
-      ;; And to unconditionally turn those two features off, you need to
-      ;; know before byte-compilation whether the byte-compilation output
-      ;; file contains non-Latin-1 characters, or perhaps to check after
-      ;; compilation and redo; but we don't do the latter.
+      ;; And to unconditionally turn those two features off, you need to know
+      ;; before byte-compilation whether the byte-compilation output file
+      ;; contains non-Latin-1 characters. Or to check after compilation and
+      ;; redo; the latter is what we do right now. This will only be necessary
+      ;; in a very small minority of cases, it's not a performance-critical
+      ;; issue.
       ;; 
-      ;; To fix this bug, we need to add Mule support to
-      ;; byte-compile-dynamic and byte-compile-dynamic-docstrings. Or drop
-      ;; support for those features entirely.
+      ;; Martin Buchholz thinks, in bytecomp.el, that we should implement lazy
+      ;; loading for Mule files; I (Aidan Kehoe) don't think that's worth the
+      ;; effort today (February 2009).
       (insert
        "(defvar testing-mule-compilation-handling (eval-when-compile
 	(decode-char 'ucs #x371e))) ;; kDefinition beautiful; pretty, used\"")
-      (Known-Bug-Expect-Failure
-       (Assert-elc-is-escape-quoted))
+      (Assert-elc-is-escape-quoted)
       (delete-region (point-min) (point-max))))
 
   (Known-Bug-Expect-Error