changeset 3948:adecfd791c9b

[xemacs-hg @ 2007-05-12 10:17:00 by aidan] Non-Latin-1 escapes can lead to corrupted ELC code.
author aidan
date Sat, 12 May 2007 10:17:09 +0000
parents 67e8a09db7ed
children 33b9323388c5
files lisp/ChangeLog lisp/bytecomp.el tests/ChangeLog tests/automated/mule-tests.el
diffstat 4 files changed, 135 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Fri May 11 21:51:38 2007 +0000
+++ b/lisp/ChangeLog	Sat May 12 10:17:09 2007 +0000
@@ -1,9 +1,17 @@
+2007-05-12  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* bytecomp.el (byte-compile-insert-header):
+	Check for any Unicode escapes in the source file text when
+	deciding whether Mule support is necessary for it, and whether to
+	use escape-quoted as the .elc coding system. Thanks to Stephen for
+	the suggestion as to how to ignore appearances in comments. 
+
 2007-05-01  Stephen J. Turnbull  <stephen@xemacs.org>
 
 	* dumped-lisp.el (preloaded-file-list): Move resize-minibuffer
 	before simple.
 
-	* resize-minibuffer.el: Remove CVS $Id: ChangeLog,v 1.793 2007/05/01 14:32:50 stephent Exp $ cookie..
+	* resize-minibuffer.el: Remove CVS $Id: ChangeLog,v 1.794 2007/05/12 10:17:00 aidan Exp $ cookie..
 
 	* resize-minibuffer.el (resize-minibuffer-mode): Remove autoload.
 
--- a/lisp/bytecomp.el	Fri May 11 21:51:38 2007 +0000
+++ b/lisp/bytecomp.el	Sat May 12 10:17:09 2007 +0000
@@ -1842,10 +1842,20 @@
 	  (save-excursion
 	    (set-buffer byte-compile-inbuffer)
 	    (goto-char (point-min))
-	    ;; mrb- There must be a better way than skip-chars-forward
-	    (skip-chars-forward (concat (char-to-string 0) "-"
-					(char-to-string 255)))
-	    (eq (point) (point-max))))
+            ;; Look for any non-Latin-1 literals or Unicode character
+            ;; escapes. Any such occurrences in a @#COUNT comment will lead
+            ;; to an escape-quoted coding cookie being inserted, but this is
+            ;; not true of ordinary comments.
+            (let ((non-latin-1-re
+                   (concat "[^\000-\377]" 
+                           #r"\|\\u[0-9a-fA-F]\{4,4\}\|\\U[0-9a-fA-F]\{8,8\}"))
+                  (case-fold-search nil))
+              (catch 'need-to-escape-quote
+                (while (re-search-forward non-latin-1-re nil t)
+                  (skip-chars-backward "^;" (point-at-bol))
+                  (if (bolp) (throw 'need-to-escape-quote nil))
+                  (forward-line 1))
+                t))))
       (setq buffer-file-coding-system 'raw-text-unix)
     (insert "(or (featurep 'mule) (error \"Loading this file requires Mule support\"))
 ;;;###coding system: escape-quoted\n")
--- a/tests/ChangeLog	Fri May 11 21:51:38 2007 +0000
+++ b/tests/ChangeLog	Sat May 12 10:17:09 2007 +0000
@@ -1,3 +1,12 @@
+2007-05-12  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* automated/mule-tests.el:
+	* automated/mule-tests.el (bytecomp):
+	Require it, since we're testing its Unicode support. 
+	* automated/mule-tests.el (featurep):
+	Assert that the escape-quoted coding cookie is added when needed,
+	and ignored when not. 
+
 2007-04-29  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* automated/mule-tests.el (featurep):
--- a/tests/automated/mule-tests.el	Fri May 11 21:51:38 2007 +0000
+++ b/tests/automated/mule-tests.el	Sat May 12 10:17:09 2007 +0000
@@ -33,6 +33,8 @@
 ;; This file will be (read)ed by a non-mule XEmacs, so don't use
 ;; literal non-Latin1 characters.  Use (make-char) instead.
 
+(require 'bytecomp)
+
 ;;-----------------------------------------------------------------
 ;; Test whether all legal chars may be safely inserted to a buffer.
 ;;-----------------------------------------------------------------
@@ -465,4 +467,105 @@
                hebrew-iso8859-8 japanese-jisx0208 japanese-jisx0212
                katakana-jisx0201 korean-ksc5601 latin-iso8859-1
                latin-iso8859-2 thai-xtis vietnamese-viscii-lower))))
+
+  (with-temp-buffer
+    (flet
+        ((Assert-elc-is-escape-quoted ()
+           "Assert the current buffer has an escape-quoted cookie if compiled."
+           (save-excursion
+             (let ((byte-compile-result (byte-compile-from-buffer
+                                         (current-buffer) nil nil))
+                   (temporary-file-name (make-temp-name
+                                         (expand-file-name "zjPQ2Pk"
+                                                           (temp-directory)))))
+               (byte-compile-insert-header
+                temporary-file-name
+                (current-buffer)
+                byte-compile-result)
+               (Assert (string-match "^;;;###coding system: escape-quoted"
+                                     (buffer-substring nil nil
+                                                       byte-compile-result))))))
+         (Assert-elc-has-no-specified-encoding ()
+           "Assert the current buffer has no coding cookie if compiled."
+           (save-excursion
+             (let ((byte-compile-result (byte-compile-from-buffer
+                                         (current-buffer) nil nil))
+                   (temporary-file-name (make-temp-name
+                                         (expand-file-name "zjPQ2Pk"
+                                                           (temp-directory)))))
+               (byte-compile-insert-header
+                temporary-file-name
+                (current-buffer)
+                byte-compile-result)
+               (Assert (not (string-match
+                             ";;;###coding system:"
+                             (buffer-substring nil nil byte-compile-result))))))))
+      (insert 
+       ;; Create a buffer creating the Unicode escapes. 
+       #r" (defvar testing-mule-compilation-handling 
+            (string ?\u371E   ;; kDefinition beautiful; pretty, used 
+                              ;; in girl's name
+                ?\U0002A6A9   ;; kDefinition	(Cant.) sound of shouting
+                ?\U0002A65B   ;; kDefinition	(Cant.) decayed teeth; 
+                              ;; tongue-tied
+                ?\U00010400   ;; DESERET CAPITAL LETTER LONG I
+                    ?\u3263)) ;; CIRCLED HANGUL RIEUL ")
+
+      (Assert-elc-is-escape-quoted)
+      (delete-region (point-min) (point-max))
+
+      (insert
+       ;; This time, the buffer will contain the actual characters, because of
+       ;; u flag to the #r. 
+       #ru" (defvar testing-mule-compilation-handling 
+            (string ?\u371E   ;; kDefinition beautiful; pretty, used 
+                              ;; in girl's name
+                ?\U0002A6A9   ;; kDefinition	(Cant.) sound of shouting
+                ?\U0002A65B   ;; kDefinition	(Cant.) decayed teeth; 
+                              ;; tongue-tied
+                ?\U00010400   ;; DESERET CAPITAL LETTER LONG I
+                    ?\u3263)) ;; CIRCLED HANGUL RIEUL ")
+    
+      (Assert-elc-is-escape-quoted)
+      (delete-region (point-min) (point-max))
+
+      (insert
+       ;; Just a single four character escape. 
+       #r" (defvar testing-mule-compilation-handling 
+            (string ?\u371E))   ;; kDefinition beautiful; pretty, used")
+
+      (Assert-elc-is-escape-quoted)
+      (delete-region (point-min) (point-max))
+
+      (insert
+       ;; Just a single eight character escape. 
+       #r" (defvar testing-mule-compilation-handling 
+            (string ?\U0002A65B))   ;; kDefinition (Cant.) decayed teeth;")
+
+      (Assert-elc-is-escape-quoted)
+      (delete-region (point-min) (point-max))
+
+      (insert
+       ;; A single latin-1 hex digit escape
+       #r" (defvar testing-mule-compilation-handling 
+            (string ?\xab))   ;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK")
+      
+      (Assert-elc-has-no-specified-encoding)
+      (delete-region (point-min) (point-max))
+
+      (insert
+       ;; A single latin-1 character
+       #ru" (defvar testing-mule-compilation-handling 
+            (string ?\u00AB))   ;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK")
+      
+      (Assert-elc-has-no-specified-encoding)
+      (delete-region (point-min) (point-max))
+
+      (insert
+       ;; Just ASCII. 
+       #r" (defvar testing-mule-compilation-handling 
+            (string ?A))   ;; LATIN CAPITAL LETTER A")
+      
+      (Assert-elc-has-no-specified-encoding)
+      (delete-region (point-min) (point-max))))
   )