changeset 5682:dae33b5feffe

Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie() src/ChangeLog addition: 2012-09-07 Aidan Kehoe <kehoea@parhasard.net> * file-coding.c: * file-coding.c (snarf_coding_system): Take a new parameter, FIND_CODING_SYSTEM_P, which indicates that find_coding_system() should be called. * file-coding.c (look_for_coding_system_magic_cookie): * file-coding.c (determine_real_coding_system): * file-coding.c (undecided_convert): Use this parameter. * file-coding.c (Ffind_coding_system_magic_cookie_in_file): New, moved from files.el, so we can use look_for_coding_system_magic_cookie's implementation. * file-coding.c (syms_of_file_coding): Make Ffind_coding_system_magic_cookie_in_file available. lisp/ChangeLog addition: 2012-09-07 Aidan Kehoe <kehoea@parhasard.net> * files.el: * files.el (find-coding-system-magic-cookie-in-file): Removed. Move this to C, so we can use look_for_coding_system_magic_cookie().
author Aidan Kehoe <kehoea@parhasard.net>
date Fri, 07 Sep 2012 22:06:01 +0100
parents 4af5a3435c94
children 98f762d06c5f
files lisp/ChangeLog lisp/files.el src/ChangeLog src/file-coding.c
diffstat 4 files changed, 89 insertions(+), 56 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Wed Sep 05 20:37:58 2012 +0100
+++ b/lisp/ChangeLog	Fri Sep 07 22:06:01 2012 +0100
@@ -1,3 +1,10 @@
+2012-09-07  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* files.el:
+	* files.el (find-coding-system-magic-cookie-in-file):
+	Removed. Move this to C, so we can use
+	look_for_coding_system_magic_cookie().
+
 2012-08-02  Stephen J. Turnbull  <stephen@xemacs.org>
 
 	* XEmacs 21.5.32 "habanero" is released.
--- a/lisp/files.el	Wed Sep 05 20:37:58 2012 +0100
+++ b/lisp/files.el	Fri Sep 07 22:06:01 2012 +0100
@@ -2126,52 +2126,6 @@
 	;; Ordinary variable, really set it.
 	(t (make-local-variable var)
 	   (set var val))))
-
-(defun find-coding-system-magic-cookie-in-file (file)
-  "Look for the coding-system magic cookie in FILE.
-The coding-system magic cookie is either the local variable specification
--*- ... coding: ... -*- on the first line, or the exact string
-\";;;###coding system: \" somewhere within the first 3000 characters
-of the file.  If found, the coding system name (as a string) is returned;
-otherwise nil is returned.  Note that it is extremely unlikely that
-either such string would occur coincidentally as the result of encoding
-some characters in a non-ASCII charset, and that the spaces make it
-even less likely since the space character is not a valid octet in any
-ISO 2022 encoding of most non-ASCII charsets."
-  (save-excursion
-    (with-temp-buffer
-      (let ((coding-system-for-read 'raw-text))
-	(insert-file-contents file nil 0 3000))
-      (goto-char (point-min))
-      (or (and (looking-at
-		"^[^\n]*-\\*-[^\n]*coding: \\([^ \t\n;]+\\)[^\n]*-\\*-")
-	       (buffer-substring (match-beginning 1) (match-end 1)))
-	  ;; (save-excursion
-	  ;;   (let (start end)
-	  ;;     (and (re-search-forward "^;+[ \t]*Local Variables:" nil t)
-	  ;;          (setq start (match-end 0))
-	  ;;          (re-search-forward "\n;+[ \t]*End:")
-	  ;;          (setq end (match-beginning 0))
-	  ;;          (save-restriction
-	  ;;            (narrow-to-region start end)
-	  ;;            (goto-char start)
-	  ;;            (re-search-forward "^;;; coding: \\([^\n]+\\)$" nil t)
-	  ;;            )
-	  ;;          (let ((codesys
-	  ;;                 (intern (buffer-substring
-	  ;;                          (match-beginning 1)(match-end 1)))))
-	  ;;            (if (find-coding-system codesys) codesys))
-	  ;;          )))
-	  (let ((case-fold-search nil))
-	    (if (search-forward
-		 ";;;###coding system: " (+ (point-min) 3000) t)
-		(let ((start (point))
-		      (end (progn
-			     (skip-chars-forward "^ \t\n\r")
-			     (point))))
-		  (if (> end start) (buffer-substring start end))
-		  )))
-	  ))))
 
 
 (defcustom change-major-mode-with-file-name t
--- a/src/ChangeLog	Wed Sep 05 20:37:58 2012 +0100
+++ b/src/ChangeLog	Fri Sep 07 22:06:01 2012 +0100
@@ -1,3 +1,19 @@
+2012-09-07  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* file-coding.c:
+	* file-coding.c (snarf_coding_system):
+	Take a new parameter, FIND_CODING_SYSTEM_P, which indicates that
+	find_coding_system() should be called.
+	* file-coding.c (look_for_coding_system_magic_cookie):
+	* file-coding.c (determine_real_coding_system):
+	* file-coding.c (undecided_convert):
+	Use this parameter.
+	* file-coding.c (Ffind_coding_system_magic_cookie_in_file):	
+	New, moved from files.el, so we can use
+	look_for_coding_system_magic_cookie's implementation.
+	* file-coding.c (syms_of_file_coding):
+	Make Ffind_coding_system_magic_cookie_in_file available.
+
 2012-09-05  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* keymap.c:
--- a/src/file-coding.c	Wed Sep 05 20:37:58 2012 +0100
+++ b/src/file-coding.c	Fri Sep 07 22:06:01 2012 +0100
@@ -79,6 +79,7 @@
 #include "extents.h"
 #include "rangetab.h"
 #include "chartab.h"
+#include "sysfile.h"
 
 #ifdef HAVE_ZLIB
 #include "zlib.h"
@@ -3674,7 +3675,8 @@
    blanks).  If found, return it, otherwise nil. */
 
 static Lisp_Object
-snarf_coding_system (const UExtbyte *p, Bytecount len)
+snarf_coding_system (const UExtbyte *p, Bytecount len,
+                     Boolint find_coding_system_p)
 {
   Bytecount n;
   UExtbyte *name;
@@ -3698,7 +3700,16 @@
       name[n] = '\0';
       /* This call to intern_istring() is OK because we already verified that
 	 there are only ASCII characters in the string */
-      return find_coding_system_for_text_file (intern_istring ((Ibyte *) name), 0);
+      if (find_coding_system_p)
+        {
+          return
+            find_coding_system_for_text_file (intern_istring ((Ibyte *) name),
+                                              0);
+        }
+      else
+        {
+          return build_ascstring ((const Ascbyte *) name);
+        }
     }
 
   return Qnil;
@@ -3725,11 +3736,9 @@
   return Qnil;
 }
 
-/* #### This duplicates code in `find-coding-system-magic-cookie-in-file'
-   in files.el.  Look into combining them. */
-
 static Lisp_Object
-look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len)
+look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len,
+                                     Boolint find_coding_system_p)
 {
   const UExtbyte *p;
   const UExtbyte *scan_end;
@@ -3767,7 +3776,8 @@
 			    *(p-1) == ';')))
 		  {
 		    p += LENGTH ("coding:");
-		    return snarf_coding_system (p, suffix - p);
+		    return snarf_coding_system (p, suffix - p,
+                                                find_coding_system_p);
 		    break;
 		  }
 	      break;
@@ -3792,7 +3802,7 @@
 	suffix = p;
 	while (suffix < scan_end && !isspace (*suffix))
 	  suffix++;
-	return snarf_coding_system (p, suffix - p);
+	return snarf_coding_system (p, suffix - p, find_coding_system_p);
       }
   }
 
@@ -3807,7 +3817,8 @@
 				     make_opaque_ptr (st));
   UExtbyte buf[4096];
   Bytecount nread = Lstream_read (stream, buf, sizeof (buf));
-  Lisp_Object coding_system = look_for_coding_system_magic_cookie (buf, nread);
+  Lisp_Object coding_system
+    = look_for_coding_system_magic_cookie (buf, nread, 1);
 
   if (NILP (coding_system))
     {
@@ -3971,7 +3982,7 @@
 		/* #### This is cheesy.  What we really ought to do is buffer
 		   up a certain minimum amount of data to get a better result.
 		   */
-		data->actual = look_for_coding_system_magic_cookie (src, n);
+		data->actual = look_for_coding_system_magic_cookie (src, n, 1);
 	      if (NILP (data->actual))
 		{
 		  /* #### This is cheesy.  What we really ought to do is buffer
@@ -4216,6 +4227,50 @@
   return val;
 }
 
+DEFUN ("find-coding-system-magic-cookie-in-file",
+       Ffind_coding_system_magic_cookie_in_file, 1, 1, 0, /*
+Look for the coding-system magic cookie in FILENAME.
+The coding-system magic cookie is either the local variable specification
+-*- ... coding: ... -*- on the first line, or the exact string
+\";;;###coding system: \" somewhere within the first 3000 characters
+of the file.  If found, the coding system name (as a string) is returned;
+otherwise nil is returned.  Note that it is extremely unlikely that
+either such string would occur coincidentally as the result of encoding
+some characters in a non-ASCII charset, and that the spaces make it
+even less likely since the space character is not a valid octet in any
+ISO 2022 encoding of most non-ASCII charsets.
+*/
+       (filename))
+{
+  Lisp_Object lstream;
+  UExtbyte buf[4096];
+  Bytecount nread;
+  int fd = -1;
+  struct stat st;
+
+  filename = Fexpand_file_name (filename, Qnil);
+
+  if (qxe_stat (XSTRING_DATA (filename), &st) < 0)
+    {
+    badopen:
+      report_file_error ("Opening input file", filename);
+    }
+
+  if (fd < 0)
+    {
+      if ((fd = qxe_interruptible_open (XSTRING_DATA (filename),
+					O_RDONLY | OPEN_BINARY, 0)) < 0)
+	goto badopen;
+    }
+
+  lstream = make_filedesc_input_stream (fd, 0, -1, 0);
+  Lstream_set_buffering (XLSTREAM (lstream), LSTREAM_UNBUFFERED, 0);
+  nread = Lstream_read (XLSTREAM (lstream), buf, sizeof (buf));
+  Lstream_delete (XLSTREAM (lstream));
+  retry_close (fd);
+
+  return look_for_coding_system_magic_cookie (buf, nread, 0);
+}
 
 
 #ifdef DEBUG_XEMACS
@@ -4524,6 +4579,7 @@
   DEFSUBR (Fdecode_coding_region);
   DEFSUBR (Fencode_coding_region);
   DEFSUBR (Fquery_coding_region);
+  DEFSUBR (Ffind_coding_system_magic_cookie_in_file);
   DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp);
   DEFSYMBOL (Qno_conversion);
   DEFSYMBOL (Qconvert_eol);