changeset 4303:cee827542370

[xemacs-hg @ 2007-12-04 20:18:33 by aidan] Implement coding system autoloads; use them for the variable-length ISO 2022 Latin coding systems.
author aidan
date Tue, 04 Dec 2007 20:18:43 +0000
parents 2f5ccbd44293
children 4ac3a83867c6
files lisp/ChangeLog lisp/autoload.el lisp/mule/iso-with-esc.el src/ChangeLog src/file-coding.c
diffstat 5 files changed, 319 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Mon Dec 03 22:51:15 2007 +0000
+++ b/lisp/ChangeLog	Tue Dec 04 20:18:43 2007 +0000
@@ -1,3 +1,16 @@
+2007-12-04  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* mule/iso-with-esc.el:
+	* mule/iso-with-esc.el ('iso-latin-1-with-esc): New.
+	Provide the variable-length rarely-used ISO 2022 compatible coding
+	systems for Latin (that is, iso-8859-[1-16]) again, to address
+	Stephen's veto. 
+
+2007-12-04  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* autoload.el (make-autoload):
+	Support auto-autoloads for coding systems. 
+
 2007-12-01  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* mule/mule-coding.el (iso-8859-1):
--- a/lisp/autoload.el	Mon Dec 03 22:51:15 2007 +0000
+++ b/lisp/autoload.el	Tue Dec 04 20:18:43 2007 +0000
@@ -280,7 +280,9 @@
 				  ',varname 'custom-variable)
 	     (custom-add-load ',varname
 			      ,(plist-get rest :require))))))
-
+     ;; Coding systems. #### Would be nice to handle the docstring here too.
+     ((memq car '(make-coding-system make-8-bit-coding-system))
+      `(autoload-coding-system ,(nth 1 form) '(load ,file)))
      ;; nil here indicates that this is not a special autoload form.
      (t nil))))
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lisp/mule/iso-with-esc.el	Tue Dec 04 20:18:43 2007 +0000
@@ -0,0 +1,100 @@
+;;; iso-with-esc.el --
+;;; Provision of the hateful and never widely implemented Latin, Greek and
+;;; Cyrillic variable-length ISO 2022 coding systems that passed for Latin
+;;; 2, Latin 10, (etc) support in XEmacs for so long.
+;;                      
+;; Copyright (C) 2006 Free Software Foundation
+
+;; Author: Aidan Kehoe
+
+;; This file is part of XEmacs.
+
+;; XEmacs is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; XEmacs is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with XEmacs; see the file COPYING.  If not, write to the
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;;; Commentary:
+
+;;; Code:
+
+;;;###autoload
+(define-coding-system-alias 'iso-latin-1-with-esc 'iso-2022-8)
+
+;;;###autoload
+(make-coding-system
+ 'iso-latin-2-with-esc 'iso2022 "ISO-8859-2 (Latin-2)"
+ '(charset-g0 ascii
+   charset-g1 latin-iso8859-2
+   charset-g2 t
+   charset-g3 t
+   mnemonic "MIME/Ltn-2"))
+
+;;;###autoload
+(make-coding-system
+ 'iso-latin-3-with-esc 'iso2022 "ISO-8859-3 (Latin-3)"
+ '(charset-g0 ascii
+   charset-g1 latin-iso8859-3
+   charset-g2 t
+   charset-g3 t
+   mnemonic "MIME/Ltn-3"))
+
+;;;###autoload
+(make-coding-system
+ 'iso-latin-4-with-esc 'iso2022 "ISO-8859-4 (Latin-4)"
+ '(charset-g0 ascii
+   charset-g1 latin-iso8859-4
+   charset-g2 t
+   charset-g3 t
+   mnemonic "MIME/Ltn-4"))
+
+;;;###autoload
+(make-coding-system
+ 'iso-latin-9-with-esc 'iso2022
+  "ISO 4873 conforming 8-bit code (ASCII + Latin 9; aka Latin-1 with Euro)"
+  '(mnemonic "MIME/Ltn-9"		; bletch
+    eol-type nil
+    charset-g0 ascii
+    charset-g1 latin-iso8859-15
+    charset-g2 t
+    charset-g3 t))
+
+;;;###autoload
+(make-coding-system
+ 'iso-latin-5-with-esc 'iso2022 "ISO-8859-9 (Latin-5)"
+ '(charset-g0 ascii
+   charset-g1 latin-iso8859-9
+   charset-g2 t
+   charset-g3 t
+   mnemonic "MIME/Ltn-5"))
+
+;;;###autoload
+(make-coding-system
+ 'cyrillic-iso-8bit-with-esc 'iso2022
+ "ISO-8859-5 (Cyrillic)"
+ '(charset-g0 ascii
+   charset-g1 cyrillic-iso8859-5
+   charset-g2 t
+   charset-g3 t
+   mnemonic "ISO8/Cyr"))
+
+;;;###autoload
+(make-coding-system
+ 'hebrew-iso-8bit-with-esc 'iso2022
+ "ISO-8859-8 (Hebrew)"
+ '(charset-g0 ascii
+   charset-g1 hebrew-iso8859-8
+   charset-g2 t
+  charset-g3 t
+   no-iso6429 t
+   mnemonic "MIME/Hbrw"))
--- a/src/ChangeLog	Mon Dec 03 22:51:15 2007 +0000
+++ b/src/ChangeLog	Tue Dec 04 20:18:43 2007 +0000
@@ -1,3 +1,32 @@
+2007-12-04  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* file-coding.c:
+	* file-coding.c (find_coding_system):
+	C-accessible version of #'find-coding-system that doesn't
+	necessarily call the autoload code, for use in
+	#'autoload-coding-system (which we allow to overwrite autoloaded
+	coding systems) and make_coding_system_1 (which has to).
+	* file-coding.c (Ffind_coding_system):
+	Move the implementation to find_coding_system; call that function
+	with a do_autoloads argument of 1. 
+	* file-coding.c (Fautoload_coding_system):
+	New.
+	* file-coding.c (add_coding_system_to_list_mapper):
+	When returning a list of coding systems, don't call the autoload
+	code.
+	* file-coding.c (make_coding_system_1):
+	* file-coding.c (Fcopy_coding_system):
+	* file-coding.c (syms_of_file_coding):
+
+	Implement autoloaded coding systems. The form to be evaluated to
+	load a given coding system is stored as the value in
+	Vcoding_system_hash_table; this form is evaluated if
+	find-coding-system is called with the symbol name of the coding
+	system as its argument.
+
+	This is also tied in with the POSIX locale infrastructure by means
+	of posix-charset-to-coding-system-hash. 
+
 2007-11-29  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* mule-ccl.c (ccl_driver):
--- a/src/file-coding.c	Mon Dec 03 22:51:15 2007 +0000
+++ b/src/file-coding.c	Tue Dec 04 20:18:43 2007 +0000
@@ -229,6 +229,8 @@
 
 Lisp_Object QScoding_system_cookie;
 
+Lisp_Object Qposix_charset_to_coding_system_hash;
+
 /* This is used to convert autodetected coding systems into existing
    systems.  For example, the chain undecided->convert-eol-autodetect may
    have its separate parts detected as mswindows-multibyte and
@@ -469,6 +471,89 @@
   return CODING_SYSTEMP (object) ? Qt : Qnil;
 }
 
+static Lisp_Object
+find_coding_system (Lisp_Object coding_system_or_name,
+                    int do_autoloads)
+{
+  Lisp_Object lookup;
+
+  if (NILP (coding_system_or_name))
+    coding_system_or_name = Qbinary;
+  else if (CODING_SYSTEMP (coding_system_or_name))
+    return coding_system_or_name;
+  else
+    CHECK_SYMBOL (coding_system_or_name);
+
+  while (1)
+    {
+      lookup =
+	Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
+
+      if (CONSP (lookup) && do_autoloads)
+        {
+          struct gcpro gcpro1;
+          int length;
+          DECLARE_EISTRING (desired_base);
+          DECLARE_EISTRING (warning_info);
+
+          eicpy_lstr (desired_base, XSYMBOL_NAME (coding_system_or_name));
+
+          /* Work out the name of the base coding system. */
+          length = eilen (desired_base);
+          if (length > (int)(sizeof ("-unix") - 1))
+            {
+              if (0 == qxestrcmp ((UAscbyte *)"-unix", (eidata (desired_base))
+                                  + (length - (sizeof ("-unix") - 1))))
+                {
+                  eidel (desired_base, length - (sizeof ("-unix") - 1),
+                         -1, 5, 5);
+                }
+            }
+          else if (length > (int)(sizeof ("-dos") - 1))
+            {
+              if ((0 == qxestrcmp ((UAscbyte *)"-dos", (eidata (desired_base))
+                                   + (length - (sizeof ("-dos") - 1)))) ||
+                  (0 == qxestrcmp ((UAscbyte *)"-mac", (eidata (desired_base))
+                                   + (length - (sizeof ("-mac") - 1)))))
+                {
+                  eidel (desired_base, length - (sizeof ("-dos") - 1), -1,
+                         4, 4);
+                }
+            }
+
+          coding_system_or_name = intern_int (eidata (desired_base));
+
+          /* Remove this coding system and its subsidiary coding
+             systems from the hash, to avoid calling this code recursively. */
+          Fremhash (coding_system_or_name, Vcoding_system_hash_table);
+          Fremhash (add_suffix_to_symbol(coding_system_or_name, "-unix"),
+                    Vcoding_system_hash_table);
+          Fremhash (add_suffix_to_symbol(coding_system_or_name, "-dos"),
+                    Vcoding_system_hash_table);
+          Fremhash (add_suffix_to_symbol(coding_system_or_name, "-mac"),
+                    Vcoding_system_hash_table);
+
+          eicpy_ascii (warning_info, "Error autoloading coding system ");
+          eicat_lstr (warning_info, XSYMBOL_NAME (coding_system_or_name));
+
+          /* Keep around the form so it doesn't disappear from under
+             #'eval's feet. */
+          GCPRO1 (lookup);
+          call1_trapping_problems ((const CIbyte *)eidata (warning_info),
+                                   Qeval, lookup, 0);
+          UNGCPRO;
+
+          lookup =
+            Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
+        }
+
+      if (CODING_SYSTEMP (lookup) || NILP (lookup))
+        return lookup;
+
+      coding_system_or_name = lookup;
+    }
+}
+
 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /*
 Retrieve the coding system of the given name.
 
@@ -479,22 +564,77 @@
 */
        (coding_system_or_name))
 {
-  if (NILP (coding_system_or_name))
-    coding_system_or_name = Qbinary;
-  else if (CODING_SYSTEMP (coding_system_or_name))
-    return coding_system_or_name;
-  else
-    CHECK_SYMBOL (coding_system_or_name);
-
-  while (1)
+  return find_coding_system(coding_system_or_name, 1);
+}
+
+DEFUN ("autoload-coding-system", Fautoload_coding_system, 2, 2, 0, /*
+Define SYMBOL as a coding-system that is loaded on demand.
+
+FORM is a form to evaluate to define the coding-system. 
+*/
+       (symbol, form))
+{
+  Lisp_Object lookup;
+
+  CHECK_SYMBOL (symbol);
+  CHECK_CONS (form);
+
+  lookup = find_coding_system (symbol, 0);
+
+  if (!NILP (lookup) &&
+      /* Allow autoloads to be redefined. */
+      !CONSP (lookup))
     {
-      coding_system_or_name =
-	Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
-
-      if (CODING_SYSTEMP (coding_system_or_name)
-	  || NILP (coding_system_or_name))
-	return coding_system_or_name;
+      invalid_operation ("Cannot redefine existing coding system",
+                         symbol);
     }
+
+  Fputhash (symbol, form, Vcoding_system_hash_table);
+  Fputhash (add_suffix_to_symbol(symbol, "-unix"), form,
+            Vcoding_system_hash_table);
+  Fputhash (add_suffix_to_symbol(symbol, "-dos"), form,
+            Vcoding_system_hash_table);
+  Fputhash (add_suffix_to_symbol(symbol, "-mac"), form,
+            Vcoding_system_hash_table);
+
+  /* Tell the POSIX locale infrastructure about this coding system (though
+     unfortunately it'll be too late for the startup locale sniffing. */
+  if (!UNBOUNDP (Qposix_charset_to_coding_system_hash))
+    {
+      Lisp_Object val = Fsymbol_value (Qposix_charset_to_coding_system_hash);
+      DECLARE_EISTRING (minimal_name);
+      Ibyte *full_name;
+      int len = XSTRING_LENGTH (XSYMBOL_NAME (symbol)), i;
+
+      if (!NILP (val))
+        {
+          full_name = XSTRING_DATA (XSYMBOL_NAME (symbol));
+          for (i = 0; i < len; ++i)
+            {
+              if (full_name[i] >= '0' && full_name[i] <= '9')
+                {
+                  eicat_ch (minimal_name, full_name[i]);
+                }
+              else if (full_name[i] >= 'a' && full_name[i] <= 'z')
+                {
+                  eicat_ch (minimal_name, full_name[i]);
+                }
+              else if (full_name[i] >= 'A'  && full_name[i] <= 'Z')
+                {
+                  eicat_ch (minimal_name, full_name[i] + 
+                            ('a' - 'A'));
+                }
+            }
+
+          if (eilen (minimal_name))
+            {
+              CHECK_HASH_TABLE (val);
+              Fputhash (eimake_string(minimal_name), symbol, val);
+            }
+        }
+    }
+
+  return Qt;
 }
 
 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
@@ -651,7 +791,7 @@
 };
 
 static int
-add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object UNUSED (value),
+add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value,
 				  void *coding_system_list_closure)
 {
   /* This function can GC */
@@ -660,9 +800,13 @@
   Lisp_Object *coding_system_list = cscl->coding_system_list;
 
   /* We can't just use VALUE because KEY might be an alias, and we need
-     the real coding system object. */
-  if (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ?
-      cscl->internal : cscl->normal)
+     the real coding system object.
+
+     Autoloaded coding systems have conses for their values, and can't be
+     internal coding systems, or coding system aliases.  */
+  if (CONSP (value) ||
+      (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ?
+       cscl->internal : cscl->normal))
     *coding_system_list = Fcons (key, *coding_system_list);
   return 0;
 }
@@ -921,9 +1065,13 @@
   else
     CHECK_SYMBOL (name_or_existing);
 
-  if (!NILP (Ffind_coding_system (name_or_existing)))
+  /* See is there an entry for name_or_existing in the defined coding system
+     hash table. */
+  csobj = find_coding_system (name_or_existing, 0);
+  /* Error if it's there and not an autoload form. */
+  if (!NILP (csobj) && !CONSP (csobj))
     invalid_operation ("Cannot redefine existing coding system",
-		       name_or_existing);
+                       name_or_existing);
 
   cs = allocate_coding_system (meths, meths->extra_data_size,
 			       name_or_existing);
@@ -999,6 +1147,8 @@
     XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system
 							 below */
 
+    Fputhash (name_or_existing, csobj, Vcoding_system_hash_table);
+
     if (need_to_setup_eol_systems && !cs->internal_p)
       setup_eol_coding_systems (csobj);
     else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF)
@@ -1037,8 +1187,6 @@
       }
     XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper;
   }
-  
-  Fputhash (name_or_existing, csobj, Vcoding_system_hash_table);
 
   return csobj;
 }
@@ -1396,7 +1544,7 @@
   Lisp_Object new_coding_system;
   old_coding_system = Fget_coding_system (old_coding_system);
   new_coding_system =
-    UNBOUNDP (new_name) ? Qnil : Ffind_coding_system (new_name);
+    UNBOUNDP (new_name) ? Qnil : find_coding_system (new_name, 0);
   if (NILP (new_coding_system))
     {
       new_coding_system =
@@ -4386,6 +4534,7 @@
   DEFSUBR (Fvalid_coding_system_type_p);
   DEFSUBR (Fcoding_system_type_list);
   DEFSUBR (Fcoding_system_p);
+  DEFSUBR (Fautoload_coding_system);
   DEFSUBR (Ffind_coding_system);
   DEFSUBR (Fget_coding_system);
   DEFSUBR (Fcoding_system_list);
@@ -4457,6 +4606,8 @@
 
   DEFSYMBOL (Qcanonicalize_after_coding);
 
+  DEFSYMBOL (Qposix_charset_to_coding_system_hash);
+
   DEFSYMBOL (Qescape_quoted);
 
 #ifdef HAVE_ZLIB