Mercurial > hg > xemacs-beta
changeset 4303:cee827542370
[xemacs-hg @ 2007-12-04 20:18:33 by aidan]
Implement coding system autoloads; use them for the variable-length ISO 2022
Latin coding systems.
author | aidan |
---|---|
date | Tue, 04 Dec 2007 20:18:43 +0000 |
parents | 2f5ccbd44293 |
children | 4ac3a83867c6 |
files | lisp/ChangeLog lisp/autoload.el lisp/mule/iso-with-esc.el src/ChangeLog src/file-coding.c |
diffstat | 5 files changed, 319 insertions(+), 24 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/ChangeLog Mon Dec 03 22:51:15 2007 +0000 +++ b/lisp/ChangeLog Tue Dec 04 20:18:43 2007 +0000 @@ -1,3 +1,16 @@ +2007-12-04 Aidan Kehoe <kehoea@parhasard.net> + + * mule/iso-with-esc.el: + * mule/iso-with-esc.el ('iso-latin-1-with-esc): New. + Provide the variable-length rarely-used ISO 2022 compatible coding + systems for Latin (that is, iso-8859-[1-16]) again, to address + Stephen's veto. + +2007-12-04 Aidan Kehoe <kehoea@parhasard.net> + + * autoload.el (make-autoload): + Support auto-autoloads for coding systems. + 2007-12-01 Aidan Kehoe <kehoea@parhasard.net> * mule/mule-coding.el (iso-8859-1):
--- a/lisp/autoload.el Mon Dec 03 22:51:15 2007 +0000 +++ b/lisp/autoload.el Tue Dec 04 20:18:43 2007 +0000 @@ -280,7 +280,9 @@ ',varname 'custom-variable) (custom-add-load ',varname ,(plist-get rest :require)))))) - + ;; Coding systems. #### Would be nice to handle the docstring here too. + ((memq car '(make-coding-system make-8-bit-coding-system)) + `(autoload-coding-system ,(nth 1 form) '(load ,file))) ;; nil here indicates that this is not a special autoload form. (t nil))))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/mule/iso-with-esc.el Tue Dec 04 20:18:43 2007 +0000 @@ -0,0 +1,100 @@ +;;; iso-with-esc.el -- +;;; Provision of the hateful and never widely implemented Latin, Greek and +;;; Cyrillic variable-length ISO 2022 coding systems that passed for Latin +;;; 2, Latin 10, (etc) support in XEmacs for so long. +;; +;; Copyright (C) 2006 Free Software Foundation + +;; Author: Aidan Kehoe + +;; This file is part of XEmacs. + +;; XEmacs is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; XEmacs is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with XEmacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. + +;;; Commentary: + +;;; Code: + +;;;###autoload +(define-coding-system-alias 'iso-latin-1-with-esc 'iso-2022-8) + +;;;###autoload +(make-coding-system + 'iso-latin-2-with-esc 'iso2022 "ISO-8859-2 (Latin-2)" + '(charset-g0 ascii + charset-g1 latin-iso8859-2 + charset-g2 t + charset-g3 t + mnemonic "MIME/Ltn-2")) + +;;;###autoload +(make-coding-system + 'iso-latin-3-with-esc 'iso2022 "ISO-8859-3 (Latin-3)" + '(charset-g0 ascii + charset-g1 latin-iso8859-3 + charset-g2 t + charset-g3 t + mnemonic "MIME/Ltn-3")) + +;;;###autoload +(make-coding-system + 'iso-latin-4-with-esc 'iso2022 "ISO-8859-4 (Latin-4)" + '(charset-g0 ascii + charset-g1 latin-iso8859-4 + charset-g2 t + charset-g3 t + mnemonic "MIME/Ltn-4")) + +;;;###autoload +(make-coding-system + 'iso-latin-9-with-esc 'iso2022 + "ISO 4873 conforming 8-bit code (ASCII + Latin 9; aka Latin-1 with Euro)" + '(mnemonic "MIME/Ltn-9" ; bletch + eol-type nil + charset-g0 ascii + charset-g1 latin-iso8859-15 + charset-g2 t + charset-g3 t)) + +;;;###autoload +(make-coding-system + 'iso-latin-5-with-esc 'iso2022 "ISO-8859-9 (Latin-5)" + '(charset-g0 ascii + charset-g1 latin-iso8859-9 + charset-g2 t + charset-g3 t + mnemonic "MIME/Ltn-5")) + +;;;###autoload +(make-coding-system + 'cyrillic-iso-8bit-with-esc 'iso2022 + "ISO-8859-5 (Cyrillic)" + '(charset-g0 ascii + charset-g1 cyrillic-iso8859-5 + charset-g2 t + charset-g3 t + mnemonic "ISO8/Cyr")) + +;;;###autoload +(make-coding-system + 'hebrew-iso-8bit-with-esc 'iso2022 + "ISO-8859-8 (Hebrew)" + '(charset-g0 ascii + charset-g1 hebrew-iso8859-8 + charset-g2 t + charset-g3 t + no-iso6429 t + mnemonic "MIME/Hbrw"))
--- a/src/ChangeLog Mon Dec 03 22:51:15 2007 +0000 +++ b/src/ChangeLog Tue Dec 04 20:18:43 2007 +0000 @@ -1,3 +1,32 @@ +2007-12-04 Aidan Kehoe <kehoea@parhasard.net> + + * file-coding.c: + * file-coding.c (find_coding_system): + C-accessible version of #'find-coding-system that doesn't + necessarily call the autoload code, for use in + #'autoload-coding-system (which we allow to overwrite autoloaded + coding systems) and make_coding_system_1 (which has to). + * file-coding.c (Ffind_coding_system): + Move the implementation to find_coding_system; call that function + with a do_autoloads argument of 1. + * file-coding.c (Fautoload_coding_system): + New. + * file-coding.c (add_coding_system_to_list_mapper): + When returning a list of coding systems, don't call the autoload + code. + * file-coding.c (make_coding_system_1): + * file-coding.c (Fcopy_coding_system): + * file-coding.c (syms_of_file_coding): + + Implement autoloaded coding systems. The form to be evaluated to + load a given coding system is stored as the value in + Vcoding_system_hash_table; this form is evaluated if + find-coding-system is called with the symbol name of the coding + system as its argument. + + This is also tied in with the POSIX locale infrastructure by means + of posix-charset-to-coding-system-hash. + 2007-11-29 Aidan Kehoe <kehoea@parhasard.net> * mule-ccl.c (ccl_driver):
--- a/src/file-coding.c Mon Dec 03 22:51:15 2007 +0000 +++ b/src/file-coding.c Tue Dec 04 20:18:43 2007 +0000 @@ -229,6 +229,8 @@ Lisp_Object QScoding_system_cookie; +Lisp_Object Qposix_charset_to_coding_system_hash; + /* This is used to convert autodetected coding systems into existing systems. For example, the chain undecided->convert-eol-autodetect may have its separate parts detected as mswindows-multibyte and @@ -469,6 +471,89 @@ return CODING_SYSTEMP (object) ? Qt : Qnil; } +static Lisp_Object +find_coding_system (Lisp_Object coding_system_or_name, + int do_autoloads) +{ + Lisp_Object lookup; + + if (NILP (coding_system_or_name)) + coding_system_or_name = Qbinary; + else if (CODING_SYSTEMP (coding_system_or_name)) + return coding_system_or_name; + else + CHECK_SYMBOL (coding_system_or_name); + + while (1) + { + lookup = + Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); + + if (CONSP (lookup) && do_autoloads) + { + struct gcpro gcpro1; + int length; + DECLARE_EISTRING (desired_base); + DECLARE_EISTRING (warning_info); + + eicpy_lstr (desired_base, XSYMBOL_NAME (coding_system_or_name)); + + /* Work out the name of the base coding system. */ + length = eilen (desired_base); + if (length > (int)(sizeof ("-unix") - 1)) + { + if (0 == qxestrcmp ((UAscbyte *)"-unix", (eidata (desired_base)) + + (length - (sizeof ("-unix") - 1)))) + { + eidel (desired_base, length - (sizeof ("-unix") - 1), + -1, 5, 5); + } + } + else if (length > (int)(sizeof ("-dos") - 1)) + { + if ((0 == qxestrcmp ((UAscbyte *)"-dos", (eidata (desired_base)) + + (length - (sizeof ("-dos") - 1)))) || + (0 == qxestrcmp ((UAscbyte *)"-mac", (eidata (desired_base)) + + (length - (sizeof ("-mac") - 1))))) + { + eidel (desired_base, length - (sizeof ("-dos") - 1), -1, + 4, 4); + } + } + + coding_system_or_name = intern_int (eidata (desired_base)); + + /* Remove this coding system and its subsidiary coding + systems from the hash, to avoid calling this code recursively. */ + Fremhash (coding_system_or_name, Vcoding_system_hash_table); + Fremhash (add_suffix_to_symbol(coding_system_or_name, "-unix"), + Vcoding_system_hash_table); + Fremhash (add_suffix_to_symbol(coding_system_or_name, "-dos"), + Vcoding_system_hash_table); + Fremhash (add_suffix_to_symbol(coding_system_or_name, "-mac"), + Vcoding_system_hash_table); + + eicpy_ascii (warning_info, "Error autoloading coding system "); + eicat_lstr (warning_info, XSYMBOL_NAME (coding_system_or_name)); + + /* Keep around the form so it doesn't disappear from under + #'eval's feet. */ + GCPRO1 (lookup); + call1_trapping_problems ((const CIbyte *)eidata (warning_info), + Qeval, lookup, 0); + UNGCPRO; + + lookup = + Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); + } + + if (CODING_SYSTEMP (lookup) || NILP (lookup)) + return lookup; + + coding_system_or_name = lookup; + } +} + DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /* Retrieve the coding system of the given name. @@ -479,22 +564,77 @@ */ (coding_system_or_name)) { - if (NILP (coding_system_or_name)) - coding_system_or_name = Qbinary; - else if (CODING_SYSTEMP (coding_system_or_name)) - return coding_system_or_name; - else - CHECK_SYMBOL (coding_system_or_name); - - while (1) + return find_coding_system(coding_system_or_name, 1); +} + +DEFUN ("autoload-coding-system", Fautoload_coding_system, 2, 2, 0, /* +Define SYMBOL as a coding-system that is loaded on demand. + +FORM is a form to evaluate to define the coding-system. +*/ + (symbol, form)) +{ + Lisp_Object lookup; + + CHECK_SYMBOL (symbol); + CHECK_CONS (form); + + lookup = find_coding_system (symbol, 0); + + if (!NILP (lookup) && + /* Allow autoloads to be redefined. */ + !CONSP (lookup)) { - coding_system_or_name = - Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); - - if (CODING_SYSTEMP (coding_system_or_name) - || NILP (coding_system_or_name)) - return coding_system_or_name; + invalid_operation ("Cannot redefine existing coding system", + symbol); } + + Fputhash (symbol, form, Vcoding_system_hash_table); + Fputhash (add_suffix_to_symbol(symbol, "-unix"), form, + Vcoding_system_hash_table); + Fputhash (add_suffix_to_symbol(symbol, "-dos"), form, + Vcoding_system_hash_table); + Fputhash (add_suffix_to_symbol(symbol, "-mac"), form, + Vcoding_system_hash_table); + + /* Tell the POSIX locale infrastructure about this coding system (though + unfortunately it'll be too late for the startup locale sniffing. */ + if (!UNBOUNDP (Qposix_charset_to_coding_system_hash)) + { + Lisp_Object val = Fsymbol_value (Qposix_charset_to_coding_system_hash); + DECLARE_EISTRING (minimal_name); + Ibyte *full_name; + int len = XSTRING_LENGTH (XSYMBOL_NAME (symbol)), i; + + if (!NILP (val)) + { + full_name = XSTRING_DATA (XSYMBOL_NAME (symbol)); + for (i = 0; i < len; ++i) + { + if (full_name[i] >= '0' && full_name[i] <= '9') + { + eicat_ch (minimal_name, full_name[i]); + } + else if (full_name[i] >= 'a' && full_name[i] <= 'z') + { + eicat_ch (minimal_name, full_name[i]); + } + else if (full_name[i] >= 'A' && full_name[i] <= 'Z') + { + eicat_ch (minimal_name, full_name[i] + + ('a' - 'A')); + } + } + + if (eilen (minimal_name)) + { + CHECK_HASH_TABLE (val); + Fputhash (eimake_string(minimal_name), symbol, val); + } + } + } + + return Qt; } DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* @@ -651,7 +791,7 @@ }; static int -add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object UNUSED (value), +add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value, void *coding_system_list_closure) { /* This function can GC */ @@ -660,9 +800,13 @@ Lisp_Object *coding_system_list = cscl->coding_system_list; /* We can't just use VALUE because KEY might be an alias, and we need - the real coding system object. */ - if (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ? - cscl->internal : cscl->normal) + the real coding system object. + + Autoloaded coding systems have conses for their values, and can't be + internal coding systems, or coding system aliases. */ + if (CONSP (value) || + (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ? + cscl->internal : cscl->normal)) *coding_system_list = Fcons (key, *coding_system_list); return 0; } @@ -921,9 +1065,13 @@ else CHECK_SYMBOL (name_or_existing); - if (!NILP (Ffind_coding_system (name_or_existing))) + /* See is there an entry for name_or_existing in the defined coding system + hash table. */ + csobj = find_coding_system (name_or_existing, 0); + /* Error if it's there and not an autoload form. */ + if (!NILP (csobj) && !CONSP (csobj)) invalid_operation ("Cannot redefine existing coding system", - name_or_existing); + name_or_existing); cs = allocate_coding_system (meths, meths->extra_data_size, name_or_existing); @@ -999,6 +1147,8 @@ XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system below */ + Fputhash (name_or_existing, csobj, Vcoding_system_hash_table); + if (need_to_setup_eol_systems && !cs->internal_p) setup_eol_coding_systems (csobj); else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF) @@ -1037,8 +1187,6 @@ } XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper; } - - Fputhash (name_or_existing, csobj, Vcoding_system_hash_table); return csobj; } @@ -1396,7 +1544,7 @@ Lisp_Object new_coding_system; old_coding_system = Fget_coding_system (old_coding_system); new_coding_system = - UNBOUNDP (new_name) ? Qnil : Ffind_coding_system (new_name); + UNBOUNDP (new_name) ? Qnil : find_coding_system (new_name, 0); if (NILP (new_coding_system)) { new_coding_system = @@ -4386,6 +4534,7 @@ DEFSUBR (Fvalid_coding_system_type_p); DEFSUBR (Fcoding_system_type_list); DEFSUBR (Fcoding_system_p); + DEFSUBR (Fautoload_coding_system); DEFSUBR (Ffind_coding_system); DEFSUBR (Fget_coding_system); DEFSUBR (Fcoding_system_list); @@ -4457,6 +4606,8 @@ DEFSYMBOL (Qcanonicalize_after_coding); + DEFSYMBOL (Qposix_charset_to_coding_system_hash); + DEFSYMBOL (Qescape_quoted); #ifdef HAVE_ZLIB