view lisp/unicode.el @ 5127:a9c41067dd88 ben-lisp-object

more cleanups, terminology clarification, lots of doc work -------------------- ChangeLog entries follow: -------------------- man/ChangeLog addition: 2010-03-05 Ben Wing <ben@xemacs.org> * internals/internals.texi (Introduction to Allocation): * internals/internals.texi (Integers and Characters): * internals/internals.texi (Allocation from Frob Blocks): * internals/internals.texi (lrecords): * internals/internals.texi (Low-level allocation): Rewrite section on allocation of Lisp objects to reflect the new reality. Remove references to nonexistent XSETINT and XSETCHAR. modules/ChangeLog addition: 2010-03-05 Ben Wing <ben@xemacs.org> * postgresql/postgresql.c (allocate_pgconn): * postgresql/postgresql.c (allocate_pgresult): * postgresql/postgresql.h (struct Lisp_PGconn): * postgresql/postgresql.h (struct Lisp_PGresult): * ldap/eldap.c (allocate_ldap): * ldap/eldap.h (struct Lisp_LDAP): Same changes as in src/ dir. See large log there in ChangeLog, but basically: ALLOC_LISP_OBJECT -> ALLOC_NORMAL_LISP_OBJECT LISP_OBJECT_HEADER -> NORMAL_LISP_OBJECT_HEADER ../hlo/src/ChangeLog addition: 2010-03-05 Ben Wing <ben@xemacs.org> * alloc.c: * alloc.c (old_alloc_sized_lcrecord): * alloc.c (very_old_free_lcrecord): * alloc.c (copy_lisp_object): * alloc.c (zero_sized_lisp_object): * alloc.c (zero_nonsized_lisp_object): * alloc.c (lisp_object_storage_size): * alloc.c (free_normal_lisp_object): * alloc.c (FREE_FIXED_TYPE_WHEN_NOT_IN_GC): * alloc.c (ALLOC_FROB_BLOCK_LISP_OBJECT): * alloc.c (Fcons): * alloc.c (noseeum_cons): * alloc.c (make_float): * alloc.c (make_bignum): * alloc.c (make_bignum_bg): * alloc.c (make_ratio): * alloc.c (make_ratio_bg): * alloc.c (make_ratio_rt): * alloc.c (make_bigfloat): * alloc.c (make_bigfloat_bf): * alloc.c (size_vector): * alloc.c (make_compiled_function): * alloc.c (Fmake_symbol): * alloc.c (allocate_extent): * alloc.c (allocate_event): * alloc.c (make_key_data): * alloc.c (make_button_data): * alloc.c (make_motion_data): * alloc.c (make_process_data): * alloc.c (make_timeout_data): * alloc.c (make_magic_data): * alloc.c (make_magic_eval_data): * alloc.c (make_eval_data): * alloc.c (make_misc_user_data): * alloc.c (Fmake_marker): * alloc.c (noseeum_make_marker): * alloc.c (size_string_direct_data): * alloc.c (make_uninit_string): * alloc.c (make_string_nocopy): * alloc.c (mark_lcrecord_list): * alloc.c (alloc_managed_lcrecord): * alloc.c (free_managed_lcrecord): * alloc.c (sweep_lcrecords_1): * alloc.c (malloced_storage_size): * buffer.c (allocate_buffer): * buffer.c (compute_buffer_usage): * buffer.c (DEFVAR_BUFFER_LOCAL_1): * buffer.c (nuke_all_buffer_slots): * buffer.c (common_init_complex_vars_of_buffer): * buffer.h (struct buffer_text): * buffer.h (struct buffer): * bytecode.c: * bytecode.c (make_compiled_function_args): * bytecode.c (size_compiled_function_args): * bytecode.h (struct compiled_function_args): * casetab.c (allocate_case_table): * casetab.h (struct Lisp_Case_Table): * charset.h (struct Lisp_Charset): * chartab.c (fill_char_table): * chartab.c (Fmake_char_table): * chartab.c (make_char_table_entry): * chartab.c (copy_char_table_entry): * chartab.c (Fcopy_char_table): * chartab.c (put_char_table): * chartab.h (struct Lisp_Char_Table_Entry): * chartab.h (struct Lisp_Char_Table): * console-gtk-impl.h (struct gtk_device): * console-gtk-impl.h (struct gtk_frame): * console-impl.h (struct console): * console-msw-impl.h (struct Lisp_Devmode): * console-msw-impl.h (struct mswindows_device): * console-msw-impl.h (struct msprinter_device): * console-msw-impl.h (struct mswindows_frame): * console-msw-impl.h (struct mswindows_dialog_id): * console-stream-impl.h (struct stream_console): * console-stream.c (stream_init_console): * console-tty-impl.h (struct tty_console): * console-tty-impl.h (struct tty_device): * console-tty.c (allocate_tty_console_struct): * console-x-impl.h (struct x_device): * console-x-impl.h (struct x_frame): * console.c (allocate_console): * console.c (nuke_all_console_slots): * console.c (DEFVAR_CONSOLE_LOCAL_1): * console.c (common_init_complex_vars_of_console): * data.c (make_weak_list): * data.c (make_weak_box): * data.c (make_ephemeron): * database.c: * database.c (struct Lisp_Database): * database.c (allocate_database): * database.c (finalize_database): * device-gtk.c (allocate_gtk_device_struct): * device-impl.h (struct device): * device-msw.c: * device-msw.c (mswindows_init_device): * device-msw.c (msprinter_init_device): * device-msw.c (finalize_devmode): * device-msw.c (allocate_devmode): * device-tty.c (allocate_tty_device_struct): * device-x.c (allocate_x_device_struct): * device.c: * device.c (nuke_all_device_slots): * device.c (allocate_device): * dialog-msw.c (handle_question_dialog_box): * elhash.c: * elhash.c (struct Lisp_Hash_Table): * elhash.c (finalize_hash_table): * elhash.c (make_general_lisp_hash_table): * elhash.c (Fcopy_hash_table): * elhash.h (htentry): * emacs.c (main_1): * eval.c: * eval.c (size_multiple_value): * event-stream.c (finalize_command_builder): * event-stream.c (allocate_command_builder): * event-stream.c (free_command_builder): * event-stream.c (event_stream_generate_wakeup): * event-stream.c (event_stream_resignal_wakeup): * event-stream.c (event_stream_disable_wakeup): * event-stream.c (event_stream_wakeup_pending_p): * events.h (struct Lisp_Timeout): * events.h (struct command_builder): * extents-impl.h: * extents-impl.h (struct extent_auxiliary): * extents-impl.h (struct extent_info): * extents-impl.h (set_extent_no_chase_aux_field): * extents-impl.h (set_extent_no_chase_normal_field): * extents.c: * extents.c (gap_array_marker): * extents.c (gap_array): * extents.c (extent_list_marker): * extents.c (extent_list): * extents.c (stack_of_extents): * extents.c (gap_array_make_marker): * extents.c (extent_list_make_marker): * extents.c (allocate_extent_list): * extents.c (SLOT): * extents.c (mark_extent_auxiliary): * extents.c (allocate_extent_auxiliary): * extents.c (attach_extent_auxiliary): * extents.c (size_gap_array): * extents.c (finalize_extent_info): * extents.c (allocate_extent_info): * extents.c (uninit_buffer_extents): * extents.c (allocate_soe): * extents.c (copy_extent): * extents.c (vars_of_extents): * extents.h: * faces.c (allocate_face): * faces.h (struct Lisp_Face): * faces.h (struct face_cachel): * file-coding.c: * file-coding.c (finalize_coding_system): * file-coding.c (sizeof_coding_system): * file-coding.c (Fcopy_coding_system): * file-coding.h (struct Lisp_Coding_System): * file-coding.h (MARKED_SLOT): * fns.c (size_bit_vector): * font-mgr.c: * font-mgr.c (finalize_fc_pattern): * font-mgr.c (print_fc_pattern): * font-mgr.c (Ffc_pattern_p): * font-mgr.c (Ffc_pattern_create): * font-mgr.c (Ffc_name_parse): * font-mgr.c (Ffc_name_unparse): * font-mgr.c (Ffc_pattern_duplicate): * font-mgr.c (Ffc_pattern_add): * font-mgr.c (Ffc_pattern_del): * font-mgr.c (Ffc_pattern_get): * font-mgr.c (fc_config_create_using): * font-mgr.c (fc_strlist_to_lisp_using): * font-mgr.c (fontset_to_list): * font-mgr.c (Ffc_config_p): * font-mgr.c (Ffc_config_up_to_date): * font-mgr.c (Ffc_config_build_fonts): * font-mgr.c (Ffc_config_get_cache): * font-mgr.c (Ffc_config_get_fonts): * font-mgr.c (Ffc_config_set_current): * font-mgr.c (Ffc_config_get_blanks): * font-mgr.c (Ffc_config_get_rescan_interval): * font-mgr.c (Ffc_config_set_rescan_interval): * font-mgr.c (Ffc_config_app_font_add_file): * font-mgr.c (Ffc_config_app_font_add_dir): * font-mgr.c (Ffc_config_app_font_clear): * font-mgr.c (size): * font-mgr.c (Ffc_config_substitute): * font-mgr.c (Ffc_font_render_prepare): * font-mgr.c (Ffc_font_match): * font-mgr.c (Ffc_font_sort): * font-mgr.c (finalize_fc_config): * font-mgr.c (print_fc_config): * font-mgr.h: * font-mgr.h (struct fc_pattern): * font-mgr.h (XFC_PATTERN): * font-mgr.h (struct fc_config): * font-mgr.h (XFC_CONFIG): * frame-gtk.c (allocate_gtk_frame_struct): * frame-impl.h (struct frame): * frame-msw.c (mswindows_init_frame_1): * frame-x.c (allocate_x_frame_struct): * frame.c (nuke_all_frame_slots): * frame.c (allocate_frame_core): * gc.c: * gc.c (GC_CHECK_NOT_FREE): * glyphs.c (finalize_image_instance): * glyphs.c (allocate_image_instance): * glyphs.c (Fcolorize_image_instance): * glyphs.c (allocate_glyph): * glyphs.c (unmap_subwindow_instance_cache_mapper): * glyphs.c (register_ignored_expose): * glyphs.h (struct Lisp_Image_Instance): * glyphs.h (struct Lisp_Glyph): * glyphs.h (struct glyph_cachel): * glyphs.h (struct expose_ignore): * gui.c (allocate_gui_item): * gui.h (struct Lisp_Gui_Item): * keymap.c (struct Lisp_Keymap): * keymap.c (make_keymap): * lisp.h: * lisp.h (struct Lisp_String_Direct_Data): * lisp.h (struct Lisp_String_Indirect_Data): * lisp.h (struct Lisp_Vector): * lisp.h (struct Lisp_Bit_Vector): * lisp.h (DECLARE_INLINE_LISP_BIT_VECTOR): * lisp.h (struct weak_box): * lisp.h (struct ephemeron): * lisp.h (struct weak_list): * lrecord.h: * lrecord.h (struct lrecord_implementation): * lrecord.h (MC_ALLOC_CALL_FINALIZER): * lrecord.h (struct lcrecord_list): * lstream.c (finalize_lstream): * lstream.c (sizeof_lstream): * lstream.c (Lstream_new): * lstream.c (Lstream_delete): * lstream.h (struct lstream): * marker.c: * marker.c (finalize_marker): * marker.c (compute_buffer_marker_usage): * mule-charset.c: * mule-charset.c (make_charset): * mule-charset.c (compute_charset_usage): * objects-impl.h (struct Lisp_Color_Instance): * objects-impl.h (struct Lisp_Font_Instance): * objects-tty-impl.h (struct tty_color_instance_data): * objects-tty-impl.h (struct tty_font_instance_data): * objects-tty.c (tty_initialize_color_instance): * objects-tty.c (tty_initialize_font_instance): * objects.c (finalize_color_instance): * objects.c (Fmake_color_instance): * objects.c (finalize_font_instance): * objects.c (Fmake_font_instance): * objects.c (reinit_vars_of_objects): * opaque.c: * opaque.c (sizeof_opaque): * opaque.c (make_opaque_ptr): * opaque.c (free_opaque_ptr): * opaque.h: * opaque.h (Lisp_Opaque): * opaque.h (Lisp_Opaque_Ptr): * print.c (printing_unreadable_lcrecord): * print.c (external_object_printer): * print.c (debug_p4): * process.c (finalize_process): * process.c (make_process_internal): * procimpl.h (struct Lisp_Process): * rangetab.c (Fmake_range_table): * rangetab.c (Fcopy_range_table): * rangetab.h (struct Lisp_Range_Table): * scrollbar.c: * scrollbar.c (create_scrollbar_instance): * scrollbar.c (compute_scrollbar_instance_usage): * scrollbar.h (struct scrollbar_instance): * specifier.c (finalize_specifier): * specifier.c (sizeof_specifier): * specifier.c (set_specifier_caching): * specifier.h (struct Lisp_Specifier): * specifier.h (struct specifier_caching): * symeval.h: * symeval.h (SYMBOL_VALUE_MAGIC_P): * symeval.h (DEFVAR_SYMVAL_FWD): * symsinit.h: * syntax.c (init_buffer_syntax_cache): * syntax.h (struct syntax_cache): * toolbar.c: * toolbar.c (allocate_toolbar_button): * toolbar.c (update_toolbar_button): * toolbar.h (struct toolbar_button): * tooltalk.c (struct Lisp_Tooltalk_Message): * tooltalk.c (make_tooltalk_message): * tooltalk.c (struct Lisp_Tooltalk_Pattern): * tooltalk.c (make_tooltalk_pattern): * ui-gtk.c: * ui-gtk.c (allocate_ffi_data): * ui-gtk.c (emacs_gtk_object_finalizer): * ui-gtk.c (allocate_emacs_gtk_object_data): * ui-gtk.c (allocate_emacs_gtk_boxed_data): * ui-gtk.h: * window-impl.h (struct window): * window-impl.h (struct window_mirror): * window.c (finalize_window): * window.c (allocate_window): * window.c (new_window_mirror): * window.c (mark_window_as_deleted): * window.c (make_dummy_parent): * window.c (compute_window_mirror_usage): * window.c (compute_window_usage): Overall point of this change and previous ones in this repository: (1) Introduce new, clearer terminology: everything other than int or char is a "record" object, which comes in two types: "normal objects" and "frob-block objects". Fix up all places that referred to frob-block objects as "simple", "basic", etc. (2) Provide an advertised interface for doing operations on Lisp objects, including creating new types, that is clean and consistent in its naming, uses the above-referenced terms and avoids referencing "lrecords", "old lcrecords", etc., which should hide under the surface. (3) Make the size_in_bytes and finalizer methods take a Lisp_Object rather than a void * for consistency with other methods. (4) Separate finalizer method into finalizer and disksaver, so that normal finalize methods don't have to worry about disksaving. Other specifics: (1) Renaming: LISP_OBJECT_HEADER -> NORMAL_LISP_OBJECT_HEADER ALLOC_LISP_OBJECT -> ALLOC_NORMAL_LISP_OBJECT implementation->basic_p -> implementation->frob_block_p ALLOCATE_FIXED_TYPE_AND_SET_IMPL -> ALLOC_FROB_BLOCK_LISP_OBJECT *FCCONFIG*, wrap_fcconfig -> *FC_CONFIG*, wrap_fc_config *FCPATTERN*, wrap_fcpattern -> *FC_PATTERN*, wrap_fc_pattern (the last two changes make the naming of these macros consistent with the naming of all other macros, since the objects are named fc-config and fc-pattern with a hyphen) (2) Lots of documentation fixes in lrecord.h. (3) Eliminate macros for copying, freeing, zeroing objects, getting their storage size. Instead, new functions: zero_sized_lisp_object() zero_nonsized_lisp_object() lisp_object_storage_size() free_normal_lisp_object() (copy_lisp_object() already exists) LISP_OBJECT_FROB_BLOCK_P() (actually a macro) Eliminated: free_lrecord() zero_lrecord() copy_lrecord() copy_sized_lrecord() old_copy_lcrecord() old_copy_sized_lcrecord() old_zero_lcrecord() old_zero_sized_lcrecord() LISP_OBJECT_STORAGE_SIZE() COPY_SIZED_LISP_OBJECT() COPY_SIZED_LCRECORD() COPY_LISP_OBJECT() ZERO_LISP_OBJECT() FREE_LISP_OBJECT() (4) Catch the remaining places where lrecord stuff was used directly and use the advertised interface, e.g. alloc_sized_lrecord() -> ALLOC_SIZED_LISP_OBJECT(). (5) Make certain statically-declared pseudo-objects (buffer_local_flags, console_local_flags) have their lheader initialized correctly, so things like copy_lisp_object() can work on them. Make extent_auxiliary_defaults a proper heap object Vextent_auxiliary_defaults, and make extent auxiliaries dumpable so that this object can be dumped. allocate_extent_auxiliary() now just creates the object, and attach_extent_auxiliary() creates an extent auxiliary and attaches to an extent, like the old allocate_extent_auxiliary(). (6) Create EXTENT_AUXILIARY_SLOTS macro, similar to the foo-slots.h files but in a macro instead of a file. The purpose is to avoid duplication when iterating over all the slots in an extent auxiliary. Use it. (7) In lstream.c, don't zero out object after allocation because allocation routines take care of this. (8) In marker.c, fix a mistake in computing marker overhead. (9) In print.c, clean up printing_unreadable_lcrecord(), external_object_printer() to avoid lots of ifdef NEW_GC's. (10) Separate toolbar-button allocation into a separate allocate_toolbar_button() function for use in the example code in lrecord.h.
author Ben Wing <ben@xemacs.org>
date Fri, 05 Mar 2010 04:08:17 -0600
parents c0934cef10c6
children 75469840109b 308d34e9f07d
line wrap: on
line source

;;; unicode.el --- Unicode support -*- coding: utf-8; -*-

;; Copyright (C) 2001, 2002 Ben Wing.

;; Keywords: multilingual, Unicode

;; This file is part of XEmacs.

;; XEmacs is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; XEmacs is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with XEmacs; see the file COPYING.  If not, write to the Free
;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
;; 02111-1307, USA.

;;; Synched up with: Not in FSF.

;;; Commentary:

;; Lisp support for Unicode, e.g. initialize the translation tables.

;;; Code:

;; GNU Emacs has the charsets: 

;;     mule-unicode-2500-33ff
;;     mule-unicode-e000-ffff
;;     mule-unicode-0100-24ff

;; built-in.  This is hack--and an incomplete hack at that--against the
;; spirit and the letter of standard ISO 2022 character sets.  Instead of
;; this, we have the jit-ucs-charset-N Mule character sets, created in
;; unicode.c on encountering a Unicode code point that we don't recognise,
;; and saved in ISO 2022 coding systems using the UTF-8 escape described in
;; ISO-IR 196.

(eval-when-compile (when (featurep 'mule) (require 'ccl)))

;; accessed in loadup.el, mule-cmds.el; see discussion in unicode.c
(defvar load-unicode-tables-at-dump-time (eq system-type 'windows-nt)
  "[INTERNAL] Whether to load the Unicode tables at dump time.
Setting this at run-time does nothing.")

;; NOTE: This takes only a fraction of a second on my Pentium III
;; 700Mhz even with a totally optimization-disabled XEmacs.
(defun load-unicode-tables ()
  "Initialize the Unicode translation tables for all standard charsets."
  (let ((parse-args
	 '(("unicode/unicode-consortium"
	    ;; Due to the braindamaged way Mule treats the ASCII and Control-1
	    ;; charsets' types, trying to load them results in out-of-range
	    ;; warnings at unicode.c:1439.  They're no-ops anyway, they're
	    ;; hardwired in unicode.c (unicode_to_ichar, ichar_to_unicode).
	    ;; ("8859-1.TXT" ascii #x00 #x7F #x0)
	    ;; ("8859-1.TXT" control-1 #x80 #x9F #x-80)
            ;; The 8859-1.TXT G1 assignments are half no-ops, hardwired in
	    ;; unicode.c ichar_to_unicode, but not in unicode_to_ichar.
	    ("8859-1.TXT" latin-iso8859-1 #xA0 #xFF #x-80)
	    ;; "8859-10.TXT"
	    ;; "8859-13.TXT"
	    ("8859-14.TXT" latin-iso8859-14 #xA0 #xFF #x-80)
	    ("8859-15.TXT" latin-iso8859-15 #xA0 #xFF #x-80)
	    ("8859-16.TXT" latin-iso8859-16 #xA0 #xFF #x-80)
	    ("8859-2.TXT" latin-iso8859-2 #xA0 #xFF #x-80)
	    ("8859-3.TXT" latin-iso8859-3 #xA0 #xFF #x-80)
	    ("8859-4.TXT" latin-iso8859-4 #xA0 #xFF #x-80)
	    ("8859-5.TXT" cyrillic-iso8859-5 #xA0 #xFF #x-80)
	    ("8859-6.TXT" arabic-iso8859-6 #xA0 #xFF #x-80)
	    ("8859-7.TXT" greek-iso8859-7 #xA0 #xFF #x-80)
	    ("8859-8.TXT" hebrew-iso8859-8 #xA0 #xFF #x-80)
	    ("8859-9.TXT" latin-iso8859-9 #xA0 #xFF #x-80)
	    ;; charset for Big5 does not matter; specifying `big5' will
	    ;; automatically make the right thing happen
	    ("BIG5.TXT" chinese-big5-1 nil nil nil big5)
	    ("CNS11643.TXT" chinese-cns11643-1 #x10000 #x1FFFF #x-10000)
	    ("CNS11643.TXT" chinese-cns11643-2 #x20000 #x2FFFF #x-20000)
	    ;; "CP1250.TXT" 
	    ;; "CP1251.TXT" 
	    ;; "CP1252.TXT" 
	    ;; "CP1253.TXT" 
	    ;; "CP1254.TXT" 
	    ;; "CP1255.TXT" 
	    ;; "CP1256.TXT" 
	    ;; "CP1257.TXT" 
	    ;; "CP1258.TXT" 
	    ;; "CP874.TXT" 
	    ;; "CP932.TXT" 
	    ;; "CP936.TXT" 
	    ;; "CP949.TXT" 
	    ;; "CP950.TXT" 
	    ;; "GB12345.TXT" 
	    ("GB2312.TXT" chinese-gb2312)
	    ;; "HANGUL.TXT"
	    ;; #### shouldn't JIS X 0201's upper limit be 7f?
	    ("JIS0201.TXT" latin-jisx0201 #x21 #x80)
	    ("JIS0201.TXT" katakana-jisx0201 #xA0 #xFF #x-80)
	    ("JIS0208.TXT" japanese-jisx0208 nil nil nil ignore-first-column)
	    ("JIS0212.TXT" japanese-jisx0212)
	    ;; "JOHAB.TXT" 
	    ;; "KOI8-R.TXT" 
	    ;; "KSC5601.TXT" 
	    ;; note that KSC5601.TXT as currently distributed is NOT what
	    ;; it claims to be!  see comments in KSX1001.TXT.
	    ("KSX1001.TXT" korean-ksc5601)
	    ;; "OLD5601.TXT" 
	    ;; "SHIFTJIS.TXT"
	    )
	   ("unicode/mule-ucs"
	    ;; #### we don't support surrogates?!??
	    ;; use these instead of the above ones once we support surrogates
	    ;;("chinese-cns11643-1.txt" chinese-cns11643-1)
	    ;;("chinese-cns11643-2.txt" chinese-cns11643-2)
	    ;;("chinese-cns11643-3.txt" chinese-cns11643-3)
	    ;;("chinese-cns11643-4.txt" chinese-cns11643-4)
	    ;;("chinese-cns11643-5.txt" chinese-cns11643-5)
	    ;;("chinese-cns11643-6.txt" chinese-cns11643-6)
	    ;;("chinese-cns11643-7.txt" chinese-cns11643-7)
	    ("chinese-sisheng.txt" chinese-sisheng)
	    ("ethiopic.txt" ethiopic)
	    ("indian-is13194.txt" indian-is13194)
	    ("ipa.txt" ipa)
	    ("thai-tis620.txt" thai-tis620)
	    ("tibetan.txt" tibetan)
	    ("vietnamese-viscii-lower.txt" vietnamese-viscii-lower)
	    ("vietnamese-viscii-upper.txt" vietnamese-viscii-upper)
	    )
	   ("unicode/other"
	    ("lao.txt" lao)
	    )
	   )))
    (mapc #'(lambda (tables)
              (let ((undir
                     (expand-file-name (car tables) data-directory)))
                (mapc #'(lambda (args)
                          (apply 'load-unicode-mapping-table
                                 (expand-file-name (car args) undir)
                                 (cdr args)))
                      (cdr tables))))
	    parse-args)
    ;; The default-unicode-precedence-list. We set this here to default to
    ;; *not* mapping various European characters to East Asian characters;
    ;; otherwise the default-unicode-precedence-list is numerically ordered
    ;; by charset ID.
    (declare-fboundp
     (set-default-unicode-precedence-list
      '(ascii control-1 latin-iso8859-1 latin-iso8859-2 latin-iso8859-15
	greek-iso8859-7 hebrew-iso8859-8 ipa cyrillic-iso8859-5
	latin-iso8859-16 latin-iso8859-3 latin-iso8859-4 latin-iso8859-9
	vietnamese-viscii-lower vietnamese-viscii-upper arabic-iso8859-6
	jit-ucs-charset-0 japanese-jisx0208 japanese-jisx0208-1978
	japanese-jisx0212 japanese-jisx0213-1 japanese-jisx0213-2
	chinese-gb2312 chinese-sisheng chinese-big5-1 chinese-big5-2
	indian-is13194 korean-ksc5601 chinese-cns11643-1 chinese-cns11643-2
	chinese-isoir165 
	composite ethiopic indian-1-column indian-2-column jit-ucs-charset-0
	katakana-jisx0201 lao thai-tis620 thai-xtis tibetan tibetan-1-column
	latin-jisx0201 chinese-cns11643-3 chinese-cns11643-4
	chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7)))))

(defconst ccl-encode-to-ucs-2
  (eval-when-compile
    (let ((pre-existing 
           ;; This is the compiled CCL program from the assert
           ;; below. Since this file is dumped and ccl.el isn't (and
           ;; even when it was, it was dumped much later than this
           ;; one), we can't compile the program at dump time. We can
           ;; check at byte compile time that the program is as
           ;; expected, though.
           [1 16 131127 7 98872 65823 1307 5 -65536 65313 64833 1028
              147513 8 82009 255 22]))
      (when (featurep 'mule)
        ;; Check that the pre-existing constant reflects the intended
        ;; CCL program.
        (assert
         (equal pre-existing
                (ccl-compile
                 `(1 
                   ( ;; mule-to-unicode's first argument is the
                    ;; charset ID, the second its first byte
                    ;; left shifted by 7 bits masked with its
                    ;; second byte.
                    (r1 = (r1 << 7)) 
                    (r1 = (r1 | r2)) 
                    (mule-to-unicode r0 r1) 
                    (if (r0 & ,(lognot #xFFFF))
                        ;; Redisplay looks in r1 and r2 for the first
                        ;; and second bytes of the X11 font,
                        ;; respectively. For non-BMP characters we
                        ;; display U+FFFD.
                        ((r1 = #xFF)
                         (r2 = #xFD))
                      ((r1 = (r0 >> 8)) 
                       (r2 = (r0 & #xFF))))))))
         nil 
         "The pre-compiled CCL program appears broken. "))
      pre-existing))
  "CCL program to transform Mule characters to UCS-2.")

(when (featurep 'mule)
  (put 'ccl-encode-to-ucs-2 'ccl-program-idx
       (declare-fboundp
	(register-ccl-program 'ccl-encode-to-ucs-2 ccl-encode-to-ucs-2))))

(defun decode-char (quote-ucs code &optional restriction) 
  "FSF compatibility--return Mule character with Unicode codepoint CODE.
The second argument must be 'ucs, the third argument is ignored.  "
  ;; We're prepared to accept invalid Unicode in unicode-to-char, but not in
  ;; this function, which is the API that should actually be used, since
  ;; it's available in GNU and in Mule-UCS.
  (check-argument-range code #x0 #x10FFFF)
  (assert (eq quote-ucs 'ucs) t
	  "Sorry, decode-char doesn't yet support anything but the UCS.  ")
  (unicode-to-char code))

(defun encode-char (char quote-ucs &optional restriction)
  "FSF compatibility--return the Unicode code point of CHAR.
The second argument must be 'ucs, the third argument is ignored.  "
  (assert (eq quote-ucs 'ucs) t
	  "Sorry, encode-char doesn't yet support anything but the UCS.  ")
  (char-to-unicode char))

(make-coding-system
 'utf-16 'unicode
 "UTF-16"
 '(mnemonic "UTF-16"
   documentation 
   "UTF-16 Unicode encoding -- the standard (almost-) fixed-width
two-byte encoding, with surrogates.  It will be fixed-width if all
characters are in the BMP (Basic Multilingual Plane -- first 65536
codepoints).  Cannot represent characters with codepoints above
0x10FFFF (a little more than 1,000,000).  Unicode and ISO guarantee
never to encode any characters outside this range -- all the rest are
for private, corporate or internal use."
   unicode-type utf-16))

(define-coding-system-alias 'utf-16-be 'utf-16) 

(make-coding-system
 'utf-16-bom 'unicode
 "UTF-16 w/BOM"
 '(mnemonic "UTF16-BOM"
   documentation 
   "UTF-16 Unicode encoding with byte order mark (BOM) at the beginning.
The BOM is Unicode character U+FEFF -- i.e. the first two bytes are
0xFE and 0xFF, respectively, or reversed in a little-endian
representation.  It has been sanctioned by the Unicode Consortium for
use at the beginning of a Unicode stream as a marker of the byte order
of the stream, and commonly appears in Unicode files under Microsoft
Windows, where it also functions as a magic cookie identifying a
Unicode file.  The character is called \"ZERO WIDTH NO-BREAK SPACE\"
and is suitable as a byte-order marker because:

  -- it has no displayable representation
  -- due to its semantics it never normally appears at the beginning
     of a stream
  -- its reverse U+FFFE is not a legal Unicode character
  -- neither byte sequence is at all likely in any other standard
     encoding, particularly at the beginning of a stream

This coding system will insert a BOM at the beginning of a stream when
writing and strip it off when reading."
   unicode-type utf-16
   need-bom t))

(make-coding-system
 'utf-16-little-endian 'unicode
 "UTF-16 Little Endian"
 '(mnemonic "UTF16-LE"
   documentation
   "Little-endian version of UTF-16 Unicode encoding.
See `utf-16' coding system."
   unicode-type utf-16
   little-endian t))

(define-coding-system-alias 'utf-16-le 'utf-16-little-endian) 

(make-coding-system
 'utf-16-little-endian-bom 'unicode
 "UTF-16 Little Endian w/BOM"
 '(mnemonic "MSW-Unicode"
   documentation
   "Little-endian version of UTF-16 Unicode encoding, with byte order mark.
Standard encoding for representing Unicode under MS Windows.  See
`utf-16-bom' coding system."
   unicode-type utf-16
   little-endian t
   need-bom t))

(make-coding-system
 'ucs-4 'unicode
 "UCS-4"
 '(mnemonic "UCS4"
   documentation
   "UCS-4 Unicode encoding -- fully fixed-width four-byte encoding."
   unicode-type ucs-4))

(make-coding-system
 'ucs-4-little-endian 'unicode
 "UCS-4 Little Endian"
 '(mnemonic "UCS4-LE"
   documentation
   ;; #### I don't think this is permitted by ISO 10646, only Unicode.
   ;; Call it UTF-32 instead?
   "Little-endian version of UCS-4 Unicode encoding.  See `ucs-4' coding system."
   unicode-type ucs-4
   little-endian t))

(make-coding-system
 'utf-32 'unicode
 "UTF-32"
 '(mnemonic "UTF32"
   documentation
   "UTF-32 Unicode encoding -- fixed-width four-byte encoding,
characters less than #x10FFFF are not supported.  "
   unicode-type utf-32))

(make-coding-system
 'utf-32-little-endian 'unicode
 "UTF-32 Little Endian"
 '(mnemonic "UTF32-LE"
   documentation
   "Little-endian version of UTF-32 Unicode encoding.

A fixed-width four-byte encoding, characters less than #x10FFFF are not
supported.  "
   unicode-type ucs-4 little-endian t))

;; Now defined in unicode.c.

;;(make-coding-system
;; 'utf-8 'unicode
;; "UTF-8"
;; '(mnemonic "UTF8"
;;   documentation "..."
;;   unicode-type utf-8))

(make-coding-system
 'utf-8-bom 'unicode
 "UTF-8 w/BOM"
 '(mnemonic "MSW-UTF8"
   documentation
   "UTF-8 Unicode encoding, with byte order mark.
Standard encoding for representing UTF-8 under MS Windows."
   unicode-type utf-8
   little-endian t
   need-bom t))

;; Now, create jit-ucs-charset-0 entries for those characters in Windows
;; Glyph List 4 that would otherwise end up in East Asian character sets.
;; 
;; WGL4 is a character repertoire from Microsoft that gives a guideline
;; for font implementors as to what characters are sufficient for
;; pan-European support.  The intention of this code is to avoid the
;; situation where these characters end up mapping to East Asian XEmacs
;; characters, which generally clash strongly with European characters
;; both in font choice and character width; jit-ucs-charset-0 is a
;; single-width character set which comes before the East Asian character
;; sets in the default-unicode-precedence-list above.
(loop for (ucs ascii-or-latin-1)
  in '((#x2013 ?-) ;; U+2013 EN DASH
       (#x2014 ?-) ;; U+2014 EM DASH
       (#x2105 ?%) ;; U+2105 CARE OF
       (#x203e ?-) ;; U+203E OVERLINE
       (#x221f ?|) ;; U+221F RIGHT ANGLE
       (#x2584 ?|) ;; U+2584 LOWER HALF BLOCK
       (#x2588 ?|) ;; U+2588 FULL BLOCK
       (#x258c ?|) ;; U+258C LEFT HALF BLOCK
       (#x2550 ?|) ;; U+2550 BOX DRAWINGS DOUBLE HORIZONTAL
       (#x255e ?|) ;; U+255E BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
       (#x256a ?|) ;; U+256A BOX DRAWINGS VERTICAL SINGLE & HORIZONTAL DOUBLE
       (#x2561 ?|) ;; U+2561 BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
       (#x2215 ?/) ;; U+2215 DIVISION SLASH
       (#x02c9 ?`) ;; U+02C9 MODIFIER LETTER MACRON
       (#x2211 ?s) ;; U+2211 N-ARY SUMMATION
       (#x220f ?s) ;; U+220F N-ARY PRODUCT
       (#x2248 ?=) ;; U+2248 ALMOST EQUAL TO
       (#x2264 ?=) ;; U+2264 LESS-THAN OR EQUAL TO
       (#x2265 ?=) ;; U+2265 GREATER-THAN OR EQUAL TO
       (#x201c ?') ;; U+201C LEFT DOUBLE QUOTATION MARK
       (#x2026 ?.) ;; U+2026 HORIZONTAL ELLIPSIS
       (#x2212 ?-) ;; U+2212 MINUS SIGN
       (#x2260 ?=) ;; U+2260 NOT EQUAL TO
       (#x221e ?=) ;; U+221E INFINITY
       (#x2642 ?=) ;; U+2642 MALE SIGN
       (#x2640 ?=) ;; U+2640 FEMALE SIGN
       (#x2032 ?=) ;; U+2032 PRIME
       (#x2033 ?=) ;; U+2033 DOUBLE PRIME
       (#x25cb ?=) ;; U+25CB WHITE CIRCLE
       (#x25cf ?=) ;; U+25CF BLACK CIRCLE
       (#x25a1 ?=) ;; U+25A1 WHITE SQUARE
       (#x25a0 ?=) ;; U+25A0 BLACK SQUARE
       (#x25b2 ?=) ;; U+25B2 BLACK UP-POINTING TRIANGLE
       (#x25bc ?=) ;; U+25BC BLACK DOWN-POINTING TRIANGLE
       (#x2192 ?=) ;; U+2192 RIGHTWARDS ARROW
       (#x2190 ?=) ;; U+2190 LEFTWARDS ARROW
       (#x2191 ?=) ;; U+2191 UPWARDS ARROW
       (#x2193 ?=) ;; U+2193 DOWNWARDS ARROW
       (#x2229 ?=) ;; U+2229 INTERSECTION
       (#x2202 ?=) ;; U+2202 PARTIAL DIFFERENTIAL
       (#x2261 ?=) ;; U+2261 IDENTICAL TO
       (#x221a ?=) ;; U+221A SQUARE ROOT
       (#x222b ?=) ;; U+222B INTEGRAL
       (#x2030 ?=) ;; U+2030 PER MILLE SIGN
       (#x266a ?=) ;; U+266A EIGHTH NOTE
       (#x2020 ?*) ;; U+2020 DAGGER
       (#x2021 ?*) ;; U+2021 DOUBLE DAGGER
       (#x2500 ?|) ;; U+2500 BOX DRAWINGS LIGHT HORIZONTAL
       (#x2502 ?|) ;; U+2502 BOX DRAWINGS LIGHT VERTICAL
       (#x250c ?|) ;; U+250C BOX DRAWINGS LIGHT DOWN AND RIGHT
       (#x2510 ?|) ;; U+2510 BOX DRAWINGS LIGHT DOWN AND LEFT
       (#x2518 ?|) ;; U+2518 BOX DRAWINGS LIGHT UP AND LEFT
       (#x2514 ?|) ;; U+2514 BOX DRAWINGS LIGHT UP AND RIGHT
       (#x251c ?|) ;; U+251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT
       (#x252c ?|) ;; U+252C BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
       (#x2524 ?|) ;; U+2524 BOX DRAWINGS LIGHT VERTICAL AND LEFT
       (#x2534 ?|) ;; U+2534 BOX DRAWINGS LIGHT UP AND HORIZONTAL
       (#x253c ?|) ;; U+253C BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
       (#x02da ?^) ;; U+02DA RING ABOVE
       (#x2122 ?\xa9) ;; U+2122 TRADE MARK SIGN, ?©

       (#x0132 ?\xe6) ;; U+0132 LATIN CAPITAL LIGATURE IJ, ?æ
       (#x013f ?\xe6) ;; U+013F LATIN CAPITAL LETTER L WITH MIDDLE DOT, ?æ

       (#x0133 ?\xe6) ;; U+0133 LATIN SMALL LIGATURE IJ, ?æ
       (#x0140 ?\xe6) ;; U+0140 LATIN SMALL LETTER L WITH MIDDLE DOT, ?æ
       (#x0149 ?\xe6) ;; U+0149 LATIN SMALL LETTER N PRECEDED BY APOSTROPH,?æ

       (#x2194 ?|) ;; U+2194 LEFT RIGHT ARROW
       (#x2660 ?*) ;; U+2660 BLACK SPADE SUIT
       (#x2665 ?*) ;; U+2665 BLACK HEART SUIT
       (#x2663 ?*) ;; U+2663 BLACK CLUB SUIT
       (#x2592 ?|) ;; U+2592 MEDIUM SHADE
       (#x2195 ?|) ;; U+2195 UP DOWN ARROW

       (#x2113 ?\xb9) ;; U+2113 SCRIPT SMALL L, ?¹
       (#x215b ?\xbe) ;; U+215B VULGAR FRACTION ONE EIGHTH, ?¾
       (#x215c ?\xbe) ;; U+215C VULGAR FRACTION THREE EIGHTHS, ?¾
       (#x215d ?\xbe) ;; U+215D VULGAR FRACTION FIVE EIGHTHS, ?¾
       (#x215e ?\xbe) ;; U+215E VULGAR FRACTION SEVEN EIGHTHS, ?¾
       (#x207f ?\xbe) ;; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?¾
  
       ;; These are not in WGL 4, but are IPA characters that should not
       ;; be double width. They are the only IPA characters that both
       ;; occur in packages/mule-packages/leim/ipa.el and end up in East
       ;; Asian character sets when that file is loaded in an XEmacs
       ;; without packages.
       (#x2197 ?|) ;; U+2197 NORTH EAST ARROW
       (#x2199 ?|) ;; U+2199 SOUTH WEST ARROW
       (#x2191 ?|) ;; U+2191 UPWARDS ARROW
       (#x207f ?\xb9)) ;; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?¹
  with decoded = nil
  with syntax-table = (standard-syntax-table)
  initially (unless (featurep 'mule) (return))
  ;; This creates jit-ucs-charset-0 entries because:
  ;;
  ;;    1. If the tables are dumped, it is run at dump time before they are
  ;;    dumped, and as such before the relevant conversions are available
  ;;    (they are made available in mule/general-late.el). 
  ;;
  ;;    2. If the tables are not dumped, it is run at dump time, long before
  ;;    any of the other mappings are available.
  ;;
  do
  (setq decoded (decode-char 'ucs ucs))
  (assert (eq (declare-fboundp (char-charset decoded))
              'jit-ucs-charset-0) nil 
              "Unexpected Unicode decoding behavior.  ")
  (modify-syntax-entry decoded
                       (string 
                        (char-syntax ascii-or-latin-1))
                       syntax-table))

;; *Sigh*, declarations needs to be at the start of the line to be picked up
;; by make-docfile. Not so much an issue with ccl-encode-to-ucs-2, which we
;; don't necessarily want to advertise, but the following are important.

;; Create all the Unicode error sequences, normally as jit-ucs-charset-0
;; characters starting at U+200000 (which isn't a valid Unicode code
;; point). Make them available to user code. 
(defvar unicode-error-default-translation-table
  (loop 
    with char-table = (make-char-table 'generic)
    for i from ?\x00 to ?\xFF
    initially (unless (featurep 'mule) (return))
    do
    (put-char-table (aref
                     ;; #xd800 is the first leading surrogate;
                     ;; trailing surrogates must be in the range
                     ;; #xdc00-#xdfff. These examples are not, so we
                     ;; intentionally provoke an error sequence.
                     (decode-coding-string (format "\xd8\x00\x00%c" i)
                                           'utf-16-be)
                     3)
                    i
                    char-table)
    finally return char-table)
  "Translation table mapping Unicode error sequences to Latin-1 chars.

To transform XEmacs Unicode error sequences to the Latin-1 characters that
correspond to the octets on disk, you can use this variable.  ")

(defvar unicode-invalid-sequence-regexp-range
  (and (featurep 'mule)
       (format "%c%c-%c"
               (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0)
               (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3)
               (aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3)))
  "Regular expression range to match Unicode error sequences in XEmacs.

Invalid Unicode sequences on input are represented as XEmacs
characters with values stored as the keys in
`unicode-error-default-translation-table', one character for each
invalid octet.  You can use this variable (with `re-search-forward' or
`skip-chars-forward') to search for such characters; see also
`unicode-error-translate-region'.  ")

;; Check that the lookup table is correct, and that all the actual error
;; sequences are caught by the regexp.
(with-temp-buffer
  (loop
    for i from ?\x00 to ?\xFF
    with to-check = (make-string 20 ?\x20) 
    initially (unless (featurep 'mule) (return))
    do 
    (delete-region (point-min) (point-max))
    (insert to-check)
    (goto-char 10)
    (insert (decode-coding-string (format "\xd8\x00\x00%c" i)
                                  'utf-16-be))
    (backward-char)
    (assert (= i (get-char-table (char-after (point)) 
                                 unicode-error-default-translation-table))
            (format "Char ?\\x%x not the expected error sequence!"
                    i))

    (goto-char (point-min))
    ;; Comment out until the issue in
    ;; 18179.49815.622843.336527@parhasard.net is fixed.
    (assert t ; (re-search-forward (concat "[" 
              ;                        unicode-invalid-sequence-regexp-range
              ;                        "]"))
            nil
            (format "Could not find char ?\\x%x in buffer" i))))

(defun frob-unicode-errors-region (frob-function begin end &optional buffer)
  "Call FROB-FUNCTION on the Unicode error sequences between BEGIN and END.

Optional argument BUFFER specifies the buffer that should be examined for
such sequences.  "
  (check-argument-type #'functionp frob-function)
  (check-argument-range begin (point-min buffer) (point-max buffer))
  (check-argument-range end (point-min buffer) (point-max buffer))
    (save-excursion
      (save-restriction
	(if buffer (set-buffer buffer))
	(narrow-to-region begin end)
	(goto-char (point-min))
	(while end
	  (setq begin
		(progn
		  (skip-chars-forward
		   (concat "^" unicode-invalid-sequence-regexp-range))
		  (point))
		end (and (not (= (point) (point-max)))
			 (progn
			   (skip-chars-forward
			    unicode-invalid-sequence-regexp-range)
			   (point))))
	  (if end
	      (funcall frob-function begin end))))))

(defun unicode-error-translate-region (begin end &optional buffer table)
  "Translate the Unicode error sequences in BUFFER between BEGIN and END.

The error sequences are transformed, by default, into the ASCII,
control-1 and latin-iso8859-1 characters with the numeric values
corresponding to the incorrect octets encountered.  This is achieved
by using `unicode-error-default-translation-table' (which see) for
TABLE; you can change this by supplying another character table,
mapping from the error sequences to the desired characters.  "
    (unless table (setq table unicode-error-default-translation-table))
    (frob-unicode-errors-region
     (lambda (start finish)
       (translate-region start finish table))
     begin end buffer))

;; Sure would be nice to be able to use defface here. 
(copy-face 'highlight 'unicode-invalid-sequence-warning-face)

(unless (featurep 'mule)
  ;; We do this in such a roundabout way--instead of having the above defun
  ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have
  ;; make-docfile.c pick up symbol and function documentation correctly. An
  ;; alternative approach would be to fix make-docfile.c to be able to read
  ;; Lisp.
  (mapc #'unintern
        '(ccl-encode-to-ucs-2 unicode-error-default-translation-table
          unicode-invalid-regexp-range frob-unicode-errors-region
          unicode-error-translate-region unicode-query-coding-region
          unicode-query-coding-skip-chars-arg)))

;; #### UTF-7 is not yet implemented, and it's tricky to do.  There's
;; an implementation in appendix A.1 of the Unicode Standard, Version
;; 2.0, but I don't know its licensing characteristics.

; (make-coding-system
;  'utf-7 'unicode
;  "UTF-7"
;  '(mnemonic "UTF7"
;    documentation;    "UTF-7 Unicode encoding -- 7-bit-ASCII modal Internet-mail-compatible
; encoding especially designed for headers, with the following
; properties:

;   -- Only characters that are considered safe for passing through any mail
;      gateway without damage are used.

;   -- This is a modal encoding, with two states.  The first, default
;      state encodes the most common Unicode characters (upper and
;      lowercase letters, digits, and 9 common punctuation marks) as
;      themselves, and the second state, entered using '+' and
;      terminated with '-' or any character disallowed in state 2,
;      encodes any Unicode characters by first converting to UTF-16,
;      most significant byte first, and then to a slightly modified
;      Base64 encoding. (Thus, UTF-7 has the same limitations on the
;      characters it can encode as UTF-16.)

;   -- The modified Base64 encoding deviates from standard Base64 in
;      that it omits the `=' pad character.  This is eliminated so as to
;      avoid conflicts with the use of `=' as an escape in the
;      Quoted-Printable encoding and the related Q encoding for headers:
;      With this modification, non-whitespace chars in UTF-7 will be
;      represented in Quoted-Printable and in Q as-is, with no further
;      encoding.

; For more information, see Appendix A.1 of The Unicode Standard 2.0, or
; wherever it is in v3.0."
;    unicode-type utf-7))