changeset 3707:f6f6fc9eb269

[xemacs-hg @ 2006-11-28 21:20:22 by aidan] Better language behaviour on startup.
author aidan
date Tue, 28 Nov 2006 21:20:37 +0000
parents 4ca1ef2bdb6a
children e4e19e557673
files lisp/ChangeLog lisp/dumped-lisp.el lisp/mule/cyrillic.el lisp/mule/general-late.el lisp/mule/mule-cmds.el src/ChangeLog src/device-x.c src/faces.c src/window.c
diffstat 9 files changed, 272 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Tue Nov 28 16:09:47 2006 +0000
+++ b/lisp/ChangeLog	Tue Nov 28 21:20:37 2006 +0000
@@ -1,3 +1,46 @@
+2006-11-28  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* mule/cyrillic.el ("Cyrillic-KOI8"):
+	* mule/cyrillic.el ("Cyrillic-ALT"):
+	Add information on the native coding system of the machine to the
+	language environment definition for Cyrillic. 
+
+	* mule/general-late.el:
+	New file, for dumped Mule code that needs to be run after the
+	language support has been loaded. 
+
+	* mule/mule-cmds.el:
+	* mule/mule-cmds.el (set-language-info-alist):
+	Return the new language environment name instead of nil. 
+
+	* mule/mule-cmds.el (langenv-to-locale-hash): Removed.
+	This was relevant because coding_system_of_xrm_database called
+	get-language-environment-from-locale 1307 times on startup, so the
+	hash table made a difference. I've changed c_s_o_x_d to normally
+	not call Lisp, and that makes this caching unnecessary.
+
+	* mule/mule-cmds.el (posix-charset-to-coding-system-hash): New.
+	A map from charsets as found in POSIX locales, with
+	non-alphanumeric character stripped, to XEmacs coding systems. 
+	* mule/mule-cmds.el (parse-posix-locale-string): New.
+	Parse a POSIX locale string into a language, region, charset,
+	modifiers quad. 
+	* mule/mule-cmds.el (create-variant-language-environment): New.
+	Create a version of a language environment which differs in its
+	name and in the associated coding systems from a given language
+	environment. 
+	* mule/mule-cmds.el (get-language-environment-from-locale):
+	Rework to better pay attention to the POSIX locale, and to create
+	language environments on the fly if the coding system of a given
+	language differs from that available in the environment.
+	* mule/mule-cmds.el (set-language-environment-coding-systems):
+	Update a comment. 
+
+2006-11-28  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* dumped-lisp.el (preloaded-file-list):
+	Load mule/general-late when we're in a Mule build. 
+
 2004-06-28  Nix  <nix@esperi.org.uk>
 
 	* cmdloop.el (truncate-command-history-for-gc): Delay
--- a/lisp/dumped-lisp.el	Tue Nov 28 16:09:47 2006 +0000
+++ b/lisp/dumped-lisp.el	Tue Nov 28 21:20:37 2006 +0000
@@ -239,6 +239,9 @@
        (when (and (featurep 'mule) (valid-console-type-p 'mswindows))
 	 "mule/mule-msw-init-late")
 
+       (when (featurep 'mule)
+	 "mule/general-late")
+
 ;;; mule-load.el ends here
 
 ;; preload InfoDock stuff.  should almost certainly not be here if
--- a/lisp/mule/cyrillic.el	Tue Nov 28 16:09:47 2006 +0000
+++ b/lisp/mule/cyrillic.el	Tue Nov 28 21:20:37 2006 +0000
@@ -176,6 +176,7 @@
 (set-language-info-alist
  "Cyrillic-KOI8" '((charset cyrillic-iso8859-5)
 		   (coding-system koi8-r)
+		   (native-coding-system koi8-r)
 		   (coding-priority koi8-r)
 		   (input-method . "cyrillic-yawerty")
 		   (features cyril-util)
@@ -282,6 +283,7 @@
 (set-language-info-alist
  "Cyrillic-ALT" '((charset cyrillic-iso8859-5)
 		  (coding-system alternativnyj)
+		  (native-coding-system alternativnyj)
 		  (coding-priority alternativnyj)
 		  (input-method . "cyrillic-yawerty")
 		  (features cyril-util)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lisp/mule/general-late.el	Tue Nov 28 21:20:37 2006 +0000
@@ -0,0 +1,55 @@
+;;; general-late.el --- General Mule code that needs to be run late when
+;;                      dumping.
+;; Copyright (C) 2006 Free Software Foundation
+
+;; Author: Aidan Kehoe
+
+;; This file is part of XEmacs.
+
+;; XEmacs is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; XEmacs is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with XEmacs; see the file COPYING.  If not, write to the
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;;; Commentary:
+
+;;; Code:
+
+;; The variable is declared in mule-cmds.el; it's initialised here, to give
+;; the language-specific code a chance to create its coding systems.
+
+(setq posix-charset-to-coding-system-hash
+      (eval-when-compile
+	(let ((res (make-hash-table :test 'equal)))
+	  (dolist (coding-system (coding-system-list) res)
+	    (setq coding-system
+		  (symbol-name (coding-system-name coding-system)))
+	    (unless (string-match #r"\(-unix\|-mac\|-dos\)$" coding-system)
+	      (puthash 
+	       (replace-in-string (downcase coding-system) "[^a-z0-9]" "")
+	       (intern coding-system) res)))))
+
+      ;; In a thoughtless act of cultural imperialism, move English, German
+      ;; and Japanese to the front of language-info-alist to make start-up a
+      ;; fraction faster for those languages.
+      language-info-alist
+      (cons (assoc "Japanese" language-info-alist)
+	    (remassoc "Japanese" language-info-alist))
+      language-info-alist 
+      (cons (assoc "German" language-info-alist)
+	    (remassoc "German" language-info-alist))
+      language-info-alist
+      (cons (assoc "English" language-info-alist)
+	    (remassoc "English" language-info-alist)))
+
+;;; general-late.el ends here
\ No newline at end of file
--- a/lisp/mule/mule-cmds.el	Tue Nov 28 16:09:47 2006 +0000
+++ b/lisp/mule/mule-cmds.el	Tue Nov 28 21:20:37 2006 +0000
@@ -242,7 +242,8 @@
   ;; appropriately.  We just use a filter.
   (while alist
     (set-language-info lang-env (car (car alist)) (cdr (car alist)))
-    (setq alist (cdr alist))))
+    (setq alist (cdr alist)))
+  lang-env)
 
 (defun read-language-name (key prompt &optional default)
   "Read a language environment name which has information for KEY.
@@ -1012,29 +1013,115 @@
 ;; auto-language-alist deleted.  We have a more sophisticated system,
 ;; with the locales stored in the language data.
 
-(defconst langenv-to-locale-hash (make-hash-table :test 'equal))
+;; Initialised with an eval-when-compile in mule/general-late.el, which is
+;; compiled after all the language support--and, thus, minority Chinese
+;; coding systems and so on--has been loaded.
+(defvar posix-charset-to-coding-system-hash nil
+  "A map from the POSIX locale charset versions of the defined coding
+systems' names, with all alpha-numeric characters removed, to the actual
+coding system names.  Used at startup when working out which coding system
+should be the default for the locale.  ")
+
+(defun parse-posix-locale-string (locale-string)
+  "Return values \(LANGUAGE REGION CHARSET MODIFIERS\) given LOCALE-STRING.
+
+LOCALE-STRING should be a POSIX locale.  If it cannot be parsed as such, this
+function returns nil. "
+  (let (language region charset modifiers locinfo)
+    (setq locale-string (downcase locale-string))
+    (cond ((string-match
+	    #r"^\([a-z0-9]\{2,2\}\)\(_[a-z0-9]\{2,2\}\)?\(\.[^@]*\)?\(@.*\)?$"
+	    locale-string)
+	   (setq language (match-string 1 locale-string)
+		 region (match-string 2 locale-string)
+		 charset (match-string 3 locale-string)
+		 modifiers (match-string 4 locale-string)
+		 region (and region (replace-in-string region "^_" ""))
+		 charset (and charset (replace-in-string charset #r"^\." ""))
+		 modifiers (and modifiers
+				(replace-in-string modifiers "^@" "")))
+	   (when (and modifiers (equal modifiers "euro") (null charset))
+	     ;; Not ideal for Latvian, say, but I don't have any locales
+	     ;; where the @euro modifier doesn't mean ISO-8859-15 in the 956
+	     ;; I have.
+	     (setq charset "iso-8859-15"))
+	   (values language region charset modifiers))
+	  ((and (string-match "^[a-z0-9]+$" locale-string)
+		(assoc-ignore-case locale-string language-info-alist))
+	   (setq language (get-language-info locale-string 'locale)
+		 language (if (listp language) (car language) language))
+	   (values language region charset modifiers))
+	  ((string-match #r"^\([a-z0-9]+\)\.\([a-z0-9]+\)$" locale-string)
+	   (when (assoc-ignore-case
+		  (setq locinfo (match-string 1 locale-string))
+		  language-info-alist)
+	     (setq language (get-language-info locinfo 'locale)
+		   language (if (listp language) (car language) language)))
+	   (setq charset (match-string 2 locale-string))
+	   (values language region charset modifiers)))))
+
+(defun create-variant-language-environment (langenv coding-system)
+  "Create a variant of LANGENV with CODING-SYSTEM as its coding systems.
+
+The coding systems in question are those described in the
+`set-language-info' docstring with the property names of
+`native-coding-system' and `coding-system'.  The name of the new language
+environment is the name of the old language environment, followed by
+CODING-SYSTEM in parentheses.  Returns the name of the new language
+environment.  "
+  (check-coding-system coding-system)
+  (if (symbolp langenv) (setq langenv (symbol-name langenv)))
+  (unless (setq langenv
+		(assoc-ignore-case langenv language-info-alist))
+    (error 'wrong-type-argument "Not a known language environment"))
+  (set-language-info-alist
+   (if (string-match " ([^)]+)$" (car langenv))
+       (replace-match (format " (%s)" 
+                              (upcase (symbol-name
+                                       (coding-system-name coding-system))))
+                      nil nil langenv)
+     (format "%s (%s)" (car langenv)
+             (upcase (symbol-name (coding-system-name coding-system)))))
+   (destructive-plist-to-alist 
+    (plist-put (plist-put (alist-to-plist (cdr langenv)) 'native-coding-system
+			  coding-system) 'coding-system
+			  (cons coding-system
+				(cdr (assoc 'coding-system (cdr langenv))))))))
 
 (defun get-language-environment-from-locale (locale)
   "Convert LOCALE into a language environment.
 LOCALE is a C library locale string, as returned by `current-locale'.
 Uses the `locale' property of the language environment."
-  (or (gethash locale langenv-to-locale-hash)
-      (let ((retval
-	     (block langenv
-	       (dolist (langcons language-info-alist)
-		 (let* ((lang (car langcons))
-			(locs (get-language-info lang 'locale))
-			(case-fold-search t))
-		   (dolist (loc (if (listp locs) locs (list locs)))
-		     (if (cond ((functionp loc)
-				(funcall loc locale))
-			       ((stringp loc)
-				(string-match
-				 (concat "^" loc "\\([^A-Za-z0-9]\\|$\\)")
-				 locale)))
-			 (return-from langenv lang))))))))
-	(puthash locale retval langenv-to-locale-hash)
-	retval)))
+  (block langenv
+    (multiple-value-bind (language region charset modifiers)
+	(parse-posix-locale-string locale)
+      (let ((case-fold-search t)
+	    (desired-coding-system
+	     (and charset (gethash (replace-in-string charset "[^a-z0-9]" "")
+                                   posix-charset-to-coding-system-hash)))
+	    lang locs)
+	(dolist (langcons language-info-alist)
+	  (setq lang (car langcons)
+		locs (get-language-info lang 'locale))
+	  (dolist (loc (if (listp locs) locs (list locs)))
+	    (cond ((functionp loc)
+		   (if (funcall loc locale)
+		       (return-from langenv lang)))
+		  ((stringp loc)
+		   (when (or (equal loc language)
+			     (string-match
+			      (format "^%s\\([^A-Za-z0-9]\\|$\\)" loc)
+			      locale))
+		     (if (or (null desired-coding-system)
+			     (and desired-coding-system
+				  (eq desired-coding-system
+				      (get-language-info
+				       lang
+				       'native-coding-system))))
+			 (return-from langenv lang)
+		       (return-from langenv
+			 (create-variant-language-environment
+			  lang desired-coding-system))))))))))))
 
 (defun mswindows-get-language-environment-from-locale (ms-locale)
   "Convert MS-LOCALE (an MS Windows locale) into a language environment.
@@ -1250,11 +1337,19 @@
       ;; set the default buffer coding system from the first element of the
       ;; list in the `coding-priority' property, under Unix.  Under Windows, it
       ;; should stay at `mswindows-multibyte', which will reference the current
-      ;; code page. (#### Does it really make sense the set the Unix default
+      ;; code page. ([Does it really make sense to set the Unix default
       ;; that way?  NOTE also that it's not the same as the native coding
       ;; system for the locale, which is correct -- the form we choose for text
-      ;; files should not necessarily have any relevant to whether we're in a
-      ;; Shift-JIS, EUC-JP, JIS, or other Japanese locale.)
+      ;; files should not necessarily have any relevance to whether we're in a
+      ;; Shift-JIS, EUC-JP, JIS, or other Japanese locale.])
+      ;;
+      ;;     On Unix--with the exception of Mac OS X--there is no way to
+      ;;     know for certain what coding system to use for file names, and
+      ;;     the environment is the best guess. If a particular user's
+      ;;     preferences differ from this, then that particular user needs
+      ;;     to edit ~/.xemacs/init.el. Aidan Kehoe, Sun Nov 26 18:11:31 CET
+      ;;     2006. OS X uses an almost-normal-form version of UTF-8. 
+
       (unless (memq system-type '(windows-nt cygwin32))
 	(set-default-buffer-file-coding-system
 	 (maybe-change-coding-system-with-eol default-coding eol-type))))
--- a/src/ChangeLog	Tue Nov 28 16:09:47 2006 +0000
+++ b/src/ChangeLog	Tue Nov 28 21:20:37 2006 +0000
@@ -1,3 +1,15 @@
+2006-11-28  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* device-x.c:
+	* device-x.c (coding_system_of_xrm_database):
+	Cache the last db argument and resulting coding system, and return
+	them--instead of calling Lisp--if the DB is the same pointer
+	arument as last time. 
+	* faces.c (default_face_font_info):
+	* window.c (window_displayed_height):
+	Behave more gracefully if called when we have no information about
+	the dimensions of the default face and window. 
+
 2006-11-28  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* doprnt.c (emacs_doprnt_1):
--- a/src/device-x.c	Tue Nov 28 16:09:47 2006 +0000
+++ b/src/device-x.c	Tue Nov 28 21:20:37 2006 +0000
@@ -35,6 +35,7 @@
 #include "elhash.h"
 #include "events.h"
 #include "faces.h"
+#include "file-coding.h"
 #include "frame-impl.h"
 #include "process.h"		/* for egetenv */
 #include "redisplay.h"
@@ -192,9 +193,27 @@
 coding_system_of_xrm_database (XrmDatabase USED_IF_MULE (db))
 {
 #ifdef MULE
-  const Extbyte *locale = XrmLocaleOfDatabase (db);
-  Lisp_Object localestr = build_ext_string (locale, Qbinary);
-  return call1 (Qget_coding_system_from_locale, localestr);
+  const Extbyte *locale;
+  Lisp_Object localestr;
+  static XrmDatabase last_xrm_db; 
+
+  /* This will always be zero, nil or an actual coding system object, so no
+     need to worry about GCPROing it--it'll be protected from garbage
+     collection by means of Vcoding_system_hash_table in file-coding.c. */
+  static Lisp_Object last_coding_system; 
+
+  if (db == last_xrm_db)
+    {
+      return last_coding_system; 
+    }
+
+  last_xrm_db = db;
+
+  locale = XrmLocaleOfDatabase (db);
+  localestr = build_ext_string (locale, Qbinary);
+  last_coding_system = call1 (Qget_coding_system_from_locale, localestr);
+
+  return last_coding_system;
 #else
   return Qbinary;
 #endif
--- a/src/faces.c	Tue Nov 28 16:09:47 2006 +0000
+++ b/src/faces.c	Tue Nov 28 21:20:37 2006 +0000
@@ -719,6 +719,8 @@
 			int *height, int *width, int *proportional_p)
 {
   Lisp_Object font_instance;
+  struct face_cachel *cachel;
+  struct window *w = NULL;
 
   if (noninteractive)
     {
@@ -735,16 +737,16 @@
       return;
     }
 
-  /* We use ASCII here.  This is probably reasonable because the
-     people calling this function are using the resulting values to
-     come up with overall sizes for windows and frames. */
-  if (WINDOWP (domain))
+  /* We use ASCII here.  This is reasonable because the people calling this
+     function are using the resulting values to come up with overall sizes
+     for windows and frames. 
+
+     It's possible for this function to get called when the face cachels
+     have not been initialized--put a call to debug-print in
+     init-locale-at-early-startup to see it happen. */
+
+  if (WINDOWP (domain) && (w = XWINDOW (domain)) && w->face_cachels)
     {
-      struct face_cachel *cachel;
-      struct window *w = XWINDOW (domain);
-
-      /* #### It's possible for this function to get called when the
-	 face cachels have not been initialized.  I don't know why. */
       if (!Dynarr_length (w->face_cachels))
         reset_face_cachels (w);
       cachel = WINDOW_FACE_CACHEL (w, DEFAULT_INDEX);
@@ -755,6 +757,11 @@
       font_instance = FACE_FONT (Vdefault_face, domain, Vcharset_ascii);
     }
 
+  if (UNBOUNDP (font_instance))
+    {
+      return;
+    }
+
   if (height)
     *height = XFONT_INSTANCE (font_instance)->height;
   if (width)
--- a/src/window.c	Tue Nov 28 16:09:47 2006 +0000
+++ b/src/window.c	Tue Nov 28 21:20:37 2006 +0000
@@ -4223,7 +4223,8 @@
       default_face_height_and_width (window, &defheight, &defwidth);
       /* #### This probably needs to know about the clipping area once a
          final definition is decided on. */
-      num_lines += ((ypos2 - ypos1) / defheight);
+      if (defheight)
+        num_lines += ((ypos2 - ypos1) / defheight);
     }
   else
     {