changeset 4468:a78d697ccd2c

Import and extend GNU's descr-text.el, supporting prefix argument for C-x = 2008-05-25 Aidan Kehoe <kehoea@parhasard.net> * descr-text.el: New. Taken from GNU's GPLV2 version of 2007-02-14, with modifications for XEmacs support and extensions for Unihan.txt support and db/dbm caches. * simple.el (what-cursor-position): Support an optional prefix argument, as does GNU, calling #'describe-char to giving more detail on the character at point, notably from UnicodeData and (in our case, optionally) Unihan.txt. * syntax.el (syntax-after): Make this available for the sake of #'describe-char. * mule/mule-cmds.el (iso-2022-control-alist): Make this available, for the sake of #'encoded-string-description and #'describe-char. * mule/mule-cmds.el (encoded-string-description): Make this available, for the sake of #'describe-char. * unicode.el (unicode-error-default-translation-table): Make this a char table of type generic, not of type char. Makes it possible to have the relevant logic in #'describe-char reasonably clear; also, and this is undocumented, makes it much easier to implement #'frob-unicode-errors-region. I should document this, and revise #'frob-unicode-errors-region.
author Aidan Kehoe <kehoea@parhasard.net>
date Sun, 25 May 2008 21:11:35 +0200
parents 23ef20edf6ba
children c661944aa259 017044266245
files lisp/ChangeLog lisp/descr-text.el lisp/mule/mule-cmds.el lisp/simple.el lisp/syntax.el lisp/unicode.el
diffstat 6 files changed, 1343 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Wed May 21 21:09:20 2008 +0200
+++ b/lisp/ChangeLog	Sun May 25 21:11:35 2008 +0200
@@ -1,3 +1,27 @@
+2008-05-25  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* descr-text.el: New.
+	Taken from GNU's GPLV2 version of 2007-02-14, with modifications
+	for XEmacs support and extensions for Unihan.txt support and
+	db/dbm caches.
+	* simple.el (what-cursor-position): 
+	Support an optional prefix argument, as does GNU, calling
+	#'describe-char to giving more detail on the character at point,
+	notably from UnicodeData and (in our case, optionally) Unihan.txt.
+	* syntax.el (syntax-after): 
+	Make this available for the sake of #'describe-char. 
+	* mule/mule-cmds.el (iso-2022-control-alist):
+	Make this available, for the sake of #'encoded-string-description
+	and #'describe-char. 
+	* mule/mule-cmds.el (encoded-string-description):
+	Make this available, for the sake of #'describe-char.
+	* unicode.el (unicode-error-default-translation-table): 
+	Make this a char table of type generic, not of type char. Makes it
+	possible to have the relevant logic in #'describe-char reasonably
+	clear; also, and this is undocumented, makes it much easier to
+	implement #'frob-unicode-errors-region. I should document this,
+	and revise #'frob-unicode-errors-region. 
+
 2008-05-14  Stephen J. Turnbull  <stephen@xemacs.org>
 
 	* subr.el (add-to-list): Fix Aidan's last commit.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lisp/descr-text.el	Sun May 25 21:11:35 2008 +0200
@@ -0,0 +1,1284 @@
+;;; descr-text.el --- describe text mode
+
+;; Copyright (C) 1994, 1995, 1996, 2001, 2002, 2003, 2004,
+;;   2005, 2006, 2007 Free Software Foundation, Inc.
+
+;; Author: Boris Goldowsky <boris@gnu.org>
+;; Maintainer: FSF
+;; Keywords: faces, i18n, Unicode, multilingual
+
+;; This file is part of XEmacs.
+
+;; XEmacs is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; XEmacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with XEmacs; see the file COPYING.  If not, write to the
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
+
+;;; Commentary:
+
+;;; Describe-Text Mode.
+
+;;; Code:
+
+(eval-when-compile (require 'wid-edit))
+
+;;; Describe-Text Utilities.
+
+(defun describe-text-widget (widget)
+  "Insert text to describe WIDGET in the current buffer."
+  ;; XEmacs change; use the widget function.
+  (widget-create 'push-button
+                 :notify `(lambda (&rest ignore)
+                            (widget-browse ',widget))
+                 :help-echo
+                 "mouse-2, RET: browse this widget"
+                 (symbol-name (if (symbolp widget)
+                                  widget
+                                (car widget))))
+  (widget-insert " ")
+  (widget-create 'info-link
+                 :tag "Widget help"
+                 :help-echo
+                 "Read widget documentation"
+                 "(widget)Top"))
+
+(defun describe-text-sexp (sexp)
+  "Insert a short description of SEXP in the current buffer."
+  ;; XEmacs change; use the widget functions. 
+  (let ((pp (condition-case signal
+		(pp-to-string sexp)
+	      (error (prin1-to-string signal)))))
+    (when (string-match "\n\\'" pp)
+      (setq pp (substring pp 0 (1- (length pp)))))
+    (if (cond ((string-match "\n" pp)
+	       nil)
+	      ((> (length pp) (- (window-width) (current-column)))
+	       nil)
+	      (t t))
+	(widget-insert pp)
+      (widget-create 'push-button
+                     :notify `(lambda (&rest ignore)
+                                (with-output-to-temp-buffer
+                                    "*Pp Eval Output*"
+                                  (princ ',pp)))
+                     :help-echo
+                     "mouse-2, RET: pretty print value in another buffer"
+                     "[Show]"))))
+
+(defun describe-property-list (properties)
+  "Insert a description of PROPERTIES in the current buffer.
+PROPERTIES should be a list of overlay or text properties.
+The `category', `face' and `font-lock-face' properties are made
+into help buttons that call `describe-text-category' or
+`describe-face' when pushed."
+  ;; Sort the properties by the size of their value.
+  (dolist (elt (sort (let (ret)
+		       (while properties
+			 (push (list (pop properties) (pop properties)) ret))
+		       ret)
+		     (lambda (a b) (string< (prin1-to-string (nth 0 a) t)
+					    (prin1-to-string (nth 0 b) t)))))
+    (let ((key (nth 0 elt))
+	  (value (nth 1 elt)))
+      ;; XEmacs change; use #'widget-insert, #'widget-create
+      (widget-insert (propertize (format "  %-20s " key)
+                                 'face 'hyper-apropos-heading))
+      (cond ((eq key 'category)
+             (widget-create
+              'push-button
+	      :notify `(lambda (&rest ignore)
+			 (describe-text-category ',value))
+	      :help-echo "mouse-2, RET: describe this category"
+              (symbol-name value)))
+            ((memq key '(face font-lock-face mouse-face))
+             (widget-create
+              'push-button
+              :notify (lexical-let
+                          ((value-name (symbol-name value)))
+                        (lambda (&rest ignore)
+                          (hyper-describe-face (intern value-name))))
+	      :help-echo "mouse-2, RET: describe this face"
+              (format "%S" value)))
+            ((widgetp value)
+	     (describe-text-widget value))
+	    (t
+	     (describe-text-sexp value))))
+    (insert "\n")))
+
+;;; Describe-Text Commands.
+
+(defun describe-text-category (category)
+  "Describe a text property category."
+  (interactive "SCategory: ")
+  ; (help-setup-xref (list #'describe-text-category category) (interactive-p))
+  (save-excursion
+    (with-output-to-temp-buffer "*Help*"
+      (set-buffer standard-output)
+      (insert "Category " (format "%S" category) ":\n\n")
+      (describe-property-list (symbol-plist category))
+      (goto-char (point-min)))))
+
+;;;###autoload
+(defun describe-text-properties (pos &optional output-buffer)
+  "Describe widgets, buttons, overlays and text properties at POS.
+Interactively, describe them for the character after point.
+If optional second argument OUTPUT-BUFFER is non-nil,
+insert the output into that buffer, and don't initialize or clear it
+otherwise."
+  (interactive "d")
+  (if (>= pos (point-max))
+      (error "No character follows specified position"))
+  (if output-buffer
+      (describe-text-properties-1 pos output-buffer)
+    (if (not (or (text-properties-at pos) ; (overlays-at pos)))
+                 ;; XEmacs change. 
+                 (extents-at pos)))
+	(message "This is plain text.")
+      (let ((buffer (current-buffer))
+	    (target-buffer "*Help*"))
+	(when (eq buffer (get-buffer target-buffer))
+	  (setq target-buffer "*Help*<2>"))
+	(save-excursion
+	  (with-output-to-temp-buffer target-buffer
+	    (set-buffer standard-output)
+	    (setq output-buffer (current-buffer))
+	    (insert "Text content at position " (format "%d" pos) ":\n\n")
+	    (with-current-buffer buffer
+	      (describe-text-properties-1 pos output-buffer))
+	    (goto-char (point-min))))))))
+
+(defun describe-text-properties-1 (pos output-buffer)
+  (let* ((properties (text-properties-at pos))
+         ;; XEmacs change; extents, not overlays. 
+	 (extents (extents-at pos))
+	 (wid-field (get-char-property pos 'field))
+	 (wid-button (get-char-property pos 'button))
+	 (wid-doc (get-char-property pos 'widget-doc))
+	 ;; If button.el is not loaded, we have no buttons in the text.
+         ;; XEmacs change; use the #'and-fboundp, #'declare-fboundp macros.
+	 (button (and-fboundp 'button-at (button-at pos)))
+	 (button-type (and button 
+                           (declare-fboundp (button-type button))))
+	 (button-label (and button 
+                            (declare-fboundp (button-label button))))
+	 (widget (or wid-field wid-button wid-doc)))
+    (with-current-buffer output-buffer
+      ;; Widgets
+      (when (widgetp widget)
+	(newline)
+	(insert (cond (wid-field "This is an editable text area")
+		      (wid-button "This is an active area")
+		      (wid-doc "This is documentation text")))
+	(insert " of a ")
+	(describe-text-widget widget)
+	(insert ".\n\n"))
+      ;; Buttons
+      (when (and button (not (widgetp wid-button)))
+	(newline)
+	(insert "Here is a `" (format "%S" button-type)
+		"' button labeled `" button-label "'.\n\n"))
+      ;; Overlays
+      (when extents
+	(newline)
+	(if (eq (length extents) 1)
+	    (insert "There is an extent here:\n")
+	  (insert "There are " (format "%d" (length extents))
+			 " overlays here:\n"))
+	(dolist (extent extents)
+	  (insert " From " (format "%d" (extent-start-position extent))
+                  " to " (format "%d" (extent-end-position extent)) "\n")
+	  (describe-property-list (extent-properties extent)))
+	(insert "\n"))
+      ;; Text properties
+      (when properties
+	(newline)
+	(insert "There are text properties here:\n")
+	(describe-property-list properties)))))
+
+(defcustom describe-char-unicodedata-file
+  ;; XEmacs change; initialise this by default, using Perl. 
+  (let ((have-perl
+         (member-if 
+          #'(lambda (path) 
+              (file-exists-p (format "%s%cperl" path directory-sep-char))) 
+          exec-path)) 
+        installprivlib res)
+    (when have-perl 
+      (setq installprivlib  
+            (with-string-as-buffer-contents ""
+              (shell-command "perl -V:installprivlib" t) 
+              ;; 1+ because buffer offsets start at one. 
+              (delete-region 1 (1+ (length "installprivlib='"))) 
+              ;; Delete the final newline, semicolon and quotation mark. 
+              (delete-region (- (point-max) 3) (point-max)))) 
+      (cond 
+       ((file-exists-p 
+         (setq res
+               (format "%s%cunicore%cUnicodeData.txt" 
+                       installprivlib directory-sep-char directory-sep-char)))) 
+       ((file-exists-p 
+         (setq res
+               (format "%s%cunicode%cUnicodeData.txt" 
+                       installprivlib directory-sep-char directory-sep-char)))))
+      res))
+  "Location of Unicode data file.
+This is the UnicodeData.txt file from the Unicode Consortium, used for
+diagnostics.  If it is non-nil `describe-char' will print data
+looked up from it.  This facility is mostly of use to people doing
+multilingual development.
+
+This is a fairly large file, typically installed with Perl.
+At the time of writing it is at the URL
+`http://www.unicode.org/Public/UNIDATA/UnicodeData.txt'.
+
+It is possible to build a DBM or Berkeley index cache for this file, so that
+it is not necessary to parse the whole file at run time.  See
+`unidata-initialize-unicodedata-database'.
+
+See also `describe-char-unihan-file' for the complementary file describing
+East Asian Han characters and their associated information."
+
+  :group 'mule
+  :type '(choice (const :tag "None" nil)
+		 file))
+
+;; XEmacs additions, from here until `describe-char-unicode-data'
+(defcustom describe-char-use-cache t
+  "Whether `describe-char' should use a DBM or Berkeley DB cache.
+This speeds up navigation of `describe-char-unicodedata-file', and makes
+navigation of `describe-char-unihan-file' reasonable."
+  :group 'mule
+  :type '(choice (const :tag "None" nil)
+		 file))
+
+(defcustom describe-char-unihan-file nil
+  "Location of Unihan file.
+This the Unihan.txt file from the Unicode Consortium, used for diagnostics.
+If it is non-nil `describe-char' can print data looked up from it.  This
+facility is of use to people doing multilingual development, to those
+learning Chinese or Japanese, and to a lesser extent to those learning
+Korean or Vietnamese.
+
+This is large file, typically not installed with the operating system.  At
+the time of writing it is at the URL
+`http://www.unicode.org/Public/UNIDATA/UniHan.txt'.
+
+In contrast with `describe-char-unicodedata-file', `describe-char' will not
+load this entire file and parse it if it is available.  It requires a
+pre-initialized cache; see `unidata-initialize-unihan-database'.  "
+  :group 'mule
+  :type '(choice (const :tag "None" nil)
+		 file))
+ 
+;; XEmacs addition
+(defvar unidata-database-format
+  (or (and (featurep 'dbm) 'dbm)
+      (and (featurep 'berkeley-db) 'berkeley-db))
+  "The DB format to use for the `describe-char' cache, or nil if no cache.")
+
+(defvar describe-char-unihan-field-descriptions
+  #s(hash-table test equal data 
+                ("kAccountingNumeric"
+                     "Value as an an accounting numeral"
+                 "kBigFive"
+                     "Big Five mapping (excluding ETEN, etc. extensions)"
+                 "kCCCII"
+                     "Hex CCCII code, for libraries in the Republic of China"
+                 "kCNS1986"
+                     "Hex CNS 11643-1986 mapping, for the Republic of China"
+                 "kCNS1992"
+                     "Hex CNS 11643-1986 mapping, for the Republic of China"
+                 "kCangjie"
+                     "Cangjie input code for the character"
+                 "kCantonese"
+                     "Cantonese pronunciation, using jyutping"
+                 "kCheungBauer"
+                     "Radical-stroke index, cangjie input code, \
+and Cantonese readings"
+                 "kCheungBauerIndex"
+                     "Index of information about this character \
+in Cheung & Bauer, 2002"
+                 "kCihaiT"
+                     "Lookup information for this character in the \
+Cihai dictionary ISBN 962-231-005-2."
+                 "kCompatibilityVariant"
+                     "Compatibility decomposition for this character"
+                 "kCowles"
+                     "Lookup information for this character in the \
+Cowles dictionary ISBN 962-231-005-2."
+                 "kDaeJaweon"
+                     "Lookup information for this character in the \
+Dae Jaweon (Korean) dictionary, 1988"
+                 "kDefinition"
+                     "Definition for this character in modern written Chinese"
+                 "kEACC"
+                     "The EACC (= CCCII, as used by the \
+US library of congress) code for this character"
+                 "kFenn"
+                     "Frequency information for this character from \
+Fenn's Chinese-English dictionary, 1979"
+                 "kFennIndex"
+                     "Lookup information for this character in \
+Fenn's Chinese-English dictionary, 1979"
+                 "kFourCornerCode"
+                     "Four-corner lookup code for this character"
+                 "kFrequency"
+                     "Frequency information from traditional \
+Chinese USENET postings"
+                 "kGB0" "GB 2312-80 mapping, ku/ten"
+                 "kGB1" "GB 12345-90 mapping, ku/ten"
+                 "kGB3" "GB 7589-87 mapping, ku/ten"
+                 "kGB5" "GB 7590-87 mapping, ku/ten"
+                 "kGB7" "GB 8565-89 mapping, ku/ten"
+                 ;; Identical to the previous information?!
+                 "kGB8" "GB 8565-89 mapping, ku/ten" 
+                 "kGSR"
+                     "Lookup information for this character in \
+Karlgern's Grammata Serica Recensa"
+                 "kGradeLevel"
+                     "The first grade in the HK school system \
+where knowledge of this character is expected"
+                 "kHDZRadBreak" "Whether Hanyu Da Zidian has a radical break \
+beginning with this character"
+                 "kHKGlyph" "Lookup information for this character in the HK \
+glyph reference, ISBN 962-949-040-4"
+                 "kHKSCS" "Mapping to the HK Supplementary Character Set for \
+Big Five."
+                 "kHanYu" "Character lookup information for Hanyu Da Zidian, \
+`Great Chinese Character Dictionary'"
+                 "kHangul" "Korean pronunciation"
+                 "kHanyuPinlu" "Pronunciation and frequency info, from Xiandai\
+ Hanyu Pinlu Cidian"
+                 "kIBMJapan" "IBM Japanese mapping"
+                 "kIICore" "Is this character in the core East Asian \
+ideograph set from the IRG?"
+                 "kIRGDaeJaweon" "Lookup information for this character \
+in the Dae Jaweon (Korean) dictionary"
+                 "kIRGDaiKanwaZiten" "Lookup information for this character \
+in the Morohashi (Japanese) dictionary"
+                 "kIRGHanyuDaZidian" "Lookup information for this character \
+in the Hanyu Da Zidian (Chinese) dictionary"
+                 "kIRGKangXi" "Lookup information for this character \
+in the KangXi dictionary"
+                 "kIRG_GSource" "PRC character source information"
+                 "kIRG_HSource" "Hong Kong character source information"
+                 "kIRG_JSource" "Japanese character source information"
+                 "kIRG_KPSource" "Korean character source information"
+                 "kIRG_KSource" "Republic of Korean character source\
+ information"
+                 "kIRG_TSource" "Republic of China character source \
+information"
+                 "kIRG_USource" "Unicode (standards body) source information"
+                 "kIRG_VSource" "Vietnamese character source information"
+                 "kJIS0213" "JIS X 0213-2000 mapping in min,ku,ten form"
+                 "kJapaneseKun" "Native Japanese pronunciation"
+                 "kJapaneseOn" "Sino-Japanese pronunciation"
+                 "kJis0" "JIS X 0208-1990 mapping in ku/ten form"
+                 "kJis1" "JIS X 0212-1990 mapping in ku/ten form"
+                 "kKPS0" "KPS 9566-97 mapping in hexadecimal"
+                 "kKPS1" "KPS 10721-2000 mapping in hexadecimal"
+                 "kKSC0" "KS X 1001:1992 (KS C 5601-1989) mapping \
+in ku/ten form"
+                 "kKSC1" "KS X 1002:1991 (KS C 5657-1991) mapping \
+in ku/ten form"
+                 "kKangXi" "Lookup information for this character \
+in the KangXi (Chinese) dictionary"
+                 "kKarlgren" "Lookup information for this character \
+in Karlgren's dictionary, 1974"
+                 "kKorean" "Pronunciation in Korean"
+                 "kLau" "Lookup information for this character \
+in Lau's Cantonese-English dictionary"
+                 "kMainlandTelegraph" "PRC telegraph code"
+                 "kMandarin" "Mandarin pronunciation in Pinyin"
+                 "kMatthews" "Lookup information for Robert Mathews' \
+Chinese-English dictionary"
+                 "kMeyerWempe" "Lookup information for Bernard Meyer and \
+Theodore Wempe's dictionary"
+                 ;; Identical to kIRGDaiKanwaZiten?!?
+                 "kMorohashi" "Lookup information for this character \
+in the Morohashi (Japanese) dictionary"
+                 "kNelson" "Lookup information for this character in \
+Nelson's Japanese-English dictionary"
+                 "kOtherNumeric" "Esoteric numeric value"
+                 "kPhonetic" "Phonetic index data"
+                 "kPrimaryNumeric" "Standard numeric value"
+                 "kPseudoGB1" "Fake GB 12345-90, for the purposes of \
+Unicode inclusion"
+                 "kRSAdobe_Japan1_6" "Adobe-Japan1-6 information for \
+the character"
+                 "kRSJapanese" "Radical/stroke count for Japanese"
+                 "kRSKanWa" "Morohashi radical/stroke count"
+                 "kRSKangXi" "KangXi radical/stroke count"
+                 "kRSKorean" "Korean radical/stroke count"
+                 "kRSUnicode" "Unicode radical/stroke count"
+                 "kSBGY" "Lookup information for this character in the Song \
+Ben Guang Yun Chinese dictionary"
+                 "kSemanticVariant" "Semantic variant character"
+                 "kSimplifiedVariant" "Simplified variant character"
+                 "kSpecializedSemanticVariant" "Specialized semantic variant"
+                 "kTaiwanTelegraph" "Taiwanese telegraph code"
+                 "kTang" "Tang dynasty pronunciation"
+                 "kTotalStrokes" "Total number of strokes"
+                 "kTraditionalVariant" "Traditional variant character"
+                 "kVietnamese" "Vietnamese pronunciation"
+                 "kXerox" "Xerox code"
+                 "kZVariant" "Z-variant code(s)"))
+  "A map from symbolic Unihan field names to English-language descriptions.")
+
+(defun unidata-generate-database-file-name (unidata-file-name size
+                                            database-format)
+  "Return a filename suitable for storing the cache for UNIDATA-FILE-NAME."
+  (expand-file-name
+   (format "~%c.xemacs%c%s-%s" directory-sep-char directory-sep-char
+           (md5 (format "%s-%d" unidata-file-name size))
+           database-format)))
+
+(defun unidata-initialize-unicodedata-database (unidata-file-name)
+  "Init the berkeley or gdbm lookup table for UNIDATA-FILE-NAME.
+
+The table is a (non-SQL) database with information on the file offset of
+each Unicode code point described in UNIDATA-FILE-NAME.  In the normal
+course of events UNIDATA-FILE-NAME is the value of
+`unidata-default-file-name', which see.  "
+  (check-argument-type #'file-readable-p unidata-file-name)
+  (unless unidata-database-format
+    (error 'unimplemented "No (non-SQL) DB support available"))
+  (let* ((database-format unidata-database-format)
+         (size (eighth (file-attributes unidata-file-name)))
+         (database-file-name
+          (unidata-generate-database-file-name unidata-file-name
+                                               size database-format))
+         (database-handle (open-database database-file-name database-format 
+                                         nil "rw+" #o644 'no-conversion-unix))
+         (coding-system-for-read 'no-conversion-unix)
+         (buffer-size 32768)
+         (offset-start 0)
+         (offset-end buffer-size)
+         (range-information (make-range-table 'start-closed-end-closed))
+         (range-staging (make-hash-table :test 'equal))
+         (message "Initializing UnicodeData database cache: ")
+         (loop-count 1)
+         range-startinfo)
+    (with-temp-buffer
+      (progress-feedback-with-label 'describe-char-unicodedata-file
+                                    "%s" 0 message)
+      (while (progn
+               (delete-region (point-min) (point-max))
+               (insert-file-contents unidata-file-name nil
+                                     offset-start offset-end)
+               ;; If we've reached the end of the data, pass nil back to
+               ;; the while loop test.
+               (not (= (point-min) (point-max))))
+
+        (when (= buffer-size (- (point-max) (point-min)))
+          ;; If we're in the body of the file, and there's a trailing
+          ;; incomplete end-line, delete it, and adjust offset-end
+          ;; appropriately.
+          (goto-char (point-max))
+          (search-backward "\n")
+          (forward-char)
+          (delete-region (point) (point-max))
+          (setq offset-end (+ offset-start (- (point) (point-min)))))
+
+        (progress-feedback-with-label 'describe-char-unicodedata-file
+                                      "%s" (truncate 
+                                            (* (/ offset-start size) 100))
+                                      (concat message
+                                              (make-string
+                                               (mod loop-count 39) ?.)))
+        (incf loop-count)
+        (goto-char (point-min))
+        (while (re-search-forward 
+                #r"^\([0-9A-F]\{4,6\}\);\([^;]*\);.*$" nil t)
+          (cond
+           ((and (> (- (match-end 2) (match-beginning 2)) 7)
+                 (equal (substring (match-string 2) -7)
+                        " First>"))
+            ;; Start of a range. Save the start info in range-staging.
+            (puthash (substring (match-string 2) 0 -7)
+                     (list (string-to-int (match-string 1) 16)
+                           (+ offset-start (1- (match-beginning 0))))
+                     range-staging))
+           ((and (> (- (match-end 2) (match-beginning 2)) 7)
+                 (equal (substring (match-string 2) -6)
+                        " Last>"))
+            ;; End of a range. Combine with the start info, save it to the
+            ;; range-information range table. 
+            (setq range-startinfo
+                  (gethash (substring (match-string 2) 0 -6) range-staging))
+            (assert range-startinfo nil
+                    "Unexpected order for range information.")
+            (put-range-table 
+             (first range-startinfo)
+             (string-to-int (match-string 1) 16)
+             (list (second range-startinfo) 
+                   (+ offset-start (1- (match-end 0))))
+             range-information)
+            (remhash (substring (match-string 2) 0 -6) range-staging))
+           (t
+            ;; Normal character. Save the associated information in the
+            ;; database directly.
+            (put-database (match-string 1)
+                          (format "(%d %d)"
+                                  (+ offset-start (1- (match-beginning 0)))
+                                  (+ offset-start (1- (match-end 0))))
+                          database-handle))))
+        (goto-char (point-min))
+        (setq offset-start offset-end
+              offset-end (+ buffer-size offset-end))))
+    ;; Save the range information as such in the database. 
+    (put-database "range-information"
+                  (let ((print-readably t))
+                    (prin1-to-string range-information))
+                  database-handle) 
+    (close-database database-handle)
+    (progress-feedback-with-label 'describe-char-unicodedata-file
+                                  "%s" 100 message)
+    database-file-name))
+
+(defun unidata-initialize-unihan-database (unihan-file-name)
+  "Init the berkeley or gdbm lookup table for UNIHAN-FILE-NAME.
+
+The table is a (non-SQL) database with information on the file offset of
+each Unicode code point described in Unicode.org's Han character repository.
+Unihan.txt (see `describe-char-unihan-file', the usual argument to this
+function) is very large, and manipulating it directly can be tedious and
+slow, so creating this cache makes it reasonable to display Unihan info in
+the output of \\[universal-argument] \\[what-cursor-position] .  "
+  (check-argument-type #'file-readable-p unihan-file-name)
+  (unless unidata-database-format
+    (error 'unimplemented "No (non-SQL) DB support available"))
+  (let* ((database-format unidata-database-format)
+         (size (eighth (file-attributes unihan-file-name)))
+         (database-file-name
+          (unidata-generate-database-file-name unihan-file-name
+                                               size database-format))
+         (database-handle (open-database database-file-name database-format 
+                                         nil "rw+" #o644 'no-conversion-unix))
+         (coding-system-for-read 'no-conversion-unix)
+         (buffer-size 65536)
+         (offset-start 0)
+         (offset-end buffer-size)
+         (message "Initializing Unihan database cache: ")
+         (loop-count 1)
+         trailing-unicode leading-unicode character-start character-end)
+    (with-temp-buffer
+      (progress-feedback-with-label 'describe-char-unihan-file
+                                    "%s" 0 message)
+      (while (progn
+               (delete-region (point-min) (point-max))
+               (insert-file-contents unihan-file-name nil
+                                     offset-start offset-end)
+               ;; If we've reached the end of the data, return nil to the
+               ;; while.
+               (not (= (point-min) (point-max))))
+
+        (incf loop-count)
+        (progress-feedback-with-label 'describe-char-unihan-file
+                                      "%s" (truncate
+                                            (* (/ offset-start size) 100))
+                                      (concat message
+                                              (make-string
+                                               (mod loop-count 44) ?.)))
+        (block 'dealing-with-chars
+          (when (= buffer-size (- (point-max) (point-min)))
+            ;; If we're in the body of the file, we need to delete the
+            ;; character info for the last character, and set offset-end
+            ;; appropriately. Otherwise, we may not be able to pick where
+            ;; the actual description of a character ends and
+            ;; begins. 
+            ;;
+            ;; This breaks if any single Unihan character description is
+            ;; greater than the buffer size in length.
+            (goto-char (point-max))
+            (beginning-of-line)
+
+            (when (< (- (point-max) (point)) (eval-when-compile
+                                               (length "U+ABCDEF\t")))
+              ;; If the character ID of the last line may have been cut off,
+              ;; we need to delete all of that line here.
+              (delete-region (point) (point-max))
+              (forward-line -1))
+
+            (when (looking-at "^\\(U\\+[0-9A-F]\\{4,6\\}\\)\t")
+              (setq trailing-unicode (match-string 1)
+                    trailing-unicode
+                    (format "^%s\t" (regexp-quote trailing-unicode)))
+
+              (end-of-line)
+
+              ;; Go back until we hit a line that doesn't start with this
+              ;; character info.
+              (while (re-search-backward trailing-unicode nil t))
+
+              ;; The re-search-backward failed, so point is still at the end
+              ;; of the last match. Move to its beginning.
+              (beginning-of-line)
+              (delete-region (point) (point-max))
+              (setq offset-end (+ offset-start (- (point) (point-min))))))
+          (goto-char (point-min))
+          (while t
+            (when (= (point) (point-max))
+              ;; We're at the end of this part of the file.
+              (return-from 'dealing-with-chars))
+
+            (unless (re-search-forward "^\\(U\\+[0-9A-F]\\{4,6\\}\\)\t"
+                                     nil t)
+              ;; We're probably in the comments at the start of the file. No
+              ;; need to look for character info.
+              (return-from 'dealing-with-chars))
+
+            ;; Store where the character started. 
+            (beginning-of-line)
+            (setq character-start (point))
+
+            (setq leading-unicode 
+                  (format "^%s\t" (regexp-quote (match-string 1))))
+
+            ;; Loop until we get past this entry.
+            (while (re-search-forward leading-unicode nil t))
+
+            ;; Now, store the information.
+            (setq leading-unicode
+                  (string-to-number (substring leading-unicode 3) 16)
+                  leading-unicode (format "%04X" leading-unicode)
+                  character-end (prog2 (end-of-line) (point)))
+            (put-database leading-unicode
+                          (format "(%d %d)"
+                                  (+ offset-start (1- character-start))
+                                  (+ offset-start (1- character-end)))
+                          database-handle)
+            (forward-line)))
+        (setq offset-start offset-end
+              offset-end (+ buffer-size offset-end))))
+    (close-database database-handle)
+    (progress-feedback-with-label 'describe-char-unihan-file
+                                  "%s" 100
+                                  message)
+    database-file-name))
+;; End XEmacs additions.
+
+(defun describe-char-unicode-data (char)
+  "Return a list of Unicode data for unicode CHAR.
+Each element is a list of a property description and the property value.
+The list is null if CHAR isn't found in `describe-char-unicodedata-file'."
+  (when describe-char-unicodedata-file
+    (unless (file-exists-p describe-char-unicodedata-file)
+      (error 'file-error
+             "`unicodedata-file' %s not found" describe-char-unicodedata-file))
+    ;; XEmacs change; accept a character argument, use the cache if
+    ;; appropriate.
+    (when (characterp char)
+      (setq char (encode-char char 'ucs)))
+    (with-temp-buffer
+      (if describe-char-use-cache
+          ;; Use the database info.
+          (let ((database-handle (open-database
+                                  (unidata-generate-database-file-name
+                                   describe-char-unicodedata-file
+                                   (eighth (file-attributes
+                                            describe-char-unicodedata-file))
+                                   unidata-database-format)
+                                  unidata-database-format
+                                  nil "r"
+                                  #o644 'no-conversion-unix))
+                (coding-system-for-read 'no-conversion-unix)
+                key lookup)
+            (unless database-handle
+              (error 'io-error "Could not open %s as a %s database"
+                     (unidata-generate-database-file-name
+                      describe-char-unicodedata-file
+                      (eighth (file-attributes
+                               describe-char-unicodedata-file))
+                      unidata-database-format)
+                     unidata-database-format))
+            (setq key (format "%04X" char)
+                  lookup (get-database key database-handle))
+            (if lookup
+                ;; Okay, we have information on that character in particular.
+                (progn (setq lookup (read lookup))
+                       (insert-file-contents describe-char-unicodedata-file nil
+                                             (first lookup) (second lookup)))
+              ;; No information on that character in particular. Do we have
+              ;; range information? If so, load and check for our desired
+              ;; character.
+              (setq lookup (get-database "range-information" database-handle)
+                    lookup (if lookup (read lookup))
+                    lookup (if lookup (get-range-table char lookup)))
+              (when lookup 
+                (insert-file-contents describe-char-unicodedata-file nil
+                                      (first lookup) (second lookup))))
+            (close-database database-handle))
+
+        ;; Otherwise, insert the whole file (the FSF approach).
+        (set-buffer (get-buffer-create " *Unicode Data*"))
+        (when (zerop (buffer-size))
+          ;; Don't use -literally in case of DOS line endings.
+          (insert-file-contents describe-char-unicodedata-file)))
+
+      (goto-char (point-min))
+      (let ((hex (format "%04X" char))
+            found first last unihan-match unihan-info
+            (unihan-database-handle
+	     (and describe-char-unihan-file
+		  (open-database (unidata-generate-database-file-name
+				  describe-char-unihan-file
+				  (eighth (file-attributes
+					   describe-char-unihan-file))
+				  unidata-database-format)
+				 unidata-database-format
+				 nil "r" #o644 'no-conversion-unix)))
+            (coding-system-for-read 'no-conversion-unix))
+        (if (re-search-forward (concat "^" hex) nil t)
+            (setq found t)
+	  ;; It's not listed explicitly.  Look for ranges, e.g. CJK
+	  ;; ideographs, and check whether it's in one of them.
+	  (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t)
+		      (>= char (setq first
+				     (string-to-number (match-string 1) 16)))
+		      (progn
+			(forward-line 1)
+			(looking-at "^\\([^;]+\\);[^;]+Last>;")
+			(> char
+			   (setq last
+				 (string-to-number (match-string 1) 16))))))
+	  (if (and first (>= char first)
+		   last (<= char last))
+	      (setq found t)))
+	(if found
+	    (let ((fields (mapcar (lambda (elt)
+				    (if (> (length elt) 0)
+					elt))
+				  (cdr (split-string
+					(buffer-substring
+					 (line-beginning-position)
+					 (line-end-position))
+					";")))))
+	      ;; The length depends on whether the last field was empty.
+	      (unless (or (= 13 (length fields))
+			  (= 14 (length fields)))
+		(error 'invalid-argument
+                       "Invalid contents in %s" describe-char-unicodedata-file))
+	      ;; The field names and values lists are slightly
+	      ;; modified from Mule-UCS unidata.el.
+	      (apply #'list
+	       (list "Name" (let ((name (nth 0 fields)))
+			      ;; Check for <..., First>, <..., Last>
+			      (if (string-match "\\`\\(<[^,]+\\)," name)
+				  (concat (match-string 1 name) ">")
+				name)))
+	       (list "Category"
+		     (cdr (assoc
+			   (nth 1 fields)
+			   '(("Lu" . "uppercase letter")
+			     ("Ll" . "lowercase letter")
+			     ("Lt" . "titlecase letter")
+			     ("Mn" . "non-spacing mark")
+			     ("Mc" . "spacing-combining mark")
+			     ("Me" . "enclosing mark")
+			     ("Nd" . "decimal digit")
+			     ("Nl" . "letter number")
+			     ("No" . "other number")
+			     ("Zs" . "space separator")
+			     ("Zl" . "line separator")
+			     ("Zp" . "paragraph separator")
+			     ("Cc" . "other control")
+			     ("Cf" . "other format")
+			     ("Cs" . "surrogate")
+			     ("Co" . "private use")
+			     ("Cn" . "not assigned")
+			     ("Lm" . "modifier letter")
+			     ("Lo" . "other letter")
+			     ("Pc" . "connector punctuation")
+			     ("Pd" . "dash punctuation")
+			     ("Ps" . "open punctuation")
+			     ("Pe" . "close punctuation")
+			     ("Pi" . "initial-quotation punctuation")
+			     ("Pf" . "final-quotation punctuation")
+			     ("Po" . "other punctuation")
+			     ("Sm" . "math symbol")
+			     ("Sc" . "currency symbol")
+			     ("Sk" . "modifier symbol")
+			     ("So" . "other symbol")))))
+	       (list "Combining class"
+		     (cdr (assoc
+			   (string-to-number (nth 2 fields))
+			   '((0 . "Spacing")
+			     (1 . "Overlays and interior")
+			     (7 . "Nuktas")
+			     (8 . "Hiragana/Katakana voicing marks")
+			     (9 . "Viramas")
+			     (10 . "Start of fixed position classes")
+			     (199 . "End of fixed position classes")
+			     (200 . "Below left attached")
+			     (202 . "Below attached")
+			     (204 . "Below right attached")
+			     (208 . "Left attached (reordrant around \
+single base character)")
+			     (210 . "Right attached")
+			     (212 . "Above left attached")
+			     (214 . "Above attached")
+			     (216 . "Above right attached")
+			     (218 . "Below left")
+			     (220 . "Below")
+			     (222 . "Below right")
+			     (224 . "Left (reordrant around single base \
+character)")
+			     (226 . "Right")
+			     (228 . "Above left")
+			     (230 . "Above")
+			     (232 . "Above right")
+			     (233 . "Double below")
+			     (234 . "Double above")
+			     (240 . "Below (iota subscript)")))))
+	       (list "Bidi category"
+		     (cdr (assoc
+			   (nth 3 fields)
+			   '(("L" . "Left-to-Right")
+			     ("LRE" . "Left-to-Right Embedding")
+			     ("LRO" . "Left-to-Right Override")
+			     ("R" . "Right-to-Left")
+			     ("AL" . "Right-to-Left Arabic")
+			     ("RLE" . "Right-to-Left Embedding")
+			     ("RLO" . "Right-to-Left Override")
+			     ("PDF" . "Pop Directional Format")
+			     ("EN" . "European Number")
+			     ("ES" . "European Number Separator")
+			     ("ET" . "European Number Terminator")
+			     ("AN" . "Arabic Number")
+			     ("CS" . "Common Number Separator")
+			     ("NSM" . "Non-Spacing Mark")
+			     ("BN" . "Boundary Neutral")
+			     ("B" . "Paragraph Separator")
+			     ("S" . "Segment Separator")
+			     ("WS" . "Whitespace")
+			     ("ON" . "Other Neutrals")))))
+	       (list
+		"Decomposition"
+		(if (nth 4 fields)
+		    (let* ((parts (split-string (nth 4 fields)))
+			   (info (car parts)))
+		      (if (string-match "\\`<\\(.+\\)>\\'" info)
+			  (setq info (match-string 1 info))
+			(setq info nil))
+		      (if info (setq parts (cdr parts)))
+		      ;; Maybe printing ? for unrepresentable unicodes
+		      ;; here and below should be changed?
+		      (setq parts (mapconcat
+				   (lambda (arg)
+				     (string (or (decode-char
+						  'ucs
+						  (string-to-number arg 16))
+						 ??)))
+				   parts " "))
+		      (concat info parts))))
+	       (list "Decimal digit value"
+		     (nth 5 fields))
+	       (list "Digit value"
+		     (nth 6 fields))
+	       (list "Numeric value"
+		     (nth 7 fields))
+	       (list "Mirrored"
+		     (if (equal "Y" (nth 8 fields))
+			 "yes"))
+	       (list "Old name" (nth 9 fields))
+	       (list "ISO 10646 comment" (nth 10 fields))
+	       (list "Uppercase" (and (nth 11 fields)
+				      (string (or (decode-char
+						   'ucs
+						   (string-to-number
+						    (nth 11 fields) 16))
+						  ??))))
+	       (list "Lowercase" (and (nth 12 fields)
+				      (string (or (decode-char
+						   'ucs
+						   (string-to-number
+						    (nth 12 fields) 16))
+						  ??))))
+	       (list "Titlecase" (and (nth 13 fields)
+				      (string (or (decode-char
+						   'ucs
+						   (string-to-number
+						    (nth 13 fields) 16))
+						  ??))))
+
+               ;; XEmacs addition.
+               ;; If we're aware the character is a Han character, provide
+               ;; the Unihan information, or tell the user that it's not
+               ;; available.
+               (if (and (> (length (nth 0 fields)) 13)
+                        (equal "<CJK Ideograph"
+                               (substring (nth 0 fields) 0 14)))
+                   (if (and unihan-database-handle
+                            (setq unihan-match
+                                  (get-database (format "%04X" char)
+                                                unihan-database-handle)
+                                  unihan-match
+                                  (and unihan-match (read unihan-match))))
+                       (with-temp-buffer
+                         (insert-file-contents describe-char-unihan-file
+                                               nil (first unihan-match)
+                                               (second unihan-match))
+                         (goto-char (point-min))
+                         (while (re-search-forward
+                                 "^U\\+[0-9A-F]+\t\\(k[^\t]+\\)\t\\(.*\\)$"
+                                 nil t)
+                           (push
+                            (list
+                             (or (gethash
+                                  (match-string 1)
+                                  describe-char-unihan-field-descriptions)
+                                 (match-string 1))
+                             (decode-coding-string (match-string 2) 'utf-8))
+                            unihan-info))
+                         (close-database unihan-database-handle)
+                         unihan-info)
+                     ;; It's a Han character, but Unihan.txt is not
+                     ;; available. Tell the user.
+                     (list
+                      '("Unihan"
+                        "No Unihan information available; is \
+`describe-char-unihan-file' set, and its cache initialized?")))))))))))
+
+;; Return information about how CHAR is displayed at the buffer
+;; position POS.  If the selected frame is on a graphic display,
+;; return a cons (FONTNAME . GLYPH-CODE).  Otherwise, return a string
+;; describing the terminal codes for the character.
+(defun describe-char-display (pos char)
+  (let* ((frame (selected-frame))
+         (charset (char-charset char))
+         (ccl (or (and (charset-property charset 'encode-as-utf-8)
+                       ccl-encode-to-ucs-2)
+                  (charset-property charset 'ccl-program)))
+         (ccl-vector (make-vector 8 0)))
+    (if (display-graphic-p (selected-frame))
+        (list
+         (font-instance-name
+          (face-font-instance (or (get-char-property pos 'face)
+                                  'default)
+                              (selected-window)
+                              charset))
+         (cond 
+          ((and ccl (eq 'x (frame-type frame)))
+           (setq char (split-char char))
+           (aset ccl-vector 0 (charset-id charset))
+           (aset ccl-vector 1 (second char))
+           (if (= 2 (charset-dimension charset))
+               (aset ccl-vector 2 (third char)))
+           (ccl-execute ccl ccl-vector)
+           (if (= 2 (charset-dimension charset))
+               (logior (lsh (aref ccl-vector 1) 8)
+                       (aref ccl-vector 2))
+             (aref ccl-vector 1)))
+          ;; #### We don't handle the X case where redisplay falls back to an
+          ;; ISO 10646-1 font at runtime.
+          ((eq 'x (frame-type frame))
+           (if (= 2 (charset-dimension charset))
+               (prog2
+                   (setq char (split-char char)) 
+                   (logior (lsh (second char) 8)
+                           (third char)))
+             (second (split-char char))))
+          ;; Otherwise we assume we're using Unicode.
+          (t
+           (encode-char char 'ucs))))
+      (let* ((coding (console-tty-output-coding-system (device-console)))
+             (encoded (encode-coding-string char coding)))
+        (if encoded
+            (format "%s, coding system %s"
+                    (encoded-string-description encoded coding)
+                    (coding-system-name coding)))))))
+
+
+;;;###autoload
+(defun describe-char (pos)
+  "Describe the character after POS (interactively, the character after point).
+The information includes character code, charset and code points in it,
+syntax, category, how the character is encoded in a file,
+character composition information (if relevant),
+as well as widgets, buttons, overlays, and text properties."
+  (interactive "d")
+  (if (>= pos (point-max))
+      (error "No character follows specified position"))
+  (let* ((char (char-after pos))
+	 (charset (char-charset char))
+	 (composition (find-composition pos nil nil t))
+	 (component-chars nil)
+	 (display-table
+          (specifier-instance current-display-table (selected-window)))
+	 (disp-table-entry (and display-table
+                                (get-display-table char display-table)))
+	 (extents (mapcar #'(lambda (o) (extent-properties o))
+			   (extents-at pos)))
+	 (char-description (single-key-description char))
+	 (text-props-desc
+	  (let ((tmp-buf (generate-new-buffer " *text-props*")))
+	    (unwind-protect
+		(progn
+		  (describe-text-properties pos tmp-buf)
+		  (with-current-buffer tmp-buf (buffer-string)))
+	      (kill-buffer tmp-buf))))
+	 item-list max-width unicode unicode-formatted unicode-error)
+
+
+    (setq unicode-error
+          ;; XEmacs change, check does the character represent a Unicode
+          ;; error sequence.
+          (get-char-table char unicode-error-default-translation-table)
+          unicode (and (not unicode-error) (encode-char char 'ucs))
+          unicode-formatted (if unicode-error
+                                (format
+                                 "Invalid Unicode sequence, ?\x%02x on disk"
+                                 unicode-error)
+                              (if (and unicode (natnump unicode))
+                                  (format (if (> unicode #xFFFF)
+                                              "U+%06X" "U+%04X")
+                                          unicode)
+                                ""))
+          item-list
+	  `(("character"
+	     ,(format "%s (%s, %d, #o%o, #x%x)"
+		      (apply 'propertize char-description
+			     (text-properties-at pos))
+                      unicode-formatted
+                      char 
+                      char 
+		      char))
+ 	    ("charset"
+ 	     ,(lexical-let 
+                   ((charset-name (symbol-name charset)))
+                `(progn
+                  (widget-create 'push-button
+                                 :notify ,(lambda (&rest ignored-arg)
+                                            (with-displaying-help-buffer
+                                             (lambda nil
+                                               (charset-description
+                                                (intern charset-name)))
+                                             charset-name))
+                                 ,charset-name)
+                  (widget-insert (format " (%s)" (charset-description
+                                                 ',charset))))))
+	    ("code point"
+	     ,(let ((split (split-char char)))
+		`(widget-create 'push-button
+;                  :notify
+;                  ,(lambda (&rest ignored-arg)
+;                     (with-selected-wind
+; insert-gui-button
+;                  (make-gui-button
+                  ,(if (= (charset-dimension charset) 1)
+                       (format "#x%02X" (nth 1 split))
+                     (format "#x%02X #x%02X" (nth 1 split)
+                             (nth 2 split))))))
+	    ("syntax"
+	     ,(let ((syntax
+                     (syntax-string-to-code (string (syntax-after pos)))))
+		(with-temp-buffer
+		  (describe-syntax-code syntax (current-buffer))
+                  ;; Remove the newline. 
+                  (delete-backward-char)
+		  (buffer-string))))
+            ;; XEmacs; #### add category support. 
+; 	    ("category"
+; 	     ,@(let ((category-set (char-category-set char)))
+; 		 (if (not category-set)
+; 		     '("-- none --")
+; 		   (mapcar #'(lambda (x) (format "%c:%s"
+; 						 x (category-docstring x)))
+; 			   (category-set-mnemonics category-set)))))
+; 	    ,@(let ((props (get-char-table char char-code-property-table))
+; 		    ps)
+; 		(when props
+; 		  (while props
+; 		    (push (format "%s:" (pop props)) ps)
+; 		    (push (format "%s;" (pop props)) ps))
+; 		  (list (cons "Properties" (nreverse ps)))))
+	    ("to input"
+	     ,@(let ((key-list (and-fboundp #'quail-find-key
+                                 current-input-method
+                                 (quail-find-key char))))
+		 (if (consp key-list)
+		     (list "type"
+			   (mapconcat #'(lambda (x) (concat "\"" x "\""))
+				      key-list " or ")
+			   "with"
+			   `(insert-text-button
+			     ,current-input-method
+			     'type 'help-input-method
+			     'help-args '(,current-input-method))))))
+;	    ("buffer code"
+;	     ,(encoded-string-description
+;	       (string-as-unibyte (char-to-string char) nil))
+	    ("file code"
+	     ,@(let* ((coding buffer-file-coding-system)
+                      ;; ### XEmacs; use encode-coding-char once
+                      ;; merged. 
+		      (encoded (encode-coding-string char coding)))
+		 (if encoded
+		     (list (encoded-string-description encoded coding)
+			   (format "(encoded by coding system %S)"
+                                   (coding-system-name coding)))
+		   (list "not encodable by coding system"
+			 (coding-system-name coding)))))
+	    ("display"
+	     ,(cond
+	       (disp-table-entry
+                ;; XEmacs change; just use the print syntax of the display
+                ;; table entry. Might be possible to improve this, but
+                ;; nothing occurs to me right now.
+		(format "by display table entry [%S] " disp-table-entry))
+	       (composition
+		(let ((from (car composition))
+		      (to (nth 1 composition))
+		      (next (1+ pos))
+		      (components (nth 2 composition))
+		      ch)
+		  (setcar composition
+			  (and (< from pos) (buffer-substring from pos)))
+		  (setcar (cdr composition)
+			  (and (< next to) (buffer-substring next to)))
+		  (dotimes (i (length components))
+		    (if (integerp (setq ch (aref components i)))
+			(push (cons ch (describe-char-display pos ch))
+			      component-chars)))
+		  (setq component-chars (nreverse component-chars))
+		  (format "composed to form \"%s\" (see below)"
+			  (buffer-substring from to))))
+	       (t
+		(let ((display (describe-char-display pos char)))
+		  (if (display-graphic-p (selected-frame))
+		      (if display
+			  (concat
+			   "by this font (glyph code)\n"
+			   (format "     %s (#x%02X)"
+				   (first display) (second display)))
+			"no font available")
+		    (if display
+			(format "terminal code %s" display)
+		      "not encodable for terminal"))))))
+	    ,@(let ((face
+		     (if (not (or disp-table-entry composition))
+			 (cond
+                          ;; XEmacs #### Implement this. 
+;			  ((and show-trailing-whitespace
+;				(save-excursion (goto-char pos)
+;						(looking-at "[ \t]+$")))
+;			   'trailing-whitespace)
+;			  ((and nobreak-char-display unicode (eq unicode '#xa0))
+;			   'nobreak-space)
+;			  ((and nobreak-char-display unicode (eq unicode '#xad))
+;			   'escape-glyph)
+			  ((and (< char 32) (not (memq char '(9 10))))
+			   'escape-glyph)))))
+		(if face (list (list "hardcoded face"
+				     `(insert-gui-button
+                                       (make-gui-button
+                                        ,(symbol-name face)))))))
+	    ,@(let ((unicodedata (and unicode
+				      (describe-char-unicode-data unicode))))
+		(if unicodedata
+		    (cons (list "Unicode data" " ") unicodedata)))))
+    (setq max-width (apply #'max (mapcar #'(lambda (x)
+					     (if (cadr x) (length (car x)) 0))
+					 item-list)))
+    ; (help-setup-xref nil (interactive-p))
+    (with-displaying-help-buffer
+     (lambda ()
+       (with-current-buffer standard-output
+         ; (set-buffer-multibyte multibyte-p)
+         (let ((formatter (format "%%%ds:" max-width)))
+           (dolist (elt item-list)
+             (when (cadr elt)
+               (insert (format formatter (car elt)))
+               (dolist (clm (cdr elt))
+                 (if (consp clm)
+                     (progn (insert " ") (eval clm))
+                   (when (>= (+ (current-column)
+                                (or (string-match "\n" clm)
+                                    (string-width clm))
+                                1)
+                             (window-width))
+                     (insert "\n")
+                     (indent-to (1+ max-width)))
+                   (insert " " clm)))
+               (insert "\n"))))
+
+         (when extents
+           (save-excursion
+             (goto-char (point-min))
+             (re-search-forward "character:[ \t\n]+")
+             (let* ((end (+ (point) (length char-description))))
+               (mapc #'(lambda (props)
+                         (let ((o (make-extent (point) end)))
+                           (while props
+                             (set-extent-property o (car props) (nth 1 props))
+                             (setq props (cddr props)))))
+                     extents))))
+
+         ;; XEmacs change; don't give GUI- or TTY-specific detail about the
+         ;; display table entry, the #'specifier-instance call above dealt
+         ;; with that.
+         ; (when disp-table-entry ...)
+
+         ;; XEmacs; this doesn't work now. 
+         (when composition
+           (insert "\nComposed")
+           (if (car composition)
+               (if (cadr composition)
+                   (insert " with the surrounding characters \""
+                           (car composition) "\" and \""
+                           (cadr composition) "\"")
+                 (insert " with the preceding character(s) \""
+                         (car composition) "\""))
+             (if (cadr composition)
+                 (insert " with the following character(s) \""
+                         (cadr composition) "\"")))
+           (insert " by the rule:\n\t("
+                   (mapconcat (lambda (x)
+                                (format (if (consp x) "%S" "?%c") x))
+                              (nth 2 composition)
+                              " ")
+                   ")")
+           (insert  "\nThe component character(s) are displayed by ")
+           ;; XEmacs #### Once composition is in place, this should be 
+           ;; a (font-instance-name (face-font-instance [...])) call. 
+           (if (display-graphic-p (selected-frame))
+               (progn
+                 (insert "these fonts (glyph codes):")
+                 (dolist (elt component-chars)
+                   (insert "\n " (car elt) ?:
+                           (propertize " " 'display '(space :align-to 5))
+                           (if (cdr elt)
+                               (format "%s (#x%02X)" (cadr elt) (cddr elt))
+                             "-- no font --"))))
+             (insert "these terminal codes:")
+             (dolist (elt component-chars)
+               (insert "\n  " (car elt) ":"
+                       (propertize " " 'display '(space :align-to 5))
+                       (or (cdr elt) "-- not encodable --"))))
+           (insert "\nSee the variable `reference-point-alist' for "
+                   "the meaning of the rule.\n"))
+
+         (if text-props-desc (insert text-props-desc))
+;        (setq help-xref-stack-item (list 'help-insert-string (buffer-string)))
+         (toggle-read-only 1)
+         (print-help-return-message)))
+     (format "Describe %c" (char-after pos)))))
+
+(defalias 'describe-char-after 'describe-char)
+(make-obsolete 'describe-char-after 'describe-char "22.1")
+
+(provide 'descr-text)
+
+;; arch-tag: fc55a498-f3e9-4312-b5bd-98cc02480af1
+;;; descr-text.el ends here
--- a/lisp/mule/mule-cmds.el	Wed May 21 21:09:20 2008 +0200
+++ b/lisp/mule/mule-cmds.el	Sun May 25 21:11:35 2008 +0200
@@ -901,34 +901,24 @@
 ;; Pretty description of encoded string
 
 ;; Alist of ISO 2022 control code vs the corresponding mnemonic string.
-;; (defvar iso-2022-control-alist
-;;   '((?\x1b . "ESC")
-;;     (?\x0e . "SO")
-;;     (?\x0f . "SI")
-;;     (?\x8e . "SS2")
-;;     (?\x8f . "SS3")
-;;     (?\x9b . "CSI")))
+(defvar iso-2022-control-alist
+  '((?\x1b . "ESC")
+    (?\x0e . "SO")
+    (?\x0f . "SI")
+    (?\x8e . "SS2")
+    (?\x8f . "SS3")
+    (?\x9b . "CSI")))
 
-;; (defun encoded-string-description (str coding-system)
-;;   "Return a pretty description of STR that is encoded by CODING-SYSTEM."
-;;   (setq str (string-as-unibyte str))
-;;   (let ((char (aref str 0))
-;;         desc)
-;;     (when (< char 128)
-;;       (setq desc (or (cdr (assq char iso-2022-control-alist))
-;;                      (char-to-string char)))
-;;       (let ((i 1)
-;;             (len (length str))) 
-;;         (while (< i len)
-;;           (setq char (aref str i))
-;;           (if (>= char 128)
-;;               (setq desc nil i len)
-;;             (setq desc (concat desc " "
-;;                                (or (cdr (assq char iso-2022-control-alist))
-;;                                    (char-to-string char)))
-;;                   i (1+ i))))))
-;;     (or desc
-;;         (mapconcat (function (lambda (x) (format "0x%02x" x))) str " "))))
+(defun encoded-string-description (str coding-system)
+  "Return a pretty description of STR that is encoded by CODING-SYSTEM."
+;  (setq str (string-as-unibyte str))
+  (mapconcat
+   (if (and coding-system (eq (coding-system-type coding-system) 'iso2022))
+       ;; Try to get a pretty description for ISO 2022 escape sequences.
+       (function (lambda (x) (or (cdr (assq x iso-2022-control-alist))
+				 (format "#x%02X" x))))
+     (function (lambda (x) (format "#x%02X" x))))
+   str " "))
 
 ;; (defun encode-coding-char (char coding-system)
 ;;   "Encode CHAR by CODING-SYSTEM and return the resulting string.
--- a/lisp/simple.el	Wed May 21 21:09:20 2008 +0200
+++ b/lisp/simple.el	Sun May 25 21:11:35 2008 +0200
@@ -782,13 +782,16 @@
 		done)))
 	(- (buffer-size) (forward-line (buffer-size)))))))
 
-(defun what-cursor-position ()
+(defun what-cursor-position (&optional detail)
   "Print info on cursor position (on screen and within buffer).
 Also describe the character after point, giving its UCS code point and Mule
 charset and codes; for ASCII characters, give its code in octal, decimal and
-hex."
-  ;; XEmacs change
-  (interactive "_")
+hex.
+
+With prefix argument, show extended details about the character in a
+separate buffer.  See also the command `describe-char'."
+  ;; XEmacs change "_"
+  (interactive "_P")
   (let* ((char (char-after (point))) ; XEmacs
 	 (beg (point-min))
 	 (end (point-max))
@@ -813,6 +816,8 @@
     (if (= pos end)
         (message "point=%d of %d(%d%%)%s column %d %s"
                  pos total percent narrowed-details col hscroll)
+      (if detail
+          (describe-char (point)))
       ;; XEmacs: don't use single-key-description, treat non-ASCII
       ;; characters differently.
       (if (< char ?\x80)
--- a/lisp/syntax.el	Wed May 21 21:09:20 2008 +0200
+++ b/lisp/syntax.el	Sun May 25 21:11:35 2008 +0200
@@ -39,6 +39,14 @@
 It inherits all characters from the standard syntax table."
   (make-char-table 'syntax))
 
+(defun syntax-after (pos)
+  "Return the raw syntax of the char after POS.
+If POS is outside the buffer's accessible portion, return nil."
+  (unless (or (< pos (point-min)) (>= pos (point-max)))
+    (let ((st (if lookup-syntax-properties
+		  (get-char-property pos 'syntax-table))))
+      (char-syntax (char-after pos) (or st (syntax-table))))))
+
 (defun simple-set-syntax-entry (char spec table)
   (put-char-table char spec table))
 
--- a/lisp/unicode.el	Wed May 21 21:09:20 2008 +0200
+++ b/lisp/unicode.el	Sun May 25 21:11:35 2008 +0200
@@ -506,7 +506,7 @@
 ;; point). Make them available to user code. 
 (defvar unicode-error-default-translation-table
   (loop 
-    with char-table = (make-char-table 'char)
+    with char-table = (make-char-table 'generic)
     for i from ?\x00 to ?\xFF
     initially (unless (featurep 'mule) (return))
     do