Mercurial > hg > xemacs-beta
comparison lisp/descr-text.el @ 4468:a78d697ccd2c
Import and extend GNU's descr-text.el, supporting prefix argument for C-x =
2008-05-25 Aidan Kehoe <kehoea@parhasard.net>
* descr-text.el: New.
Taken from GNU's GPLV2 version of 2007-02-14, with modifications
for XEmacs support and extensions for Unihan.txt support and
db/dbm caches.
* simple.el (what-cursor-position):
Support an optional prefix argument, as does GNU, calling
#'describe-char to giving more detail on the character at point,
notably from UnicodeData and (in our case, optionally) Unihan.txt.
* syntax.el (syntax-after):
Make this available for the sake of #'describe-char.
* mule/mule-cmds.el (iso-2022-control-alist):
Make this available, for the sake of #'encoded-string-description
and #'describe-char.
* mule/mule-cmds.el (encoded-string-description):
Make this available, for the sake of #'describe-char.
* unicode.el (unicode-error-default-translation-table):
Make this a char table of type generic, not of type char. Makes it
possible to have the relevant logic in #'describe-char reasonably
clear; also, and this is undocumented, makes it much easier to
implement #'frob-unicode-errors-region. I should document this,
and revise #'frob-unicode-errors-region.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sun, 25 May 2008 21:11:35 +0200 |
parents | |
children | 0204391fc17c |
comparison
equal
deleted
inserted
replaced
4467:23ef20edf6ba | 4468:a78d697ccd2c |
---|---|
1 ;;; descr-text.el --- describe text mode | |
2 | |
3 ;; Copyright (C) 1994, 1995, 1996, 2001, 2002, 2003, 2004, | |
4 ;; 2005, 2006, 2007 Free Software Foundation, Inc. | |
5 | |
6 ;; Author: Boris Goldowsky <boris@gnu.org> | |
7 ;; Maintainer: FSF | |
8 ;; Keywords: faces, i18n, Unicode, multilingual | |
9 | |
10 ;; This file is part of XEmacs. | |
11 | |
12 ;; XEmacs is free software; you can redistribute it and/or modify | |
13 ;; it under the terms of the GNU General Public License as published by | |
14 ;; the Free Software Foundation; either version 2, or (at your option) | |
15 ;; any later version. | |
16 | |
17 ;; XEmacs is distributed in the hope that it will be useful, | |
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 ;; GNU General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
23 ;; along with XEmacs; see the file COPYING. If not, write to the | |
24 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
25 ;; Boston, MA 02110-1301, USA. | |
26 | |
27 ;;; Commentary: | |
28 | |
29 ;;; Describe-Text Mode. | |
30 | |
31 ;;; Code: | |
32 | |
33 (eval-when-compile (require 'wid-edit)) | |
34 | |
35 ;;; Describe-Text Utilities. | |
36 | |
37 (defun describe-text-widget (widget) | |
38 "Insert text to describe WIDGET in the current buffer." | |
39 ;; XEmacs change; use the widget function. | |
40 (widget-create 'push-button | |
41 :notify `(lambda (&rest ignore) | |
42 (widget-browse ',widget)) | |
43 :help-echo | |
44 "mouse-2, RET: browse this widget" | |
45 (symbol-name (if (symbolp widget) | |
46 widget | |
47 (car widget)))) | |
48 (widget-insert " ") | |
49 (widget-create 'info-link | |
50 :tag "Widget help" | |
51 :help-echo | |
52 "Read widget documentation" | |
53 "(widget)Top")) | |
54 | |
55 (defun describe-text-sexp (sexp) | |
56 "Insert a short description of SEXP in the current buffer." | |
57 ;; XEmacs change; use the widget functions. | |
58 (let ((pp (condition-case signal | |
59 (pp-to-string sexp) | |
60 (error (prin1-to-string signal))))) | |
61 (when (string-match "\n\\'" pp) | |
62 (setq pp (substring pp 0 (1- (length pp))))) | |
63 (if (cond ((string-match "\n" pp) | |
64 nil) | |
65 ((> (length pp) (- (window-width) (current-column))) | |
66 nil) | |
67 (t t)) | |
68 (widget-insert pp) | |
69 (widget-create 'push-button | |
70 :notify `(lambda (&rest ignore) | |
71 (with-output-to-temp-buffer | |
72 "*Pp Eval Output*" | |
73 (princ ',pp))) | |
74 :help-echo | |
75 "mouse-2, RET: pretty print value in another buffer" | |
76 "[Show]")))) | |
77 | |
78 (defun describe-property-list (properties) | |
79 "Insert a description of PROPERTIES in the current buffer. | |
80 PROPERTIES should be a list of overlay or text properties. | |
81 The `category', `face' and `font-lock-face' properties are made | |
82 into help buttons that call `describe-text-category' or | |
83 `describe-face' when pushed." | |
84 ;; Sort the properties by the size of their value. | |
85 (dolist (elt (sort (let (ret) | |
86 (while properties | |
87 (push (list (pop properties) (pop properties)) ret)) | |
88 ret) | |
89 (lambda (a b) (string< (prin1-to-string (nth 0 a) t) | |
90 (prin1-to-string (nth 0 b) t))))) | |
91 (let ((key (nth 0 elt)) | |
92 (value (nth 1 elt))) | |
93 ;; XEmacs change; use #'widget-insert, #'widget-create | |
94 (widget-insert (propertize (format " %-20s " key) | |
95 'face 'hyper-apropos-heading)) | |
96 (cond ((eq key 'category) | |
97 (widget-create | |
98 'push-button | |
99 :notify `(lambda (&rest ignore) | |
100 (describe-text-category ',value)) | |
101 :help-echo "mouse-2, RET: describe this category" | |
102 (symbol-name value))) | |
103 ((memq key '(face font-lock-face mouse-face)) | |
104 (widget-create | |
105 'push-button | |
106 :notify (lexical-let | |
107 ((value-name (symbol-name value))) | |
108 (lambda (&rest ignore) | |
109 (hyper-describe-face (intern value-name)))) | |
110 :help-echo "mouse-2, RET: describe this face" | |
111 (format "%S" value))) | |
112 ((widgetp value) | |
113 (describe-text-widget value)) | |
114 (t | |
115 (describe-text-sexp value)))) | |
116 (insert "\n"))) | |
117 | |
118 ;;; Describe-Text Commands. | |
119 | |
120 (defun describe-text-category (category) | |
121 "Describe a text property category." | |
122 (interactive "SCategory: ") | |
123 ; (help-setup-xref (list #'describe-text-category category) (interactive-p)) | |
124 (save-excursion | |
125 (with-output-to-temp-buffer "*Help*" | |
126 (set-buffer standard-output) | |
127 (insert "Category " (format "%S" category) ":\n\n") | |
128 (describe-property-list (symbol-plist category)) | |
129 (goto-char (point-min))))) | |
130 | |
131 ;;;###autoload | |
132 (defun describe-text-properties (pos &optional output-buffer) | |
133 "Describe widgets, buttons, overlays and text properties at POS. | |
134 Interactively, describe them for the character after point. | |
135 If optional second argument OUTPUT-BUFFER is non-nil, | |
136 insert the output into that buffer, and don't initialize or clear it | |
137 otherwise." | |
138 (interactive "d") | |
139 (if (>= pos (point-max)) | |
140 (error "No character follows specified position")) | |
141 (if output-buffer | |
142 (describe-text-properties-1 pos output-buffer) | |
143 (if (not (or (text-properties-at pos) ; (overlays-at pos))) | |
144 ;; XEmacs change. | |
145 (extents-at pos))) | |
146 (message "This is plain text.") | |
147 (let ((buffer (current-buffer)) | |
148 (target-buffer "*Help*")) | |
149 (when (eq buffer (get-buffer target-buffer)) | |
150 (setq target-buffer "*Help*<2>")) | |
151 (save-excursion | |
152 (with-output-to-temp-buffer target-buffer | |
153 (set-buffer standard-output) | |
154 (setq output-buffer (current-buffer)) | |
155 (insert "Text content at position " (format "%d" pos) ":\n\n") | |
156 (with-current-buffer buffer | |
157 (describe-text-properties-1 pos output-buffer)) | |
158 (goto-char (point-min)))))))) | |
159 | |
160 (defun describe-text-properties-1 (pos output-buffer) | |
161 (let* ((properties (text-properties-at pos)) | |
162 ;; XEmacs change; extents, not overlays. | |
163 (extents (extents-at pos)) | |
164 (wid-field (get-char-property pos 'field)) | |
165 (wid-button (get-char-property pos 'button)) | |
166 (wid-doc (get-char-property pos 'widget-doc)) | |
167 ;; If button.el is not loaded, we have no buttons in the text. | |
168 ;; XEmacs change; use the #'and-fboundp, #'declare-fboundp macros. | |
169 (button (and-fboundp 'button-at (button-at pos))) | |
170 (button-type (and button | |
171 (declare-fboundp (button-type button)))) | |
172 (button-label (and button | |
173 (declare-fboundp (button-label button)))) | |
174 (widget (or wid-field wid-button wid-doc))) | |
175 (with-current-buffer output-buffer | |
176 ;; Widgets | |
177 (when (widgetp widget) | |
178 (newline) | |
179 (insert (cond (wid-field "This is an editable text area") | |
180 (wid-button "This is an active area") | |
181 (wid-doc "This is documentation text"))) | |
182 (insert " of a ") | |
183 (describe-text-widget widget) | |
184 (insert ".\n\n")) | |
185 ;; Buttons | |
186 (when (and button (not (widgetp wid-button))) | |
187 (newline) | |
188 (insert "Here is a `" (format "%S" button-type) | |
189 "' button labeled `" button-label "'.\n\n")) | |
190 ;; Overlays | |
191 (when extents | |
192 (newline) | |
193 (if (eq (length extents) 1) | |
194 (insert "There is an extent here:\n") | |
195 (insert "There are " (format "%d" (length extents)) | |
196 " overlays here:\n")) | |
197 (dolist (extent extents) | |
198 (insert " From " (format "%d" (extent-start-position extent)) | |
199 " to " (format "%d" (extent-end-position extent)) "\n") | |
200 (describe-property-list (extent-properties extent))) | |
201 (insert "\n")) | |
202 ;; Text properties | |
203 (when properties | |
204 (newline) | |
205 (insert "There are text properties here:\n") | |
206 (describe-property-list properties))))) | |
207 | |
208 (defcustom describe-char-unicodedata-file | |
209 ;; XEmacs change; initialise this by default, using Perl. | |
210 (let ((have-perl | |
211 (member-if | |
212 #'(lambda (path) | |
213 (file-exists-p (format "%s%cperl" path directory-sep-char))) | |
214 exec-path)) | |
215 installprivlib res) | |
216 (when have-perl | |
217 (setq installprivlib | |
218 (with-string-as-buffer-contents "" | |
219 (shell-command "perl -V:installprivlib" t) | |
220 ;; 1+ because buffer offsets start at one. | |
221 (delete-region 1 (1+ (length "installprivlib='"))) | |
222 ;; Delete the final newline, semicolon and quotation mark. | |
223 (delete-region (- (point-max) 3) (point-max)))) | |
224 (cond | |
225 ((file-exists-p | |
226 (setq res | |
227 (format "%s%cunicore%cUnicodeData.txt" | |
228 installprivlib directory-sep-char directory-sep-char)))) | |
229 ((file-exists-p | |
230 (setq res | |
231 (format "%s%cunicode%cUnicodeData.txt" | |
232 installprivlib directory-sep-char directory-sep-char))))) | |
233 res)) | |
234 "Location of Unicode data file. | |
235 This is the UnicodeData.txt file from the Unicode Consortium, used for | |
236 diagnostics. If it is non-nil `describe-char' will print data | |
237 looked up from it. This facility is mostly of use to people doing | |
238 multilingual development. | |
239 | |
240 This is a fairly large file, typically installed with Perl. | |
241 At the time of writing it is at the URL | |
242 `http://www.unicode.org/Public/UNIDATA/UnicodeData.txt'. | |
243 | |
244 It is possible to build a DBM or Berkeley index cache for this file, so that | |
245 it is not necessary to parse the whole file at run time. See | |
246 `unidata-initialize-unicodedata-database'. | |
247 | |
248 See also `describe-char-unihan-file' for the complementary file describing | |
249 East Asian Han characters and their associated information." | |
250 | |
251 :group 'mule | |
252 :type '(choice (const :tag "None" nil) | |
253 file)) | |
254 | |
255 ;; XEmacs additions, from here until `describe-char-unicode-data' | |
256 (defcustom describe-char-use-cache t | |
257 "Whether `describe-char' should use a DBM or Berkeley DB cache. | |
258 This speeds up navigation of `describe-char-unicodedata-file', and makes | |
259 navigation of `describe-char-unihan-file' reasonable." | |
260 :group 'mule | |
261 :type '(choice (const :tag "None" nil) | |
262 file)) | |
263 | |
264 (defcustom describe-char-unihan-file nil | |
265 "Location of Unihan file. | |
266 This the Unihan.txt file from the Unicode Consortium, used for diagnostics. | |
267 If it is non-nil `describe-char' can print data looked up from it. This | |
268 facility is of use to people doing multilingual development, to those | |
269 learning Chinese or Japanese, and to a lesser extent to those learning | |
270 Korean or Vietnamese. | |
271 | |
272 This is large file, typically not installed with the operating system. At | |
273 the time of writing it is at the URL | |
274 `http://www.unicode.org/Public/UNIDATA/UniHan.txt'. | |
275 | |
276 In contrast with `describe-char-unicodedata-file', `describe-char' will not | |
277 load this entire file and parse it if it is available. It requires a | |
278 pre-initialized cache; see `unidata-initialize-unihan-database'. " | |
279 :group 'mule | |
280 :type '(choice (const :tag "None" nil) | |
281 file)) | |
282 | |
283 ;; XEmacs addition | |
284 (defvar unidata-database-format | |
285 (or (and (featurep 'dbm) 'dbm) | |
286 (and (featurep 'berkeley-db) 'berkeley-db)) | |
287 "The DB format to use for the `describe-char' cache, or nil if no cache.") | |
288 | |
289 (defvar describe-char-unihan-field-descriptions | |
290 #s(hash-table test equal data | |
291 ("kAccountingNumeric" | |
292 "Value as an an accounting numeral" | |
293 "kBigFive" | |
294 "Big Five mapping (excluding ETEN, etc. extensions)" | |
295 "kCCCII" | |
296 "Hex CCCII code, for libraries in the Republic of China" | |
297 "kCNS1986" | |
298 "Hex CNS 11643-1986 mapping, for the Republic of China" | |
299 "kCNS1992" | |
300 "Hex CNS 11643-1986 mapping, for the Republic of China" | |
301 "kCangjie" | |
302 "Cangjie input code for the character" | |
303 "kCantonese" | |
304 "Cantonese pronunciation, using jyutping" | |
305 "kCheungBauer" | |
306 "Radical-stroke index, cangjie input code, \ | |
307 and Cantonese readings" | |
308 "kCheungBauerIndex" | |
309 "Index of information about this character \ | |
310 in Cheung & Bauer, 2002" | |
311 "kCihaiT" | |
312 "Lookup information for this character in the \ | |
313 Cihai dictionary ISBN 962-231-005-2." | |
314 "kCompatibilityVariant" | |
315 "Compatibility decomposition for this character" | |
316 "kCowles" | |
317 "Lookup information for this character in the \ | |
318 Cowles dictionary ISBN 962-231-005-2." | |
319 "kDaeJaweon" | |
320 "Lookup information for this character in the \ | |
321 Dae Jaweon (Korean) dictionary, 1988" | |
322 "kDefinition" | |
323 "Definition for this character in modern written Chinese" | |
324 "kEACC" | |
325 "The EACC (= CCCII, as used by the \ | |
326 US library of congress) code for this character" | |
327 "kFenn" | |
328 "Frequency information for this character from \ | |
329 Fenn's Chinese-English dictionary, 1979" | |
330 "kFennIndex" | |
331 "Lookup information for this character in \ | |
332 Fenn's Chinese-English dictionary, 1979" | |
333 "kFourCornerCode" | |
334 "Four-corner lookup code for this character" | |
335 "kFrequency" | |
336 "Frequency information from traditional \ | |
337 Chinese USENET postings" | |
338 "kGB0" "GB 2312-80 mapping, ku/ten" | |
339 "kGB1" "GB 12345-90 mapping, ku/ten" | |
340 "kGB3" "GB 7589-87 mapping, ku/ten" | |
341 "kGB5" "GB 7590-87 mapping, ku/ten" | |
342 "kGB7" "GB 8565-89 mapping, ku/ten" | |
343 ;; Identical to the previous information?! | |
344 "kGB8" "GB 8565-89 mapping, ku/ten" | |
345 "kGSR" | |
346 "Lookup information for this character in \ | |
347 Karlgern's Grammata Serica Recensa" | |
348 "kGradeLevel" | |
349 "The first grade in the HK school system \ | |
350 where knowledge of this character is expected" | |
351 "kHDZRadBreak" "Whether Hanyu Da Zidian has a radical break \ | |
352 beginning with this character" | |
353 "kHKGlyph" "Lookup information for this character in the HK \ | |
354 glyph reference, ISBN 962-949-040-4" | |
355 "kHKSCS" "Mapping to the HK Supplementary Character Set for \ | |
356 Big Five." | |
357 "kHanYu" "Character lookup information for Hanyu Da Zidian, \ | |
358 `Great Chinese Character Dictionary'" | |
359 "kHangul" "Korean pronunciation" | |
360 "kHanyuPinlu" "Pronunciation and frequency info, from Xiandai\ | |
361 Hanyu Pinlu Cidian" | |
362 "kIBMJapan" "IBM Japanese mapping" | |
363 "kIICore" "Is this character in the core East Asian \ | |
364 ideograph set from the IRG?" | |
365 "kIRGDaeJaweon" "Lookup information for this character \ | |
366 in the Dae Jaweon (Korean) dictionary" | |
367 "kIRGDaiKanwaZiten" "Lookup information for this character \ | |
368 in the Morohashi (Japanese) dictionary" | |
369 "kIRGHanyuDaZidian" "Lookup information for this character \ | |
370 in the Hanyu Da Zidian (Chinese) dictionary" | |
371 "kIRGKangXi" "Lookup information for this character \ | |
372 in the KangXi dictionary" | |
373 "kIRG_GSource" "PRC character source information" | |
374 "kIRG_HSource" "Hong Kong character source information" | |
375 "kIRG_JSource" "Japanese character source information" | |
376 "kIRG_KPSource" "Korean character source information" | |
377 "kIRG_KSource" "Republic of Korean character source\ | |
378 information" | |
379 "kIRG_TSource" "Republic of China character source \ | |
380 information" | |
381 "kIRG_USource" "Unicode (standards body) source information" | |
382 "kIRG_VSource" "Vietnamese character source information" | |
383 "kJIS0213" "JIS X 0213-2000 mapping in min,ku,ten form" | |
384 "kJapaneseKun" "Native Japanese pronunciation" | |
385 "kJapaneseOn" "Sino-Japanese pronunciation" | |
386 "kJis0" "JIS X 0208-1990 mapping in ku/ten form" | |
387 "kJis1" "JIS X 0212-1990 mapping in ku/ten form" | |
388 "kKPS0" "KPS 9566-97 mapping in hexadecimal" | |
389 "kKPS1" "KPS 10721-2000 mapping in hexadecimal" | |
390 "kKSC0" "KS X 1001:1992 (KS C 5601-1989) mapping \ | |
391 in ku/ten form" | |
392 "kKSC1" "KS X 1002:1991 (KS C 5657-1991) mapping \ | |
393 in ku/ten form" | |
394 "kKangXi" "Lookup information for this character \ | |
395 in the KangXi (Chinese) dictionary" | |
396 "kKarlgren" "Lookup information for this character \ | |
397 in Karlgren's dictionary, 1974" | |
398 "kKorean" "Pronunciation in Korean" | |
399 "kLau" "Lookup information for this character \ | |
400 in Lau's Cantonese-English dictionary" | |
401 "kMainlandTelegraph" "PRC telegraph code" | |
402 "kMandarin" "Mandarin pronunciation in Pinyin" | |
403 "kMatthews" "Lookup information for Robert Mathews' \ | |
404 Chinese-English dictionary" | |
405 "kMeyerWempe" "Lookup information for Bernard Meyer and \ | |
406 Theodore Wempe's dictionary" | |
407 ;; Identical to kIRGDaiKanwaZiten?!? | |
408 "kMorohashi" "Lookup information for this character \ | |
409 in the Morohashi (Japanese) dictionary" | |
410 "kNelson" "Lookup information for this character in \ | |
411 Nelson's Japanese-English dictionary" | |
412 "kOtherNumeric" "Esoteric numeric value" | |
413 "kPhonetic" "Phonetic index data" | |
414 "kPrimaryNumeric" "Standard numeric value" | |
415 "kPseudoGB1" "Fake GB 12345-90, for the purposes of \ | |
416 Unicode inclusion" | |
417 "kRSAdobe_Japan1_6" "Adobe-Japan1-6 information for \ | |
418 the character" | |
419 "kRSJapanese" "Radical/stroke count for Japanese" | |
420 "kRSKanWa" "Morohashi radical/stroke count" | |
421 "kRSKangXi" "KangXi radical/stroke count" | |
422 "kRSKorean" "Korean radical/stroke count" | |
423 "kRSUnicode" "Unicode radical/stroke count" | |
424 "kSBGY" "Lookup information for this character in the Song \ | |
425 Ben Guang Yun Chinese dictionary" | |
426 "kSemanticVariant" "Semantic variant character" | |
427 "kSimplifiedVariant" "Simplified variant character" | |
428 "kSpecializedSemanticVariant" "Specialized semantic variant" | |
429 "kTaiwanTelegraph" "Taiwanese telegraph code" | |
430 "kTang" "Tang dynasty pronunciation" | |
431 "kTotalStrokes" "Total number of strokes" | |
432 "kTraditionalVariant" "Traditional variant character" | |
433 "kVietnamese" "Vietnamese pronunciation" | |
434 "kXerox" "Xerox code" | |
435 "kZVariant" "Z-variant code(s)")) | |
436 "A map from symbolic Unihan field names to English-language descriptions.") | |
437 | |
438 (defun unidata-generate-database-file-name (unidata-file-name size | |
439 database-format) | |
440 "Return a filename suitable for storing the cache for UNIDATA-FILE-NAME." | |
441 (expand-file-name | |
442 (format "~%c.xemacs%c%s-%s" directory-sep-char directory-sep-char | |
443 (md5 (format "%s-%d" unidata-file-name size)) | |
444 database-format))) | |
445 | |
446 (defun unidata-initialize-unicodedata-database (unidata-file-name) | |
447 "Init the berkeley or gdbm lookup table for UNIDATA-FILE-NAME. | |
448 | |
449 The table is a (non-SQL) database with information on the file offset of | |
450 each Unicode code point described in UNIDATA-FILE-NAME. In the normal | |
451 course of events UNIDATA-FILE-NAME is the value of | |
452 `unidata-default-file-name', which see. " | |
453 (check-argument-type #'file-readable-p unidata-file-name) | |
454 (unless unidata-database-format | |
455 (error 'unimplemented "No (non-SQL) DB support available")) | |
456 (let* ((database-format unidata-database-format) | |
457 (size (eighth (file-attributes unidata-file-name))) | |
458 (database-file-name | |
459 (unidata-generate-database-file-name unidata-file-name | |
460 size database-format)) | |
461 (database-handle (open-database database-file-name database-format | |
462 nil "rw+" #o644 'no-conversion-unix)) | |
463 (coding-system-for-read 'no-conversion-unix) | |
464 (buffer-size 32768) | |
465 (offset-start 0) | |
466 (offset-end buffer-size) | |
467 (range-information (make-range-table 'start-closed-end-closed)) | |
468 (range-staging (make-hash-table :test 'equal)) | |
469 (message "Initializing UnicodeData database cache: ") | |
470 (loop-count 1) | |
471 range-startinfo) | |
472 (with-temp-buffer | |
473 (progress-feedback-with-label 'describe-char-unicodedata-file | |
474 "%s" 0 message) | |
475 (while (progn | |
476 (delete-region (point-min) (point-max)) | |
477 (insert-file-contents unidata-file-name nil | |
478 offset-start offset-end) | |
479 ;; If we've reached the end of the data, pass nil back to | |
480 ;; the while loop test. | |
481 (not (= (point-min) (point-max)))) | |
482 | |
483 (when (= buffer-size (- (point-max) (point-min))) | |
484 ;; If we're in the body of the file, and there's a trailing | |
485 ;; incomplete end-line, delete it, and adjust offset-end | |
486 ;; appropriately. | |
487 (goto-char (point-max)) | |
488 (search-backward "\n") | |
489 (forward-char) | |
490 (delete-region (point) (point-max)) | |
491 (setq offset-end (+ offset-start (- (point) (point-min))))) | |
492 | |
493 (progress-feedback-with-label 'describe-char-unicodedata-file | |
494 "%s" (truncate | |
495 (* (/ offset-start size) 100)) | |
496 (concat message | |
497 (make-string | |
498 (mod loop-count 39) ?.))) | |
499 (incf loop-count) | |
500 (goto-char (point-min)) | |
501 (while (re-search-forward | |
502 #r"^\([0-9A-F]\{4,6\}\);\([^;]*\);.*$" nil t) | |
503 (cond | |
504 ((and (> (- (match-end 2) (match-beginning 2)) 7) | |
505 (equal (substring (match-string 2) -7) | |
506 " First>")) | |
507 ;; Start of a range. Save the start info in range-staging. | |
508 (puthash (substring (match-string 2) 0 -7) | |
509 (list (string-to-int (match-string 1) 16) | |
510 (+ offset-start (1- (match-beginning 0)))) | |
511 range-staging)) | |
512 ((and (> (- (match-end 2) (match-beginning 2)) 7) | |
513 (equal (substring (match-string 2) -6) | |
514 " Last>")) | |
515 ;; End of a range. Combine with the start info, save it to the | |
516 ;; range-information range table. | |
517 (setq range-startinfo | |
518 (gethash (substring (match-string 2) 0 -6) range-staging)) | |
519 (assert range-startinfo nil | |
520 "Unexpected order for range information.") | |
521 (put-range-table | |
522 (first range-startinfo) | |
523 (string-to-int (match-string 1) 16) | |
524 (list (second range-startinfo) | |
525 (+ offset-start (1- (match-end 0)))) | |
526 range-information) | |
527 (remhash (substring (match-string 2) 0 -6) range-staging)) | |
528 (t | |
529 ;; Normal character. Save the associated information in the | |
530 ;; database directly. | |
531 (put-database (match-string 1) | |
532 (format "(%d %d)" | |
533 (+ offset-start (1- (match-beginning 0))) | |
534 (+ offset-start (1- (match-end 0)))) | |
535 database-handle)))) | |
536 (goto-char (point-min)) | |
537 (setq offset-start offset-end | |
538 offset-end (+ buffer-size offset-end)))) | |
539 ;; Save the range information as such in the database. | |
540 (put-database "range-information" | |
541 (let ((print-readably t)) | |
542 (prin1-to-string range-information)) | |
543 database-handle) | |
544 (close-database database-handle) | |
545 (progress-feedback-with-label 'describe-char-unicodedata-file | |
546 "%s" 100 message) | |
547 database-file-name)) | |
548 | |
549 (defun unidata-initialize-unihan-database (unihan-file-name) | |
550 "Init the berkeley or gdbm lookup table for UNIHAN-FILE-NAME. | |
551 | |
552 The table is a (non-SQL) database with information on the file offset of | |
553 each Unicode code point described in Unicode.org's Han character repository. | |
554 Unihan.txt (see `describe-char-unihan-file', the usual argument to this | |
555 function) is very large, and manipulating it directly can be tedious and | |
556 slow, so creating this cache makes it reasonable to display Unihan info in | |
557 the output of \\[universal-argument] \\[what-cursor-position] . " | |
558 (check-argument-type #'file-readable-p unihan-file-name) | |
559 (unless unidata-database-format | |
560 (error 'unimplemented "No (non-SQL) DB support available")) | |
561 (let* ((database-format unidata-database-format) | |
562 (size (eighth (file-attributes unihan-file-name))) | |
563 (database-file-name | |
564 (unidata-generate-database-file-name unihan-file-name | |
565 size database-format)) | |
566 (database-handle (open-database database-file-name database-format | |
567 nil "rw+" #o644 'no-conversion-unix)) | |
568 (coding-system-for-read 'no-conversion-unix) | |
569 (buffer-size 65536) | |
570 (offset-start 0) | |
571 (offset-end buffer-size) | |
572 (message "Initializing Unihan database cache: ") | |
573 (loop-count 1) | |
574 trailing-unicode leading-unicode character-start character-end) | |
575 (with-temp-buffer | |
576 (progress-feedback-with-label 'describe-char-unihan-file | |
577 "%s" 0 message) | |
578 (while (progn | |
579 (delete-region (point-min) (point-max)) | |
580 (insert-file-contents unihan-file-name nil | |
581 offset-start offset-end) | |
582 ;; If we've reached the end of the data, return nil to the | |
583 ;; while. | |
584 (not (= (point-min) (point-max)))) | |
585 | |
586 (incf loop-count) | |
587 (progress-feedback-with-label 'describe-char-unihan-file | |
588 "%s" (truncate | |
589 (* (/ offset-start size) 100)) | |
590 (concat message | |
591 (make-string | |
592 (mod loop-count 44) ?.))) | |
593 (block 'dealing-with-chars | |
594 (when (= buffer-size (- (point-max) (point-min))) | |
595 ;; If we're in the body of the file, we need to delete the | |
596 ;; character info for the last character, and set offset-end | |
597 ;; appropriately. Otherwise, we may not be able to pick where | |
598 ;; the actual description of a character ends and | |
599 ;; begins. | |
600 ;; | |
601 ;; This breaks if any single Unihan character description is | |
602 ;; greater than the buffer size in length. | |
603 (goto-char (point-max)) | |
604 (beginning-of-line) | |
605 | |
606 (when (< (- (point-max) (point)) (eval-when-compile | |
607 (length "U+ABCDEF\t"))) | |
608 ;; If the character ID of the last line may have been cut off, | |
609 ;; we need to delete all of that line here. | |
610 (delete-region (point) (point-max)) | |
611 (forward-line -1)) | |
612 | |
613 (when (looking-at "^\\(U\\+[0-9A-F]\\{4,6\\}\\)\t") | |
614 (setq trailing-unicode (match-string 1) | |
615 trailing-unicode | |
616 (format "^%s\t" (regexp-quote trailing-unicode))) | |
617 | |
618 (end-of-line) | |
619 | |
620 ;; Go back until we hit a line that doesn't start with this | |
621 ;; character info. | |
622 (while (re-search-backward trailing-unicode nil t)) | |
623 | |
624 ;; The re-search-backward failed, so point is still at the end | |
625 ;; of the last match. Move to its beginning. | |
626 (beginning-of-line) | |
627 (delete-region (point) (point-max)) | |
628 (setq offset-end (+ offset-start (- (point) (point-min)))))) | |
629 (goto-char (point-min)) | |
630 (while t | |
631 (when (= (point) (point-max)) | |
632 ;; We're at the end of this part of the file. | |
633 (return-from 'dealing-with-chars)) | |
634 | |
635 (unless (re-search-forward "^\\(U\\+[0-9A-F]\\{4,6\\}\\)\t" | |
636 nil t) | |
637 ;; We're probably in the comments at the start of the file. No | |
638 ;; need to look for character info. | |
639 (return-from 'dealing-with-chars)) | |
640 | |
641 ;; Store where the character started. | |
642 (beginning-of-line) | |
643 (setq character-start (point)) | |
644 | |
645 (setq leading-unicode | |
646 (format "^%s\t" (regexp-quote (match-string 1)))) | |
647 | |
648 ;; Loop until we get past this entry. | |
649 (while (re-search-forward leading-unicode nil t)) | |
650 | |
651 ;; Now, store the information. | |
652 (setq leading-unicode | |
653 (string-to-number (substring leading-unicode 3) 16) | |
654 leading-unicode (format "%04X" leading-unicode) | |
655 character-end (prog2 (end-of-line) (point))) | |
656 (put-database leading-unicode | |
657 (format "(%d %d)" | |
658 (+ offset-start (1- character-start)) | |
659 (+ offset-start (1- character-end))) | |
660 database-handle) | |
661 (forward-line))) | |
662 (setq offset-start offset-end | |
663 offset-end (+ buffer-size offset-end)))) | |
664 (close-database database-handle) | |
665 (progress-feedback-with-label 'describe-char-unihan-file | |
666 "%s" 100 | |
667 message) | |
668 database-file-name)) | |
669 ;; End XEmacs additions. | |
670 | |
671 (defun describe-char-unicode-data (char) | |
672 "Return a list of Unicode data for unicode CHAR. | |
673 Each element is a list of a property description and the property value. | |
674 The list is null if CHAR isn't found in `describe-char-unicodedata-file'." | |
675 (when describe-char-unicodedata-file | |
676 (unless (file-exists-p describe-char-unicodedata-file) | |
677 (error 'file-error | |
678 "`unicodedata-file' %s not found" describe-char-unicodedata-file)) | |
679 ;; XEmacs change; accept a character argument, use the cache if | |
680 ;; appropriate. | |
681 (when (characterp char) | |
682 (setq char (encode-char char 'ucs))) | |
683 (with-temp-buffer | |
684 (if describe-char-use-cache | |
685 ;; Use the database info. | |
686 (let ((database-handle (open-database | |
687 (unidata-generate-database-file-name | |
688 describe-char-unicodedata-file | |
689 (eighth (file-attributes | |
690 describe-char-unicodedata-file)) | |
691 unidata-database-format) | |
692 unidata-database-format | |
693 nil "r" | |
694 #o644 'no-conversion-unix)) | |
695 (coding-system-for-read 'no-conversion-unix) | |
696 key lookup) | |
697 (unless database-handle | |
698 (error 'io-error "Could not open %s as a %s database" | |
699 (unidata-generate-database-file-name | |
700 describe-char-unicodedata-file | |
701 (eighth (file-attributes | |
702 describe-char-unicodedata-file)) | |
703 unidata-database-format) | |
704 unidata-database-format)) | |
705 (setq key (format "%04X" char) | |
706 lookup (get-database key database-handle)) | |
707 (if lookup | |
708 ;; Okay, we have information on that character in particular. | |
709 (progn (setq lookup (read lookup)) | |
710 (insert-file-contents describe-char-unicodedata-file nil | |
711 (first lookup) (second lookup))) | |
712 ;; No information on that character in particular. Do we have | |
713 ;; range information? If so, load and check for our desired | |
714 ;; character. | |
715 (setq lookup (get-database "range-information" database-handle) | |
716 lookup (if lookup (read lookup)) | |
717 lookup (if lookup (get-range-table char lookup))) | |
718 (when lookup | |
719 (insert-file-contents describe-char-unicodedata-file nil | |
720 (first lookup) (second lookup)))) | |
721 (close-database database-handle)) | |
722 | |
723 ;; Otherwise, insert the whole file (the FSF approach). | |
724 (set-buffer (get-buffer-create " *Unicode Data*")) | |
725 (when (zerop (buffer-size)) | |
726 ;; Don't use -literally in case of DOS line endings. | |
727 (insert-file-contents describe-char-unicodedata-file))) | |
728 | |
729 (goto-char (point-min)) | |
730 (let ((hex (format "%04X" char)) | |
731 found first last unihan-match unihan-info | |
732 (unihan-database-handle | |
733 (and describe-char-unihan-file | |
734 (open-database (unidata-generate-database-file-name | |
735 describe-char-unihan-file | |
736 (eighth (file-attributes | |
737 describe-char-unihan-file)) | |
738 unidata-database-format) | |
739 unidata-database-format | |
740 nil "r" #o644 'no-conversion-unix))) | |
741 (coding-system-for-read 'no-conversion-unix)) | |
742 (if (re-search-forward (concat "^" hex) nil t) | |
743 (setq found t) | |
744 ;; It's not listed explicitly. Look for ranges, e.g. CJK | |
745 ;; ideographs, and check whether it's in one of them. | |
746 (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) | |
747 (>= char (setq first | |
748 (string-to-number (match-string 1) 16))) | |
749 (progn | |
750 (forward-line 1) | |
751 (looking-at "^\\([^;]+\\);[^;]+Last>;") | |
752 (> char | |
753 (setq last | |
754 (string-to-number (match-string 1) 16)))))) | |
755 (if (and first (>= char first) | |
756 last (<= char last)) | |
757 (setq found t))) | |
758 (if found | |
759 (let ((fields (mapcar (lambda (elt) | |
760 (if (> (length elt) 0) | |
761 elt)) | |
762 (cdr (split-string | |
763 (buffer-substring | |
764 (line-beginning-position) | |
765 (line-end-position)) | |
766 ";"))))) | |
767 ;; The length depends on whether the last field was empty. | |
768 (unless (or (= 13 (length fields)) | |
769 (= 14 (length fields))) | |
770 (error 'invalid-argument | |
771 "Invalid contents in %s" describe-char-unicodedata-file)) | |
772 ;; The field names and values lists are slightly | |
773 ;; modified from Mule-UCS unidata.el. | |
774 (apply #'list | |
775 (list "Name" (let ((name (nth 0 fields))) | |
776 ;; Check for <..., First>, <..., Last> | |
777 (if (string-match "\\`\\(<[^,]+\\)," name) | |
778 (concat (match-string 1 name) ">") | |
779 name))) | |
780 (list "Category" | |
781 (cdr (assoc | |
782 (nth 1 fields) | |
783 '(("Lu" . "uppercase letter") | |
784 ("Ll" . "lowercase letter") | |
785 ("Lt" . "titlecase letter") | |
786 ("Mn" . "non-spacing mark") | |
787 ("Mc" . "spacing-combining mark") | |
788 ("Me" . "enclosing mark") | |
789 ("Nd" . "decimal digit") | |
790 ("Nl" . "letter number") | |
791 ("No" . "other number") | |
792 ("Zs" . "space separator") | |
793 ("Zl" . "line separator") | |
794 ("Zp" . "paragraph separator") | |
795 ("Cc" . "other control") | |
796 ("Cf" . "other format") | |
797 ("Cs" . "surrogate") | |
798 ("Co" . "private use") | |
799 ("Cn" . "not assigned") | |
800 ("Lm" . "modifier letter") | |
801 ("Lo" . "other letter") | |
802 ("Pc" . "connector punctuation") | |
803 ("Pd" . "dash punctuation") | |
804 ("Ps" . "open punctuation") | |
805 ("Pe" . "close punctuation") | |
806 ("Pi" . "initial-quotation punctuation") | |
807 ("Pf" . "final-quotation punctuation") | |
808 ("Po" . "other punctuation") | |
809 ("Sm" . "math symbol") | |
810 ("Sc" . "currency symbol") | |
811 ("Sk" . "modifier symbol") | |
812 ("So" . "other symbol"))))) | |
813 (list "Combining class" | |
814 (cdr (assoc | |
815 (string-to-number (nth 2 fields)) | |
816 '((0 . "Spacing") | |
817 (1 . "Overlays and interior") | |
818 (7 . "Nuktas") | |
819 (8 . "Hiragana/Katakana voicing marks") | |
820 (9 . "Viramas") | |
821 (10 . "Start of fixed position classes") | |
822 (199 . "End of fixed position classes") | |
823 (200 . "Below left attached") | |
824 (202 . "Below attached") | |
825 (204 . "Below right attached") | |
826 (208 . "Left attached (reordrant around \ | |
827 single base character)") | |
828 (210 . "Right attached") | |
829 (212 . "Above left attached") | |
830 (214 . "Above attached") | |
831 (216 . "Above right attached") | |
832 (218 . "Below left") | |
833 (220 . "Below") | |
834 (222 . "Below right") | |
835 (224 . "Left (reordrant around single base \ | |
836 character)") | |
837 (226 . "Right") | |
838 (228 . "Above left") | |
839 (230 . "Above") | |
840 (232 . "Above right") | |
841 (233 . "Double below") | |
842 (234 . "Double above") | |
843 (240 . "Below (iota subscript)"))))) | |
844 (list "Bidi category" | |
845 (cdr (assoc | |
846 (nth 3 fields) | |
847 '(("L" . "Left-to-Right") | |
848 ("LRE" . "Left-to-Right Embedding") | |
849 ("LRO" . "Left-to-Right Override") | |
850 ("R" . "Right-to-Left") | |
851 ("AL" . "Right-to-Left Arabic") | |
852 ("RLE" . "Right-to-Left Embedding") | |
853 ("RLO" . "Right-to-Left Override") | |
854 ("PDF" . "Pop Directional Format") | |
855 ("EN" . "European Number") | |
856 ("ES" . "European Number Separator") | |
857 ("ET" . "European Number Terminator") | |
858 ("AN" . "Arabic Number") | |
859 ("CS" . "Common Number Separator") | |
860 ("NSM" . "Non-Spacing Mark") | |
861 ("BN" . "Boundary Neutral") | |
862 ("B" . "Paragraph Separator") | |
863 ("S" . "Segment Separator") | |
864 ("WS" . "Whitespace") | |
865 ("ON" . "Other Neutrals"))))) | |
866 (list | |
867 "Decomposition" | |
868 (if (nth 4 fields) | |
869 (let* ((parts (split-string (nth 4 fields))) | |
870 (info (car parts))) | |
871 (if (string-match "\\`<\\(.+\\)>\\'" info) | |
872 (setq info (match-string 1 info)) | |
873 (setq info nil)) | |
874 (if info (setq parts (cdr parts))) | |
875 ;; Maybe printing ? for unrepresentable unicodes | |
876 ;; here and below should be changed? | |
877 (setq parts (mapconcat | |
878 (lambda (arg) | |
879 (string (or (decode-char | |
880 'ucs | |
881 (string-to-number arg 16)) | |
882 ??))) | |
883 parts " ")) | |
884 (concat info parts)))) | |
885 (list "Decimal digit value" | |
886 (nth 5 fields)) | |
887 (list "Digit value" | |
888 (nth 6 fields)) | |
889 (list "Numeric value" | |
890 (nth 7 fields)) | |
891 (list "Mirrored" | |
892 (if (equal "Y" (nth 8 fields)) | |
893 "yes")) | |
894 (list "Old name" (nth 9 fields)) | |
895 (list "ISO 10646 comment" (nth 10 fields)) | |
896 (list "Uppercase" (and (nth 11 fields) | |
897 (string (or (decode-char | |
898 'ucs | |
899 (string-to-number | |
900 (nth 11 fields) 16)) | |
901 ??)))) | |
902 (list "Lowercase" (and (nth 12 fields) | |
903 (string (or (decode-char | |
904 'ucs | |
905 (string-to-number | |
906 (nth 12 fields) 16)) | |
907 ??)))) | |
908 (list "Titlecase" (and (nth 13 fields) | |
909 (string (or (decode-char | |
910 'ucs | |
911 (string-to-number | |
912 (nth 13 fields) 16)) | |
913 ??)))) | |
914 | |
915 ;; XEmacs addition. | |
916 ;; If we're aware the character is a Han character, provide | |
917 ;; the Unihan information, or tell the user that it's not | |
918 ;; available. | |
919 (if (and (> (length (nth 0 fields)) 13) | |
920 (equal "<CJK Ideograph" | |
921 (substring (nth 0 fields) 0 14))) | |
922 (if (and unihan-database-handle | |
923 (setq unihan-match | |
924 (get-database (format "%04X" char) | |
925 unihan-database-handle) | |
926 unihan-match | |
927 (and unihan-match (read unihan-match)))) | |
928 (with-temp-buffer | |
929 (insert-file-contents describe-char-unihan-file | |
930 nil (first unihan-match) | |
931 (second unihan-match)) | |
932 (goto-char (point-min)) | |
933 (while (re-search-forward | |
934 "^U\\+[0-9A-F]+\t\\(k[^\t]+\\)\t\\(.*\\)$" | |
935 nil t) | |
936 (push | |
937 (list | |
938 (or (gethash | |
939 (match-string 1) | |
940 describe-char-unihan-field-descriptions) | |
941 (match-string 1)) | |
942 (decode-coding-string (match-string 2) 'utf-8)) | |
943 unihan-info)) | |
944 (close-database unihan-database-handle) | |
945 unihan-info) | |
946 ;; It's a Han character, but Unihan.txt is not | |
947 ;; available. Tell the user. | |
948 (list | |
949 '("Unihan" | |
950 "No Unihan information available; is \ | |
951 `describe-char-unihan-file' set, and its cache initialized?"))))))))))) | |
952 | |
953 ;; Return information about how CHAR is displayed at the buffer | |
954 ;; position POS. If the selected frame is on a graphic display, | |
955 ;; return a cons (FONTNAME . GLYPH-CODE). Otherwise, return a string | |
956 ;; describing the terminal codes for the character. | |
957 (defun describe-char-display (pos char) | |
958 (let* ((frame (selected-frame)) | |
959 (charset (char-charset char)) | |
960 (ccl (or (and (charset-property charset 'encode-as-utf-8) | |
961 ccl-encode-to-ucs-2) | |
962 (charset-property charset 'ccl-program))) | |
963 (ccl-vector (make-vector 8 0))) | |
964 (if (display-graphic-p (selected-frame)) | |
965 (list | |
966 (font-instance-name | |
967 (face-font-instance (or (get-char-property pos 'face) | |
968 'default) | |
969 (selected-window) | |
970 charset)) | |
971 (cond | |
972 ((and ccl (eq 'x (frame-type frame))) | |
973 (setq char (split-char char)) | |
974 (aset ccl-vector 0 (charset-id charset)) | |
975 (aset ccl-vector 1 (second char)) | |
976 (if (= 2 (charset-dimension charset)) | |
977 (aset ccl-vector 2 (third char))) | |
978 (ccl-execute ccl ccl-vector) | |
979 (if (= 2 (charset-dimension charset)) | |
980 (logior (lsh (aref ccl-vector 1) 8) | |
981 (aref ccl-vector 2)) | |
982 (aref ccl-vector 1))) | |
983 ;; #### We don't handle the X case where redisplay falls back to an | |
984 ;; ISO 10646-1 font at runtime. | |
985 ((eq 'x (frame-type frame)) | |
986 (if (= 2 (charset-dimension charset)) | |
987 (prog2 | |
988 (setq char (split-char char)) | |
989 (logior (lsh (second char) 8) | |
990 (third char))) | |
991 (second (split-char char)))) | |
992 ;; Otherwise we assume we're using Unicode. | |
993 (t | |
994 (encode-char char 'ucs)))) | |
995 (let* ((coding (console-tty-output-coding-system (device-console))) | |
996 (encoded (encode-coding-string char coding))) | |
997 (if encoded | |
998 (format "%s, coding system %s" | |
999 (encoded-string-description encoded coding) | |
1000 (coding-system-name coding))))))) | |
1001 | |
1002 | |
1003 ;;;###autoload | |
1004 (defun describe-char (pos) | |
1005 "Describe the character after POS (interactively, the character after point). | |
1006 The information includes character code, charset and code points in it, | |
1007 syntax, category, how the character is encoded in a file, | |
1008 character composition information (if relevant), | |
1009 as well as widgets, buttons, overlays, and text properties." | |
1010 (interactive "d") | |
1011 (if (>= pos (point-max)) | |
1012 (error "No character follows specified position")) | |
1013 (let* ((char (char-after pos)) | |
1014 (charset (char-charset char)) | |
1015 (composition (find-composition pos nil nil t)) | |
1016 (component-chars nil) | |
1017 (display-table | |
1018 (specifier-instance current-display-table (selected-window))) | |
1019 (disp-table-entry (and display-table | |
1020 (get-display-table char display-table))) | |
1021 (extents (mapcar #'(lambda (o) (extent-properties o)) | |
1022 (extents-at pos))) | |
1023 (char-description (single-key-description char)) | |
1024 (text-props-desc | |
1025 (let ((tmp-buf (generate-new-buffer " *text-props*"))) | |
1026 (unwind-protect | |
1027 (progn | |
1028 (describe-text-properties pos tmp-buf) | |
1029 (with-current-buffer tmp-buf (buffer-string))) | |
1030 (kill-buffer tmp-buf)))) | |
1031 item-list max-width unicode unicode-formatted unicode-error) | |
1032 | |
1033 | |
1034 (setq unicode-error | |
1035 ;; XEmacs change, check does the character represent a Unicode | |
1036 ;; error sequence. | |
1037 (get-char-table char unicode-error-default-translation-table) | |
1038 unicode (and (not unicode-error) (encode-char char 'ucs)) | |
1039 unicode-formatted (if unicode-error | |
1040 (format | |
1041 "Invalid Unicode sequence, ?\x%02x on disk" | |
1042 unicode-error) | |
1043 (if (and unicode (natnump unicode)) | |
1044 (format (if (> unicode #xFFFF) | |
1045 "U+%06X" "U+%04X") | |
1046 unicode) | |
1047 "")) | |
1048 item-list | |
1049 `(("character" | |
1050 ,(format "%s (%s, %d, #o%o, #x%x)" | |
1051 (apply 'propertize char-description | |
1052 (text-properties-at pos)) | |
1053 unicode-formatted | |
1054 char | |
1055 char | |
1056 char)) | |
1057 ("charset" | |
1058 ,(lexical-let | |
1059 ((charset-name (symbol-name charset))) | |
1060 `(progn | |
1061 (widget-create 'push-button | |
1062 :notify ,(lambda (&rest ignored-arg) | |
1063 (with-displaying-help-buffer | |
1064 (lambda nil | |
1065 (charset-description | |
1066 (intern charset-name))) | |
1067 charset-name)) | |
1068 ,charset-name) | |
1069 (widget-insert (format " (%s)" (charset-description | |
1070 ',charset)))))) | |
1071 ("code point" | |
1072 ,(let ((split (split-char char))) | |
1073 `(widget-create 'push-button | |
1074 ; :notify | |
1075 ; ,(lambda (&rest ignored-arg) | |
1076 ; (with-selected-wind | |
1077 ; insert-gui-button | |
1078 ; (make-gui-button | |
1079 ,(if (= (charset-dimension charset) 1) | |
1080 (format "#x%02X" (nth 1 split)) | |
1081 (format "#x%02X #x%02X" (nth 1 split) | |
1082 (nth 2 split)))))) | |
1083 ("syntax" | |
1084 ,(let ((syntax | |
1085 (syntax-string-to-code (string (syntax-after pos))))) | |
1086 (with-temp-buffer | |
1087 (describe-syntax-code syntax (current-buffer)) | |
1088 ;; Remove the newline. | |
1089 (delete-backward-char) | |
1090 (buffer-string)))) | |
1091 ;; XEmacs; #### add category support. | |
1092 ; ("category" | |
1093 ; ,@(let ((category-set (char-category-set char))) | |
1094 ; (if (not category-set) | |
1095 ; '("-- none --") | |
1096 ; (mapcar #'(lambda (x) (format "%c:%s" | |
1097 ; x (category-docstring x))) | |
1098 ; (category-set-mnemonics category-set))))) | |
1099 ; ,@(let ((props (get-char-table char char-code-property-table)) | |
1100 ; ps) | |
1101 ; (when props | |
1102 ; (while props | |
1103 ; (push (format "%s:" (pop props)) ps) | |
1104 ; (push (format "%s;" (pop props)) ps)) | |
1105 ; (list (cons "Properties" (nreverse ps))))) | |
1106 ("to input" | |
1107 ,@(let ((key-list (and-fboundp #'quail-find-key | |
1108 current-input-method | |
1109 (quail-find-key char)))) | |
1110 (if (consp key-list) | |
1111 (list "type" | |
1112 (mapconcat #'(lambda (x) (concat "\"" x "\"")) | |
1113 key-list " or ") | |
1114 "with" | |
1115 `(insert-text-button | |
1116 ,current-input-method | |
1117 'type 'help-input-method | |
1118 'help-args '(,current-input-method)))))) | |
1119 ; ("buffer code" | |
1120 ; ,(encoded-string-description | |
1121 ; (string-as-unibyte (char-to-string char) nil)) | |
1122 ("file code" | |
1123 ,@(let* ((coding buffer-file-coding-system) | |
1124 ;; ### XEmacs; use encode-coding-char once | |
1125 ;; merged. | |
1126 (encoded (encode-coding-string char coding))) | |
1127 (if encoded | |
1128 (list (encoded-string-description encoded coding) | |
1129 (format "(encoded by coding system %S)" | |
1130 (coding-system-name coding))) | |
1131 (list "not encodable by coding system" | |
1132 (coding-system-name coding))))) | |
1133 ("display" | |
1134 ,(cond | |
1135 (disp-table-entry | |
1136 ;; XEmacs change; just use the print syntax of the display | |
1137 ;; table entry. Might be possible to improve this, but | |
1138 ;; nothing occurs to me right now. | |
1139 (format "by display table entry [%S] " disp-table-entry)) | |
1140 (composition | |
1141 (let ((from (car composition)) | |
1142 (to (nth 1 composition)) | |
1143 (next (1+ pos)) | |
1144 (components (nth 2 composition)) | |
1145 ch) | |
1146 (setcar composition | |
1147 (and (< from pos) (buffer-substring from pos))) | |
1148 (setcar (cdr composition) | |
1149 (and (< next to) (buffer-substring next to))) | |
1150 (dotimes (i (length components)) | |
1151 (if (integerp (setq ch (aref components i))) | |
1152 (push (cons ch (describe-char-display pos ch)) | |
1153 component-chars))) | |
1154 (setq component-chars (nreverse component-chars)) | |
1155 (format "composed to form \"%s\" (see below)" | |
1156 (buffer-substring from to)))) | |
1157 (t | |
1158 (let ((display (describe-char-display pos char))) | |
1159 (if (display-graphic-p (selected-frame)) | |
1160 (if display | |
1161 (concat | |
1162 "by this font (glyph code)\n" | |
1163 (format " %s (#x%02X)" | |
1164 (first display) (second display))) | |
1165 "no font available") | |
1166 (if display | |
1167 (format "terminal code %s" display) | |
1168 "not encodable for terminal")))))) | |
1169 ,@(let ((face | |
1170 (if (not (or disp-table-entry composition)) | |
1171 (cond | |
1172 ;; XEmacs #### Implement this. | |
1173 ; ((and show-trailing-whitespace | |
1174 ; (save-excursion (goto-char pos) | |
1175 ; (looking-at "[ \t]+$"))) | |
1176 ; 'trailing-whitespace) | |
1177 ; ((and nobreak-char-display unicode (eq unicode '#xa0)) | |
1178 ; 'nobreak-space) | |
1179 ; ((and nobreak-char-display unicode (eq unicode '#xad)) | |
1180 ; 'escape-glyph) | |
1181 ((and (< char 32) (not (memq char '(9 10)))) | |
1182 'escape-glyph))))) | |
1183 (if face (list (list "hardcoded face" | |
1184 `(insert-gui-button | |
1185 (make-gui-button | |
1186 ,(symbol-name face))))))) | |
1187 ,@(let ((unicodedata (and unicode | |
1188 (describe-char-unicode-data unicode)))) | |
1189 (if unicodedata | |
1190 (cons (list "Unicode data" " ") unicodedata))))) | |
1191 (setq max-width (apply #'max (mapcar #'(lambda (x) | |
1192 (if (cadr x) (length (car x)) 0)) | |
1193 item-list))) | |
1194 ; (help-setup-xref nil (interactive-p)) | |
1195 (with-displaying-help-buffer | |
1196 (lambda () | |
1197 (with-current-buffer standard-output | |
1198 ; (set-buffer-multibyte multibyte-p) | |
1199 (let ((formatter (format "%%%ds:" max-width))) | |
1200 (dolist (elt item-list) | |
1201 (when (cadr elt) | |
1202 (insert (format formatter (car elt))) | |
1203 (dolist (clm (cdr elt)) | |
1204 (if (consp clm) | |
1205 (progn (insert " ") (eval clm)) | |
1206 (when (>= (+ (current-column) | |
1207 (or (string-match "\n" clm) | |
1208 (string-width clm)) | |
1209 1) | |
1210 (window-width)) | |
1211 (insert "\n") | |
1212 (indent-to (1+ max-width))) | |
1213 (insert " " clm))) | |
1214 (insert "\n")))) | |
1215 | |
1216 (when extents | |
1217 (save-excursion | |
1218 (goto-char (point-min)) | |
1219 (re-search-forward "character:[ \t\n]+") | |
1220 (let* ((end (+ (point) (length char-description)))) | |
1221 (mapc #'(lambda (props) | |
1222 (let ((o (make-extent (point) end))) | |
1223 (while props | |
1224 (set-extent-property o (car props) (nth 1 props)) | |
1225 (setq props (cddr props))))) | |
1226 extents)))) | |
1227 | |
1228 ;; XEmacs change; don't give GUI- or TTY-specific detail about the | |
1229 ;; display table entry, the #'specifier-instance call above dealt | |
1230 ;; with that. | |
1231 ; (when disp-table-entry ...) | |
1232 | |
1233 ;; XEmacs; this doesn't work now. | |
1234 (when composition | |
1235 (insert "\nComposed") | |
1236 (if (car composition) | |
1237 (if (cadr composition) | |
1238 (insert " with the surrounding characters \"" | |
1239 (car composition) "\" and \"" | |
1240 (cadr composition) "\"") | |
1241 (insert " with the preceding character(s) \"" | |
1242 (car composition) "\"")) | |
1243 (if (cadr composition) | |
1244 (insert " with the following character(s) \"" | |
1245 (cadr composition) "\""))) | |
1246 (insert " by the rule:\n\t(" | |
1247 (mapconcat (lambda (x) | |
1248 (format (if (consp x) "%S" "?%c") x)) | |
1249 (nth 2 composition) | |
1250 " ") | |
1251 ")") | |
1252 (insert "\nThe component character(s) are displayed by ") | |
1253 ;; XEmacs #### Once composition is in place, this should be | |
1254 ;; a (font-instance-name (face-font-instance [...])) call. | |
1255 (if (display-graphic-p (selected-frame)) | |
1256 (progn | |
1257 (insert "these fonts (glyph codes):") | |
1258 (dolist (elt component-chars) | |
1259 (insert "\n " (car elt) ?: | |
1260 (propertize " " 'display '(space :align-to 5)) | |
1261 (if (cdr elt) | |
1262 (format "%s (#x%02X)" (cadr elt) (cddr elt)) | |
1263 "-- no font --")))) | |
1264 (insert "these terminal codes:") | |
1265 (dolist (elt component-chars) | |
1266 (insert "\n " (car elt) ":" | |
1267 (propertize " " 'display '(space :align-to 5)) | |
1268 (or (cdr elt) "-- not encodable --")))) | |
1269 (insert "\nSee the variable `reference-point-alist' for " | |
1270 "the meaning of the rule.\n")) | |
1271 | |
1272 (if text-props-desc (insert text-props-desc)) | |
1273 ; (setq help-xref-stack-item (list 'help-insert-string (buffer-string))) | |
1274 (toggle-read-only 1) | |
1275 (print-help-return-message))) | |
1276 (format "Describe %c" (char-after pos))))) | |
1277 | |
1278 (defalias 'describe-char-after 'describe-char) | |
1279 (make-obsolete 'describe-char-after 'describe-char "22.1") | |
1280 | |
1281 (provide 'descr-text) | |
1282 | |
1283 ;; arch-tag: fc55a498-f3e9-4312-b5bd-98cc02480af1 | |
1284 ;;; descr-text.el ends here |