Mercurial > hg > xemacs-beta
comparison lisp/unicode.el @ 3659:98af8a976fc3
[xemacs-hg @ 2006-11-05 22:31:31 by aidan]
Support specifying fonts for particular character sets in Mule; support
translation to ISO 10646-1 for Mule character sets without an otherwise
matching font; move to a vector of X11-charset-X11-registry instead of a
regex for the charset-registry property.
author | aidan |
---|---|
date | Sun, 05 Nov 2006 22:31:46 +0000 |
parents | bcc2611d4cfc |
children | 5724b7632db3 |
comparison
equal
deleted
inserted
replaced
3658:0db1aaedbbef | 3659:98af8a976fc3 |
---|---|
27 | 27 |
28 ;; Lisp support for Unicode, e.g. initialize the translation tables. | 28 ;; Lisp support for Unicode, e.g. initialize the translation tables. |
29 | 29 |
30 ;;; Code: | 30 ;;; Code: |
31 | 31 |
32 ; ;; Subsets of Unicode. | 32 ;; GNU Emacs has the charsets: |
33 | 33 |
34 ; #### what is this bogosity ... "chars 96, final ?2" !!?! | 34 ;; mule-unicode-2500-33ff |
35 ; (make-charset 'mule-unicode-2500-33ff | 35 ;; mule-unicode-e000-ffff |
36 ; "Unicode characters of the range U+2500..U+33FF." | 36 ;; mule-unicode-0100-24ff |
37 ; '(dimension | 37 |
38 ; 2 | 38 ;; built-in. This is hack--and an incomplete hack at that--against the |
39 ; registry "ISO10646-1" | 39 ;; spirit and the letter of standard ISO 2022 character sets. Instead of |
40 ; chars 96 | 40 ;; this, we have the jit-ucs-charset-N Mule character sets, created in |
41 ; columns 1 | 41 ;; unicode.c on encountering a Unicode code point that we don't recognise, |
42 ; direction l2r | 42 ;; and saved in ISO 2022 coding systems using the UTF-8 escape described in |
43 ; final ?2 | 43 ;; ISO-IR 196. |
44 ; graphic 0 | |
45 ; short-name "Unicode subset 2" | |
46 ; long-name "Unicode subset (U+2500..U+33FF)" | |
47 ; )) | |
48 | |
49 | |
50 ; (make-charset 'mule-unicode-e000-ffff | |
51 ; "Unicode characters of the range U+E000..U+FFFF." | |
52 ; '(dimension | |
53 ; 2 | |
54 ; registry "ISO10646-1" | |
55 ; chars 96 | |
56 ; columns 1 | |
57 ; direction l2r | |
58 ; final ?3 | |
59 ; graphic 0 | |
60 ; short-name "Unicode subset 3" | |
61 ; long-name "Unicode subset (U+E000+FFFF)" | |
62 ; )) | |
63 | |
64 | |
65 ; (make-charset 'mule-unicode-0100-24ff | |
66 ; "Unicode characters of the range U+0100..U+24FF." | |
67 ; '(dimension | |
68 ; 2 | |
69 ; registry "ISO10646-1" | |
70 ; chars 96 | |
71 ; columns 1 | |
72 ; direction l2r | |
73 ; final ?1 | |
74 ; graphic 0 | |
75 ; short-name "Unicode subset" | |
76 ; long-name "Unicode subset (U+0100..U+24FF)" | |
77 ; )) | |
78 | |
79 | 44 |
80 ;; accessed in loadup.el, mule-cmds.el; see discussion in unicode.c | 45 ;; accessed in loadup.el, mule-cmds.el; see discussion in unicode.c |
81 (defvar load-unicode-tables-at-dump-time (eq system-type 'windows-nt) | 46 (defvar load-unicode-tables-at-dump-time (eq system-type 'windows-nt) |
82 "[INTERNAL] Whether to load the Unicode tables at dump time. | 47 "[INTERNAL] Whether to load the Unicode tables at dump time. |
83 Setting this at run-time does nothing.") | 48 Setting this at run-time does nothing.") |
303 type utf-8 | 268 type utf-8 |
304 little-endian t | 269 little-endian t |
305 need-bom t)) | 270 need-bom t)) |
306 | 271 |
307 (defun decode-char (quote-ucs code &optional restriction) | 272 (defun decode-char (quote-ucs code &optional restriction) |
308 "FSF compatibility--return Mule character with Unicode codepoint `code'. | 273 "FSF compatibility--return Mule character with Unicode codepoint CODE. |
309 The second argument must be 'ucs, the third argument is ignored. " | 274 The second argument must be 'ucs, the third argument is ignored. " |
310 (assert (eq quote-ucs 'ucs) t | 275 (assert (eq quote-ucs 'ucs) t |
311 "Sorry, decode-char doesn't yet support anything but the UCS. ") | 276 "Sorry, decode-char doesn't yet support anything but the UCS. ") |
312 (unicode-to-char code)) | 277 (unicode-to-char code)) |
313 | 278 |
314 (defun encode-char (char quote-ucs &optional restriction) | 279 (defun encode-char (char quote-ucs &optional restriction) |
315 "FSF compatibility--return the Unicode code point of `char'. | 280 "FSF compatibility--return the Unicode code point of CHAR. |
316 The second argument must be 'ucs, the third argument is ignored. " | 281 The second argument must be 'ucs, the third argument is ignored. " |
317 (assert (eq quote-ucs 'ucs) t | 282 (assert (eq quote-ucs 'ucs) t |
318 "Sorry, encode-char doesn't yet support anything but the UCS. ") | 283 "Sorry, encode-char doesn't yet support anything but the UCS. ") |
319 (char-to-unicode char)) | 284 (char-to-unicode char)) |
320 | 285 |
321 (when (featurep 'mule) | |
322 ;; This CCL program is used for displaying the fallback UCS character set, | |
323 ;; and can be repurposed to lao and the IPA, all going well. | |
324 ;; | |
325 ;; define-ccl-program is available after mule-ccl is loaded, much later | |
326 ;; than this file in the build process. The below is the result of | |
327 ;; | |
328 ;; (macroexpand | |
329 ;; '(define-ccl-program ccl-encode-to-ucs-2 | |
330 ;; `(1 | |
331 ;; ((r1 = (r1 << 8)) | |
332 ;; (r1 = (r1 | r2)) | |
333 ;; (mule-to-unicode r0 r1) | |
334 ;; (r1 = (r0 >> 8)) | |
335 ;; (r2 = (r0 & 255)))) | |
336 ;; "CCL program to transform Mule characters to UCS-2.")) | |
337 ;; | |
338 ;; and it should occasionally be confirmed that the correspondence still | |
339 ;; holds. | |
340 | |
341 (let ((prog [1 10 131127 8 98872 65823 147513 8 82009 255 22])) | |
342 (defconst ccl-encode-to-ucs-2 prog | |
343 "CCL program to transform Mule characters to UCS-2.") | |
344 (put (quote ccl-encode-to-ucs-2) (quote ccl-program-idx) | |
345 (register-ccl-program (quote ccl-encode-to-ucs-2) prog)) nil)) | |
346 | |
347 ;; Won't do this just yet, though. | |
348 ;; (set-charset-registry 'lao "iso10646-1") | |
349 ;; (set-charset-ccl-program 'lao 'ccl-encode-to-ucs-2) | |
350 ;; (set-charset-registry 'ipa "iso10646-1") | |
351 ;; (set-charset-ccl-program 'ipa 'ccl-encode-to-ucs-2) | |
352 | |
353 ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's | 286 ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's |
354 ;; an implementation in appendix A.1 of the Unicode Standard, Version | 287 ;; an implementation in appendix A.1 of the Unicode Standard, Version |
355 ;; 2.0, but I don't know its licensing characteristics. | 288 ;; 2.0, but I don't know its licensing characteristics. |
356 | 289 |
357 ; (make-coding-system | 290 ; (make-coding-system |
358 ; 'utf-7 'unicode | 291 ; 'utf-7 'unicode |
359 ; "UTF-7" | 292 ; "UTF-7" |
360 ; '(mnemonic "UTF7" | 293 ; '(mnemonic "UTF7" |
361 ; documentation | 294 ; documentation; "UTF-7 Unicode encoding -- 7-bit-ASCII modal Internet-mail-compatible |
362 ; "UTF-7 Unicode encoding -- 7-bit-ASCII modal Internet-mail-compatible | |
363 ; encoding especially designed for headers, with the following | 295 ; encoding especially designed for headers, with the following |
364 ; properties: | 296 ; properties: |
365 | 297 |
366 ; -- Only characters that are considered safe for passing through any mail | 298 ; -- Only characters that are considered safe for passing through any mail |
367 ; gateway without damage are used. | 299 ; gateway without damage are used. |