comparison lisp/unicode.el @ 3659:98af8a976fc3

[xemacs-hg @ 2006-11-05 22:31:31 by aidan] Support specifying fonts for particular character sets in Mule; support translation to ISO 10646-1 for Mule character sets without an otherwise matching font; move to a vector of X11-charset-X11-registry instead of a regex for the charset-registry property.
author aidan
date Sun, 05 Nov 2006 22:31:46 +0000
parents bcc2611d4cfc
children 5724b7632db3
comparison
equal deleted inserted replaced
3658:0db1aaedbbef 3659:98af8a976fc3
27 27
28 ;; Lisp support for Unicode, e.g. initialize the translation tables. 28 ;; Lisp support for Unicode, e.g. initialize the translation tables.
29 29
30 ;;; Code: 30 ;;; Code:
31 31
32 ; ;; Subsets of Unicode. 32 ;; GNU Emacs has the charsets:
33 33
34 ; #### what is this bogosity ... "chars 96, final ?2" !!?! 34 ;; mule-unicode-2500-33ff
35 ; (make-charset 'mule-unicode-2500-33ff 35 ;; mule-unicode-e000-ffff
36 ; "Unicode characters of the range U+2500..U+33FF." 36 ;; mule-unicode-0100-24ff
37 ; '(dimension 37
38 ; 2 38 ;; built-in. This is hack--and an incomplete hack at that--against the
39 ; registry "ISO10646-1" 39 ;; spirit and the letter of standard ISO 2022 character sets. Instead of
40 ; chars 96 40 ;; this, we have the jit-ucs-charset-N Mule character sets, created in
41 ; columns 1 41 ;; unicode.c on encountering a Unicode code point that we don't recognise,
42 ; direction l2r 42 ;; and saved in ISO 2022 coding systems using the UTF-8 escape described in
43 ; final ?2 43 ;; ISO-IR 196.
44 ; graphic 0
45 ; short-name "Unicode subset 2"
46 ; long-name "Unicode subset (U+2500..U+33FF)"
47 ; ))
48
49
50 ; (make-charset 'mule-unicode-e000-ffff
51 ; "Unicode characters of the range U+E000..U+FFFF."
52 ; '(dimension
53 ; 2
54 ; registry "ISO10646-1"
55 ; chars 96
56 ; columns 1
57 ; direction l2r
58 ; final ?3
59 ; graphic 0
60 ; short-name "Unicode subset 3"
61 ; long-name "Unicode subset (U+E000+FFFF)"
62 ; ))
63
64
65 ; (make-charset 'mule-unicode-0100-24ff
66 ; "Unicode characters of the range U+0100..U+24FF."
67 ; '(dimension
68 ; 2
69 ; registry "ISO10646-1"
70 ; chars 96
71 ; columns 1
72 ; direction l2r
73 ; final ?1
74 ; graphic 0
75 ; short-name "Unicode subset"
76 ; long-name "Unicode subset (U+0100..U+24FF)"
77 ; ))
78
79 44
80 ;; accessed in loadup.el, mule-cmds.el; see discussion in unicode.c 45 ;; accessed in loadup.el, mule-cmds.el; see discussion in unicode.c
81 (defvar load-unicode-tables-at-dump-time (eq system-type 'windows-nt) 46 (defvar load-unicode-tables-at-dump-time (eq system-type 'windows-nt)
82 "[INTERNAL] Whether to load the Unicode tables at dump time. 47 "[INTERNAL] Whether to load the Unicode tables at dump time.
83 Setting this at run-time does nothing.") 48 Setting this at run-time does nothing.")
303 type utf-8 268 type utf-8
304 little-endian t 269 little-endian t
305 need-bom t)) 270 need-bom t))
306 271
307 (defun decode-char (quote-ucs code &optional restriction) 272 (defun decode-char (quote-ucs code &optional restriction)
308 "FSF compatibility--return Mule character with Unicode codepoint `code'. 273 "FSF compatibility--return Mule character with Unicode codepoint CODE.
309 The second argument must be 'ucs, the third argument is ignored. " 274 The second argument must be 'ucs, the third argument is ignored. "
310 (assert (eq quote-ucs 'ucs) t 275 (assert (eq quote-ucs 'ucs) t
311 "Sorry, decode-char doesn't yet support anything but the UCS. ") 276 "Sorry, decode-char doesn't yet support anything but the UCS. ")
312 (unicode-to-char code)) 277 (unicode-to-char code))
313 278
314 (defun encode-char (char quote-ucs &optional restriction) 279 (defun encode-char (char quote-ucs &optional restriction)
315 "FSF compatibility--return the Unicode code point of `char'. 280 "FSF compatibility--return the Unicode code point of CHAR.
316 The second argument must be 'ucs, the third argument is ignored. " 281 The second argument must be 'ucs, the third argument is ignored. "
317 (assert (eq quote-ucs 'ucs) t 282 (assert (eq quote-ucs 'ucs) t
318 "Sorry, encode-char doesn't yet support anything but the UCS. ") 283 "Sorry, encode-char doesn't yet support anything but the UCS. ")
319 (char-to-unicode char)) 284 (char-to-unicode char))
320 285
321 (when (featurep 'mule)
322 ;; This CCL program is used for displaying the fallback UCS character set,
323 ;; and can be repurposed to lao and the IPA, all going well.
324 ;;
325 ;; define-ccl-program is available after mule-ccl is loaded, much later
326 ;; than this file in the build process. The below is the result of
327 ;;
328 ;; (macroexpand
329 ;; '(define-ccl-program ccl-encode-to-ucs-2
330 ;; `(1
331 ;; ((r1 = (r1 << 8))
332 ;; (r1 = (r1 | r2))
333 ;; (mule-to-unicode r0 r1)
334 ;; (r1 = (r0 >> 8))
335 ;; (r2 = (r0 & 255))))
336 ;; "CCL program to transform Mule characters to UCS-2."))
337 ;;
338 ;; and it should occasionally be confirmed that the correspondence still
339 ;; holds.
340
341 (let ((prog [1 10 131127 8 98872 65823 147513 8 82009 255 22]))
342 (defconst ccl-encode-to-ucs-2 prog
343 "CCL program to transform Mule characters to UCS-2.")
344 (put (quote ccl-encode-to-ucs-2) (quote ccl-program-idx)
345 (register-ccl-program (quote ccl-encode-to-ucs-2) prog)) nil))
346
347 ;; Won't do this just yet, though.
348 ;; (set-charset-registry 'lao "iso10646-1")
349 ;; (set-charset-ccl-program 'lao 'ccl-encode-to-ucs-2)
350 ;; (set-charset-registry 'ipa "iso10646-1")
351 ;; (set-charset-ccl-program 'ipa 'ccl-encode-to-ucs-2)
352
353 ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's 286 ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's
354 ;; an implementation in appendix A.1 of the Unicode Standard, Version 287 ;; an implementation in appendix A.1 of the Unicode Standard, Version
355 ;; 2.0, but I don't know its licensing characteristics. 288 ;; 2.0, but I don't know its licensing characteristics.
356 289
357 ; (make-coding-system 290 ; (make-coding-system
358 ; 'utf-7 'unicode 291 ; 'utf-7 'unicode
359 ; "UTF-7" 292 ; "UTF-7"
360 ; '(mnemonic "UTF7" 293 ; '(mnemonic "UTF7"
361 ; documentation 294 ; documentation; "UTF-7 Unicode encoding -- 7-bit-ASCII modal Internet-mail-compatible
362 ; "UTF-7 Unicode encoding -- 7-bit-ASCII modal Internet-mail-compatible
363 ; encoding especially designed for headers, with the following 295 ; encoding especially designed for headers, with the following
364 ; properties: 296 ; properties:
365 297
366 ; -- Only characters that are considered safe for passing through any mail 298 ; -- Only characters that are considered safe for passing through any mail
367 ; gateway without damage are used. 299 ; gateway without damage are used.