Mercurial > hg > xemacs-beta
diff lisp/unicode.el @ 3659:98af8a976fc3
[xemacs-hg @ 2006-11-05 22:31:31 by aidan]
Support specifying fonts for particular character sets in Mule; support
translation to ISO 10646-1 for Mule character sets without an otherwise
matching font; move to a vector of X11-charset-X11-registry instead of a
regex for the charset-registry property.
author | aidan |
---|---|
date | Sun, 05 Nov 2006 22:31:46 +0000 |
parents | bcc2611d4cfc |
children | 5724b7632db3 |
line wrap: on
line diff
--- a/lisp/unicode.el Sat Nov 04 22:51:03 2006 +0000 +++ b/lisp/unicode.el Sun Nov 05 22:31:46 2006 +0000 @@ -29,53 +29,18 @@ ;;; Code: -; ;; Subsets of Unicode. +;; GNU Emacs has the charsets: -; #### what is this bogosity ... "chars 96, final ?2" !!?! -; (make-charset 'mule-unicode-2500-33ff -; "Unicode characters of the range U+2500..U+33FF." -; '(dimension -; 2 -; registry "ISO10646-1" -; chars 96 -; columns 1 -; direction l2r -; final ?2 -; graphic 0 -; short-name "Unicode subset 2" -; long-name "Unicode subset (U+2500..U+33FF)" -; )) - +;; mule-unicode-2500-33ff +;; mule-unicode-e000-ffff +;; mule-unicode-0100-24ff -; (make-charset 'mule-unicode-e000-ffff -; "Unicode characters of the range U+E000..U+FFFF." -; '(dimension -; 2 -; registry "ISO10646-1" -; chars 96 -; columns 1 -; direction l2r -; final ?3 -; graphic 0 -; short-name "Unicode subset 3" -; long-name "Unicode subset (U+E000+FFFF)" -; )) - - -; (make-charset 'mule-unicode-0100-24ff -; "Unicode characters of the range U+0100..U+24FF." -; '(dimension -; 2 -; registry "ISO10646-1" -; chars 96 -; columns 1 -; direction l2r -; final ?1 -; graphic 0 -; short-name "Unicode subset" -; long-name "Unicode subset (U+0100..U+24FF)" -; )) - +;; built-in. This is hack--and an incomplete hack at that--against the +;; spirit and the letter of standard ISO 2022 character sets. Instead of +;; this, we have the jit-ucs-charset-N Mule character sets, created in +;; unicode.c on encountering a Unicode code point that we don't recognise, +;; and saved in ISO 2022 coding systems using the UTF-8 escape described in +;; ISO-IR 196. ;; accessed in loadup.el, mule-cmds.el; see discussion in unicode.c (defvar load-unicode-tables-at-dump-time (eq system-type 'windows-nt) @@ -305,51 +270,19 @@ need-bom t)) (defun decode-char (quote-ucs code &optional restriction) - "FSF compatibility--return Mule character with Unicode codepoint `code'. + "FSF compatibility--return Mule character with Unicode codepoint CODE. The second argument must be 'ucs, the third argument is ignored. " (assert (eq quote-ucs 'ucs) t "Sorry, decode-char doesn't yet support anything but the UCS. ") (unicode-to-char code)) (defun encode-char (char quote-ucs &optional restriction) - "FSF compatibility--return the Unicode code point of `char'. + "FSF compatibility--return the Unicode code point of CHAR. The second argument must be 'ucs, the third argument is ignored. " (assert (eq quote-ucs 'ucs) t "Sorry, encode-char doesn't yet support anything but the UCS. ") (char-to-unicode char)) -(when (featurep 'mule) - ;; This CCL program is used for displaying the fallback UCS character set, - ;; and can be repurposed to lao and the IPA, all going well. - ;; - ;; define-ccl-program is available after mule-ccl is loaded, much later - ;; than this file in the build process. The below is the result of - ;; - ;; (macroexpand - ;; '(define-ccl-program ccl-encode-to-ucs-2 - ;; `(1 - ;; ((r1 = (r1 << 8)) - ;; (r1 = (r1 | r2)) - ;; (mule-to-unicode r0 r1) - ;; (r1 = (r0 >> 8)) - ;; (r2 = (r0 & 255)))) - ;; "CCL program to transform Mule characters to UCS-2.")) - ;; - ;; and it should occasionally be confirmed that the correspondence still - ;; holds. - - (let ((prog [1 10 131127 8 98872 65823 147513 8 82009 255 22])) - (defconst ccl-encode-to-ucs-2 prog - "CCL program to transform Mule characters to UCS-2.") - (put (quote ccl-encode-to-ucs-2) (quote ccl-program-idx) - (register-ccl-program (quote ccl-encode-to-ucs-2) prog)) nil)) - -;; Won't do this just yet, though. -;; (set-charset-registry 'lao "iso10646-1") -;; (set-charset-ccl-program 'lao 'ccl-encode-to-ucs-2) -;; (set-charset-registry 'ipa "iso10646-1") -;; (set-charset-ccl-program 'ipa 'ccl-encode-to-ucs-2) - ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's ;; an implementation in appendix A.1 of the Unicode Standard, Version ;; 2.0, but I don't know its licensing characteristics. @@ -358,8 +291,7 @@ ; 'utf-7 'unicode ; "UTF-7" ; '(mnemonic "UTF7" -; documentation -; "UTF-7 Unicode encoding -- 7-bit-ASCII modal Internet-mail-compatible +; documentation; "UTF-7 Unicode encoding -- 7-bit-ASCII modal Internet-mail-compatible ; encoding especially designed for headers, with the following ; properties: