comparison lisp/unicode.el @ 4498:f0c81cd2a959

Merge.
author Mats Lidell <matsl@xemacs.org>
date Sun, 10 Aug 2008 23:24:15 +0200
parents 121aadac896e
children 26aae3bacf99
comparison
equal deleted inserted replaced
4497:f863b2ee146f 4498:f0c81cd2a959
71 ("8859-16.TXT" latin-iso8859-16 #xA0 #xFF #x-80) 71 ("8859-16.TXT" latin-iso8859-16 #xA0 #xFF #x-80)
72 ("8859-2.TXT" latin-iso8859-2 #xA0 #xFF #x-80) 72 ("8859-2.TXT" latin-iso8859-2 #xA0 #xFF #x-80)
73 ("8859-3.TXT" latin-iso8859-3 #xA0 #xFF #x-80) 73 ("8859-3.TXT" latin-iso8859-3 #xA0 #xFF #x-80)
74 ("8859-4.TXT" latin-iso8859-4 #xA0 #xFF #x-80) 74 ("8859-4.TXT" latin-iso8859-4 #xA0 #xFF #x-80)
75 ("8859-5.TXT" cyrillic-iso8859-5 #xA0 #xFF #x-80) 75 ("8859-5.TXT" cyrillic-iso8859-5 #xA0 #xFF #x-80)
76 ("8859-6.TXT" arabic-iso8859-6 #xA0 #xFF #x-80)
77 ("8859-7.TXT" greek-iso8859-7 #xA0 #xFF #x-80) 76 ("8859-7.TXT" greek-iso8859-7 #xA0 #xFF #x-80)
78 ("8859-8.TXT" hebrew-iso8859-8 #xA0 #xFF #x-80) 77 ("8859-8.TXT" hebrew-iso8859-8 #xA0 #xFF #x-80)
79 ("8859-9.TXT" latin-iso8859-9 #xA0 #xFF #x-80) 78 ("8859-9.TXT" latin-iso8859-9 #xA0 #xFF #x-80)
80 ;; charset for Big5 does not matter; specifying `big5' will 79 ;; charset for Big5 does not matter; specifying `big5' will
81 ;; automatically make the right thing happen 80 ;; automatically make the right thing happen
152 (declare-fboundp 151 (declare-fboundp
153 (set-default-unicode-precedence-list 152 (set-default-unicode-precedence-list
154 '(ascii control-1 latin-iso8859-1 latin-iso8859-2 latin-iso8859-15 153 '(ascii control-1 latin-iso8859-1 latin-iso8859-2 latin-iso8859-15
155 greek-iso8859-7 hebrew-iso8859-8 ipa cyrillic-iso8859-5 154 greek-iso8859-7 hebrew-iso8859-8 ipa cyrillic-iso8859-5
156 latin-iso8859-16 latin-iso8859-3 latin-iso8859-4 latin-iso8859-9 155 latin-iso8859-16 latin-iso8859-3 latin-iso8859-4 latin-iso8859-9
157 vietnamese-viscii-lower vietnamese-viscii-upper arabic-iso8859-6 156 vietnamese-viscii-lower vietnamese-viscii-upper
158 jit-ucs-charset-0 japanese-jisx0208 japanese-jisx0208-1978 157 jit-ucs-charset-0 japanese-jisx0208 japanese-jisx0208-1978
159 japanese-jisx0212 japanese-jisx0213-1 japanese-jisx0213-2 158 japanese-jisx0212 japanese-jisx0213-1 japanese-jisx0213-2
160 chinese-gb2312 chinese-sisheng chinese-big5-1 chinese-big5-2 159 chinese-gb2312 chinese-sisheng chinese-big5-1 chinese-big5-2
161 indian-is13194 korean-ksc5601 chinese-cns11643-1 chinese-cns11643-2 160 indian-is13194 korean-ksc5601 chinese-cns11643-1 chinese-cns11643-2
162 chinese-isoir165 arabic-1-column arabic-2-column arabic-digit 161 chinese-isoir165
163 composite ethiopic indian-1-column indian-2-column jit-ucs-charset-0 162 composite ethiopic indian-1-column indian-2-column jit-ucs-charset-0
164 katakana-jisx0201 lao thai-tis620 thai-xtis tibetan tibetan-1-column 163 katakana-jisx0201 lao thai-tis620 thai-xtis tibetan tibetan-1-column
165 latin-jisx0201 chinese-cns11643-3 chinese-cns11643-4 164 latin-jisx0201 chinese-cns11643-3 chinese-cns11643-4
166 chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7))))) 165 chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7)))))
167 166
524 "Translation table mapping Unicode error sequences to Latin-1 chars. 523 "Translation table mapping Unicode error sequences to Latin-1 chars.
525 524
526 To transform XEmacs Unicode error sequences to the Latin-1 characters that 525 To transform XEmacs Unicode error sequences to the Latin-1 characters that
527 correspond to the octets on disk, you can use this variable. ") 526 correspond to the octets on disk, you can use this variable. ")
528 527
529 (defvar unicode-error-sequence-regexp-range 528 (defvar unicode-invalid-sequence-regexp-range
530 (and (featurep 'mule) 529 (and (featurep 'mule)
531 (format "%c%c-%c" 530 (format "%c%c-%c"
532 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0) 531 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0)
533 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3) 532 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3)
534 (aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3))) 533 (aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3)))
562 561
563 (goto-char (point-min)) 562 (goto-char (point-min))
564 ;; Comment out until the issue in 563 ;; Comment out until the issue in
565 ;; 18179.49815.622843.336527@parhasard.net is fixed. 564 ;; 18179.49815.622843.336527@parhasard.net is fixed.
566 (assert t ; (re-search-forward (concat "[" 565 (assert t ; (re-search-forward (concat "["
567 ; unicode-error-sequence-regexp-range 566 ; unicode-invalid-sequence-regexp-range
568 ; "]")) 567 ; "]"))
569 nil 568 nil
570 (format "Could not find char ?\\x%x in buffer" i)))) 569 (format "Could not find char ?\\x%x in buffer" i))))
571 570
572 (defun frob-unicode-errors-region (frob-function begin end &optional buffer) 571 (defun frob-unicode-errors-region (frob-function begin end &optional buffer)
584 (goto-char (point-min)) 583 (goto-char (point-min))
585 (while end 584 (while end
586 (setq begin 585 (setq begin
587 (progn 586 (progn
588 (skip-chars-forward 587 (skip-chars-forward
589 (concat "^" unicode-error-sequence-regexp-range)) 588 (concat "^" unicode-invalid-sequence-regexp-range))
590 (point)) 589 (point))
591 end (and (not (= (point) (point-max))) 590 end (and (not (= (point) (point-max)))
592 (progn 591 (progn
593 (skip-chars-forward 592 (skip-chars-forward
594 unicode-error-sequence-regexp-range) 593 unicode-invalid-sequence-regexp-range)
595 (point)))) 594 (point))))
596 (if end 595 (if end
597 (funcall frob-function begin end)))))) 596 (funcall frob-function begin end))))))
598 597
599 (defun unicode-error-translate-region (begin end &optional buffer table) 598 (defun unicode-error-translate-region (begin end &optional buffer table)
609 (frob-unicode-errors-region 608 (frob-unicode-errors-region
610 (lambda (start finish) 609 (lambda (start finish)
611 (translate-region start finish table)) 610 (translate-region start finish table))
612 begin end buffer)) 611 begin end buffer))
613 612
613 ;; Sure would be nice to be able to use defface here.
614 (copy-face 'highlight 'unicode-invalid-sequence-warning-face)
615
614 (unless (featurep 'mule) 616 (unless (featurep 'mule)
615 ;; We do this in such a roundabout way--instead of having the above defun 617 ;; We do this in such a roundabout way--instead of having the above defun
616 ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have 618 ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have
617 ;; make-docfile.c pick up symbol and function documentation correctly. An 619 ;; make-docfile.c pick up symbol and function documentation correctly. An
618 ;; alternative approach would be to fix make-docfile.c to be able to read 620 ;; alternative approach would be to fix make-docfile.c to be able to read
619 ;; Lisp. 621 ;; Lisp.
620 (mapcar #'unintern 622 (mapcar #'unintern
621 '(ccl-encode-to-ucs-2 unicode-error-default-translation-table 623 '(ccl-encode-to-ucs-2 unicode-error-default-translation-table
622 unicode-error-sequence-regexp-range 624 unicode-invalid-regexp-range frob-unicode-errors-region
623 frob-unicode-errors-region unicode-error-translate-region))) 625 unicode-error-translate-region)))
624 626
625 ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's 627 ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's
626 ;; an implementation in appendix A.1 of the Unicode Standard, Version 628 ;; an implementation in appendix A.1 of the Unicode Standard, Version
627 ;; 2.0, but I don't know its licensing characteristics. 629 ;; 2.0, but I don't know its licensing characteristics.
628 630