Mercurial > hg > xemacs-beta
comparison lisp/unicode.el @ 4498:f0c81cd2a959
Merge.
author | Mats Lidell <matsl@xemacs.org> |
---|---|
date | Sun, 10 Aug 2008 23:24:15 +0200 |
parents | 121aadac896e |
children | 26aae3bacf99 |
comparison
equal
deleted
inserted
replaced
4497:f863b2ee146f | 4498:f0c81cd2a959 |
---|---|
71 ("8859-16.TXT" latin-iso8859-16 #xA0 #xFF #x-80) | 71 ("8859-16.TXT" latin-iso8859-16 #xA0 #xFF #x-80) |
72 ("8859-2.TXT" latin-iso8859-2 #xA0 #xFF #x-80) | 72 ("8859-2.TXT" latin-iso8859-2 #xA0 #xFF #x-80) |
73 ("8859-3.TXT" latin-iso8859-3 #xA0 #xFF #x-80) | 73 ("8859-3.TXT" latin-iso8859-3 #xA0 #xFF #x-80) |
74 ("8859-4.TXT" latin-iso8859-4 #xA0 #xFF #x-80) | 74 ("8859-4.TXT" latin-iso8859-4 #xA0 #xFF #x-80) |
75 ("8859-5.TXT" cyrillic-iso8859-5 #xA0 #xFF #x-80) | 75 ("8859-5.TXT" cyrillic-iso8859-5 #xA0 #xFF #x-80) |
76 ("8859-6.TXT" arabic-iso8859-6 #xA0 #xFF #x-80) | |
77 ("8859-7.TXT" greek-iso8859-7 #xA0 #xFF #x-80) | 76 ("8859-7.TXT" greek-iso8859-7 #xA0 #xFF #x-80) |
78 ("8859-8.TXT" hebrew-iso8859-8 #xA0 #xFF #x-80) | 77 ("8859-8.TXT" hebrew-iso8859-8 #xA0 #xFF #x-80) |
79 ("8859-9.TXT" latin-iso8859-9 #xA0 #xFF #x-80) | 78 ("8859-9.TXT" latin-iso8859-9 #xA0 #xFF #x-80) |
80 ;; charset for Big5 does not matter; specifying `big5' will | 79 ;; charset for Big5 does not matter; specifying `big5' will |
81 ;; automatically make the right thing happen | 80 ;; automatically make the right thing happen |
152 (declare-fboundp | 151 (declare-fboundp |
153 (set-default-unicode-precedence-list | 152 (set-default-unicode-precedence-list |
154 '(ascii control-1 latin-iso8859-1 latin-iso8859-2 latin-iso8859-15 | 153 '(ascii control-1 latin-iso8859-1 latin-iso8859-2 latin-iso8859-15 |
155 greek-iso8859-7 hebrew-iso8859-8 ipa cyrillic-iso8859-5 | 154 greek-iso8859-7 hebrew-iso8859-8 ipa cyrillic-iso8859-5 |
156 latin-iso8859-16 latin-iso8859-3 latin-iso8859-4 latin-iso8859-9 | 155 latin-iso8859-16 latin-iso8859-3 latin-iso8859-4 latin-iso8859-9 |
157 vietnamese-viscii-lower vietnamese-viscii-upper arabic-iso8859-6 | 156 vietnamese-viscii-lower vietnamese-viscii-upper |
158 jit-ucs-charset-0 japanese-jisx0208 japanese-jisx0208-1978 | 157 jit-ucs-charset-0 japanese-jisx0208 japanese-jisx0208-1978 |
159 japanese-jisx0212 japanese-jisx0213-1 japanese-jisx0213-2 | 158 japanese-jisx0212 japanese-jisx0213-1 japanese-jisx0213-2 |
160 chinese-gb2312 chinese-sisheng chinese-big5-1 chinese-big5-2 | 159 chinese-gb2312 chinese-sisheng chinese-big5-1 chinese-big5-2 |
161 indian-is13194 korean-ksc5601 chinese-cns11643-1 chinese-cns11643-2 | 160 indian-is13194 korean-ksc5601 chinese-cns11643-1 chinese-cns11643-2 |
162 chinese-isoir165 arabic-1-column arabic-2-column arabic-digit | 161 chinese-isoir165 |
163 composite ethiopic indian-1-column indian-2-column jit-ucs-charset-0 | 162 composite ethiopic indian-1-column indian-2-column jit-ucs-charset-0 |
164 katakana-jisx0201 lao thai-tis620 thai-xtis tibetan tibetan-1-column | 163 katakana-jisx0201 lao thai-tis620 thai-xtis tibetan tibetan-1-column |
165 latin-jisx0201 chinese-cns11643-3 chinese-cns11643-4 | 164 latin-jisx0201 chinese-cns11643-3 chinese-cns11643-4 |
166 chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7))))) | 165 chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7))))) |
167 | 166 |
524 "Translation table mapping Unicode error sequences to Latin-1 chars. | 523 "Translation table mapping Unicode error sequences to Latin-1 chars. |
525 | 524 |
526 To transform XEmacs Unicode error sequences to the Latin-1 characters that | 525 To transform XEmacs Unicode error sequences to the Latin-1 characters that |
527 correspond to the octets on disk, you can use this variable. ") | 526 correspond to the octets on disk, you can use this variable. ") |
528 | 527 |
529 (defvar unicode-error-sequence-regexp-range | 528 (defvar unicode-invalid-sequence-regexp-range |
530 (and (featurep 'mule) | 529 (and (featurep 'mule) |
531 (format "%c%c-%c" | 530 (format "%c%c-%c" |
532 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0) | 531 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0) |
533 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3) | 532 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3) |
534 (aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3))) | 533 (aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3))) |
562 | 561 |
563 (goto-char (point-min)) | 562 (goto-char (point-min)) |
564 ;; Comment out until the issue in | 563 ;; Comment out until the issue in |
565 ;; 18179.49815.622843.336527@parhasard.net is fixed. | 564 ;; 18179.49815.622843.336527@parhasard.net is fixed. |
566 (assert t ; (re-search-forward (concat "[" | 565 (assert t ; (re-search-forward (concat "[" |
567 ; unicode-error-sequence-regexp-range | 566 ; unicode-invalid-sequence-regexp-range |
568 ; "]")) | 567 ; "]")) |
569 nil | 568 nil |
570 (format "Could not find char ?\\x%x in buffer" i)))) | 569 (format "Could not find char ?\\x%x in buffer" i)))) |
571 | 570 |
572 (defun frob-unicode-errors-region (frob-function begin end &optional buffer) | 571 (defun frob-unicode-errors-region (frob-function begin end &optional buffer) |
584 (goto-char (point-min)) | 583 (goto-char (point-min)) |
585 (while end | 584 (while end |
586 (setq begin | 585 (setq begin |
587 (progn | 586 (progn |
588 (skip-chars-forward | 587 (skip-chars-forward |
589 (concat "^" unicode-error-sequence-regexp-range)) | 588 (concat "^" unicode-invalid-sequence-regexp-range)) |
590 (point)) | 589 (point)) |
591 end (and (not (= (point) (point-max))) | 590 end (and (not (= (point) (point-max))) |
592 (progn | 591 (progn |
593 (skip-chars-forward | 592 (skip-chars-forward |
594 unicode-error-sequence-regexp-range) | 593 unicode-invalid-sequence-regexp-range) |
595 (point)))) | 594 (point)))) |
596 (if end | 595 (if end |
597 (funcall frob-function begin end)))))) | 596 (funcall frob-function begin end)))))) |
598 | 597 |
599 (defun unicode-error-translate-region (begin end &optional buffer table) | 598 (defun unicode-error-translate-region (begin end &optional buffer table) |
609 (frob-unicode-errors-region | 608 (frob-unicode-errors-region |
610 (lambda (start finish) | 609 (lambda (start finish) |
611 (translate-region start finish table)) | 610 (translate-region start finish table)) |
612 begin end buffer)) | 611 begin end buffer)) |
613 | 612 |
613 ;; Sure would be nice to be able to use defface here. | |
614 (copy-face 'highlight 'unicode-invalid-sequence-warning-face) | |
615 | |
614 (unless (featurep 'mule) | 616 (unless (featurep 'mule) |
615 ;; We do this in such a roundabout way--instead of having the above defun | 617 ;; We do this in such a roundabout way--instead of having the above defun |
616 ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have | 618 ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have |
617 ;; make-docfile.c pick up symbol and function documentation correctly. An | 619 ;; make-docfile.c pick up symbol and function documentation correctly. An |
618 ;; alternative approach would be to fix make-docfile.c to be able to read | 620 ;; alternative approach would be to fix make-docfile.c to be able to read |
619 ;; Lisp. | 621 ;; Lisp. |
620 (mapcar #'unintern | 622 (mapcar #'unintern |
621 '(ccl-encode-to-ucs-2 unicode-error-default-translation-table | 623 '(ccl-encode-to-ucs-2 unicode-error-default-translation-table |
622 unicode-error-sequence-regexp-range | 624 unicode-invalid-regexp-range frob-unicode-errors-region |
623 frob-unicode-errors-region unicode-error-translate-region))) | 625 unicode-error-translate-region))) |
624 | 626 |
625 ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's | 627 ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's |
626 ;; an implementation in appendix A.1 of the Unicode Standard, Version | 628 ;; an implementation in appendix A.1 of the Unicode Standard, Version |
627 ;; 2.0, but I don't know its licensing characteristics. | 629 ;; 2.0, but I don't know its licensing characteristics. |
628 | 630 |