Mercurial > hg > xemacs-beta
comparison lisp/mule/mule-coding.el @ 4569:80e0588fb42f
Merge.
| author | Aidan Kehoe <kehoea@parhasard.net> |
|---|---|
| date | Sun, 28 Dec 2008 14:55:02 +0000 |
| parents | 1d74a1d115ee |
| children | e0a8715fdb1f |
comparison
equal
deleted
inserted
replaced
| 4537:7ca6d57ce12d | 4569:80e0588fb42f |
|---|---|
| 102 'ctext 'iso2022 | 102 'ctext 'iso2022 |
| 103 "Compound Text" | 103 "Compound Text" |
| 104 '(charset-g0 ascii | 104 '(charset-g0 ascii |
| 105 charset-g1 latin-iso8859-1 | 105 charset-g1 latin-iso8859-1 |
| 106 eol-type nil | 106 eol-type nil |
| 107 safe-charsets t ;; Reasonable | |
| 107 mnemonic "CText")) | 108 mnemonic "CText")) |
| 108 | 109 |
| 109 (make-coding-system | 110 (make-coding-system |
| 110 'iso-2022-8bit-ss2 'iso2022 | 111 'iso-2022-8bit-ss2 'iso2022 |
| 111 "ISO-2022 8-bit w/SS2" | 112 "ISO-2022 8-bit w/SS2" |
| 112 '(charset-g0 ascii | 113 '(charset-g0 ascii |
| 113 charset-g1 latin-iso8859-1 | 114 charset-g1 latin-iso8859-1 |
| 114 charset-g2 t ;; unspecified but can be used later. | 115 charset-g2 t ;; unspecified but can be used later. |
| 115 short t | 116 short t |
| 117 safe-charsets (ascii katakana-jisx0201 japanese-jisx0208-1978 | |
| 118 japanese-jisx0208 japanese-jisx0212 japanese-jisx0213-1 | |
| 119 japanese-jisx0213-2) | |
| 116 mnemonic "ISO8/SS" | 120 mnemonic "ISO8/SS" |
| 117 documentation "ISO 2022 based 8-bit encoding using SS2 for 96-charset" | 121 documentation "ISO 2022 based 8-bit encoding using SS2 for 96-charset" |
| 118 )) | 122 )) |
| 119 | 123 |
| 120 (make-coding-system | 124 (make-coding-system |
| 122 "ISO-2022 7-bit w/SS2" | 126 "ISO-2022 7-bit w/SS2" |
| 123 '(charset-g0 ascii | 127 '(charset-g0 ascii |
| 124 charset-g2 t ;; unspecified but can be used later. | 128 charset-g2 t ;; unspecified but can be used later. |
| 125 seven t | 129 seven t |
| 126 short t | 130 short t |
| 131 safe-charsets t | |
| 127 mnemonic "ISO7/SS" | 132 mnemonic "ISO7/SS" |
| 128 documentation "ISO 2022 based 7-bit encoding using SS2 for 96-charset" | 133 documentation "ISO 2022 based 7-bit encoding using SS2 for 96-charset" |
| 129 eol-type nil)) | 134 eol-type nil)) |
| 130 | 135 |
| 131 ;; (copy-coding-system 'iso-2022-7bit-ss2 'iso-2022-jp-2) | 136 ;; (copy-coding-system 'iso-2022-7bit-ss2 'iso-2022-jp-2) |
| 134 "ISO-2022-JP-2" | 139 "ISO-2022-JP-2" |
| 135 '(charset-g0 ascii | 140 '(charset-g0 ascii |
| 136 charset-g2 t ;; unspecified but can be used later. | 141 charset-g2 t ;; unspecified but can be used later. |
| 137 seven t | 142 seven t |
| 138 short t | 143 short t |
| 144 safe-charsets t | |
| 139 mnemonic "ISO7/SS" | 145 mnemonic "ISO7/SS" |
| 140 eol-type nil)) | 146 eol-type nil)) |
| 141 | 147 |
| 142 (make-coding-system | 148 (make-coding-system |
| 143 'iso-2022-7bit 'iso2022 | 149 'iso-2022-7bit 'iso2022 |
| 144 "ISO 2022 7-bit" | 150 "ISO 2022 7-bit" |
| 145 '(charset-g0 ascii | 151 '(charset-g0 ascii |
| 146 seven t | 152 seven t |
| 147 short t | 153 short t |
| 154 safe-charsets t | |
| 148 mnemonic "ISO7" | 155 mnemonic "ISO7" |
| 149 documentation "ISO-2022-based 7-bit encoding using only G0" | 156 documentation "ISO-2022-based 7-bit encoding using only G0" |
| 150 )) | 157 )) |
| 151 | 158 |
| 152 ;; compatibility for old XEmacsen | 159 ;; compatibility for old XEmacsen |
| 156 'iso-2022-8 'iso2022 | 163 'iso-2022-8 'iso2022 |
| 157 "ISO-2022 8-bit" | 164 "ISO-2022 8-bit" |
| 158 '(charset-g0 ascii | 165 '(charset-g0 ascii |
| 159 charset-g1 latin-iso8859-1 | 166 charset-g1 latin-iso8859-1 |
| 160 short t | 167 short t |
| 168 safe-charsets t | |
| 161 mnemonic "ISO8" | 169 mnemonic "ISO8" |
| 162 documentation "ISO-2022 eight-bit coding system. No single-shift or locking-shift." | 170 documentation "ISO-2022 eight-bit coding system. No single-shift or locking-shift." |
| 163 )) | 171 )) |
| 164 | 172 |
| 165 (make-coding-system | 173 (make-coding-system |
| 167 "Escape-Quoted (for .ELC files)" | 175 "Escape-Quoted (for .ELC files)" |
| 168 '(charset-g0 ascii | 176 '(charset-g0 ascii |
| 169 charset-g1 latin-iso8859-1 | 177 charset-g1 latin-iso8859-1 |
| 170 eol-type lf | 178 eol-type lf |
| 171 escape-quoted t | 179 escape-quoted t |
| 180 safe-charsets t | |
| 172 mnemonic "ESC/Quot" | 181 mnemonic "ESC/Quot" |
| 173 documentation "ISO-2022 eight-bit coding system with escape quoting; used for .ELC files." | 182 documentation "ISO-2022 eight-bit coding system with escape quoting; used for .ELC files." |
| 174 )) | 183 )) |
| 175 | 184 |
| 176 (make-coding-system | 185 (make-coding-system |
| 178 "ISO-2022 w/locking-shift" | 187 "ISO-2022 w/locking-shift" |
| 179 '(charset-g0 ascii | 188 '(charset-g0 ascii |
| 180 charset-g1 t ;; unspecified but can be used later. | 189 charset-g1 t ;; unspecified but can be used later. |
| 181 seven t | 190 seven t |
| 182 lock-shift t | 191 lock-shift t |
| 192 safe-charsets t | |
| 183 mnemonic "ISO7/Lock" | 193 mnemonic "ISO7/Lock" |
| 184 documentation "ISO-2022 coding system using Locking-Shift for 96-charset." | 194 documentation "ISO-2022 coding system using Locking-Shift for 96-charset." |
| 185 )) | 195 )) |
| 186 | 196 |
| 187 | 197 |
| 236 (loop | 246 (loop |
| 237 (read-multibyte-character r0 r1) | 247 (read-multibyte-character r0 r1) |
| 238 (if (r0 == ,(charset-id 'ascii)) | 248 (if (r0 == ,(charset-id 'ascii)) |
| 239 (write r1) | 249 (write r1) |
| 240 ((if (r0 == #xABAB) | 250 ((if (r0 == #xABAB) |
| 241 ;; #xBFFE is a sentinel in the compiled | |
| 242 ;; program. | |
| 243 ;; #xBFFE is a sentinel in the compiled | 251 ;; #xBFFE is a sentinel in the compiled |
| 244 ;; program. | 252 ;; program. |
| 245 ((r0 = r1 & #x7F) | 253 ((r0 = r1 & #x7F) |
| 246 (write r0 ,(make-vector vec-len #xBFFE))) | 254 (write r0 ,(make-vector vec-len #xBFFE))) |
| 247 ((mule-to-unicode r0 r1) | 255 ((mule-to-unicode r0 r1) |
| 529 "Given DECODE-TABLE, return an appropriate coding category. | 537 "Given DECODE-TABLE, return an appropriate coding category. |
| 530 DECODE-TABLE is a 256-entry vector describing the mapping from octets on | 538 DECODE-TABLE is a 256-entry vector describing the mapping from octets on |
| 531 disk to XEmacs characters for some fixed-width 8-bit coding system. " | 539 disk to XEmacs characters for some fixed-width 8-bit coding system. " |
| 532 (check-argument-type #'vectorp decode-table) | 540 (check-argument-type #'vectorp decode-table) |
| 533 (check-argument-range (length decode-table) #x100 #x100) | 541 (check-argument-range (length decode-table) #x100 #x100) |
| 534 (block category | 542 (loop |
| 535 (loop | 543 named category |
| 536 for i from #x80 to #x9F | 544 for i from #x80 to #x9F |
| 537 do (unless (= i (aref decode-table i)) | 545 do (unless (= i (aref decode-table i)) |
| 538 (return-from category 'no-conversion))) | 546 (return-from category 'no-conversion)) |
| 539 'iso-8-1)) | 547 finally return 'iso-8-1)) |
| 548 | |
| 549 (defun 8-bit-fixed-query-coding-region (begin end coding-system | |
| 550 &optional buffer errorp highlightp) | |
| 551 "The `query-coding-region' implementation for 8-bit-fixed coding systems. | |
| 552 | |
| 553 Uses the `8-bit-fixed-query-from-unicode' and `8-bit-fixed-query-skip-chars' | |
| 554 coding system properties. The former is a hash table mapping from valid | |
| 555 Unicode code points to on-disk octets in the coding system; the latter a set | |
| 556 of characters as used by `skip-chars-forward'. Both of these properties are | |
| 557 generated automatically by `make-8-bit-coding-system'. | |
| 558 | |
| 559 See that the documentation of `query-coding-region'; see also | |
| 560 `make-8-bit-coding-system'. " | |
| 561 (check-argument-type #'coding-system-p | |
| 562 (setq coding-system (find-coding-system coding-system))) | |
| 563 (check-argument-type #'integer-or-marker-p begin) | |
| 564 (check-argument-type #'integer-or-marker-p end) | |
| 565 (let ((from-unicode | |
| 566 (or (coding-system-get coding-system '8-bit-fixed-query-from-unicode) | |
| 567 (coding-system-get (coding-system-base coding-system) | |
| 568 '8-bit-fixed-query-from-unicode))) | |
| 569 (skip-chars-arg | |
| 570 (or (coding-system-get coding-system '8-bit-fixed-query-skip-chars) | |
| 571 (coding-system-get (coding-system-base coding-system) | |
| 572 '8-bit-fixed-query-skip-chars))) | |
| 573 (ranges (make-range-table)) | |
| 574 char-after fail-range-start fail-range-end previous-fail extent | |
| 575 failed) | |
| 576 (check-type from-unicode hash-table) | |
| 577 (check-type skip-chars-arg string) | |
| 578 (save-excursion | |
| 579 (when highlightp | |
| 580 (map-extents #'(lambda (extent ignored-arg) | |
| 581 (when (eq 'query-coding-warning-face | |
| 582 (extent-face extent)) | |
| 583 (delete-extent extent))) buffer begin end)) | |
| 584 (goto-char begin buffer) | |
| 585 (skip-chars-forward skip-chars-arg end buffer) | |
| 586 (while (< (point buffer) end) | |
| 587 ; (message | |
| 588 ; "fail-range-start is %S, previous-fail %S, point is %S, end is %S" | |
| 589 ; fail-range-start previous-fail (point buffer) end) | |
| 590 (setq char-after (char-after (point buffer) buffer) | |
| 591 fail-range-start (point buffer)) | |
| 592 ; (message "arguments are %S %S" | |
| 593 ; (< (point buffer) end) | |
| 594 ; (not (gethash (encode-char char-after 'ucs) from-unicode))) | |
| 595 (while (and | |
| 596 (< (point buffer) end) | |
| 597 (not (gethash (encode-char char-after 'ucs) from-unicode))) | |
| 598 (forward-char 1 buffer) | |
| 599 (setq char-after (char-after (point buffer) buffer) | |
| 600 failed t)) | |
| 601 (if (= fail-range-start (point buffer)) | |
| 602 ;; The character can actually be encoded by the coding | |
| 603 ;; system; check the characters past it. | |
| 604 (forward-char 1 buffer) | |
| 605 ;; The character actually failed. | |
| 606 ; (message "past the move through, point now %S" (point buffer)) | |
| 607 (when errorp | |
| 608 (error 'text-conversion-error | |
| 609 (format "Cannot encode %s using coding system" | |
| 610 (buffer-substring fail-range-start (point buffer) | |
| 611 buffer)) | |
| 612 (coding-system-name coding-system))) | |
| 613 (put-range-table fail-range-start | |
| 614 ;; If char-after is non-nil, we're not at | |
| 615 ;; the end of the buffer. | |
| 616 (setq fail-range-end (if char-after | |
| 617 (point buffer) | |
| 618 (point-max buffer))) | |
| 619 t ranges) | |
| 620 (when highlightp | |
| 621 ; (message "highlighting") | |
| 622 (setq extent (make-extent fail-range-start fail-range-end buffer)) | |
| 623 (set-extent-priority extent (+ mouse-highlight-priority 2)) | |
| 624 (set-extent-face extent 'query-coding-warning-face)) | |
| 625 (skip-chars-forward skip-chars-arg end buffer))) | |
| 626 ; (message "about to give the result, ranges %S" ranges) | |
| 627 (if failed | |
| 628 (values nil ranges) | |
| 629 (values t nil))))) | |
| 540 | 630 |
| 541 ;;;###autoload | 631 ;;;###autoload |
| 542 (defun make-8-bit-coding-system (name unicode-map &optional description props) | 632 (defun make-8-bit-coding-system (name unicode-map &optional description props) |
| 543 "Make and return a fixed-width 8-bit CCL coding system named NAME. | 633 "Make and return a fixed-width 8-bit CCL coding system named NAME. |
| 544 NAME must be a symbol, and UNICODE-MAP a list. | 634 NAME must be a symbol, and UNICODE-MAP a list. |
| 616 (plist-put (plist-put props 'decode decode-program) | 706 (plist-put (plist-put props 'decode decode-program) |
| 617 'encode encode-program))) | 707 'encode encode-program))) |
| 618 (coding-system-put name '8-bit-fixed t) | 708 (coding-system-put name '8-bit-fixed t) |
| 619 (coding-system-put name 'category | 709 (coding-system-put name 'category |
| 620 (make-8-bit-choose-category decode-table)) | 710 (make-8-bit-choose-category decode-table)) |
| 711 (coding-system-put name '8-bit-fixed-query-skip-chars | |
| 712 (skip-chars-quote | |
| 713 (apply #'string (append decode-table nil)))) | |
| 714 (coding-system-put name '8-bit-fixed-query-from-unicode encode-table) | |
| 715 | |
| 716 (coding-system-put name 'query-coding-function | |
| 717 #'8-bit-fixed-query-coding-region) | |
| 718 (coding-system-put (intern (format "%s-unix" name)) | |
| 719 'query-coding-function | |
| 720 #'8-bit-fixed-query-coding-region) | |
| 721 (coding-system-put (intern (format "%s-dos" name)) | |
| 722 'query-coding-function | |
| 723 #'8-bit-fixed-query-coding-region) | |
| 724 (coding-system-put (intern (format "%s-mac" name)) | |
| 725 'query-coding-function | |
| 726 #'8-bit-fixed-query-coding-region) | |
| 621 (loop for alias in aliases | 727 (loop for alias in aliases |
| 622 do (define-coding-system-alias alias name)) | 728 do (define-coding-system-alias alias name)) |
| 623 result)) | 729 result)) |
| 624 | 730 |
| 625 (define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map | 731 (define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map |
| 626 &optional description props) | 732 &optional description props) |
| 627 | |
| 628 ;; We provide the compiler macro (= macro that is expanded only on | 733 ;; We provide the compiler macro (= macro that is expanded only on |
| 629 ;; compilation, and that can punt to a runtime version of the | 734 ;; compilation, and that can punt to a runtime version of the |
| 630 ;; associate function if necessary) not for reasons of speed, though | 735 ;; associate function if necessary) not for reasons of speed, though |
| 631 ;; it does speed up things at runtime a little, but because the | 736 ;; it does speed up things at runtime a little, but because the |
| 632 ;; Unicode mappings are available at compile time in the dumped | 737 ;; Unicode mappings are available at compile time in the dumped |
| 672 ;; it, out, though, I get: | 777 ;; it, out, though, I get: |
| 673 ;; | 778 ;; |
| 674 ;; (invalid-read-syntax "Multiply defined symbol label" 1) | 779 ;; (invalid-read-syntax "Multiply defined symbol label" 1) |
| 675 ;; | 780 ;; |
| 676 ;; when the file is byte compiled. | 781 ;; when the file is byte compiled. |
| 677 (case-fold-search t)) | 782 (case-fold-search t) |
| 678 (define-translation-hash-table encode-table-sym ,encode-table) | 783 (encode-table ,encode-table)) |
| 784 (define-translation-hash-table encode-table-sym encode-table) | |
| 679 (make-coding-system | 785 (make-coding-system |
| 680 ',name 'ccl ,description | 786 ',name 'ccl ,description |
| 681 (plist-put (plist-put ',props 'decode | 787 (plist-put (plist-put ',props 'decode |
| 682 ,(apply #'vector decode-program)) | 788 ,(apply #'vector decode-program)) |
| 683 'encode | 789 'encode |
| 686 (list (cons | 792 (list (cons |
| 687 'encode-table-sym | 793 'encode-table-sym |
| 688 (symbol-value 'encode-table-sym))) | 794 (symbol-value 'encode-table-sym))) |
| 689 ',encode-program)))) | 795 ',encode-program)))) |
| 690 (coding-system-put ',name '8-bit-fixed t) | 796 (coding-system-put ',name '8-bit-fixed t) |
| 691 (coding-system-put ',name 'category ', | 797 (coding-system-put ',name 'category |
| 692 (make-8-bit-choose-category decode-table)) | 798 ',(make-8-bit-choose-category decode-table)) |
| 799 (coding-system-put ',name '8-bit-fixed-query-skip-chars | |
| 800 ',(skip-chars-quote | |
| 801 (apply #'string (append decode-table nil)))) | |
| 802 (coding-system-put ',name '8-bit-fixed-query-from-unicode encode-table) | |
| 803 (coding-system-put ',name 'query-coding-function | |
| 804 #'8-bit-fixed-query-coding-region) | |
| 805 (coding-system-put ',(intern (format "%s-unix" name)) | |
| 806 'query-coding-function | |
| 807 #'8-bit-fixed-query-coding-region) | |
| 808 (coding-system-put ',(intern (format "%s-dos" name)) | |
| 809 'query-coding-function | |
| 810 #'8-bit-fixed-query-coding-region) | |
| 811 (coding-system-put ',(intern (format "%s-mac" name)) | |
| 812 'query-coding-function | |
| 813 #'8-bit-fixed-query-coding-region) | |
| 693 ,(macroexpand `(loop for alias in ',aliases | 814 ,(macroexpand `(loop for alias in ',aliases |
| 694 do (define-coding-system-alias alias | 815 do (define-coding-system-alias alias |
| 695 ',name))) | 816 ',name))) |
| 696 (find-coding-system ',name))))) | 817 (find-coding-system ',name))))) |
| 697 | 818 |
| 701 '() ;; No differences from Latin 1. | 822 '() ;; No differences from Latin 1. |
| 702 "ISO-8859-1 (Latin-1)" | 823 "ISO-8859-1 (Latin-1)" |
| 703 '(mnemonic "Latin 1" | 824 '(mnemonic "Latin 1" |
| 704 documentation "The most used encoding of Western Europe and the Americas." | 825 documentation "The most used encoding of Western Europe and the Americas." |
| 705 aliases (iso-latin-1 latin-1))) | 826 aliases (iso-latin-1 latin-1))) |
| 706 |
