comparison lisp/mule/mule-coding.el @ 4569:80e0588fb42f

Merge.
author Aidan Kehoe <kehoea@parhasard.net>
date Sun, 28 Dec 2008 14:55:02 +0000
parents 1d74a1d115ee
children e0a8715fdb1f
comparison
equal deleted inserted replaced
4537:7ca6d57ce12d 4569:80e0588fb42f
102 'ctext 'iso2022 102 'ctext 'iso2022
103 "Compound Text" 103 "Compound Text"
104 '(charset-g0 ascii 104 '(charset-g0 ascii
105 charset-g1 latin-iso8859-1 105 charset-g1 latin-iso8859-1
106 eol-type nil 106 eol-type nil
107 safe-charsets t ;; Reasonable
107 mnemonic "CText")) 108 mnemonic "CText"))
108 109
109 (make-coding-system 110 (make-coding-system
110 'iso-2022-8bit-ss2 'iso2022 111 'iso-2022-8bit-ss2 'iso2022
111 "ISO-2022 8-bit w/SS2" 112 "ISO-2022 8-bit w/SS2"
112 '(charset-g0 ascii 113 '(charset-g0 ascii
113 charset-g1 latin-iso8859-1 114 charset-g1 latin-iso8859-1
114 charset-g2 t ;; unspecified but can be used later. 115 charset-g2 t ;; unspecified but can be used later.
115 short t 116 short t
117 safe-charsets (ascii katakana-jisx0201 japanese-jisx0208-1978
118 japanese-jisx0208 japanese-jisx0212 japanese-jisx0213-1
119 japanese-jisx0213-2)
116 mnemonic "ISO8/SS" 120 mnemonic "ISO8/SS"
117 documentation "ISO 2022 based 8-bit encoding using SS2 for 96-charset" 121 documentation "ISO 2022 based 8-bit encoding using SS2 for 96-charset"
118 )) 122 ))
119 123
120 (make-coding-system 124 (make-coding-system
122 "ISO-2022 7-bit w/SS2" 126 "ISO-2022 7-bit w/SS2"
123 '(charset-g0 ascii 127 '(charset-g0 ascii
124 charset-g2 t ;; unspecified but can be used later. 128 charset-g2 t ;; unspecified but can be used later.
125 seven t 129 seven t
126 short t 130 short t
131 safe-charsets t
127 mnemonic "ISO7/SS" 132 mnemonic "ISO7/SS"
128 documentation "ISO 2022 based 7-bit encoding using SS2 for 96-charset" 133 documentation "ISO 2022 based 7-bit encoding using SS2 for 96-charset"
129 eol-type nil)) 134 eol-type nil))
130 135
131 ;; (copy-coding-system 'iso-2022-7bit-ss2 'iso-2022-jp-2) 136 ;; (copy-coding-system 'iso-2022-7bit-ss2 'iso-2022-jp-2)
134 "ISO-2022-JP-2" 139 "ISO-2022-JP-2"
135 '(charset-g0 ascii 140 '(charset-g0 ascii
136 charset-g2 t ;; unspecified but can be used later. 141 charset-g2 t ;; unspecified but can be used later.
137 seven t 142 seven t
138 short t 143 short t
144 safe-charsets t
139 mnemonic "ISO7/SS" 145 mnemonic "ISO7/SS"
140 eol-type nil)) 146 eol-type nil))
141 147
142 (make-coding-system 148 (make-coding-system
143 'iso-2022-7bit 'iso2022 149 'iso-2022-7bit 'iso2022
144 "ISO 2022 7-bit" 150 "ISO 2022 7-bit"
145 '(charset-g0 ascii 151 '(charset-g0 ascii
146 seven t 152 seven t
147 short t 153 short t
154 safe-charsets t
148 mnemonic "ISO7" 155 mnemonic "ISO7"
149 documentation "ISO-2022-based 7-bit encoding using only G0" 156 documentation "ISO-2022-based 7-bit encoding using only G0"
150 )) 157 ))
151 158
152 ;; compatibility for old XEmacsen 159 ;; compatibility for old XEmacsen
156 'iso-2022-8 'iso2022 163 'iso-2022-8 'iso2022
157 "ISO-2022 8-bit" 164 "ISO-2022 8-bit"
158 '(charset-g0 ascii 165 '(charset-g0 ascii
159 charset-g1 latin-iso8859-1 166 charset-g1 latin-iso8859-1
160 short t 167 short t
168 safe-charsets t
161 mnemonic "ISO8" 169 mnemonic "ISO8"
162 documentation "ISO-2022 eight-bit coding system. No single-shift or locking-shift." 170 documentation "ISO-2022 eight-bit coding system. No single-shift or locking-shift."
163 )) 171 ))
164 172
165 (make-coding-system 173 (make-coding-system
167 "Escape-Quoted (for .ELC files)" 175 "Escape-Quoted (for .ELC files)"
168 '(charset-g0 ascii 176 '(charset-g0 ascii
169 charset-g1 latin-iso8859-1 177 charset-g1 latin-iso8859-1
170 eol-type lf 178 eol-type lf
171 escape-quoted t 179 escape-quoted t
180 safe-charsets t
172 mnemonic "ESC/Quot" 181 mnemonic "ESC/Quot"
173 documentation "ISO-2022 eight-bit coding system with escape quoting; used for .ELC files." 182 documentation "ISO-2022 eight-bit coding system with escape quoting; used for .ELC files."
174 )) 183 ))
175 184
176 (make-coding-system 185 (make-coding-system
178 "ISO-2022 w/locking-shift" 187 "ISO-2022 w/locking-shift"
179 '(charset-g0 ascii 188 '(charset-g0 ascii
180 charset-g1 t ;; unspecified but can be used later. 189 charset-g1 t ;; unspecified but can be used later.
181 seven t 190 seven t
182 lock-shift t 191 lock-shift t
192 safe-charsets t
183 mnemonic "ISO7/Lock" 193 mnemonic "ISO7/Lock"
184 documentation "ISO-2022 coding system using Locking-Shift for 96-charset." 194 documentation "ISO-2022 coding system using Locking-Shift for 96-charset."
185 )) 195 ))
186 196
187 197
236 (loop 246 (loop
237 (read-multibyte-character r0 r1) 247 (read-multibyte-character r0 r1)
238 (if (r0 == ,(charset-id 'ascii)) 248 (if (r0 == ,(charset-id 'ascii))
239 (write r1) 249 (write r1)
240 ((if (r0 == #xABAB) 250 ((if (r0 == #xABAB)
241 ;; #xBFFE is a sentinel in the compiled
242 ;; program.
243 ;; #xBFFE is a sentinel in the compiled 251 ;; #xBFFE is a sentinel in the compiled
244 ;; program. 252 ;; program.
245 ((r0 = r1 & #x7F) 253 ((r0 = r1 & #x7F)
246 (write r0 ,(make-vector vec-len #xBFFE))) 254 (write r0 ,(make-vector vec-len #xBFFE)))
247 ((mule-to-unicode r0 r1) 255 ((mule-to-unicode r0 r1)
529 "Given DECODE-TABLE, return an appropriate coding category. 537 "Given DECODE-TABLE, return an appropriate coding category.
530 DECODE-TABLE is a 256-entry vector describing the mapping from octets on 538 DECODE-TABLE is a 256-entry vector describing the mapping from octets on
531 disk to XEmacs characters for some fixed-width 8-bit coding system. " 539 disk to XEmacs characters for some fixed-width 8-bit coding system. "
532 (check-argument-type #'vectorp decode-table) 540 (check-argument-type #'vectorp decode-table)
533 (check-argument-range (length decode-table) #x100 #x100) 541 (check-argument-range (length decode-table) #x100 #x100)
534 (block category 542 (loop
535 (loop 543 named category
536 for i from #x80 to #x9F 544 for i from #x80 to #x9F
537 do (unless (= i (aref decode-table i)) 545 do (unless (= i (aref decode-table i))
538 (return-from category 'no-conversion))) 546 (return-from category 'no-conversion))
539 'iso-8-1)) 547 finally return 'iso-8-1))
548
549 (defun 8-bit-fixed-query-coding-region (begin end coding-system
550 &optional buffer errorp highlightp)
551 "The `query-coding-region' implementation for 8-bit-fixed coding systems.
552
553 Uses the `8-bit-fixed-query-from-unicode' and `8-bit-fixed-query-skip-chars'
554 coding system properties. The former is a hash table mapping from valid
555 Unicode code points to on-disk octets in the coding system; the latter a set
556 of characters as used by `skip-chars-forward'. Both of these properties are
557 generated automatically by `make-8-bit-coding-system'.
558
559 See that the documentation of `query-coding-region'; see also
560 `make-8-bit-coding-system'. "
561 (check-argument-type #'coding-system-p
562 (setq coding-system (find-coding-system coding-system)))
563 (check-argument-type #'integer-or-marker-p begin)
564 (check-argument-type #'integer-or-marker-p end)
565 (let ((from-unicode
566 (or (coding-system-get coding-system '8-bit-fixed-query-from-unicode)
567 (coding-system-get (coding-system-base coding-system)
568 '8-bit-fixed-query-from-unicode)))
569 (skip-chars-arg
570 (or (coding-system-get coding-system '8-bit-fixed-query-skip-chars)
571 (coding-system-get (coding-system-base coding-system)
572 '8-bit-fixed-query-skip-chars)))
573 (ranges (make-range-table))
574 char-after fail-range-start fail-range-end previous-fail extent
575 failed)
576 (check-type from-unicode hash-table)
577 (check-type skip-chars-arg string)
578 (save-excursion
579 (when highlightp
580 (map-extents #'(lambda (extent ignored-arg)
581 (when (eq 'query-coding-warning-face
582 (extent-face extent))
583 (delete-extent extent))) buffer begin end))
584 (goto-char begin buffer)
585 (skip-chars-forward skip-chars-arg end buffer)
586 (while (< (point buffer) end)
587 ; (message
588 ; "fail-range-start is %S, previous-fail %S, point is %S, end is %S"
589 ; fail-range-start previous-fail (point buffer) end)
590 (setq char-after (char-after (point buffer) buffer)
591 fail-range-start (point buffer))
592 ; (message "arguments are %S %S"
593 ; (< (point buffer) end)
594 ; (not (gethash (encode-char char-after 'ucs) from-unicode)))
595 (while (and
596 (< (point buffer) end)
597 (not (gethash (encode-char char-after 'ucs) from-unicode)))
598 (forward-char 1 buffer)
599 (setq char-after (char-after (point buffer) buffer)
600 failed t))
601 (if (= fail-range-start (point buffer))
602 ;; The character can actually be encoded by the coding
603 ;; system; check the characters past it.
604 (forward-char 1 buffer)
605 ;; The character actually failed.
606 ; (message "past the move through, point now %S" (point buffer))
607 (when errorp
608 (error 'text-conversion-error
609 (format "Cannot encode %s using coding system"
610 (buffer-substring fail-range-start (point buffer)
611 buffer))
612 (coding-system-name coding-system)))
613 (put-range-table fail-range-start
614 ;; If char-after is non-nil, we're not at
615 ;; the end of the buffer.
616 (setq fail-range-end (if char-after
617 (point buffer)
618 (point-max buffer)))
619 t ranges)
620 (when highlightp
621 ; (message "highlighting")
622 (setq extent (make-extent fail-range-start fail-range-end buffer))
623 (set-extent-priority extent (+ mouse-highlight-priority 2))
624 (set-extent-face extent 'query-coding-warning-face))
625 (skip-chars-forward skip-chars-arg end buffer)))
626 ; (message "about to give the result, ranges %S" ranges)
627 (if failed
628 (values nil ranges)
629 (values t nil)))))
540 630
541 ;;;###autoload 631 ;;;###autoload
542 (defun make-8-bit-coding-system (name unicode-map &optional description props) 632 (defun make-8-bit-coding-system (name unicode-map &optional description props)
543 "Make and return a fixed-width 8-bit CCL coding system named NAME. 633 "Make and return a fixed-width 8-bit CCL coding system named NAME.
544 NAME must be a symbol, and UNICODE-MAP a list. 634 NAME must be a symbol, and UNICODE-MAP a list.
616 (plist-put (plist-put props 'decode decode-program) 706 (plist-put (plist-put props 'decode decode-program)
617 'encode encode-program))) 707 'encode encode-program)))
618 (coding-system-put name '8-bit-fixed t) 708 (coding-system-put name '8-bit-fixed t)
619 (coding-system-put name 'category 709 (coding-system-put name 'category
620 (make-8-bit-choose-category decode-table)) 710 (make-8-bit-choose-category decode-table))
711 (coding-system-put name '8-bit-fixed-query-skip-chars
712 (skip-chars-quote
713 (apply #'string (append decode-table nil))))
714 (coding-system-put name '8-bit-fixed-query-from-unicode encode-table)
715
716 (coding-system-put name 'query-coding-function
717 #'8-bit-fixed-query-coding-region)
718 (coding-system-put (intern (format "%s-unix" name))
719 'query-coding-function
720 #'8-bit-fixed-query-coding-region)
721 (coding-system-put (intern (format "%s-dos" name))
722 'query-coding-function
723 #'8-bit-fixed-query-coding-region)
724 (coding-system-put (intern (format "%s-mac" name))
725 'query-coding-function
726 #'8-bit-fixed-query-coding-region)
621 (loop for alias in aliases 727 (loop for alias in aliases
622 do (define-coding-system-alias alias name)) 728 do (define-coding-system-alias alias name))
623 result)) 729 result))
624 730
625 (define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map 731 (define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map
626 &optional description props) 732 &optional description props)
627
628 ;; We provide the compiler macro (= macro that is expanded only on 733 ;; We provide the compiler macro (= macro that is expanded only on
629 ;; compilation, and that can punt to a runtime version of the 734 ;; compilation, and that can punt to a runtime version of the
630 ;; associate function if necessary) not for reasons of speed, though 735 ;; associate function if necessary) not for reasons of speed, though
631 ;; it does speed up things at runtime a little, but because the 736 ;; it does speed up things at runtime a little, but because the
632 ;; Unicode mappings are available at compile time in the dumped 737 ;; Unicode mappings are available at compile time in the dumped
672 ;; it, out, though, I get: 777 ;; it, out, though, I get:
673 ;; 778 ;;
674 ;; (invalid-read-syntax "Multiply defined symbol label" 1) 779 ;; (invalid-read-syntax "Multiply defined symbol label" 1)
675 ;; 780 ;;
676 ;; when the file is byte compiled. 781 ;; when the file is byte compiled.
677 (case-fold-search t)) 782 (case-fold-search t)
678 (define-translation-hash-table encode-table-sym ,encode-table) 783 (encode-table ,encode-table))
784 (define-translation-hash-table encode-table-sym encode-table)
679 (make-coding-system 785 (make-coding-system
680 ',name 'ccl ,description 786 ',name 'ccl ,description
681 (plist-put (plist-put ',props 'decode 787 (plist-put (plist-put ',props 'decode
682 ,(apply #'vector decode-program)) 788 ,(apply #'vector decode-program))
683 'encode 789 'encode
686 (list (cons 792 (list (cons
687 'encode-table-sym 793 'encode-table-sym
688 (symbol-value 'encode-table-sym))) 794 (symbol-value 'encode-table-sym)))
689 ',encode-program)))) 795 ',encode-program))))
690 (coding-system-put ',name '8-bit-fixed t) 796 (coding-system-put ',name '8-bit-fixed t)
691 (coding-system-put ',name 'category ', 797 (coding-system-put ',name 'category
692 (make-8-bit-choose-category decode-table)) 798 ',(make-8-bit-choose-category decode-table))
799 (coding-system-put ',name '8-bit-fixed-query-skip-chars
800 ',(skip-chars-quote
801 (apply #'string (append decode-table nil))))
802 (coding-system-put ',name '8-bit-fixed-query-from-unicode encode-table)
803 (coding-system-put ',name 'query-coding-function
804 #'8-bit-fixed-query-coding-region)
805 (coding-system-put ',(intern (format "%s-unix" name))
806 'query-coding-function
807 #'8-bit-fixed-query-coding-region)
808 (coding-system-put ',(intern (format "%s-dos" name))
809 'query-coding-function
810 #'8-bit-fixed-query-coding-region)
811 (coding-system-put ',(intern (format "%s-mac" name))
812 'query-coding-function
813 #'8-bit-fixed-query-coding-region)
693 ,(macroexpand `(loop for alias in ',aliases 814 ,(macroexpand `(loop for alias in ',aliases
694 do (define-coding-system-alias alias 815 do (define-coding-system-alias alias
695 ',name))) 816 ',name)))
696 (find-coding-system ',name))))) 817 (find-coding-system ',name)))))
697 818
701 '() ;; No differences from Latin 1. 822 '() ;; No differences from Latin 1.
702 "ISO-8859-1 (Latin-1)" 823 "ISO-8859-1 (Latin-1)"
703 '(mnemonic "Latin 1" 824 '(mnemonic "Latin 1"
704 documentation "The most used encoding of Western Europe and the Americas." 825 documentation "The most used encoding of Western Europe and the Americas."
705 aliases (iso-latin-1 latin-1))) 826 aliases (iso-latin-1 latin-1)))
706