Mercurial > hg > xemacs-beta
comparison lisp/mule/mule-coding.el @ 4549:68d1ca56cffa
First part of interactive checks that coding systems encode regions.
2008-01-21 Aidan Kehoe <kehoea@parhasard.net>
* coding.el (decode-coding-string):
(encode-coding-string): Accept GNU's NOCOPY argument for
these. Todo; write compiler macros to use it.
(query-coding-warning-face): New face, to show unencodable
characters.
(default-query-coding-region-safe-charset-skip-chars-map):
New variable, a cache used by #'default-query-coding-region.
(default-query-coding-region): Default implementation of
#'query-coding-region, using the safe-charsets and safe-chars
coding systemproperties.
(query-coding-region): New function; can a given coding system
encode a given region?
(query-coding-string): New function; can a given coding system
encode a given string?
(unencodable-char-position): Function API taken from GNU; return
the first unencodable position given a string and coding system.
(encode-coding-char): Function API taken from GNU; return CHAR
encoded using CODING-SYSTEM, or nil if CODING-SYSTEM would trash
CHAR.
((unless (featurep 'mule)): Override the default
query-coding-region implementation on non-Mule.
* mule/mule-coding.el (make-8-bit-generate-helper): Eliminate a
duplicate comment.
(make-8-bit-choose-category): Simplify implementation.
(8-bit-fixed-query-coding-region): Implementation of
#'query-coding-region for coding systems created with
#'make-8-bit-coding-system.
(make-8-bit-coding-system): Initialise the #'query-coding-region
implementation for these character sets.
(make-8-bit-coding-system): Ditto for the compiler macro version
of this function.
* unicode.el (unicode-query-coding-skip-chars-arg): New variable,
used by unicode-query-coding-region, initialised in
mule/general-late.el.
(unicode-query-coding-region): New function, the
#'query-coding-region implementation for Unicode coding systems.
Initialise the query-coding-function property for the Unicode
coding systems to #'unicode-query-coding-region.
* mule/mule-charset.el (charset-skip-chars-string): New
function. Return a #'skip-chars-forward argument that skips all
characters in CHARSET.
(map-charset-chars): Function synced from GNU, modified to work
with XEmacs. Map FUNC across the int value charset ranges of
CHARSET.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Mon, 21 Jan 2008 22:51:21 +0100 |
parents | f4c3ffe60a4f |
children | 6812571bfcb9 |
comparison
equal
deleted
inserted
replaced
4402:e70cc8a90e90 | 4549:68d1ca56cffa |
---|---|
236 (loop | 236 (loop |
237 (read-multibyte-character r0 r1) | 237 (read-multibyte-character r0 r1) |
238 (if (r0 == ,(charset-id 'ascii)) | 238 (if (r0 == ,(charset-id 'ascii)) |
239 (write r1) | 239 (write r1) |
240 ((if (r0 == #xABAB) | 240 ((if (r0 == #xABAB) |
241 ;; #xBFFE is a sentinel in the compiled | |
242 ;; program. | |
243 ;; #xBFFE is a sentinel in the compiled | 241 ;; #xBFFE is a sentinel in the compiled |
244 ;; program. | 242 ;; program. |
245 ((r0 = r1 & #x7F) | 243 ((r0 = r1 & #x7F) |
246 (write r0 ,(make-vector vec-len #xBFFE))) | 244 (write r0 ,(make-vector vec-len #xBFFE))) |
247 ((mule-to-unicode r0 r1) | 245 ((mule-to-unicode r0 r1) |
529 "Given DECODE-TABLE, return an appropriate coding category. | 527 "Given DECODE-TABLE, return an appropriate coding category. |
530 DECODE-TABLE is a 256-entry vector describing the mapping from octets on | 528 DECODE-TABLE is a 256-entry vector describing the mapping from octets on |
531 disk to XEmacs characters for some fixed-width 8-bit coding system. " | 529 disk to XEmacs characters for some fixed-width 8-bit coding system. " |
532 (check-argument-type #'vectorp decode-table) | 530 (check-argument-type #'vectorp decode-table) |
533 (check-argument-range (length decode-table) #x100 #x100) | 531 (check-argument-range (length decode-table) #x100 #x100) |
534 (block category | 532 (loop |
535 (loop | 533 named category |
536 for i from #x80 to #xBF | 534 for i from #x80 to #xBF |
537 do (unless (= i (aref decode-table i)) | 535 do (unless (= i (aref decode-table i)) |
538 (return-from category 'no-conversion))) | 536 (return-from category 'no-conversion)) |
539 'iso-8-1)) | 537 finally return 'iso-8-1)) |
538 | |
539 (defun 8-bit-fixed-query-coding-region (begin end coding-system | |
540 &optional buffer errorp highlightp) | |
541 "The `query-coding-region' implementation for 8-bit-fixed coding systems. | |
542 | |
543 Uses the `8-bit-fixed-query-from-unicode' and `8-bit-fixed-query-skip-chars' | |
544 coding system properties. The former is a hash table mapping from valid | |
545 Unicode code points to on-disk octets in the coding system; the latter a set | |
546 of characters as used by `skip-chars-forward'. Both of these properties are | |
547 generated automatically by `make-8-bit-coding-system'. | |
548 | |
549 See that the documentation of `query-coding-region'; see also | |
550 `make-8-bit-coding-system'. " | |
551 (check-argument-type #'coding-system-p | |
552 (setq coding-system (find-coding-system coding-system))) | |
553 (check-argument-type #'integer-or-marker-p begin) | |
554 (check-argument-type #'integer-or-marker-p end) | |
555 (let ((from-unicode | |
556 (coding-system-get coding-system '8-bit-fixed-query-from-unicode)) | |
557 (skip-chars-arg | |
558 (coding-system-get coding-system '8-bit-fixed-query-skip-chars)) | |
559 (ranges (make-range-table)) | |
560 char-after fail-range-start fail-range-end previous-fail extent | |
561 failed) | |
562 (check-type from-unicode hash-table) | |
563 (check-type skip-chars-arg string) | |
564 (save-excursion | |
565 (goto-char begin buffer) | |
566 (skip-chars-forward skip-chars-arg end buffer) | |
567 (while (< (point buffer) end) | |
568 (message | |
569 "fail-range-start is %S, previous-fail %S, point is %S, end is %S" | |
570 fail-range-start previous-fail (point buffer) end) | |
571 (setq char-after (char-after (point buffer) buffer) | |
572 fail-range-start (point buffer)) | |
573 (message "arguments are %S %S" | |
574 (< (point buffer) end) | |
575 (not (gethash (encode-char char-after 'ucs) from-unicode))) | |
576 (while (and | |
577 (< (point buffer) end) | |
578 (not (gethash (encode-char char-after 'ucs) from-unicode))) | |
579 (forward-char 1 buffer) | |
580 (setq char-after (char-after (point buffer) buffer) | |
581 failed t)) | |
582 (if (= fail-range-start (point buffer)) | |
583 ;; The character can actually be encoded by the coding | |
584 ;; system; check the characters past it. | |
585 (forward-char 1 buffer) | |
586 ;; The character actually failed. | |
587 (message "past the move through, point now %S" (point buffer)) | |
588 (when errorp | |
589 (error 'text-conversion-error | |
590 (format "Cannot encode %s using coding system" | |
591 (buffer-substring fail-range-start (point buffeR) | |
592 buffer)) | |
593 (coding-system-name coding-system))) | |
594 (put-range-table fail-range-start | |
595 ;; If char-after is non-nil, we're not at | |
596 ;; the end of the buffer. | |
597 (setq fail-range-end (if char-after | |
598 (point buffer) | |
599 (point-max buffer))) | |
600 t ranges) | |
601 (when highlightp | |
602 (message "highlighting") | |
603 (setq extent (make-extent fail-range-start fail-range-end buffer)) | |
604 (set-extent-priority extent (+ mouse-highlight-priority 2)) | |
605 (set-extent-face extent 'query-coding-warning-face)) | |
606 (skip-chars-forward skip-chars-arg end buffer)) | |
607 (message "about to give the result, ranges %S" ranges)) | |
608 (if failed | |
609 (values nil ranges) | |
610 (values t nil))))) | |
540 | 611 |
541 ;;;###autoload | 612 ;;;###autoload |
542 (defun make-8-bit-coding-system (name unicode-map &optional description props) | 613 (defun make-8-bit-coding-system (name unicode-map &optional description props) |
543 "Make and return a fixed-width 8-bit CCL coding system named NAME. | 614 "Make and return a fixed-width 8-bit CCL coding system named NAME. |
544 NAME must be a symbol, and UNICODE-MAP a list. | 615 NAME must be a symbol, and UNICODE-MAP a list. |
616 (plist-put (plist-put props 'decode decode-program) | 687 (plist-put (plist-put props 'decode decode-program) |
617 'encode encode-program))) | 688 'encode encode-program))) |
618 (coding-system-put name '8-bit-fixed t) | 689 (coding-system-put name '8-bit-fixed t) |
619 (coding-system-put name 'category | 690 (coding-system-put name 'category |
620 (make-8-bit-choose-category decode-table)) | 691 (make-8-bit-choose-category decode-table)) |
692 (coding-system-put name '8-bit-fixed-query-skip-chars | |
693 (apply #'string (append decode-table nil))) | |
694 (coding-system-put name '8-bit-fixed-query-from-unicode encode-table) | |
695 | |
696 (coding-system-put name 'query-coding-function | |
697 #'8-bit-fixed-query-coding-region) | |
698 (coding-system-put (intern (format "%s-unix" name)) | |
699 'query-coding-function | |
700 #'8-bit-fixed-query-coding-region) | |
701 (coding-system-put (intern (format "%s-dos" name)) | |
702 'query-coding-function | |
703 #'8-bit-fixed-query-coding-region) | |
704 (coding-system-put (intern (format "%s-mac" name)) | |
705 'query-coding-function | |
706 #'8-bit-fixed-query-coding-region) | |
621 (loop for alias in aliases | 707 (loop for alias in aliases |
622 do (define-coding-system-alias alias name)) | 708 do (define-coding-system-alias alias name)) |
623 result)) | 709 result)) |
624 | 710 |
625 (define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map | 711 (define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map |
626 &optional description props) | 712 &optional description props) |
627 | |
628 ;; We provide the compiler macro (= macro that is expanded only on | 713 ;; We provide the compiler macro (= macro that is expanded only on |
629 ;; compilation, and that can punt to a runtime version of the | 714 ;; compilation, and that can punt to a runtime version of the |
630 ;; associate function if necessary) not for reasons of speed, though | 715 ;; associate function if necessary) not for reasons of speed, though |
631 ;; it does speed up things at runtime a little, but because the | 716 ;; it does speed up things at runtime a little, but because the |
632 ;; Unicode mappings are available at compile time in the dumped | 717 ;; Unicode mappings are available at compile time in the dumped |
672 ;; it, out, though, I get: | 757 ;; it, out, though, I get: |
673 ;; | 758 ;; |
674 ;; (invalid-read-syntax "Multiply defined symbol label" 1) | 759 ;; (invalid-read-syntax "Multiply defined symbol label" 1) |
675 ;; | 760 ;; |
676 ;; when the file is byte compiled. | 761 ;; when the file is byte compiled. |
677 (case-fold-search t)) | 762 (case-fold-search t) |
678 (define-translation-hash-table encode-table-sym ,encode-table) | 763 (encode-table ,encode-table)) |
764 (define-translation-hash-table encode-table-sym encode-table) | |
679 (make-coding-system | 765 (make-coding-system |
680 ',name 'ccl ,description | 766 ',name 'ccl ,description |
681 (plist-put (plist-put ',props 'decode | 767 (plist-put (plist-put ',props 'decode |
682 ,(apply #'vector decode-program)) | 768 ,(apply #'vector decode-program)) |
683 'encode | 769 'encode |
686 (list (cons | 772 (list (cons |
687 'encode-table-sym | 773 'encode-table-sym |
688 (symbol-value 'encode-table-sym))) | 774 (symbol-value 'encode-table-sym))) |
689 ',encode-program)))) | 775 ',encode-program)))) |
690 (coding-system-put ',name '8-bit-fixed t) | 776 (coding-system-put ',name '8-bit-fixed t) |
691 (coding-system-put ',name 'category ', | 777 (coding-system-put ',name 'category |
692 (make-8-bit-choose-category decode-table)) | 778 ',(make-8-bit-choose-category decode-table)) |
779 (coding-system-put ',name '8-bit-fixed-query-skip-chars | |
780 ',(apply #'string (append decode-table nil))) | |
781 (coding-system-put ',name '8-bit-fixed-query-from-unicode encode-table) | |
782 (coding-system-put ',name 'query-coding-function | |
783 #'8-bit-fixed-query-coding-region) | |
784 (coding-system-put ',(intern (format "%s-unix" name)) | |
785 'query-coding-function | |
786 #'8-bit-fixed-query-coding-region) | |
787 (coding-system-put ',(intern (format "%s-dos" name)) | |
788 'query-coding-function | |
789 #'8-bit-fixed-query-coding-region) | |
790 (coding-system-put ',(intern (format "%s-mac" name)) | |
791 'query-coding-function | |
792 #'8-bit-fixed-query-coding-region) | |
693 ,(macroexpand `(loop for alias in ',aliases | 793 ,(macroexpand `(loop for alias in ',aliases |
694 do (define-coding-system-alias alias | 794 do (define-coding-system-alias alias |
695 ',name))) | 795 ',name))) |
696 (find-coding-system ',name))))) | 796 (find-coding-system ',name))))) |
697 | 797 |
701 '() ;; No differences from Latin 1. | 801 '() ;; No differences from Latin 1. |
702 "ISO-8859-1 (Latin-1)" | 802 "ISO-8859-1 (Latin-1)" |
703 '(mnemonic "Latin 1" | 803 '(mnemonic "Latin 1" |
704 documentation "The most used encoding of Western Europe and the Americas." | 804 documentation "The most used encoding of Western Europe and the Americas." |
705 aliases (iso-latin-1 latin-1))) | 805 aliases (iso-latin-1 latin-1))) |
706 |