comparison lisp/unicode.el @ 4549:68d1ca56cffa

First part of interactive checks that coding systems encode regions. 2008-01-21 Aidan Kehoe <kehoea@parhasard.net> * coding.el (decode-coding-string): (encode-coding-string): Accept GNU's NOCOPY argument for these. Todo; write compiler macros to use it. (query-coding-warning-face): New face, to show unencodable characters. (default-query-coding-region-safe-charset-skip-chars-map): New variable, a cache used by #'default-query-coding-region. (default-query-coding-region): Default implementation of #'query-coding-region, using the safe-charsets and safe-chars coding systemproperties. (query-coding-region): New function; can a given coding system encode a given region? (query-coding-string): New function; can a given coding system encode a given string? (unencodable-char-position): Function API taken from GNU; return the first unencodable position given a string and coding system. (encode-coding-char): Function API taken from GNU; return CHAR encoded using CODING-SYSTEM, or nil if CODING-SYSTEM would trash CHAR. ((unless (featurep 'mule)): Override the default query-coding-region implementation on non-Mule. * mule/mule-coding.el (make-8-bit-generate-helper): Eliminate a duplicate comment. (make-8-bit-choose-category): Simplify implementation. (8-bit-fixed-query-coding-region): Implementation of #'query-coding-region for coding systems created with #'make-8-bit-coding-system. (make-8-bit-coding-system): Initialise the #'query-coding-region implementation for these character sets. (make-8-bit-coding-system): Ditto for the compiler macro version of this function. * unicode.el (unicode-query-coding-skip-chars-arg): New variable, used by unicode-query-coding-region, initialised in mule/general-late.el. (unicode-query-coding-region): New function, the #'query-coding-region implementation for Unicode coding systems. Initialise the query-coding-function property for the Unicode coding systems to #'unicode-query-coding-region. * mule/mule-charset.el (charset-skip-chars-string): New function. Return a #'skip-chars-forward argument that skips all characters in CHARSET. (map-charset-chars): Function synced from GNU, modified to work with XEmacs. Map FUNC across the int value charset ranges of CHARSET.
author Aidan Kehoe <kehoea@parhasard.net>
date Mon, 21 Jan 2008 22:51:21 +0100
parents 15d36164ebd7
children 6812571bfcb9
comparison
equal deleted inserted replaced
4402:e70cc8a90e90 4549:68d1ca56cffa
609 (frob-unicode-errors-region 609 (frob-unicode-errors-region
610 (lambda (start finish) 610 (lambda (start finish)
611 (translate-region start finish table)) 611 (translate-region start finish table))
612 begin end buffer)) 612 begin end buffer))
613 613
614 (defvar unicode-query-coding-skip-chars-arg nil ;; Set in general-late.el
615 "Used by `unicode-query-coding-region' to skip chars with known mappings.")
616
617 (defun unicode-query-coding-region (begin end coding-system
618 &optional buffer errorp highlightp)
619 "The `query-coding-region' implementation for Unicode coding systems."
620 (check-argument-type #'coding-system-p
621 (setq coding-system (find-coding-system coding-system)))
622 (check-argument-type #'integer-or-marker-p begin)
623 (check-argument-type #'integer-or-marker-p end)
624 (let* ((skip-chars-arg unicode-query-coding-skip-chars-arg)
625 (ranges (make-range-table))
626 (looking-at-arg (concat "[" skip-chars-arg "]"))
627 fail-range-start fail-range-end previous-fail char-after
628 failed extent)
629 (save-excursion
630 (goto-char begin buffer)
631 (skip-chars-forward skip-chars-arg end buffer)
632 (while (< (point buffer) end)
633 (message
634 "fail-range-start is %S, previous-fail %S, point is %S, end is %S"
635 fail-range-start previous-fail (point buffer) end)
636 (setq char-after (char-after (point buffer) buffer)
637 fail-range-start (point buffer))
638 (while (and
639 (< (point buffer) end)
640 (not (looking-at looking-at-arg))
641 (= -1 (char-to-unicode char-after)))
642 (forward-char 1 buffer)
643 (message "what?!?")
644 (setq char-after (char-after (point buffer) buffer)
645 failed t))
646 (if (= fail-range-start (point buffer))
647 ;; The character can actually be encoded by the coding
648 ;; system; check the characters past it.
649 (forward-char 1 buffer)
650 ;; Can't be encoded; note this.
651 (when errorp
652 (error 'text-conversion-error
653 (format "Cannot encode %s using coding system"
654 (buffer-substring fail-range-start (point buffer)
655 buffer))
656 (coding-system-name coding-system)))
657 (put-range-table fail-range-start
658 ;; If char-after is non-nil, we're not at
659 ;; the end of the buffer.
660 (setq fail-range-end (if char-after
661 (point buffer)
662 (point-max buffer)))
663 t ranges)
664 (when highlightp
665 (setq extent (make-extent fail-range-start fail-range-end buffer))
666 (set-extent-priority extent (+ mouse-highlight-priority 2))
667 (set-extent-face extent 'query-coding-warning-face)))
668 (skip-chars-forward skip-chars-arg end buffer))
669 (if failed
670 (values nil ranges)
671 (values t nil)))))
672
673 (loop
674 for coding-system in (coding-system-list)
675 do (when (eq 'unicode (coding-system-type coding-system))
676 (coding-system-put coding-system 'query-coding-function
677 #'unicode-query-coding-region)))
678
614 (unless (featurep 'mule) 679 (unless (featurep 'mule)
615 ;; We do this in such a roundabout way--instead of having the above defun 680 ;; We do this in such a roundabout way--instead of having the above defun
616 ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have 681 ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have
617 ;; make-docfile.c pick up symbol and function documentation correctly. An 682 ;; make-docfile.c pick up symbol and function documentation correctly. An
618 ;; alternative approach would be to fix make-docfile.c to be able to read 683 ;; alternative approach would be to fix make-docfile.c to be able to read