Mercurial > hg > xemacs-beta
comparison lisp/unicode.el @ 4549:68d1ca56cffa
First part of interactive checks that coding systems encode regions.
2008-01-21 Aidan Kehoe <kehoea@parhasard.net>
* coding.el (decode-coding-string):
(encode-coding-string): Accept GNU's NOCOPY argument for
these. Todo; write compiler macros to use it.
(query-coding-warning-face): New face, to show unencodable
characters.
(default-query-coding-region-safe-charset-skip-chars-map):
New variable, a cache used by #'default-query-coding-region.
(default-query-coding-region): Default implementation of
#'query-coding-region, using the safe-charsets and safe-chars
coding systemproperties.
(query-coding-region): New function; can a given coding system
encode a given region?
(query-coding-string): New function; can a given coding system
encode a given string?
(unencodable-char-position): Function API taken from GNU; return
the first unencodable position given a string and coding system.
(encode-coding-char): Function API taken from GNU; return CHAR
encoded using CODING-SYSTEM, or nil if CODING-SYSTEM would trash
CHAR.
((unless (featurep 'mule)): Override the default
query-coding-region implementation on non-Mule.
* mule/mule-coding.el (make-8-bit-generate-helper): Eliminate a
duplicate comment.
(make-8-bit-choose-category): Simplify implementation.
(8-bit-fixed-query-coding-region): Implementation of
#'query-coding-region for coding systems created with
#'make-8-bit-coding-system.
(make-8-bit-coding-system): Initialise the #'query-coding-region
implementation for these character sets.
(make-8-bit-coding-system): Ditto for the compiler macro version
of this function.
* unicode.el (unicode-query-coding-skip-chars-arg): New variable,
used by unicode-query-coding-region, initialised in
mule/general-late.el.
(unicode-query-coding-region): New function, the
#'query-coding-region implementation for Unicode coding systems.
Initialise the query-coding-function property for the Unicode
coding systems to #'unicode-query-coding-region.
* mule/mule-charset.el (charset-skip-chars-string): New
function. Return a #'skip-chars-forward argument that skips all
characters in CHARSET.
(map-charset-chars): Function synced from GNU, modified to work
with XEmacs. Map FUNC across the int value charset ranges of
CHARSET.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Mon, 21 Jan 2008 22:51:21 +0100 |
parents | 15d36164ebd7 |
children | 6812571bfcb9 |
comparison
equal
deleted
inserted
replaced
4402:e70cc8a90e90 | 4549:68d1ca56cffa |
---|---|
609 (frob-unicode-errors-region | 609 (frob-unicode-errors-region |
610 (lambda (start finish) | 610 (lambda (start finish) |
611 (translate-region start finish table)) | 611 (translate-region start finish table)) |
612 begin end buffer)) | 612 begin end buffer)) |
613 | 613 |
614 (defvar unicode-query-coding-skip-chars-arg nil ;; Set in general-late.el | |
615 "Used by `unicode-query-coding-region' to skip chars with known mappings.") | |
616 | |
617 (defun unicode-query-coding-region (begin end coding-system | |
618 &optional buffer errorp highlightp) | |
619 "The `query-coding-region' implementation for Unicode coding systems." | |
620 (check-argument-type #'coding-system-p | |
621 (setq coding-system (find-coding-system coding-system))) | |
622 (check-argument-type #'integer-or-marker-p begin) | |
623 (check-argument-type #'integer-or-marker-p end) | |
624 (let* ((skip-chars-arg unicode-query-coding-skip-chars-arg) | |
625 (ranges (make-range-table)) | |
626 (looking-at-arg (concat "[" skip-chars-arg "]")) | |
627 fail-range-start fail-range-end previous-fail char-after | |
628 failed extent) | |
629 (save-excursion | |
630 (goto-char begin buffer) | |
631 (skip-chars-forward skip-chars-arg end buffer) | |
632 (while (< (point buffer) end) | |
633 (message | |
634 "fail-range-start is %S, previous-fail %S, point is %S, end is %S" | |
635 fail-range-start previous-fail (point buffer) end) | |
636 (setq char-after (char-after (point buffer) buffer) | |
637 fail-range-start (point buffer)) | |
638 (while (and | |
639 (< (point buffer) end) | |
640 (not (looking-at looking-at-arg)) | |
641 (= -1 (char-to-unicode char-after))) | |
642 (forward-char 1 buffer) | |
643 (message "what?!?") | |
644 (setq char-after (char-after (point buffer) buffer) | |
645 failed t)) | |
646 (if (= fail-range-start (point buffer)) | |
647 ;; The character can actually be encoded by the coding | |
648 ;; system; check the characters past it. | |
649 (forward-char 1 buffer) | |
650 ;; Can't be encoded; note this. | |
651 (when errorp | |
652 (error 'text-conversion-error | |
653 (format "Cannot encode %s using coding system" | |
654 (buffer-substring fail-range-start (point buffer) | |
655 buffer)) | |
656 (coding-system-name coding-system))) | |
657 (put-range-table fail-range-start | |
658 ;; If char-after is non-nil, we're not at | |
659 ;; the end of the buffer. | |
660 (setq fail-range-end (if char-after | |
661 (point buffer) | |
662 (point-max buffer))) | |
663 t ranges) | |
664 (when highlightp | |
665 (setq extent (make-extent fail-range-start fail-range-end buffer)) | |
666 (set-extent-priority extent (+ mouse-highlight-priority 2)) | |
667 (set-extent-face extent 'query-coding-warning-face))) | |
668 (skip-chars-forward skip-chars-arg end buffer)) | |
669 (if failed | |
670 (values nil ranges) | |
671 (values t nil))))) | |
672 | |
673 (loop | |
674 for coding-system in (coding-system-list) | |
675 do (when (eq 'unicode (coding-system-type coding-system)) | |
676 (coding-system-put coding-system 'query-coding-function | |
677 #'unicode-query-coding-region))) | |
678 | |
614 (unless (featurep 'mule) | 679 (unless (featurep 'mule) |
615 ;; We do this in such a roundabout way--instead of having the above defun | 680 ;; We do this in such a roundabout way--instead of having the above defun |
616 ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have | 681 ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have |
617 ;; make-docfile.c pick up symbol and function documentation correctly. An | 682 ;; make-docfile.c pick up symbol and function documentation correctly. An |
618 ;; alternative approach would be to fix make-docfile.c to be able to read | 683 ;; alternative approach would be to fix make-docfile.c to be able to read |