Mercurial > hg > xemacs-beta
comparison lisp/coding.el @ 4570:e6a7054a9c30
Add check-coding-systems-region, test it and others, fix some bugs.
tests/ChangeLog addition:
2008-12-28 Aidan Kehoe <kehoea@parhasard.net>
* automated/query-coding-tests.el:
Add tests for #'unencodable-char-position,
#'check-coding-systems-region, #'encode-coding-char. Remove some
debugging statements.
lisp/ChangeLog addition:
2008-12-28 Aidan Kehoe <kehoea@parhasard.net>
* coding.el (query-coding-region):
(query-coding-string):
Make these defsubsts, they're short enough and they're called
explicitly rarely enough that it make some sense. The alternative
would be compiler macros that avoid the binding of the arguments.
(unencodable-char-position):
Document where the docstring and API are from.
Correct a special case for zero--check-argument-type returns nil
when it succeeds, we can't usefully chain its result in an and
here.
(check-coding-systems-region): New. API taken from GNU; docstring
and implementation are independent.
(encode-coding-char):
Add an optional third argument, as used by recent GNU. Document
the origen of the docstring.
(default-query-coding-region): Add a short docstring to the
non-Mule implementation of this function.
* unicode.el:
Don't set the query-coding-function property for unicode coding
systems if we're on non-mule. Unintern
unicode-query-coding-region, unicode-query-coding-skip-chars-arg
in the same context.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sun, 28 Dec 2008 22:51:14 +0000 |
parents | 1d74a1d115ee |
children | 4fc32a3a086e 7191a7b120f1 |
comparison
equal
deleted
inserted
replaced
4569:80e0588fb42f | 4570:e6a7054a9c30 |
---|---|
396 (skip-chars-forward skip-chars-arg end buffer)) | 396 (skip-chars-forward skip-chars-arg end buffer)) |
397 (if failed | 397 (if failed |
398 (values nil ranges) | 398 (values nil ranges) |
399 (values t nil)))))) | 399 (values t nil)))))) |
400 | 400 |
401 (defun query-coding-region (start end coding-system &optional buffer | 401 (defsubst query-coding-region (start end coding-system &optional buffer |
402 errorp highlight) | 402 errorp highlight) |
403 "Work out whether CODING-SYSTEM can losslessly encode a region. | 403 "Work out whether CODING-SYSTEM can losslessly encode a region. |
404 | 404 |
405 START and END are the beginning and end of the region to check. | 405 START and END are the beginning and end of the region to check. |
406 CODING-SYSTEM is the coding system to try. | 406 CODING-SYSTEM is the coding system to try. |
421 `make-range-table'." | 421 `make-range-table'." |
422 (funcall (or (coding-system-get coding-system 'query-coding-function) | 422 (funcall (or (coding-system-get coding-system 'query-coding-function) |
423 #'default-query-coding-region) | 423 #'default-query-coding-region) |
424 start end coding-system buffer errorp highlight)) | 424 start end coding-system buffer errorp highlight)) |
425 | 425 |
426 (defun query-coding-string (string coding-system &optional errorp highlight) | 426 (defsubst query-coding-string (string coding-system &optional errorp highlight) |
427 "Work out whether CODING-SYSTEM can losslessly encode STRING. | 427 "Work out whether CODING-SYSTEM can losslessly encode STRING. |
428 CODING-SYSTEM is the coding system to check. | 428 CODING-SYSTEM is the coding system to check. |
429 | 429 |
430 Optional argument ERRORP says to signal a `text-conversion-error' if some | 430 Optional argument ERRORP says to signal a `text-conversion-error' if some |
431 character in the region cannot be encoded, and defaults to nil. | 431 character in the region cannot be encoded, and defaults to nil. |
444 (insert string) | 444 (insert string) |
445 (query-coding-region (point-min) (point-max) coding-system (current-buffer) | 445 (query-coding-region (point-min) (point-max) coding-system (current-buffer) |
446 ;; ### Will highlight work here? | 446 ;; ### Will highlight work here? |
447 errorp highlight))) | 447 errorp highlight))) |
448 | 448 |
449 ;; Function docstring and API are taken from GNU coding.c version 1.353, GPLv2. | |
449 (defun unencodable-char-position (start end coding-system | 450 (defun unencodable-char-position (start end coding-system |
450 &optional count string) | 451 &optional count string) |
451 "Return position of first un-encodable character in a region. | 452 "Return position of first un-encodable character in a region. |
452 START and END specify the region and CODING-SYSTEM specifies the | 453 START and END specify the region and CODING-SYSTEM specifies the |
453 encoding to check. Return nil if CODING-SYSTEM does encode the region. | 454 encoding to check. Return nil if CODING-SYSTEM does encode the region. |
484 "We should never reach this point with null COUNT.") | 485 "We should never reach this point with null COUNT.") |
485 result)))))) | 486 result)))))) |
486 (check-argument-type #'integer-or-marker-p start) | 487 (check-argument-type #'integer-or-marker-p start) |
487 (check-argument-type #'integer-or-marker-p end) | 488 (check-argument-type #'integer-or-marker-p end) |
488 (check-coding-system coding-system) | 489 (check-coding-system coding-system) |
489 (and count (check-argument-type #'natnump count) | 490 (when count (check-argument-type #'natnump count) |
490 ;; Special-case zero, sigh. | 491 ;; Special-case zero, sigh. |
491 (if (zerop count) (setq count 1))) | 492 (if (zerop count) (setq count 1))) |
492 (and string (check-argument-type #'stringp string)) | 493 (and string (check-argument-type #'stringp string)) |
493 (if string | 494 (if string |
494 (with-temp-buffer | 495 (with-temp-buffer |
495 (insert string) | 496 (insert string) |
496 (funcall thunk start end coding-system count)) | 497 (funcall thunk start end coding-system count)) |
497 (funcall thunk start end coding-system count)))) | 498 (funcall thunk start end coding-system count)))) |
498 | 499 |
499 (defun encode-coding-char (char coding-system) | 500 ;; XEmacs; this is a GPLv3 function in coding.c in GNU. This is why we have |
501 ;; both a very divergent docstring and a very divergent implementation. | |
502 (defun check-coding-systems-region (begin end coding-system-list) | |
503 "Can coding systems in CODING-SYSTEM-LIST encode text in a region? | |
504 | |
505 CODING-SYSTEM-LIST must be a list of coding systems. BEGIN and END are | |
506 normally buffer positions delimiting the region. If some coding system in | |
507 CODING-SYSTEM-LIST cannot encode the entire region, the return value of this | |
508 function is an alist mapping coding system names to lists of individual | |
509 buffer positions (not ranges) that the individual coding systems cannot | |
510 encode. | |
511 | |
512 If all coding systems in CODING-SYSTEM-LIST can encode the region, | |
513 this function returns t. This conflicts with the documented, but not | |
514 with the observed, GNU behavior. | |
515 | |
516 If BEGIN is a string, `check-coding-systems-region' ignores END, and checks | |
517 whether the coding systems can encode BEGIN. The alist that is returned | |
518 uses zero-based string indices, not one-based buffer positions. | |
519 | |
520 This function is for GNU compatibility. See also `query-coding-region'." | |
521 (let ((thunk | |
522 #'(lambda (begin end coding-system-list stringp) | |
523 (loop | |
524 for coding-system in coding-system-list | |
525 with result = nil | |
526 with intermediate = nil | |
527 with range-lambda = (if stringp | |
528 #'(lambda (begin end value) | |
529 (while (< begin end) | |
530 (push (1- begin) intermediate) | |
531 (incf begin))) | |
532 #'(lambda (begin end value) | |
533 (while (< begin end) | |
534 (push begin intermediate) | |
535 (incf begin)))) | |
536 do (setq coding-system (check-coding-system coding-system)) | |
537 (multiple-value-bind (encoded ranges) | |
538 (query-coding-region begin end coding-system) | |
539 (unless encoded | |
540 (setq intermediate (list (coding-system-name coding-system))) | |
541 (map-range-table range-lambda ranges) | |
542 (push (nreverse intermediate) result))) | |
543 finally return (or result t))))) | |
544 (if (stringp begin) | |
545 (with-temp-buffer | |
546 (insert begin) | |
547 (funcall thunk (point-min) (point-max) coding-system-list t)) | |
548 (check-argument-type #'integer-or-marker-p begin) | |
549 (check-argument-type #'integer-or-marker-p end) | |
550 (funcall thunk begin end coding-system-list nil)))) | |
551 | |
552 ;; XEmacs; docstring taken from GNU, international/mule-cmds.el, revision | |
553 ;; 1.311, GPLv2. | |
554 (defun encode-coding-char (char coding-system &optional charset) | |
500 "Encode CHAR by CODING-SYSTEM and return the resulting string. | 555 "Encode CHAR by CODING-SYSTEM and return the resulting string. |
501 If CODING-SYSTEM can't safely encode CHAR, return nil." | 556 If CODING-SYSTEM can't safely encode CHAR, return nil. |
557 The optional third argument CHARSET is, for the moment, ignored." | |
502 (check-argument-type #'characterp char) | 558 (check-argument-type #'characterp char) |
503 (multiple-value-bind (succeededp) | 559 (multiple-value-bind (succeededp) |
504 (query-coding-string char coding-system) | 560 (query-coding-string char coding-system) |
505 (when succeededp | 561 (when succeededp |
506 (encode-coding-string char coding-system)))) | 562 (encode-coding-string char coding-system)))) |
507 | 563 |
508 (unless (featurep 'mule) | 564 (unless (featurep 'mule) |
509 ;; If we're under non-Mule, every XEmacs character can be encoded | 565 ;; If we're under non-Mule, every XEmacs character can be encoded |
510 ;; with every XEmacs coding system. | 566 ;; with every XEmacs coding system. |
511 (fset #'default-query-coding-region | 567 (fset #'default-query-coding-region |
512 #'(lambda (&rest ignored) (values t nil))) | 568 #'(lambda (&rest ignored) |
569 "Stub `query-coding-region' implementation. Always succeeds." | |
570 (values t nil))) | |
513 (unintern 'default-query-coding-region-safe-charset-skip-chars-map)) | 571 (unintern 'default-query-coding-region-safe-charset-skip-chars-map)) |
514 | 572 |
515 ;;; coding.el ends here | 573 ;;; coding.el ends here |