comparison lisp/coding.el @ 4570:e6a7054a9c30

Add check-coding-systems-region, test it and others, fix some bugs. tests/ChangeLog addition: 2008-12-28 Aidan Kehoe <kehoea@parhasard.net> * automated/query-coding-tests.el: Add tests for #'unencodable-char-position, #'check-coding-systems-region, #'encode-coding-char. Remove some debugging statements. lisp/ChangeLog addition: 2008-12-28 Aidan Kehoe <kehoea@parhasard.net> * coding.el (query-coding-region): (query-coding-string): Make these defsubsts, they're short enough and they're called explicitly rarely enough that it make some sense. The alternative would be compiler macros that avoid the binding of the arguments. (unencodable-char-position): Document where the docstring and API are from. Correct a special case for zero--check-argument-type returns nil when it succeeds, we can't usefully chain its result in an and here. (check-coding-systems-region): New. API taken from GNU; docstring and implementation are independent. (encode-coding-char): Add an optional third argument, as used by recent GNU. Document the origen of the docstring. (default-query-coding-region): Add a short docstring to the non-Mule implementation of this function. * unicode.el: Don't set the query-coding-function property for unicode coding systems if we're on non-mule. Unintern unicode-query-coding-region, unicode-query-coding-skip-chars-arg in the same context.
author Aidan Kehoe <kehoea@parhasard.net>
date Sun, 28 Dec 2008 22:51:14 +0000
parents 1d74a1d115ee
children 4fc32a3a086e 7191a7b120f1
comparison
equal deleted inserted replaced
4569:80e0588fb42f 4570:e6a7054a9c30
396 (skip-chars-forward skip-chars-arg end buffer)) 396 (skip-chars-forward skip-chars-arg end buffer))
397 (if failed 397 (if failed
398 (values nil ranges) 398 (values nil ranges)
399 (values t nil)))))) 399 (values t nil))))))
400 400
401 (defun query-coding-region (start end coding-system &optional buffer 401 (defsubst query-coding-region (start end coding-system &optional buffer
402 errorp highlight) 402 errorp highlight)
403 "Work out whether CODING-SYSTEM can losslessly encode a region. 403 "Work out whether CODING-SYSTEM can losslessly encode a region.
404 404
405 START and END are the beginning and end of the region to check. 405 START and END are the beginning and end of the region to check.
406 CODING-SYSTEM is the coding system to try. 406 CODING-SYSTEM is the coding system to try.
421 `make-range-table'." 421 `make-range-table'."
422 (funcall (or (coding-system-get coding-system 'query-coding-function) 422 (funcall (or (coding-system-get coding-system 'query-coding-function)
423 #'default-query-coding-region) 423 #'default-query-coding-region)
424 start end coding-system buffer errorp highlight)) 424 start end coding-system buffer errorp highlight))
425 425
426 (defun query-coding-string (string coding-system &optional errorp highlight) 426 (defsubst query-coding-string (string coding-system &optional errorp highlight)
427 "Work out whether CODING-SYSTEM can losslessly encode STRING. 427 "Work out whether CODING-SYSTEM can losslessly encode STRING.
428 CODING-SYSTEM is the coding system to check. 428 CODING-SYSTEM is the coding system to check.
429 429
430 Optional argument ERRORP says to signal a `text-conversion-error' if some 430 Optional argument ERRORP says to signal a `text-conversion-error' if some
431 character in the region cannot be encoded, and defaults to nil. 431 character in the region cannot be encoded, and defaults to nil.
444 (insert string) 444 (insert string)
445 (query-coding-region (point-min) (point-max) coding-system (current-buffer) 445 (query-coding-region (point-min) (point-max) coding-system (current-buffer)
446 ;; ### Will highlight work here? 446 ;; ### Will highlight work here?
447 errorp highlight))) 447 errorp highlight)))
448 448
449 ;; Function docstring and API are taken from GNU coding.c version 1.353, GPLv2.
449 (defun unencodable-char-position (start end coding-system 450 (defun unencodable-char-position (start end coding-system
450 &optional count string) 451 &optional count string)
451 "Return position of first un-encodable character in a region. 452 "Return position of first un-encodable character in a region.
452 START and END specify the region and CODING-SYSTEM specifies the 453 START and END specify the region and CODING-SYSTEM specifies the
453 encoding to check. Return nil if CODING-SYSTEM does encode the region. 454 encoding to check. Return nil if CODING-SYSTEM does encode the region.
484 "We should never reach this point with null COUNT.") 485 "We should never reach this point with null COUNT.")
485 result)))))) 486 result))))))
486 (check-argument-type #'integer-or-marker-p start) 487 (check-argument-type #'integer-or-marker-p start)
487 (check-argument-type #'integer-or-marker-p end) 488 (check-argument-type #'integer-or-marker-p end)
488 (check-coding-system coding-system) 489 (check-coding-system coding-system)
489 (and count (check-argument-type #'natnump count) 490 (when count (check-argument-type #'natnump count)
490 ;; Special-case zero, sigh. 491 ;; Special-case zero, sigh.
491 (if (zerop count) (setq count 1))) 492 (if (zerop count) (setq count 1)))
492 (and string (check-argument-type #'stringp string)) 493 (and string (check-argument-type #'stringp string))
493 (if string 494 (if string
494 (with-temp-buffer 495 (with-temp-buffer
495 (insert string) 496 (insert string)
496 (funcall thunk start end coding-system count)) 497 (funcall thunk start end coding-system count))
497 (funcall thunk start end coding-system count)))) 498 (funcall thunk start end coding-system count))))
498 499
499 (defun encode-coding-char (char coding-system) 500 ;; XEmacs; this is a GPLv3 function in coding.c in GNU. This is why we have
501 ;; both a very divergent docstring and a very divergent implementation.
502 (defun check-coding-systems-region (begin end coding-system-list)
503 "Can coding systems in CODING-SYSTEM-LIST encode text in a region?
504
505 CODING-SYSTEM-LIST must be a list of coding systems. BEGIN and END are
506 normally buffer positions delimiting the region. If some coding system in
507 CODING-SYSTEM-LIST cannot encode the entire region, the return value of this
508 function is an alist mapping coding system names to lists of individual
509 buffer positions (not ranges) that the individual coding systems cannot
510 encode.
511
512 If all coding systems in CODING-SYSTEM-LIST can encode the region,
513 this function returns t. This conflicts with the documented, but not
514 with the observed, GNU behavior.
515
516 If BEGIN is a string, `check-coding-systems-region' ignores END, and checks
517 whether the coding systems can encode BEGIN. The alist that is returned
518 uses zero-based string indices, not one-based buffer positions.
519
520 This function is for GNU compatibility. See also `query-coding-region'."
521 (let ((thunk
522 #'(lambda (begin end coding-system-list stringp)
523 (loop
524 for coding-system in coding-system-list
525 with result = nil
526 with intermediate = nil
527 with range-lambda = (if stringp
528 #'(lambda (begin end value)
529 (while (< begin end)
530 (push (1- begin) intermediate)
531 (incf begin)))
532 #'(lambda (begin end value)
533 (while (< begin end)
534 (push begin intermediate)
535 (incf begin))))
536 do (setq coding-system (check-coding-system coding-system))
537 (multiple-value-bind (encoded ranges)
538 (query-coding-region begin end coding-system)
539 (unless encoded
540 (setq intermediate (list (coding-system-name coding-system)))
541 (map-range-table range-lambda ranges)
542 (push (nreverse intermediate) result)))
543 finally return (or result t)))))
544 (if (stringp begin)
545 (with-temp-buffer
546 (insert begin)
547 (funcall thunk (point-min) (point-max) coding-system-list t))
548 (check-argument-type #'integer-or-marker-p begin)
549 (check-argument-type #'integer-or-marker-p end)
550 (funcall thunk begin end coding-system-list nil))))
551
552 ;; XEmacs; docstring taken from GNU, international/mule-cmds.el, revision
553 ;; 1.311, GPLv2.
554 (defun encode-coding-char (char coding-system &optional charset)
500 "Encode CHAR by CODING-SYSTEM and return the resulting string. 555 "Encode CHAR by CODING-SYSTEM and return the resulting string.
501 If CODING-SYSTEM can't safely encode CHAR, return nil." 556 If CODING-SYSTEM can't safely encode CHAR, return nil.
557 The optional third argument CHARSET is, for the moment, ignored."
502 (check-argument-type #'characterp char) 558 (check-argument-type #'characterp char)
503 (multiple-value-bind (succeededp) 559 (multiple-value-bind (succeededp)
504 (query-coding-string char coding-system) 560 (query-coding-string char coding-system)
505 (when succeededp 561 (when succeededp
506 (encode-coding-string char coding-system)))) 562 (encode-coding-string char coding-system))))
507 563
508 (unless (featurep 'mule) 564 (unless (featurep 'mule)
509 ;; If we're under non-Mule, every XEmacs character can be encoded 565 ;; If we're under non-Mule, every XEmacs character can be encoded
510 ;; with every XEmacs coding system. 566 ;; with every XEmacs coding system.
511 (fset #'default-query-coding-region 567 (fset #'default-query-coding-region
512 #'(lambda (&rest ignored) (values t nil))) 568 #'(lambda (&rest ignored)
569 "Stub `query-coding-region' implementation. Always succeeds."
570 (values t nil)))
513 (unintern 'default-query-coding-region-safe-charset-skip-chars-map)) 571 (unintern 'default-query-coding-region-safe-charset-skip-chars-map))
514 572
515 ;;; coding.el ends here 573 ;;; coding.el ends here