comparison lisp/coding.el @ 4568:1d74a1d115ee

Add #'query-coding-region tests; do the work necessary to get them running. lisp/ChangeLog addition: 2008-12-28 Aidan Kehoe <kehoea@parhasard.net> * coding.el (default-query-coding-region): Declare using defun*, so we can #'return-from to it on encountering a safe-charsets value of t. Comment out a few debug messages. (query-coding-region): Correct the docstring, it deals with a region, not a string. (unencodable-char-position): Correct the implementation for non-nil COUNT, special-case a zero value for count, treat it as one. Don't rely on dynamic scope when calling the main lambda. * unicode.el (unicode-query-coding-region): Comment out some debug messages here. * mule/mule-coding.el (8-bit-fixed-query-coding-region): Comment out some debug messages here. * code-init.el (raw-text): Add a safe-charsets property to this coding system. * mule/korean.el (iso-2022-int-1): * mule/korean.el (euc-kr): * mule/korean.el (iso-2022-kr): Add safe-charsets properties for these coding systems. * mule/japanese.el (iso-2022-jp): * mule/japanese.el (jis7): * mule/japanese.el (jis8): * mule/japanese.el (shift-jis): * mule/japanese.el (iso-2022-jp-1978-irv): * mule/japanese.el (euc-jp): Add safe-charsets properties for all these coding systems. * mule/iso-with-esc.el: Add safe-charsets properties to all the coding systems in here. Comment on the downside of a safe-charsets value of t for iso-latin-1-with-esc. * mule/hebrew.el (ctext-hebrew): Add a safe-charsets property for this coding system. * mule/devanagari.el (in-is13194-devanagari): Add a safe-charsets property for this coding system. * mule/chinese.el (cn-gb-2312): * mule/chinese.el (hz-gb-2312): * mule/chinese.el (big5): Add safe-charsets properties for these coding systems. * mule/latin.el (iso-8859-14): Add an implementation for this, using #'make-8-bit-coding-system. * mule/mule-coding.el (ctext): * mule/mule-coding.el (iso-2022-8bit-ss2): * mule/mule-coding.el (iso-2022-7bit-ss2): * mule/mule-coding.el (iso-2022-jp-2): * mule/mule-coding.el (iso-2022-7bit): * mule/mule-coding.el (iso-2022-8): * mule/mule-coding.el (escape-quoted): * mule/mule-coding.el (iso-2022-lock): Add safe-charsets properties for all these coding systems. src/ChangeLog addition: 2008-12-28 Aidan Kehoe <kehoea@parhasard.net> * file-coding.c (Fmake_coding_system): Document our use of the safe-chars and safe-charsets properties, and the differences compared to GNU. (make_coding_system_1): Don't drop the safe-chars and safe-charsets properties. (Fcoding_system_property): Return the safe-chars and safe-charsets properties when asked for them. * file-coding.h (CODING_SYSTEM_SAFE_CHARSETS): * coding-system-slots.h: Make the safe-chars and safe-charsets slots available in these headers. tests/ChangeLog addition: 2008-12-28 Aidan Kehoe <kehoea@parhasard.net> * automated/query-coding-tests.el: New file, testing the functionality of #'query-coding-region and #'query-coding-string.
author Aidan Kehoe <kehoea@parhasard.net>
date Sun, 28 Dec 2008 14:46:24 +0000
parents 46ddeaa7c738
children e6a7054a9c30
comparison
equal deleted inserted replaced
4567:84d618b355f5 4568:1d74a1d115ee
298 (map-extents #'(lambda (extent ignored-arg) 298 (map-extents #'(lambda (extent ignored-arg)
299 (when (eq 'query-coding-warning-face 299 (when (eq 'query-coding-warning-face
300 (extent-face extent)) 300 (extent-face extent))
301 (delete-extent extent))) buffer begin end)) 301 (delete-extent extent))) buffer begin end))
302 302
303 (defun default-query-coding-region (begin end coding-system 303 (defun* default-query-coding-region (begin end coding-system
304 &optional buffer errorp highlightp) 304 &optional buffer errorp highlightp)
305 "The default `query-coding-region' implementation. 305 "The default `query-coding-region' implementation.
306 306
307 Uses the `safe-charsets' and `safe-chars' coding system properties. 307 Uses the `safe-charsets' and `safe-chars' coding system properties.
308 The former is a list of XEmacs character sets that can be safely 308 The former is a list of XEmacs character sets that can be safely
309 encoded by CODING-SYSTEM; the latter a char table describing, in 309 encoded by CODING-SYSTEM; the latter a char table describing, in
322 'safe-chars))) 322 'safe-chars)))
323 (skip-chars-arg 323 (skip-chars-arg
324 (gethash safe-charsets 324 (gethash safe-charsets
325 default-query-coding-region-safe-charset-skip-chars-map)) 325 default-query-coding-region-safe-charset-skip-chars-map))
326 (ranges (make-range-table)) 326 (ranges (make-range-table))
327 fail-range-start fail-range-end previous-fail char-after 327 fail-range-start fail-range-end char-after
328 looking-at-arg failed extent) 328 looking-at-arg failed extent)
329 ;; Coding systems with a value of t for safe-charsets support everything.
330 (when (eq t safe-charsets)
331 (return-from default-query-coding-region (values t nil)))
329 (unless skip-chars-arg 332 (unless skip-chars-arg
330 (setq skip-chars-arg 333 (setq skip-chars-arg
331 (puthash safe-charsets 334 (puthash safe-charsets
332 (mapconcat #'charset-skip-chars-string 335 (mapconcat #'charset-skip-chars-string
333 safe-charsets "") 336 safe-charsets "")
353 (concat "[" skip-chars-arg "]"))) 356 (concat "[" skip-chars-arg "]")))
354 (save-excursion 357 (save-excursion
355 (goto-char begin buffer) 358 (goto-char begin buffer)
356 (skip-chars-forward skip-chars-arg end buffer) 359 (skip-chars-forward skip-chars-arg end buffer)
357 (while (< (point buffer) end) 360 (while (< (point buffer) end)
358 (message 361 ; (message
359 "fail-range-start is %S, previous-fail %S, point is %S, end is %S" 362 ; "fail-range-start is %S, point is %S, end is %S"
360 fail-range-start previous-fail (point buffer) end) 363 ; fail-range-start (point buffer) end)
361 (setq char-after (char-after (point buffer) buffer) 364 (setq char-after (char-after (point buffer) buffer)
362 fail-range-start (point buffer)) 365 fail-range-start (point buffer))
363 (while (and 366 (while (and
364 (< (point buffer) end) 367 (< (point buffer) end)
365 (not (looking-at looking-at-arg)) 368 (not (looking-at looking-at-arg))
409 Optional argument HIGHLIGHT says to display unencodable characters in the 412 Optional argument HIGHLIGHT says to display unencodable characters in the
410 region using `query-coding-warning-face'. It defaults to nil. 413 region using `query-coding-warning-face'. It defaults to nil.
411 414
412 This function returns a list; the intention is that callers use 415 This function returns a list; the intention is that callers use
413 `multiple-value-bind' or the related CL multiple value functions to deal 416 `multiple-value-bind' or the related CL multiple value functions to deal
414 with it. The first element is `t' if the string can be encoded using 417 with it. The first element is `t' if the region can be encoded using
415 CODING-SYSTEM, or `nil' if not. The second element is `nil' if the string 418 CODING-SYSTEM, or `nil' if not. The second element is `nil' if the region
416 can be encoded using CODING-SYSTEM; otherwise, it is a range table 419 can be encoded using CODING-SYSTEM; otherwise, it is a range table
417 describing the positions of the unencodable characters. See 420 describing the positions of the unencodable characters. See
418 `make-range-table'." 421 `make-range-table'."
419 (funcall (or (coding-system-get coding-system 'query-coding-function) 422 (funcall (or (coding-system-get coding-system 'query-coding-function)
420 #'default-query-coding-region) 423 #'default-query-coding-region)
454 list of positions. 457 list of positions.
455 458
456 If optional 5th argument STRING is non-nil, it is a string to search 459 If optional 5th argument STRING is non-nil, it is a string to search
457 for un-encodable characters. In that case, START and END are indexes 460 for un-encodable characters. In that case, START and END are indexes
458 in the string." 461 in the string."
459 (flet ((thunk () 462 (let ((thunk
460 (multiple-value-bind (result ranges) 463 #'(lambda (start end coding-system &optional count)
461 (query-coding-region start end coding-system) 464 (multiple-value-bind (result ranges)
462 (if result 465 (query-coding-region start end coding-system)
463 ;; If query-coding-region thinks the entire region is 466 (if result
464 ;; encodable, result will be t, and the thunk should 467 nil
465 ;; return nil, because there are no unencodable 468 (block worked-it-all-out
466 ;; positions in the region. 469 (if count
467 nil 470 (map-range-table
468 (if count 471 #'(lambda (begin end value)
469 (block counted 472 (while (and (< begin end)
470 (map-range-table 473 (< (length result) count))
471 #'(lambda (begin end value) 474 (push begin result)
472 (while (and (<= begin end) (<= begin count)) 475 (incf begin))
473 (push begin result) 476 (when (= (length result) count)
474 (incf begin)) 477 (return-from worked-it-all-out result)))
475 (if (> begin count) (return-from counted))) 478 ranges)
476 ranges)) 479 (map-range-table
477 (map-range-table 480 #'(lambda (begin end value)
478 #'(lambda (begin end value) 481 (return-from worked-it-all-out begin))
479 (while (<= begin end) 482 ranges))
480 (push begin result) 483 (assert (not (null count)) t
481 (incf begin))) ranges)) 484 "We should never reach this point with null COUNT.")
482 result)))) 485 result))))))
486 (check-argument-type #'integer-or-marker-p start)
487 (check-argument-type #'integer-or-marker-p end)
488 (check-coding-system coding-system)
489 (and count (check-argument-type #'natnump count)
490 ;; Special-case zero, sigh.
491 (if (zerop count) (setq count 1)))
492 (and string (check-argument-type #'stringp string))
483 (if string 493 (if string
484 (with-temp-buffer (insert string) (thunk)) 494 (with-temp-buffer
485 (thunk)))) 495 (insert string)
496 (funcall thunk start end coding-system count))
497 (funcall thunk start end coding-system count))))
486 498
487 (defun encode-coding-char (char coding-system) 499 (defun encode-coding-char (char coding-system)
488 "Encode CHAR by CODING-SYSTEM and return the resulting string. 500 "Encode CHAR by CODING-SYSTEM and return the resulting string.
489 If CODING-SYSTEM can't safely encode CHAR, return nil." 501 If CODING-SYSTEM can't safely encode CHAR, return nil."
490 (check-argument-type #'characterp char) 502 (check-argument-type #'characterp char)