comparison lisp/coding.el @ 4569:80e0588fb42f

Merge.
author Aidan Kehoe <kehoea@parhasard.net>
date Sun, 28 Dec 2008 14:55:02 +0000
parents 1d74a1d115ee
children e6a7054a9c30
comparison
equal deleted inserted replaced
4537:7ca6d57ce12d 4569:80e0588fb42f
123 This function is meant to be called interactively; 123 This function is meant to be called interactively;
124 from a Lisp program, use `detect-coding-region' instead." 124 from a Lisp program, use `detect-coding-region' instead."
125 (interactive "r\nP") 125 (interactive "r\nP")
126 (princ (detect-coding-region start end))) 126 (princ (detect-coding-region start end)))
127 127
128 (defun decode-coding-string (str coding-system) 128 (defun decode-coding-string (str coding-system &optional nocopy)
129 "Decode the string STR which is encoded in CODING-SYSTEM. 129 "Decode the string STR which is encoded in CODING-SYSTEM.
130 Does not modify STR. Returns the decoded string on successful conversion." 130 Normally does not modify STR. Returns the decoded string on
131 successful conversion.
132 Optional argument NOCOPY says that modifying STR and returning it is
133 allowed."
131 (with-string-as-buffer-contents 134 (with-string-as-buffer-contents
132 str (decode-coding-region (point-min) (point-max) coding-system))) 135 str (decode-coding-region (point-min) (point-max) coding-system)))
133 136
134 (defun encode-coding-string (str coding-system) 137 (defun encode-coding-string (str coding-system &optional nocopy)
135 "Encode the string STR using CODING-SYSTEM. 138 "Encode the string STR using CODING-SYSTEM.
136 Does not modify STR. Returns the encoded string on successful conversion." 139 Does not modify STR. Returns the encoded string on successful conversion.
140 Optional argument NOCOPY says that the original string may be returned
141 if does not differ from the encoded string. "
137 (with-string-as-buffer-contents 142 (with-string-as-buffer-contents
138 str (encode-coding-region (point-min) (point-max) coding-system))) 143 str (encode-coding-region (point-min) (point-max) coding-system)))
139 144
140 145
141 ;;;; Coding system accessors 146 ;;;; Coding system accessors
272 ; (define-coding-system-alias 'iso-8859-2 'raw-text) 277 ; (define-coding-system-alias 'iso-8859-2 'raw-text)
273 (define-coding-system-alias 'ctext 'raw-text)) 278 (define-coding-system-alias 'ctext 'raw-text))
274 279
275 (make-compatible-variable 'enable-multibyte-characters "Unimplemented") 280 (make-compatible-variable 'enable-multibyte-characters "Unimplemented")
276 281
282 ;; Sure would be nice to be able to use defface here.
283 (copy-face 'highlight 'query-coding-warning-face)
284
285 (defvar default-query-coding-region-safe-charset-skip-chars-map
286 #s(hash-table test equal data ())
287 "A map from list of charsets to `skip-chars-forward' arguments for them.")
288
289 (defsubst query-coding-clear-highlights (begin end &optional buffer)
290 "Remove extent faces added by `query-coding-region' between BEGIN and END.
291
292 Optional argument BUFFER is the buffer to use, and defaults to the current
293 buffer.
294
295 The HIGHLIGHTP argument to `query-coding-region' indicates that it should
296 display unencodable characters using `query-coding-warning-face'. After
297 this function has been called, this will no longer be the case. "
298 (map-extents #'(lambda (extent ignored-arg)
299 (when (eq 'query-coding-warning-face
300 (extent-face extent))
301 (delete-extent extent))) buffer begin end))
302
303 (defun* default-query-coding-region (begin end coding-system
304 &optional buffer errorp highlightp)
305 "The default `query-coding-region' implementation.
306
307 Uses the `safe-charsets' and `safe-chars' coding system properties.
308 The former is a list of XEmacs character sets that can be safely
309 encoded by CODING-SYSTEM; the latter a char table describing, in
310 addition, characters that can be safely encoded by CODING-SYSTEM."
311 (check-argument-type #'coding-system-p
312 (setq coding-system (find-coding-system coding-system)))
313 (check-argument-type #'integer-or-marker-p begin)
314 (check-argument-type #'integer-or-marker-p end)
315 (let* ((safe-charsets
316 (or (coding-system-get coding-system 'safe-charsets)
317 (coding-system-get (coding-system-base coding-system)
318 'safe-charsets)))
319 (safe-chars
320 (or (coding-system-get coding-system 'safe-chars)
321 (coding-system-get (coding-system-base coding-system)
322 'safe-chars)))
323 (skip-chars-arg
324 (gethash safe-charsets
325 default-query-coding-region-safe-charset-skip-chars-map))
326 (ranges (make-range-table))
327 fail-range-start fail-range-end char-after
328 looking-at-arg failed extent)
329 ;; Coding systems with a value of t for safe-charsets support everything.
330 (when (eq t safe-charsets)
331 (return-from default-query-coding-region (values t nil)))
332 (unless skip-chars-arg
333 (setq skip-chars-arg
334 (puthash safe-charsets
335 (mapconcat #'charset-skip-chars-string
336 safe-charsets "")
337 default-query-coding-region-safe-charset-skip-chars-map)))
338 (when highlightp
339 (query-coding-clear-highlights begin end buffer))
340 (if (and (zerop (length skip-chars-arg)) (null safe-chars))
341 (progn
342 ;; Uh-oh, nothing known about this coding system. Fail.
343 (when errorp
344 (error 'text-conversion-error
345 "Coding system doesn't say what it can encode"
346 (coding-system-name coding-system)))
347 (put-range-table begin end t ranges)
348 (when highlightp
349 (setq extent (make-extent begin end buffer))
350 (set-extent-priority extent (+ mouse-highlight-priority 2))
351 (set-extent-face extent 'query-coding-warning-face))
352 (values nil ranges))
353 (setq looking-at-arg (if (equal "" skip-chars-arg)
354 ;; Regexp that will never match.
355 #r".\{0,0\}"
356 (concat "[" skip-chars-arg "]")))
357 (save-excursion
358 (goto-char begin buffer)
359 (skip-chars-forward skip-chars-arg end buffer)
360 (while (< (point buffer) end)
361 ; (message
362 ; "fail-range-start is %S, point is %S, end is %S"
363 ; fail-range-start (point buffer) end)
364 (setq char-after (char-after (point buffer) buffer)
365 fail-range-start (point buffer))
366 (while (and
367 (< (point buffer) end)
368 (not (looking-at looking-at-arg))
369 (or (not safe-chars)
370 (not (get-char-table char-after safe-chars))))
371 (forward-char 1 buffer)
372 (setq char-after (char-after (point buffer) buffer)
373 failed t))
374 (if (= fail-range-start (point buffer))
375 ;; The character can actually be encoded by the coding
376 ;; system; check the characters past it.
377 (forward-char 1 buffer)
378 ;; Can't be encoded; note this.
379 (when errorp
380 (error 'text-conversion-error
381 (format "Cannot encode %s using coding system"
382 (buffer-substring fail-range-start (point buffer)
383 buffer))
384 (coding-system-name coding-system)))
385 (put-range-table fail-range-start
386 ;; If char-after is non-nil, we're not at
387 ;; the end of the buffer.
388 (setq fail-range-end (if char-after
389 (point buffer)
390 (point-max buffer)))
391 t ranges)
392 (when highlightp
393 (setq extent (make-extent fail-range-start fail-range-end buffer))
394 (set-extent-priority extent (+ mouse-highlight-priority 2))
395 (set-extent-face extent 'query-coding-warning-face)))
396 (skip-chars-forward skip-chars-arg end buffer))
397 (if failed
398 (values nil ranges)
399 (values t nil))))))
400
401 (defun query-coding-region (start end coding-system &optional buffer
402 errorp highlight)
403 "Work out whether CODING-SYSTEM can losslessly encode a region.
404
405 START and END are the beginning and end of the region to check.
406 CODING-SYSTEM is the coding system to try.
407
408 Optional argument BUFFER is the buffer to check, and defaults to the current
409 buffer. Optional argument ERRORP says to signal a `text-conversion-error'
410 if some character in the region cannot be encoded, and defaults to nil.
411
412 Optional argument HIGHLIGHT says to display unencodable characters in the
413 region using `query-coding-warning-face'. It defaults to nil.
414
415 This function returns a list; the intention is that callers use
416 `multiple-value-bind' or the related CL multiple value functions to deal
417 with it. The first element is `t' if the region can be encoded using
418 CODING-SYSTEM, or `nil' if not. The second element is `nil' if the region
419 can be encoded using CODING-SYSTEM; otherwise, it is a range table
420 describing the positions of the unencodable characters. See
421 `make-range-table'."
422 (funcall (or (coding-system-get coding-system 'query-coding-function)
423 #'default-query-coding-region)
424 start end coding-system buffer errorp highlight))
425
426 (defun query-coding-string (string coding-system &optional errorp highlight)
427 "Work out whether CODING-SYSTEM can losslessly encode STRING.
428 CODING-SYSTEM is the coding system to check.
429
430 Optional argument ERRORP says to signal a `text-conversion-error' if some
431 character in the region cannot be encoded, and defaults to nil.
432
433 Optional argument HIGHLIGHT says to display unencodable characters in the
434 region using `query-coding-warning-face'. It defaults to nil.
435
436 This function returns a list; the intention is that callers use use
437 `multiple-value-bind' or the related CL multiple value functions to deal
438 with it. The first element is `t' if the string can be encoded using
439 CODING-SYSTEM, or `nil' if not. The second element is `nil' if the string
440 can be encoded using CODING-SYSTEM; otherwise, it is a range table
441 describing the positions of the unencodable characters. See
442 `make-range-table'."
443 (with-temp-buffer
444 (insert string)
445 (query-coding-region (point-min) (point-max) coding-system (current-buffer)
446 ;; ### Will highlight work here?
447 errorp highlight)))
448
449 (defun unencodable-char-position (start end coding-system
450 &optional count string)
451 "Return position of first un-encodable character in a region.
452 START and END specify the region and CODING-SYSTEM specifies the
453 encoding to check. Return nil if CODING-SYSTEM does encode the region.
454
455 If optional 4th argument COUNT is non-nil, it specifies at most how
456 many un-encodable characters to search. In this case, the value is a
457 list of positions.
458
459 If optional 5th argument STRING is non-nil, it is a string to search
460 for un-encodable characters. In that case, START and END are indexes
461 in the string."
462 (let ((thunk
463 #'(lambda (start end coding-system &optional count)
464 (multiple-value-bind (result ranges)
465 (query-coding-region start end coding-system)
466 (if result
467 nil
468 (block worked-it-all-out
469 (if count
470 (map-range-table
471 #'(lambda (begin end value)
472 (while (and (< begin end)
473 (< (length result) count))
474 (push begin result)
475 (incf begin))
476 (when (= (length result) count)
477 (return-from worked-it-all-out result)))
478 ranges)
479 (map-range-table
480 #'(lambda (begin end value)
481 (return-from worked-it-all-out begin))
482 ranges))
483 (assert (not (null count)) t
484 "We should never reach this point with null COUNT.")
485 result))))))
486 (check-argument-type #'integer-or-marker-p start)
487 (check-argument-type #'integer-or-marker-p end)
488 (check-coding-system coding-system)
489 (and count (check-argument-type #'natnump count)
490 ;; Special-case zero, sigh.
491 (if (zerop count) (setq count 1)))
492 (and string (check-argument-type #'stringp string))
493 (if string
494 (with-temp-buffer
495 (insert string)
496 (funcall thunk start end coding-system count))
497 (funcall thunk start end coding-system count))))
498
499 (defun encode-coding-char (char coding-system)
500 "Encode CHAR by CODING-SYSTEM and return the resulting string.
501 If CODING-SYSTEM can't safely encode CHAR, return nil."
502 (check-argument-type #'characterp char)
503 (multiple-value-bind (succeededp)
504 (query-coding-string char coding-system)
505 (when succeededp
506 (encode-coding-string char coding-system))))
507
508 (unless (featurep 'mule)
509 ;; If we're under non-Mule, every XEmacs character can be encoded
510 ;; with every XEmacs coding system.
511 (fset #'default-query-coding-region
512 #'(lambda (&rest ignored) (values t nil)))
513 (unintern 'default-query-coding-region-safe-charset-skip-chars-map))
514
277 ;;; coding.el ends here 515 ;;; coding.el ends here