comparison lisp/coding.el @ 4564:46ddeaa7c738

Automated merge with file:///Sources/xemacs-21.5-checked-out
author Aidan Kehoe <kehoea@parhasard.net>
date Fri, 18 Jul 2008 01:00:32 +0200
parents e34711681f30 20c32e489235
children 1d74a1d115ee
comparison
equal deleted inserted replaced
4479:4cb7c59b5201 4564:46ddeaa7c738
123 This function is meant to be called interactively; 123 This function is meant to be called interactively;
124 from a Lisp program, use `detect-coding-region' instead." 124 from a Lisp program, use `detect-coding-region' instead."
125 (interactive "r\nP") 125 (interactive "r\nP")
126 (princ (detect-coding-region start end))) 126 (princ (detect-coding-region start end)))
127 127
128 (defun decode-coding-string (str coding-system) 128 (defun decode-coding-string (str coding-system &optional nocopy)
129 "Decode the string STR which is encoded in CODING-SYSTEM. 129 "Decode the string STR which is encoded in CODING-SYSTEM.
130 Does not modify STR. Returns the decoded string on successful conversion." 130 Normally does not modify STR. Returns the decoded string on
131 successful conversion.
132 Optional argument NOCOPY says that modifying STR and returning it is
133 allowed."
131 (with-string-as-buffer-contents 134 (with-string-as-buffer-contents
132 str (decode-coding-region (point-min) (point-max) coding-system))) 135 str (decode-coding-region (point-min) (point-max) coding-system)))
133 136
134 (defun encode-coding-string (str coding-system) 137 (defun encode-coding-string (str coding-system &optional nocopy)
135 "Encode the string STR using CODING-SYSTEM. 138 "Encode the string STR using CODING-SYSTEM.
136 Does not modify STR. Returns the encoded string on successful conversion." 139 Does not modify STR. Returns the encoded string on successful conversion.
140 Optional argument NOCOPY says that the original string may be returned
141 if does not differ from the encoded string. "
137 (with-string-as-buffer-contents 142 (with-string-as-buffer-contents
138 str (encode-coding-region (point-min) (point-max) coding-system))) 143 str (encode-coding-region (point-min) (point-max) coding-system)))
139 144
140 145
141 ;;;; Coding system accessors 146 ;;;; Coding system accessors
272 ; (define-coding-system-alias 'iso-8859-2 'raw-text) 277 ; (define-coding-system-alias 'iso-8859-2 'raw-text)
273 (define-coding-system-alias 'ctext 'raw-text)) 278 (define-coding-system-alias 'ctext 'raw-text))
274 279
275 (make-compatible-variable 'enable-multibyte-characters "Unimplemented") 280 (make-compatible-variable 'enable-multibyte-characters "Unimplemented")
276 281
282 ;; Sure would be nice to be able to use defface here.
283 (copy-face 'highlight 'query-coding-warning-face)
284
285 (defvar default-query-coding-region-safe-charset-skip-chars-map
286 #s(hash-table test equal data ())
287 "A map from list of charsets to `skip-chars-forward' arguments for them.")
288
289 (defsubst query-coding-clear-highlights (begin end &optional buffer)
290 "Remove extent faces added by `query-coding-region' between BEGIN and END.
291
292 Optional argument BUFFER is the buffer to use, and defaults to the current
293 buffer.
294
295 The HIGHLIGHTP argument to `query-coding-region' indicates that it should
296 display unencodable characters using `query-coding-warning-face'. After
297 this function has been called, this will no longer be the case. "
298 (map-extents #'(lambda (extent ignored-arg)
299 (when (eq 'query-coding-warning-face
300 (extent-face extent))
301 (delete-extent extent))) buffer begin end))
302
303 (defun default-query-coding-region (begin end coding-system
304 &optional buffer errorp highlightp)
305 "The default `query-coding-region' implementation.
306
307 Uses the `safe-charsets' and `safe-chars' coding system properties.
308 The former is a list of XEmacs character sets that can be safely
309 encoded by CODING-SYSTEM; the latter a char table describing, in
310 addition, characters that can be safely encoded by CODING-SYSTEM."
311 (check-argument-type #'coding-system-p
312 (setq coding-system (find-coding-system coding-system)))
313 (check-argument-type #'integer-or-marker-p begin)
314 (check-argument-type #'integer-or-marker-p end)
315 (let* ((safe-charsets
316 (or (coding-system-get coding-system 'safe-charsets)
317 (coding-system-get (coding-system-base coding-system)
318 'safe-charsets)))
319 (safe-chars
320 (or (coding-system-get coding-system 'safe-chars)
321 (coding-system-get (coding-system-base coding-system)
322 'safe-chars)))
323 (skip-chars-arg
324 (gethash safe-charsets
325 default-query-coding-region-safe-charset-skip-chars-map))
326 (ranges (make-range-table))
327 fail-range-start fail-range-end previous-fail char-after
328 looking-at-arg failed extent)
329 (unless skip-chars-arg
330 (setq skip-chars-arg
331 (puthash safe-charsets
332 (mapconcat #'charset-skip-chars-string
333 safe-charsets "")
334 default-query-coding-region-safe-charset-skip-chars-map)))
335 (when highlightp
336 (query-coding-clear-highlights begin end buffer))
337 (if (and (zerop (length skip-chars-arg)) (null safe-chars))
338 (progn
339 ;; Uh-oh, nothing known about this coding system. Fail.
340 (when errorp
341 (error 'text-conversion-error
342 "Coding system doesn't say what it can encode"
343 (coding-system-name coding-system)))
344 (put-range-table begin end t ranges)
345 (when highlightp
346 (setq extent (make-extent begin end buffer))
347 (set-extent-priority extent (+ mouse-highlight-priority 2))
348 (set-extent-face extent 'query-coding-warning-face))
349 (values nil ranges))
350 (setq looking-at-arg (if (equal "" skip-chars-arg)
351 ;; Regexp that will never match.
352 #r".\{0,0\}"
353 (concat "[" skip-chars-arg "]")))
354 (save-excursion
355 (goto-char begin buffer)
356 (skip-chars-forward skip-chars-arg end buffer)
357 (while (< (point buffer) end)
358 (message
359 "fail-range-start is %S, previous-fail %S, point is %S, end is %S"
360 fail-range-start previous-fail (point buffer) end)
361 (setq char-after (char-after (point buffer) buffer)
362 fail-range-start (point buffer))
363 (while (and
364 (< (point buffer) end)
365 (not (looking-at looking-at-arg))
366 (or (not safe-chars)
367 (not (get-char-table char-after safe-chars))))
368 (forward-char 1 buffer)
369 (setq char-after (char-after (point buffer) buffer)
370 failed t))
371 (if (= fail-range-start (point buffer))
372 ;; The character can actually be encoded by the coding
373 ;; system; check the characters past it.
374 (forward-char 1 buffer)
375 ;; Can't be encoded; note this.
376 (when errorp
377 (error 'text-conversion-error
378 (format "Cannot encode %s using coding system"
379 (buffer-substring fail-range-start (point buffer)
380 buffer))
381 (coding-system-name coding-system)))
382 (put-range-table fail-range-start
383 ;; If char-after is non-nil, we're not at
384 ;; the end of the buffer.
385 (setq fail-range-end (if char-after
386 (point buffer)
387 (point-max buffer)))
388 t ranges)
389 (when highlightp
390 (setq extent (make-extent fail-range-start fail-range-end buffer))
391 (set-extent-priority extent (+ mouse-highlight-priority 2))
392 (set-extent-face extent 'query-coding-warning-face)))
393 (skip-chars-forward skip-chars-arg end buffer))
394 (if failed
395 (values nil ranges)
396 (values t nil))))))
397
398 (defun query-coding-region (start end coding-system &optional buffer
399 errorp highlight)
400 "Work out whether CODING-SYSTEM can losslessly encode a region.
401
402 START and END are the beginning and end of the region to check.
403 CODING-SYSTEM is the coding system to try.
404
405 Optional argument BUFFER is the buffer to check, and defaults to the current
406 buffer. Optional argument ERRORP says to signal a `text-conversion-error'
407 if some character in the region cannot be encoded, and defaults to nil.
408
409 Optional argument HIGHLIGHT says to display unencodable characters in the
410 region using `query-coding-warning-face'. It defaults to nil.
411
412 This function returns a list; the intention is that callers use
413 `multiple-value-bind' or the related CL multiple value functions to deal
414 with it. The first element is `t' if the string can be encoded using
415 CODING-SYSTEM, or `nil' if not. The second element is `nil' if the string
416 can be encoded using CODING-SYSTEM; otherwise, it is a range table
417 describing the positions of the unencodable characters. See
418 `make-range-table'."
419 (funcall (or (coding-system-get coding-system 'query-coding-function)
420 #'default-query-coding-region)
421 start end coding-system buffer errorp highlight))
422
423 (defun query-coding-string (string coding-system &optional errorp highlight)
424 "Work out whether CODING-SYSTEM can losslessly encode STRING.
425 CODING-SYSTEM is the coding system to check.
426
427 Optional argument ERRORP says to signal a `text-conversion-error' if some
428 character in the region cannot be encoded, and defaults to nil.
429
430 Optional argument HIGHLIGHT says to display unencodable characters in the
431 region using `query-coding-warning-face'. It defaults to nil.
432
433 This function returns a list; the intention is that callers use use
434 `multiple-value-bind' or the related CL multiple value functions to deal
435 with it. The first element is `t' if the string can be encoded using
436 CODING-SYSTEM, or `nil' if not. The second element is `nil' if the string
437 can be encoded using CODING-SYSTEM; otherwise, it is a range table
438 describing the positions of the unencodable characters. See
439 `make-range-table'."
440 (with-temp-buffer
441 (insert string)
442 (query-coding-region (point-min) (point-max) coding-system (current-buffer)
443 ;; ### Will highlight work here?
444 errorp highlight)))
445
446 (defun unencodable-char-position (start end coding-system
447 &optional count string)
448 "Return position of first un-encodable character in a region.
449 START and END specify the region and CODING-SYSTEM specifies the
450 encoding to check. Return nil if CODING-SYSTEM does encode the region.
451
452 If optional 4th argument COUNT is non-nil, it specifies at most how
453 many un-encodable characters to search. In this case, the value is a
454 list of positions.
455
456 If optional 5th argument STRING is non-nil, it is a string to search
457 for un-encodable characters. In that case, START and END are indexes
458 in the string."
459 (flet ((thunk ()
460 (multiple-value-bind (result ranges)
461 (query-coding-region start end coding-system)
462 (if result
463 ;; If query-coding-region thinks the entire region is
464 ;; encodable, result will be t, and the thunk should
465 ;; return nil, because there are no unencodable
466 ;; positions in the region.
467 nil
468 (if count
469 (block counted
470 (map-range-table
471 #'(lambda (begin end value)
472 (while (and (<= begin end) (<= begin count))
473 (push begin result)
474 (incf begin))
475 (if (> begin count) (return-from counted)))
476 ranges))
477 (map-range-table
478 #'(lambda (begin end value)
479 (while (<= begin end)
480 (push begin result)
481 (incf begin))) ranges))
482 result))))
483 (if string
484 (with-temp-buffer (insert string) (thunk))
485 (thunk))))
486
487 (defun encode-coding-char (char coding-system)
488 "Encode CHAR by CODING-SYSTEM and return the resulting string.
489 If CODING-SYSTEM can't safely encode CHAR, return nil."
490 (check-argument-type #'characterp char)
491 (multiple-value-bind (succeededp)
492 (query-coding-string char coding-system)
493 (when succeededp
494 (encode-coding-string char coding-system))))
495
496 (unless (featurep 'mule)
497 ;; If we're under non-Mule, every XEmacs character can be encoded
498 ;; with every XEmacs coding system.
499 (fset #'default-query-coding-region
500 #'(lambda (&rest ignored) (values t nil)))
501 (unintern 'default-query-coding-region-safe-charset-skip-chars-map))
502
277 ;;; coding.el ends here 503 ;;; coding.el ends here