Mercurial > hg > xemacs-beta
comparison lisp/coding.el @ 4569:80e0588fb42f
Merge.
| author | Aidan Kehoe <kehoea@parhasard.net> |
|---|---|
| date | Sun, 28 Dec 2008 14:55:02 +0000 |
| parents | 1d74a1d115ee |
| children | e6a7054a9c30 |
comparison
equal
deleted
inserted
replaced
| 4537:7ca6d57ce12d | 4569:80e0588fb42f |
|---|---|
| 123 This function is meant to be called interactively; | 123 This function is meant to be called interactively; |
| 124 from a Lisp program, use `detect-coding-region' instead." | 124 from a Lisp program, use `detect-coding-region' instead." |
| 125 (interactive "r\nP") | 125 (interactive "r\nP") |
| 126 (princ (detect-coding-region start end))) | 126 (princ (detect-coding-region start end))) |
| 127 | 127 |
| 128 (defun decode-coding-string (str coding-system) | 128 (defun decode-coding-string (str coding-system &optional nocopy) |
| 129 "Decode the string STR which is encoded in CODING-SYSTEM. | 129 "Decode the string STR which is encoded in CODING-SYSTEM. |
| 130 Does not modify STR. Returns the decoded string on successful conversion." | 130 Normally does not modify STR. Returns the decoded string on |
| 131 successful conversion. | |
| 132 Optional argument NOCOPY says that modifying STR and returning it is | |
| 133 allowed." | |
| 131 (with-string-as-buffer-contents | 134 (with-string-as-buffer-contents |
| 132 str (decode-coding-region (point-min) (point-max) coding-system))) | 135 str (decode-coding-region (point-min) (point-max) coding-system))) |
| 133 | 136 |
| 134 (defun encode-coding-string (str coding-system) | 137 (defun encode-coding-string (str coding-system &optional nocopy) |
| 135 "Encode the string STR using CODING-SYSTEM. | 138 "Encode the string STR using CODING-SYSTEM. |
| 136 Does not modify STR. Returns the encoded string on successful conversion." | 139 Does not modify STR. Returns the encoded string on successful conversion. |
| 140 Optional argument NOCOPY says that the original string may be returned | |
| 141 if does not differ from the encoded string. " | |
| 137 (with-string-as-buffer-contents | 142 (with-string-as-buffer-contents |
| 138 str (encode-coding-region (point-min) (point-max) coding-system))) | 143 str (encode-coding-region (point-min) (point-max) coding-system))) |
| 139 | 144 |
| 140 | 145 |
| 141 ;;;; Coding system accessors | 146 ;;;; Coding system accessors |
| 272 ; (define-coding-system-alias 'iso-8859-2 'raw-text) | 277 ; (define-coding-system-alias 'iso-8859-2 'raw-text) |
| 273 (define-coding-system-alias 'ctext 'raw-text)) | 278 (define-coding-system-alias 'ctext 'raw-text)) |
| 274 | 279 |
| 275 (make-compatible-variable 'enable-multibyte-characters "Unimplemented") | 280 (make-compatible-variable 'enable-multibyte-characters "Unimplemented") |
| 276 | 281 |
| 282 ;; Sure would be nice to be able to use defface here. | |
| 283 (copy-face 'highlight 'query-coding-warning-face) | |
| 284 | |
| 285 (defvar default-query-coding-region-safe-charset-skip-chars-map | |
| 286 #s(hash-table test equal data ()) | |
| 287 "A map from list of charsets to `skip-chars-forward' arguments for them.") | |
| 288 | |
| 289 (defsubst query-coding-clear-highlights (begin end &optional buffer) | |
| 290 "Remove extent faces added by `query-coding-region' between BEGIN and END. | |
| 291 | |
| 292 Optional argument BUFFER is the buffer to use, and defaults to the current | |
| 293 buffer. | |
| 294 | |
| 295 The HIGHLIGHTP argument to `query-coding-region' indicates that it should | |
| 296 display unencodable characters using `query-coding-warning-face'. After | |
| 297 this function has been called, this will no longer be the case. " | |
| 298 (map-extents #'(lambda (extent ignored-arg) | |
| 299 (when (eq 'query-coding-warning-face | |
| 300 (extent-face extent)) | |
| 301 (delete-extent extent))) buffer begin end)) | |
| 302 | |
| 303 (defun* default-query-coding-region (begin end coding-system | |
| 304 &optional buffer errorp highlightp) | |
| 305 "The default `query-coding-region' implementation. | |
| 306 | |
| 307 Uses the `safe-charsets' and `safe-chars' coding system properties. | |
| 308 The former is a list of XEmacs character sets that can be safely | |
| 309 encoded by CODING-SYSTEM; the latter a char table describing, in | |
| 310 addition, characters that can be safely encoded by CODING-SYSTEM." | |
| 311 (check-argument-type #'coding-system-p | |
| 312 (setq coding-system (find-coding-system coding-system))) | |
| 313 (check-argument-type #'integer-or-marker-p begin) | |
| 314 (check-argument-type #'integer-or-marker-p end) | |
| 315 (let* ((safe-charsets | |
| 316 (or (coding-system-get coding-system 'safe-charsets) | |
| 317 (coding-system-get (coding-system-base coding-system) | |
| 318 'safe-charsets))) | |
| 319 (safe-chars | |
| 320 (or (coding-system-get coding-system 'safe-chars) | |
| 321 (coding-system-get (coding-system-base coding-system) | |
| 322 'safe-chars))) | |
| 323 (skip-chars-arg | |
| 324 (gethash safe-charsets | |
| 325 default-query-coding-region-safe-charset-skip-chars-map)) | |
| 326 (ranges (make-range-table)) | |
| 327 fail-range-start fail-range-end char-after | |
| 328 looking-at-arg failed extent) | |
| 329 ;; Coding systems with a value of t for safe-charsets support everything. | |
| 330 (when (eq t safe-charsets) | |
| 331 (return-from default-query-coding-region (values t nil))) | |
| 332 (unless skip-chars-arg | |
| 333 (setq skip-chars-arg | |
| 334 (puthash safe-charsets | |
| 335 (mapconcat #'charset-skip-chars-string | |
| 336 safe-charsets "") | |
| 337 default-query-coding-region-safe-charset-skip-chars-map))) | |
| 338 (when highlightp | |
| 339 (query-coding-clear-highlights begin end buffer)) | |
| 340 (if (and (zerop (length skip-chars-arg)) (null safe-chars)) | |
| 341 (progn | |
| 342 ;; Uh-oh, nothing known about this coding system. Fail. | |
| 343 (when errorp | |
| 344 (error 'text-conversion-error | |
| 345 "Coding system doesn't say what it can encode" | |
| 346 (coding-system-name coding-system))) | |
| 347 (put-range-table begin end t ranges) | |
| 348 (when highlightp | |
| 349 (setq extent (make-extent begin end buffer)) | |
| 350 (set-extent-priority extent (+ mouse-highlight-priority 2)) | |
| 351 (set-extent-face extent 'query-coding-warning-face)) | |
| 352 (values nil ranges)) | |
| 353 (setq looking-at-arg (if (equal "" skip-chars-arg) | |
| 354 ;; Regexp that will never match. | |
| 355 #r".\{0,0\}" | |
| 356 (concat "[" skip-chars-arg "]"))) | |
| 357 (save-excursion | |
| 358 (goto-char begin buffer) | |
| 359 (skip-chars-forward skip-chars-arg end buffer) | |
| 360 (while (< (point buffer) end) | |
| 361 ; (message | |
| 362 ; "fail-range-start is %S, point is %S, end is %S" | |
| 363 ; fail-range-start (point buffer) end) | |
| 364 (setq char-after (char-after (point buffer) buffer) | |
| 365 fail-range-start (point buffer)) | |
| 366 (while (and | |
| 367 (< (point buffer) end) | |
| 368 (not (looking-at looking-at-arg)) | |
| 369 (or (not safe-chars) | |
| 370 (not (get-char-table char-after safe-chars)))) | |
| 371 (forward-char 1 buffer) | |
| 372 (setq char-after (char-after (point buffer) buffer) | |
| 373 failed t)) | |
| 374 (if (= fail-range-start (point buffer)) | |
| 375 ;; The character can actually be encoded by the coding | |
| 376 ;; system; check the characters past it. | |
| 377 (forward-char 1 buffer) | |
| 378 ;; Can't be encoded; note this. | |
| 379 (when errorp | |
| 380 (error 'text-conversion-error | |
| 381 (format "Cannot encode %s using coding system" | |
| 382 (buffer-substring fail-range-start (point buffer) | |
| 383 buffer)) | |
| 384 (coding-system-name coding-system))) | |
| 385 (put-range-table fail-range-start | |
| 386 ;; If char-after is non-nil, we're not at | |
| 387 ;; the end of the buffer. | |
| 388 (setq fail-range-end (if char-after | |
| 389 (point buffer) | |
| 390 (point-max buffer))) | |
| 391 t ranges) | |
| 392 (when highlightp | |
| 393 (setq extent (make-extent fail-range-start fail-range-end buffer)) | |
| 394 (set-extent-priority extent (+ mouse-highlight-priority 2)) | |
| 395 (set-extent-face extent 'query-coding-warning-face))) | |
| 396 (skip-chars-forward skip-chars-arg end buffer)) | |
| 397 (if failed | |
| 398 (values nil ranges) | |
| 399 (values t nil)))))) | |
| 400 | |
| 401 (defun query-coding-region (start end coding-system &optional buffer | |
| 402 errorp highlight) | |
| 403 "Work out whether CODING-SYSTEM can losslessly encode a region. | |
| 404 | |
| 405 START and END are the beginning and end of the region to check. | |
| 406 CODING-SYSTEM is the coding system to try. | |
| 407 | |
| 408 Optional argument BUFFER is the buffer to check, and defaults to the current | |
| 409 buffer. Optional argument ERRORP says to signal a `text-conversion-error' | |
| 410 if some character in the region cannot be encoded, and defaults to nil. | |
| 411 | |
| 412 Optional argument HIGHLIGHT says to display unencodable characters in the | |
| 413 region using `query-coding-warning-face'. It defaults to nil. | |
| 414 | |
| 415 This function returns a list; the intention is that callers use | |
| 416 `multiple-value-bind' or the related CL multiple value functions to deal | |
| 417 with it. The first element is `t' if the region can be encoded using | |
| 418 CODING-SYSTEM, or `nil' if not. The second element is `nil' if the region | |
| 419 can be encoded using CODING-SYSTEM; otherwise, it is a range table | |
| 420 describing the positions of the unencodable characters. See | |
| 421 `make-range-table'." | |
| 422 (funcall (or (coding-system-get coding-system 'query-coding-function) | |
| 423 #'default-query-coding-region) | |
| 424 start end coding-system buffer errorp highlight)) | |
| 425 | |
| 426 (defun query-coding-string (string coding-system &optional errorp highlight) | |
| 427 "Work out whether CODING-SYSTEM can losslessly encode STRING. | |
| 428 CODING-SYSTEM is the coding system to check. | |
| 429 | |
| 430 Optional argument ERRORP says to signal a `text-conversion-error' if some | |
| 431 character in the region cannot be encoded, and defaults to nil. | |
| 432 | |
| 433 Optional argument HIGHLIGHT says to display unencodable characters in the | |
| 434 region using `query-coding-warning-face'. It defaults to nil. | |
| 435 | |
| 436 This function returns a list; the intention is that callers use use | |
| 437 `multiple-value-bind' or the related CL multiple value functions to deal | |
| 438 with it. The first element is `t' if the string can be encoded using | |
| 439 CODING-SYSTEM, or `nil' if not. The second element is `nil' if the string | |
| 440 can be encoded using CODING-SYSTEM; otherwise, it is a range table | |
| 441 describing the positions of the unencodable characters. See | |
| 442 `make-range-table'." | |
| 443 (with-temp-buffer | |
| 444 (insert string) | |
| 445 (query-coding-region (point-min) (point-max) coding-system (current-buffer) | |
| 446 ;; ### Will highlight work here? | |
| 447 errorp highlight))) | |
| 448 | |
| 449 (defun unencodable-char-position (start end coding-system | |
| 450 &optional count string) | |
| 451 "Return position of first un-encodable character in a region. | |
| 452 START and END specify the region and CODING-SYSTEM specifies the | |
| 453 encoding to check. Return nil if CODING-SYSTEM does encode the region. | |
| 454 | |
| 455 If optional 4th argument COUNT is non-nil, it specifies at most how | |
| 456 many un-encodable characters to search. In this case, the value is a | |
| 457 list of positions. | |
| 458 | |
| 459 If optional 5th argument STRING is non-nil, it is a string to search | |
| 460 for un-encodable characters. In that case, START and END are indexes | |
| 461 in the string." | |
| 462 (let ((thunk | |
| 463 #'(lambda (start end coding-system &optional count) | |
| 464 (multiple-value-bind (result ranges) | |
| 465 (query-coding-region start end coding-system) | |
| 466 (if result | |
| 467 nil | |
| 468 (block worked-it-all-out | |
| 469 (if count | |
| 470 (map-range-table | |
| 471 #'(lambda (begin end value) | |
| 472 (while (and (< begin end) | |
| 473 (< (length result) count)) | |
| 474 (push begin result) | |
| 475 (incf begin)) | |
| 476 (when (= (length result) count) | |
| 477 (return-from worked-it-all-out result))) | |
| 478 ranges) | |
| 479 (map-range-table | |
| 480 #'(lambda (begin end value) | |
| 481 (return-from worked-it-all-out begin)) | |
| 482 ranges)) | |
| 483 (assert (not (null count)) t | |
| 484 "We should never reach this point with null COUNT.") | |
| 485 result)))))) | |
| 486 (check-argument-type #'integer-or-marker-p start) | |
| 487 (check-argument-type #'integer-or-marker-p end) | |
| 488 (check-coding-system coding-system) | |
| 489 (and count (check-argument-type #'natnump count) | |
| 490 ;; Special-case zero, sigh. | |
| 491 (if (zerop count) (setq count 1))) | |
| 492 (and string (check-argument-type #'stringp string)) | |
| 493 (if string | |
| 494 (with-temp-buffer | |
| 495 (insert string) | |
| 496 (funcall thunk start end coding-system count)) | |
| 497 (funcall thunk start end coding-system count)))) | |
| 498 | |
| 499 (defun encode-coding-char (char coding-system) | |
| 500 "Encode CHAR by CODING-SYSTEM and return the resulting string. | |
| 501 If CODING-SYSTEM can't safely encode CHAR, return nil." | |
| 502 (check-argument-type #'characterp char) | |
| 503 (multiple-value-bind (succeededp) | |
| 504 (query-coding-string char coding-system) | |
| 505 (when succeededp | |
| 506 (encode-coding-string char coding-system)))) | |
| 507 | |
| 508 (unless (featurep 'mule) | |
| 509 ;; If we're under non-Mule, every XEmacs character can be encoded | |
| 510 ;; with every XEmacs coding system. | |
| 511 (fset #'default-query-coding-region | |
| 512 #'(lambda (&rest ignored) (values t nil))) | |
| 513 (unintern 'default-query-coding-region-safe-charset-skip-chars-map)) | |
| 514 | |
| 277 ;;; coding.el ends here | 515 ;;; coding.el ends here |
