diff lisp/code-cmds.el @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents 576fb035e263
children b531bf8658e9
line wrap: on
line diff
--- a/lisp/code-cmds.el	Fri Mar 08 13:33:14 2002 +0000
+++ b/lisp/code-cmds.el	Wed Mar 13 08:54:06 2002 +0000
@@ -1,9 +1,10 @@
-;;; code-cmds.el --- Commands for manipulating coding systems..
+;;; code-cmds.el --- Commands for manipulating coding systems.
 
 ;; Copyright (C) 1995,1999 Electrotechnical Laboratory, JAPAN.
 ;; Licensed to the Free Software Foundation.
 ;; Copyright (C) 2000 Free Software Foundation
 ;; Copyright (C) 1997 MORIOKA Tomohiko
+;; Copyright (C) 2001, 2002 Ben Wing.
 
 
 ;; This file is part of XEmacs.
@@ -34,21 +35,23 @@
 
 ;;; Coding related key bindings and menus.
 
-(defvar coding-keymap (make-sparse-keymap "Mule/Conding")
-  "Keymap for Mule and Coding cystem specific commands.")
+(defvar coding-keymap (make-sparse-keymap "Coding+Mule")
+  "Keymap for coding-system-specific and (when available) Mule commands.")
 
-;; Keep "C-x C-m ..." for mule specific commands.
+;; Keep "C-x C-m ..." for coding/mule specific commands.
 (define-key ctl-x-map "\C-m" coding-keymap)
 
 (define-key coding-keymap "f" 'set-buffer-file-coding-system)
 (define-key coding-keymap "F" 'set-default-buffer-file-coding-system) ; XEmacs
 (define-key coding-keymap "t" 'set-terminal-coding-system)
 (define-key coding-keymap "p" 'set-buffer-process-coding-system)
-;(define-key coding-keymap "x" 'set-selection-coding-system)
-;(define-key coding-keymap "X" 'set-next-selection-coding-system)
 (define-key coding-keymap "c" 'universal-coding-system-argument)
 ;;(define-key coding-keymap "c" 'list-coding-system-briefly) ; XEmacs
-;;(define-key coding-keymap "C" 'describe-coding-system)	 ; XEmacs
+(when-fboundp 'describe-coding-system
+  (define-key coding-keymap "C" 'describe-coding-system))	 ; XEmacs
+(when-fboundp 'set-selection-coding-system
+  (define-key coding-keymap "x" 'set-selection-coding-system)
+  (define-key coding-keymap "X" 'set-next-selection-coding-system))
 
 
 (defun coding-system-change-eol-conversion (coding-system eol-type)
@@ -80,12 +83,24 @@
       (let ((base (coding-system-base coding-system)))
         (if (not eol-type)
             base
-          (if (= eol-type orig-eol-type)
+          (if (eq eol-type orig-eol-type)
               coding-system
             (setq orig-eol-type (coding-system-eol-type base))
             (if (null orig-eol-type)
                 (coding-system-property base eol-type))))))))
 
+;; (defun coding-system-change-text-conversion (coding-system coding)
+;;   "Return a coding system which differs from CODING-SYSTEM in text conversion.
+;; The returned coding system converts text by CODING
+;; but end-of-line as the same way as CODING-SYSTEM.
+;; If CODING is nil, the returned coding system detects
+;; how text is formatted automatically while decoding."
+;;   (if (not coding)
+;;       (coding-system-base coding-system)
+;;     (let ((eol-type (coding-system-eol-type coding-system)))
+;;       (coding-system-change-eol-conversion
+;;        coding
+;;        (if (numberp eol-type) (aref [unix dos mac] eol-type))))))
 
 (defun universal-coding-system-argument ()
   "Execute an I/O command using the specified coding system."
@@ -109,54 +124,35 @@
       (message "")
       (call-interactively cmd))))
 
-(defun set-default-coding-systems (coding-system)
-  "Set default value of various coding systems to CODING-SYSTEM.
-This sets the following coding systems:
-  o coding system of a newly created buffer
-  o default coding system for terminal output
-  o default coding system for keyboard input
-  o default coding system for subprocess I/O
-  o default coding system for converting file names."
+(defun set-default-output-coding-systems (coding-system)
+  "Set default value for coding system output to CODING-SYSTEM.
+This sets the coding system of newly created buffers (the default value of
+`buffer-file-coding-system') and the default coding system for output to a
+subprocess (the CDR of `default-process-coding-system').
+
+Other defaults are not changed; see `prefer-coding-system' for why."
   (check-coding-system coding-system)
-  ;;(setq-default buffer-file-coding-system coding-system)
   (set-default-buffer-file-coding-system coding-system)
-  ;; (if default-enable-multibyte-characters
-  ;;     (setq default-file-name-coding-system coding-system))
-  ;; If coding-system is nil, honor that on MS-DOS as well, so
-  ;; that they could reset the terminal coding system.
-  ;; (unless (and (eq window-system 'pc) coding-system)
-  ;;   (setq default-terminal-coding-system coding-system))
-  (set-terminal-coding-system coding-system)
-  ;;(setq default-keyboard-coding-system coding-system)
-  (set-keyboard-coding-system coding-system)
-  ;;(setq default-process-coding-system (cons coding-system coding-system))
-  ;; Refer to coding-system-for-read and coding-system-for-write
-  ;; so that C-x RET c works.
-  (add-hook 'comint-exec-hook
-	    `(lambda ()
-	       (let ((proc (get-buffer-process (current-buffer))))
-		 (set-process-input-coding-system
-		  proc (or coding-system-for-read ',coding-system))
-		 (set-process-output-coding-system
-		  proc (or coding-system-for-write ',coding-system))))
-	    'append)
-  (setq file-name-coding-system coding-system))
+  (setq default-process-coding-system
+	(cons (car default-process-coding-system) coding-system)))
 
 (defun prefer-coding-system (coding-system)
   "Add CODING-SYSTEM at the front of the priority list for automatic detection.
-This also sets the following coding systems:
-  o coding system of a newly created buffer
-  o default coding system for terminal output
-  o default coding system for keyboard input
-  o default coding system for converting file names.
+This also sets the coding system of newly created buffers (the default
+value of `buffer-file-coding-system') and the default coding system for
+output to a subprocess (the CDR of `default-process-coding-system').
 
 If CODING-SYSTEM specifies a certain type of EOL conversion, the coding
 systems set by this function will use that type of EOL conversion.
 
-This command does not change the default value of terminal coding system
-for MS-DOS terminal, because DOS terminals only support a single coding
-system, and Emacs automatically sets the default to that coding system at
-startup."
+This does not change the default coding system for converting file names
+because that is dependent on the current locale; it's changed when
+`set-language-environment' is called.  It does not change
+`terminal-coding-system' or `keyboard-coding-system'; they should get set
+when the terminal is opened (and are typically an inherent property of the
+terminal), and don't change afterward.  It does not change the default
+coding system for reading files or input from a subprocess; they should
+remain as `undecided' so that automatic detection is done."
   (interactive "zPrefer coding system: ")
   (if (not (and coding-system (find-coding-system coding-system)))
       (error "Invalid coding system `%s'" coding-system))
@@ -175,29 +171,290 @@
 	(message "Highest priority is set to %s (base of %s)"
 		 base coding-system))
     ;; If they asked for specific EOL conversion, honor that.
-    (if (memq eol-type '(lf crlf mac))
+    (if (memq eol-type '(lf crlf cr unix dos mac))
 	(setq coding-system
 	      (coding-system-change-eol-conversion base eol-type))
       (setq coding-system base))
-    (set-default-coding-systems coding-system)))
+    (set-default-output-coding-systems coding-system)))
+
+;; (defun find-coding-systems-region-subset-p (list1 list2)
+;;   "Return non-nil if all elements in LIST1 are included in LIST2.
+;; Comparison done with EQ."
+;;   (catch 'tag
+;;     (while list1
+;;       (or (memq (car list1) list2)
+;;           (throw 'tag nil))
+;;       (setq list1 (cdr list1)))
+;;     t))
 
-;;; Commands
+;; (defun find-coding-systems-region (from to)
+;;   "Return a list of proper coding systems to encode a text between FROM and TO.
+;; All coding systems in the list can safely encode any multibyte characters
+;; in the text.
+;; 
+;; If the text contains no multibyte characters, return a list of a single
+;; element `undecided'."
+;;   (find-coding-systems-for-charsets (find-charset-region from to)))
+
+;; (defun find-coding-systems-string (string)
+;;   "Return a list of proper coding systems to encode STRING.
+;; All coding systems in the list can safely encode any multibyte characters
+;; in STRING.
+;; 
+;; If STRING contains no multibyte characters, return a list of a single
+;; element `undecided'."
+;;   (find-coding-systems-for-charsets (find-charset-string string)))
 
-(defun set-buffer-process-coding-system (decoding encoding)
-  "Set coding systems for the process associated with the current buffer.
-DECODING is the coding system to be used to decode input from the process,
-ENCODING is the coding system to be used to encode output to the process.
+;; (defun find-coding-systems-for-charsets (charsets)
+;;   "Return a list of proper coding systems to encode characters of CHARSETS.
+;; CHARSETS is a list of character sets."
+;;   (if (or (null charsets)
+;;           (and (= (length charsets) 1)
+;;                (eq 'ascii (car charsets))))
+;;       '(undecided)
+;;     (setq charsets (delq 'composition charsets))
+;;     (let ((l (coding-system-list 'base-only))
+;;           (charset-preferred-codings
+;;            (mapcar (function
+;;                     (lambda (x)
+;;                       (if (eq x 'unknown)
+;;                           'raw-text
+;;                         (get-charset-property x 'preferred-coding-system))))
+;;                    charsets))
+;;           (priorities (mapcar (function (lambda (x) (symbol-value x)))
+;;                               coding-category-list))
+;;           codings coding safe)
+;;       (if (memq 'unknown charsets)
+;;           ;; The region contains invalid multibyte characters.
+;;           (setq l '(raw-text)))
+;;       (while l
+;;         (setq coding (car l) l (cdr l))
+;;         (if (and (setq safe (coding-system-get coding 'safe-charsets))
+;;                  (or (eq safe t)
+;;                      (find-coding-systems-region-subset-p charsets safe)))
+;;             ;; We put the higher priority to coding systems included
+;;             ;; in CHARSET-PREFERRED-CODINGS, and within them, put the
+;;             ;; higher priority to coding systems which support smaller
+;;             ;; number of charsets.
+;;             (let ((priority
+;;                    (+ (if (coding-system-get coding 'mime-charset) 4096 0)
+;;                       (lsh (length (memq coding priorities)) 7)
+;;                       (if (memq coding charset-preferred-codings) 64 0)
+;;                       (if (> (coding-system-type coding) 0) 32 0)
+;;                       (if (consp safe) (- 32 (length safe)) 0))))
+;;               (setq codings (cons (cons priority coding) codings)))))
+;;       (mapcar 'cdr
+;;               (sort codings (function (lambda (x y) (> (car x) (car y))))))
+;;       )))
+
+;; (defun find-multibyte-characters (from to &optional maxcount excludes)
+;;   "Find multibyte characters in the region specified by FROM and TO.
+;; If FROM is a string, find multibyte characters in the string.
+;; The return value is an alist of the following format:
+;;   ((CHARSET COUNT CHAR ...) ...)
+;; where
+;;   CHARSET is a character set,
+;;   COUNT is a number of characters,
+;;   CHARs are found characters of the character set.
+;; Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
+;; Optional 4th arg EXCLUDE is a list of character sets to be ignored.
+;; 
+;; For invalid characters, CHARs are actually strings."
+;;   (let ((chars nil)
+;;         charset char)
+;;     (if (stringp from)
+;;         (let ((idx 0))
+;;           (while (setq idx (string-match "[^\000-\177]" from idx))
+;;             (setq char (aref from idx)
+;;                   charset (char-charset char))
+;;             (if (eq charset 'unknown)
+;;                 (setq char (match-string 0)))
+;;             (if (or (eq charset 'unknown)
+;;                     (not (or (eq excludes t) (memq charset excludes))))
+;;                 (let ((slot (assq charset chars)))
+;;                   (if slot
+;;                       (if (not (memq char (nthcdr 2 slot)))
+;;                           (let ((count (nth 1 slot)))
+;;                             (setcar (cdr slot) (1+ count))
+;;                             (if (or (not maxcount) (< count maxcount))
+;;                                 (nconc slot (list char)))))
+;;                     (setq chars (cons (list charset 1 char) chars)))))
+;;             (setq idx (1+ idx))))
+;;       (save-excursion
+;;         (goto-char from)
+;;         (while (re-search-forward "[^\000-\177]" to t)
+;;           (setq char (preceding-char)
+;;                 charset (char-charset char))
+;;           (if (eq charset 'unknown)
+;;               (setq char (match-string 0)))
+;;           (if (or (eq charset 'unknown)
+;;                   (not (or (eq excludes t) (memq charset excludes))))
+;;               (let ((slot (assq charset chars)))
+;;                 (if slot
+;;                     (if (not (member char (nthcdr 2 slot)))
+;;                         (let ((count (nth 1 slot)))
+;;                           (setcar (cdr slot) (1+ count))
+;;                           (if (or (not maxcount) (< count maxcount))
+;;                               (nconc slot (list char)))))
+;;                   (setq chars (cons (list charset 1 char) chars))))))))
+;;     (nreverse chars)))
+
+;; (defvar last-coding-system-specified nil
+;;   "Most recent coding system explicitly specified by the user when asked.
+;; This variable is set whenever Emacs asks the user which coding system
+;; to use in order to write a file.  If you set it to nil explicitly,
+;; then call `write-region', then afterward this variable will be non-nil
+;; only if the user was explicitly asked and specified a coding system.")
 
-For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]."
-  (interactive
-   "zCoding-system for process input: \nzCoding-system for process output: ")
-  (let ((proc (get-buffer-process (current-buffer))))
-    (if (null proc)
-	(error "no process")
-      (check-coding-system decoding)
-      (check-coding-system encoding)
-      (set-process-coding-system proc decoding encoding)))
-  (force-mode-line-update))
+;; (defun select-safe-coding-system (from to &optional default-coding-system)
+;;   "Ask a user to select a safe coding system from candidates.
+;; The candidates of coding systems which can safely encode a text
+;; between FROM and TO are shown in a popup window.
+;; 
+;; Optional arg DEFAULT-CODING-SYSTEM specifies a coding system to be
+;; checked at first.  If omitted, buffer-file-coding-system of the
+;; current buffer is used.
+;; 
+;; If the text can be encoded safely by DEFAULT-CODING-SYSTEM, it is
+;; returned without any user interaction.
+;; 
+;; Kludgy feature: if FROM is a string, the string is the target text,
+;; and TO is ignored."
+;;   (or default-coding-system
+;;       (setq default-coding-system buffer-file-coding-system))
+;;   (let* ((charsets (if (stringp from) (find-charset-string from)
+;;                      (find-charset-region from to)))
+;;          (safe-coding-systems (find-coding-systems-for-charsets charsets)))
+;;     (if (or (not enable-multibyte-characters)
+;;             (eq (car safe-coding-systems) 'undecided)
+;;             (eq default-coding-system 'no-conversion)
+;;             (and default-coding-system
+;;                  (memq (coding-system-base default-coding-system)
+;;                        safe-coding-systems)))
+;;         default-coding-system
+;; 
+;;       ;; At first, change each coding system to the corresponding
+;;       ;; mime-charset name if it is also a coding system.
+;;       (let ((l safe-coding-systems)
+;;             mime-charset)
+;;         (while l
+;;           (setq mime-charset (coding-system-get (car l) 'mime-charset))
+;;           (if (and mime-charset (coding-system-p mime-charset))
+;;               (setcar l mime-charset))
+;;           (setq l (cdr l))))
+;; 
+;;       (let ((non-safe-chars (find-multibyte-characters
+;;                              from to 3
+;;                              (and default-coding-system
+;;                                   (coding-system-get default-coding-system
+;;                                                      'safe-charsets))))
+;;             show-position overlays)
+;;         (save-excursion
+;;           ;; Highlight characters that default-coding-system can't encode.
+;;           (when (integerp from)
+;;             (goto-char from)
+;;             (let ((found nil))
+;;               (while (and (not found)
+;;                           (re-search-forward "[^\000-\177]" to t))
+;;                 (setq found (assq (char-charset (preceding-char))
+;;                                   non-safe-chars))))
+;;             (forward-line -1)
+;;             (setq show-position (point))
+;;             (save-excursion
+;;               (while (and (< (length overlays) 256)
+;;                           (re-search-forward "[^\000-\177]" to t))
+;;                 (let* ((char (preceding-char))
+;;                        (charset (char-charset char)))
+;;                   (when (assq charset non-safe-chars)
+;;                     (setq overlays (cons (make-overlay (1- (point)) (point))
+;;                                          overlays))
+;;                     (overlay-put (car overlays) 'face 'highlight))))))
+;; 
+;;           ;; At last, ask a user to select a proper coding system.  
+;;           (unwind-protect
+;;               (save-window-excursion
+;;                 (when show-position
+;;                   ;; At first, be sure to show the current buffer.
+;;                   (set-window-buffer (selected-window) (current-buffer))
+;;                   (set-window-start (selected-window) show-position))
+;;                 ;; Then, show a helpful message.
+;;                 (with-output-to-temp-buffer "*Warning*"
+;;                   (save-excursion
+;;                     (set-buffer standard-output)
+;;                     (insert "The target text contains the following non ASCII character(s):\n")
+;;                     (let ((len (length non-safe-chars))
+;;                           (shown 0))
+;;                       (while (and non-safe-chars (< shown 3))
+;;                         (when (> (length (car non-safe-chars)) 2)
+;;                           (setq shown (1+ shown))
+;;                           (insert (format "%25s: " (car (car non-safe-chars))))
+;;                           (let ((l (nthcdr 2 (car non-safe-chars))))
+;;                             (while l
+;;                               (if (or (stringp (car l)) (char-valid-p (car l)))
+;;                                   (insert (car l)))
+;;                               (setq l (cdr l))))
+;;                           (if (> (nth 1 (car non-safe-chars)) 3)
+;;                               (insert "..."))
+;;                           (insert "\n"))
+;;                         (setq non-safe-chars (cdr non-safe-chars)))
+;;                       (if (< shown len)
+;;                           (insert (format "%27s\n" "..."))))
+;;                     (insert (format "\
+;; These can't be encoded safely by the coding system %s.
+;; 
+;; Please select one from the following safe coding systems:\n"
+;;                                     default-coding-system))
+;;                     (let ((pos (point))
+;;                           (fill-prefix "  "))
+;;                       (mapcar (function (lambda (x) (princ "  ") (princ x)))
+;;                               safe-coding-systems)
+;;                       (fill-region-as-paragraph pos (point)))))
+;; 
+;;                 ;; Read a coding system.
+;;                 (let* ((safe-names (mapcar (lambda (x) (list (symbol-name x)))
+;;                                            safe-coding-systems))
+;;                        (name (completing-read
+;;                               (format "Select coding system (default %s): "
+;;                                       (car safe-coding-systems))
+;;                               safe-names nil t nil nil
+;;                               (car (car safe-names)))))
+;;                   (setq last-coding-system-specified (intern name))
+;;                   (if (integerp (coding-system-eol-type default-coding-system))
+;;                       (setq last-coding-system-specified
+;;                             (coding-system-change-eol-conversion
+;;                              last-coding-system-specified
+;;                              (coding-system-eol-type default-coding-system))))
+;;                   last-coding-system-specified))
+;;             (kill-buffer "*Warning*")
+;;             (while overlays
+;;               (delete-overlay (car overlays))
+;;               (setq overlays (cdr overlays)))))))))
+
+;; (setq select-safe-coding-system-function 'select-safe-coding-system)
+
+;; (defun select-message-coding-system ()
+;;   "Return a coding system to encode the outgoing message of the current buffer.
+;; It at first tries the first coding system found in these variables
+;; in this order:
+;;   (1) local value of `buffer-file-coding-system'
+;;   (2) value of `sendmail-coding-system'
+;;   (3) value of `default-buffer-file-coding-system'
+;;   (4) value of `default-sendmail-coding-system'
+;; If the found coding system can't encode the current buffer,
+;; or none of them are bound to a coding system,
+;; it asks the user to select a proper coding system."
+;;   (let ((coding (or (and (local-variable-p 'buffer-file-coding-system)
+;;                          buffer-file-coding-system)
+;;                     sendmail-coding-system
+;;                     default-buffer-file-coding-system
+;;                     default-sendmail-coding-system)))
+;;     (if (eq coding 'no-conversion)
+;;         ;; We should never use no-conversion for outgoing mails.
+;;         (setq coding nil))
+;;     (if (fboundp select-safe-coding-system-function)
+;;         (funcall select-safe-coding-system-function
+;;                  (point-min) (point-max) coding)
+;;       coding)))
 
 (provide 'code-cmds)