changeset 2297:13a418960a88

[xemacs-hg @ 2004-09-22 02:05:42 by stephent] various doc patches <87isa7awrh.fsf@tleepslib.sk.tsukuba.ac.jp>
author stephent
date Wed, 22 Sep 2004 02:06:52 +0000
parents a58ea4d0d0cd
children 7d67f0ab192c
files lisp/ChangeLog lisp/about.el lisp/cl-macs.el lisp/code-init.el lisp/find-paths.el lisp/font-menu.el lisp/gtk-font-menu.el lisp/itimer.el lisp/mule/mule-charset.el lisp/specifier.el lisp/unicode.el lisp/x-font-menu.el man/ChangeLog man/lispref/control.texi man/lispref/glyphs.texi src/ChangeLog src/dired.c src/eval.c src/file-coding.c src/file-coding.h src/glyphs-widget.c src/glyphs.c src/process.c src/procimpl.h src/syntax.c src/undo.c
diffstat 26 files changed, 563 insertions(+), 120 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Wed Sep 22 01:10:57 2004 +0000
+++ b/lisp/ChangeLog	Wed Sep 22 02:06:52 2004 +0000
@@ -1,3 +1,64 @@
+2004-09-19  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* code-init.el: Add to header comment.
+	* gtk-font-menu.el: Improve documentation.
+	* unicode.el: Various comments added.
+	* process.c (process-buffer):
+	(set-process-buffer):
+	(process-stderr-buffer):
+	(set-process-stderr-buffer):
+	Document interaction with filters.
+
+2004-09-14  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* x-font-menu.el: (x-fonts-menu-junk-families): Improve docstring.
+	(hack-font-truename): Improve docstring.
+
+	* font-menu.el (font-menu-ignore-scaled-fonts): Improve docstring.
+	(font-menu-this-frame-only-p): Improve docstring.
+	(font-menu-preferred-resolution): Improve docstring.
+	(font-menu-size-scaling): Improve docstring.
+	(device-fonts-cache): Move comment into docstring and improve it.
+	(reset-device-font-menus): Improve docstring.
+	(reset-device-font-menus): Move message into if; only announce
+	we're getting font list if we're actually going to do it.
+
+2003-12-14  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* cl-macs.el (loop): Fixed typo in docstring.
+
+2003-11-02  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* itimer.el (check-itimer-coerce-string): Fix sense of docstring;
+	markup signal name as code.
+	(check-nonnegative-number): Markup signal name as code.
+
+2004-07-12  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* specifier.el (set-specifier): Sentences in docstrings should be
+	separated with two spaces, not one.
+
+	* find-paths.el (paths-lisp-filename-regexp): Improve docstring.
+
+2004-07-12  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* about.el (xemacs-hackers): Update turnbull, add viteno.
+	(about-current-release-maintainers): Add martin, vin, and viteno.
+	(about-other-current-hackers):
+	(about-once-and-future-hackers):
+	Move inactive hackers from other-current to once-and-future.
+	(about-url-alist):  Add turnbull, xemacs-cvs, and xemacs-lists.
+	* about.el (about-xemacs):
+	Add information about package maintainers.
+	Mention Vin, Stephen, and Andy w.r.t. 21.4.
+	Mention Ben and Martin w.r.t. 21.2.
+	(about-advantages): Mention GNU Emacs 21.
+	(about-personal-info): Update turnbull.
+
+2004-05-15  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* specifier.el (set-specifier): Fix typo in comment.
+
 2004-07-12  Stephen J. Turnbull  <stephen@xemacs.org>
 
 	* about.el (about-xemacs):
@@ -197,7 +258,6 @@
 
 	Added support for dialog button mnemonics.
 
-
 2004-04-30  Stephen J. Turnbull  <stephen@xemacs.org>
 
 	* cl.el (gensym, gentemp): Improve docstrings.
--- a/lisp/about.el	Wed Sep 22 01:10:57 2004 +0000
+++ b/lisp/about.el	Wed Sep 22 02:06:52 2004 +0000
@@ -147,6 +147,7 @@
     (pez      "Peter Pezaris"     "pez@xemacs.org")
     (piper    "Andy Piper"        "andy@xemacs.org")
     (pittman  "Daniel Pittman"    "pittman@xemacs.org")
+    (purvis   "Malcolm Purvis"    "mpurvis@xemacs.org")
     (rickc    "Rick Campbell"     "rickc@xemacs.org")
     (rose     "John Rose"         "rose@xemacs.org")
     (rossini  "Anthony Rossini"   "rossini@xemacs.org")
@@ -158,8 +159,9 @@
     (thiessel "Marcus Thiessel"   "marcus@xemacs.org")
     (tomonori "Tomonori Ikeyama"  "tomonori@xemacs.org")
     (tuck     "Matt Tucker"       "tuck@xemacs.org")
-    (turnbull "Stephen Turnbull"  "turnbull@xemacs.org")
+    (turnbull "Stephen Turnbull"  "stephen@xemacs.org")
     (vin      "Vin Shelton"       "acs@xemacs.org")
+    (viteno   "Norbert Koch"      "viteno@xemacs.org")
     (vladimir "Vladimir Ivanovic" "vladimir@xemacs.org")
     (wmperry  "William Perry"     "wmperry@xemacs.org")
     (yoshiki  "Yoshiki Hayashi"   "yoshiki@xemacs.org")
@@ -168,19 +170,19 @@
 
 (defvar about-current-release-maintainers
   ;; this list should not necessarily be in sorted order.
-  '(adrian ben hniksic james piper scop sperber turnbull))
+  '(adrian james piper purvis sperber turnbull vin viteno))
 
 (defvar about-other-current-hackers
   ;; to sort this list or the one below, use:
   ;; M-x sort-regexp-fields RET [a-z]+ RET \(.*\) RET
-  '(aj alastair cgw craig daiki dan dv fabrice golubev gunnar hisashi
-       jan jareth jason jmiller jonathan kazz kirill larsi martin morioka mta ograf
-       olivier oscar pittman tomonori tuck vin wmperry yoshiki))
+  '(ben daiki darrylo dv fabrice golubev hniksic jan jason jmiller jonathan
+    kazz kirill larsi martin morioka mta ograf olivier oscar rossini pittman
+    scop tomonori tuck wmperry yoshiki))
 
 (defvar about-once-and-future-hackers
-  '(ajc baw bw chr cthomp darrylo devin dkindred dmoore eb hbs hmuller
-	hobley jens juhp jwz kyle marcpa mcook mly ograf pelegri pez
-	rickc rose rossini slb stig stigb thiessel vladimir))
+  '(aj ajc alastair baw bw cgw chr craig cthomp dan devin dkindred dmoore eb
+    gunnar hbs hisashi hmuller hobley jareth jens juhp jwz kyle marcpa mcook
+    mly ograf pelegri pez rickc rose slb stig stigb thiessel vladimir))
 
 ;; The CAR of alist elements is a valid argument to `about-url-link'.
 ;; It is preferred to a simple string, because it makes maintenance
@@ -215,10 +217,13 @@
     (piper      . "http://www.andypiper.com/")
     (rossini    . "http://faculty.washington.edu/rossini/")
     (stigb      . "http://www.tihlde.hist.no/~stigb/")
+    (turnbull   . "http://turnbull.sk.tsukuba.ac.jp/yaseppochi-gumi.html")
     (vin        . "http://www.upa.org/")
     (vladimir   . "http://www.leonora.org/~vladimir/")
     (wget       . "http://sunsite.dk/wget/")
-    (xemacs     . "http://www.xemacs.org/"))
+    (xemacs     . "http://www.xemacs.org/")
+    (xemacs-cvs . "http://cvs.xemacs.org/")
+    (xemacs-lists . "http://www.xemacs.org/Lists/"))
   "Some of the more important URLs.")
 
 (defvar about-left-margin 3)
@@ -461,6 +466,26 @@
 		     :button-suffix ""
 		     "The full list of contributors...")
       (widget-insert "\n
+The current package release engineer is Norbert Koch.
+Andreas Jaeger was the first package release engineer following the split
+of the XEmacs code base into core implementation and packaged Lisp.
+He was succeeded by Steve Youngs, then Ville Skyttä.\n\n")
+      (setup-person 'viteno)
+      (setup-person 'aj)
+      (setup-person 'scop)
+      (widget-insert "
+Vin Shelton is the maintainer of the stable branch, 21.4.
+Stephen Turnbull was the project manager for the release.  Andy Piper
+maintained the Windows branch until the release was declared stable.\n\n")
+      (setup-person 'vin)
+      (setup-person 'turnbull)
+      (setup-person 'piper)
+      (widget-insert "
+Ben Wing and Martin Buchholz were heavy code contributors and maintainers
+for 21.2 (the development branch leading to 21.4).\n\n")
+      (setup-person 'martin)
+      (setup-person 'ben)
+      (widget-insert "
 Steve Baur was the primary maintainer for 19.15 through 21.0.\n\n")
       (setup-person 'slb)
       (widget-insert "
@@ -667,6 +692,10 @@
   -- support for arbitrary pixmaps and widgets in a buffer
   -- face support on TTY's, including color
 
+  Many of these are now available in GNU Emacs 21, but the XEmacs
+  implementations are generally more efficient, and the XEmacs APIs are
+  generally more in line with modern programming practices.
+
 * An installable package system, with a huge number of packages available
   that have been tested and are known to work with the latest version
   of XEmacs.
@@ -1292,6 +1321,10 @@
      (widget-insert
       "\
 Sorry, no personal information available about me yet.\n"))
+    (purvis
+     (widget-insert
+      "\
+Sorry, no personal information available about me yet.\n"))
     (rickc
      (widget-insert "\
 The hacker formerly known as Rick Busdiecker is a developer and
@@ -1376,7 +1409,15 @@
     (turnbull
      (widget-insert "\
 Stephen lives with his Japanese wife and children in Tsukuba, Japan,
-where he is a professor of economics at the University of Tsukuba.\n"))
+where he is a professor of economics at the University of Tsukuba,
+and occasionally regretting not going to MIT for college, where he
+surely would have gotten addicted to computers early enough to have
+learned to actually code.
+
+Well, they also serve who rail 'n' rant.
+                  ")
+     (about-url-link 'turnbull nil "Visit Steve's personal page")
+     (widget-insert "\n"))
     (vin
      (widget-insert "\
 I own and operate my own consulting firm, EtherSoft.  Shhh, don't
@@ -1730,6 +1771,10 @@
      (widget-insert
       "\
 Sorry, no information about my XEmacs contributions yet.\n"))
+    (purvis
+     (widget-insert
+      "\
+Currently filing off some of the splinters in the GTK port.\n"))
     (rickc
      (widget-insert "\
 Maintainer of ILISP.\n"))
@@ -1780,7 +1825,18 @@
     (turnbull
      (widget-insert
       "\
-Former XEmacs Beta Release Manager.\n"))
+Mostly a source of random noise and occasionally useful advice on
+I18N up until people starting hinting that (2 years after the release
+of 21.1) it was time for various projects to get pushed into the public
+eye.  Steve was the prime mover behind the release of 21.4.
+
+Since Vin took over the maintainership of 21.4, Steve has featured as
+janitor and waterboy, handling (more or less) all those administrative
+tasks that need to get done somehow by somebody---wishing he were coding
+the whole time.
+
+Steve is maintainer of the edict, mule-ucs, and latin-unity packages,
+and has contributed quite a bit of documentation, especially for Mule.\n"))
     (vin
      (widget-insert "\
 Vin helps maintain the older, more mature (read: moldy) versions of
--- a/lisp/cl-macs.el	Wed Sep 22 01:10:57 2004 +0000
+++ b/lisp/cl-macs.el	Wed Sep 22 02:06:52 2004 +0000
@@ -767,7 +767,7 @@
 The loop macro consists of a series of clauses, which do things like
 iterate variables, set conditions for exiting the loop, accumulating values
 to be returned as the return value of the loop, and executing arbitrary
-blocks of code.  Each clause is proceed in turn, and the loop executes its
+blocks of code.  Each clause is processed in turn, and the loop executes its
 body repeatedly until an exit condition is hit.
 
 It's important to understand that loop clauses such as `for' and `while',
--- a/lisp/code-init.el	Wed Sep 22 01:10:57 2004 +0000
+++ b/lisp/code-init.el	Wed Sep 22 02:06:52 2004 +0000
@@ -25,6 +25,12 @@
 ;; coding systems have been created, because we'll be using them at
 ;; load time.
 
+;; #### Issues (this discussion probably belongs elsewhere)
+;; 1.  "Big" characters are unrepresentable.  Should give error, warning,
+;;     not just substitute "~".
+;; 2.  21.4 compatibility?
+;; 3.  make-char: non-mule barfs on non-iso8859-1.
+
 ;;; Code:
 
 (defcustom eol-detection-enabled-p (or (featurep 'mule)
@@ -78,6 +84,7 @@
   '((buffer-file-coding-system-for-read
      binary raw-text undecided raw-text undecided)
     (default-buffer-file-coding-system
+      ;; #### iso-2022-8 with no eol specified?  can that be OK?
       binary binary iso-2022-8 raw-text-dos mswindows-multibyte-dos)
     (native
      binary binary binary raw-text-dos mswindows-multibyte-system-default-dos)
--- a/lisp/find-paths.el	Wed Sep 22 01:10:57 2004 +0000
+++ b/lisp/find-paths.el	Wed Sep 22 02:06:52 2004 +0000
@@ -44,7 +44,7 @@
 
 (defvar paths-lisp-filename-regexp
   "^\\(.*\\.elc?\\)$"
-  "File bases that contain Lisp file.")
+  "File bases that name Emacs Lisp files.")
 
 (defvar paths-no-lisp-directory-regexp
   (concat "\\(" paths-version-control-filename-regexp "\\)"
--- a/lisp/font-menu.el	Wed Sep 22 01:10:57 2004 +0000
+++ b/lisp/font-menu.el	Wed Sep 22 02:06:52 2004 +0000
@@ -121,40 +121,58 @@
 
 ;;;###autoload
 (defcustom font-menu-ignore-scaled-fonts nil
-  "*If non-nil, then the font menu will try to show only bitmap fonts."
+  "*If non-nil, the font menu shows only bitmap fonts.
+
+Bitmap fonts at their design size are generally noticably higher quality than
+scaled fonts, unless the device is capable of interpreting antialiasing hints.
+In general, setting this option non-`nil' is useful mostly on older X servers.
+
+Not all devices make the distinction between bitmap and scaled fonts."
   :type 'boolean
   :group 'font-menu)
 
 ;;;###autoload
 (defcustom font-menu-this-frame-only-p nil
-  "*If non-nil, then changing the default font from the font menu will only
-affect one frame instead of all frames."
+  "*If non-nil, the menu affects the default font only on the selected frame."
   :type 'boolean
   :group 'font-menu)
 
 (defcustom font-menu-max-number nil
-  "The maximum number of fonts retrieved from the server"
+  "The maximum number of fonts retrieved from the display."
   :type 'integer
   :group 'font-menu)
 
 (defvaralias 'font-menu-max-items 'menu-max-items)
 (defvaralias 'font-menu-submenu-name-format 'menu-submenu-name-format)
 
+;; #### Need to update for fontconfig/Xft?  Document form for MS Windows.
 (defvar font-menu-preferred-resolution
   (make-specifier-and-init 'generic '((global ((mswindows) . ":")
 					      ((gtk) . "*-*")
 					      ((x) . "*-*"))) t)
-  "Preferred horizontal and vertical font menu resolution (e.g. \"75:75\").")
+  "Generic specifier containing preferred resolution as a string.
+Do not `setq' this variable; use `set-specifier'.
+
+For X11 and GTK devices, the instance value will be interpolated into an
+XLFD, and looks like \"75-75\").")
 
 (defvar font-menu-size-scaling
   (make-specifier-and-init 'integer '((global ((mswindows) . 1)
 					      ((gtk) . 10)
 					      ((x) . 10))) t)
-  "Scale factor used in defining font sizes.")
+  "Generic specifier containing scale factor for font sizes.  Don't touch.
+
+This is really a device type constant.  Some devices specify size in points
+\(MS Windows), others in decipoints (X11).")
 
-;; only call XListFonts (and parse) once per device.
-;; ( (device . [parsed-list-fonts family-menu size-menu weight-menu]) ...)
-(defvar device-fonts-cache nil)
+(defvar device-fonts-cache nil
+  "Alist mapping devices to font lists and font menus.  Don't use this.
+
+Instead, use the function `device-fonts-cache' which lazily updates this
+variable, and returns the value for the selected device.
+
+Each element has the form (DEVICE . [FONT-LIST FAMILY SIZE WEIGHT]) where
+FAMILY, SIZE, and WEIGHT denote menus.")
 
 (defsubst device-fonts-cache ()
   (or (cdr (assq (selected-device) device-fonts-cache))
@@ -169,14 +187,15 @@
 (defun reset-device-font-menus (&optional device debug)
   "Generates the `Font', `Size', and `Weight' submenus for the Options menu.
 This is run the first time that a font-menu is needed for each device.
+
 If you don't like the lazy invocation of this function, you can add it to
 `create-device-hook' and that will make the font menus respond more quickly
 when they are selected for the first time.  If you add fonts to your system,
 or if you change your font path, you can call this to re-initialize the menus."
-  (message "Getting list of fonts from server... ")
   (if (or noninteractive
 	  (not (or device (setq device (selected-device)))))
       nil
+    (message "Getting list of fonts from server... ")
     (call-device-method 'reset-device-font-menus device device debug)
     (message "Getting list of fonts from server... done.")))
 
@@ -349,9 +368,12 @@
       (message "Font %s" (face-font-name 'default)))))
 
 
+;; #### This should be called `font-menu-maybe-change-face'
+;; I wonder if a better API wouldn't (face attribute from to)
 (defun font-menu-change-face (face
 			      from-family from-weight from-size
 			      to-family   to-weight   to-size)
+  "Maybe update the font of FACE per TO-FAMILY, TO-WEIGHT, and TO-SIZE."
   (check-type face symbol)
   (let* ((dcache (device-fonts-cache))
 	 (font-data (font-menu-font-data face dcache))
@@ -360,8 +382,8 @@
 	 (face-weight (aref font-data 3))
 	 (face-slant  (aref font-data 4)))
 
-    (or face-family
-	(signal 'error (list "couldn't parse font name for face" face)))
+     (or face-family
+ 	(signal 'error (list "couldn't parse font name for face" face)))
 
     ;; If this face matches the old default face in the attribute we
     ;; are changing, then change it to the new attribute along that
--- a/lisp/gtk-font-menu.el	Wed Sep 22 01:10:57 2004 +0000
+++ b/lisp/gtk-font-menu.el	Wed Sep 22 02:06:52 2004 +0000
@@ -28,6 +28,11 @@
 ;; Boston, MA 02111-1307, USA.
 ;;; Code:
 
+;; #### - The comment that this file was GTK-ized by Wm Perry is a lie;
+;; nothing was done except to rename everything that was x- to gtk-.
+;; This is harmless, but we should reintegrate so that GTK can take
+;; advantage of fontconfig, too, I think.
+
 ;; #### - implement these...
 ;;
 ;;; (defvar font-menu-ignore-proportional-fonts nil
@@ -37,7 +42,8 @@
 
 (globally-declare-boundp
  '(gtk-font-regexp
-   gtk-font-regexp-foundry-and-family gtk-font-regexp-spacing))
+   gtk-font-regexp-foundry-and-family
+   gtk-font-regexp-spacing))
 
 (defvar gtk-font-menu-registry-encoding nil
   "Registry and encoding to use with font menu fonts.")
@@ -56,7 +62,8 @@
   "A regexp matching font families which are uninteresting (e.g. cursor fonts).")
 
 (defun hack-font-truename (fn)
-  "Filter the output of `font-instance-truename' to deal with Japanese fontsets."
+  ;; #### This is duplicated from x-font-menu.el.
+  "Filter the output of `font-instance-truename' to deal with font sets."
   (if (string-match "," (font-instance-truename fn))
       (let ((fpnt (nth 8 (split-string (font-instance-name fn) "-")))
 	    (flist (split-string (font-instance-truename fn) ","))
--- a/lisp/itimer.el	Wed Sep 22 01:10:57 2004 +0000
+++ b/lisp/itimer.el	Wed Sep 22 02:06:52 2004 +0000
@@ -130,9 +130,9 @@
 		    (list 'list ''itimerp var)))))
 
 (defmacro check-itimer-coerce-string (var)
-  "If VAR is not bound to a string, look up the itimer that it names and
+  "If VAR is bound to a string, look up the itimer that it names and
 bind VAR to it.  Otherwise, if VAR is not bound to an itimer, signal
-wrong-type-argument.  This is a macro."
+`wrong-type-argument'.  This is a macro."
   (list 'setq var
 	(list 'cond
 	      (list (list 'itimerp var) var)
@@ -142,7 +142,7 @@
 
 (defmacro check-nonnegative-number (var)
   "If VAR is not bound to a number, signal `wrong-type-argument'.
-If VAR is not bound to a positive number, signal args-out-of-range.
+If VAR is not bound to a positive number, signal `args-out-of-range'.
 This is a macro."
   (list 'setq var
 	(list 'if (list 'not (list 'numberp var))
--- a/lisp/mule/mule-charset.el	Wed Sep 22 01:10:57 2004 +0000
+++ b/lisp/mule/mule-charset.el	Wed Sep 22 02:06:52 2004 +0000
@@ -62,6 +62,67 @@
 	  (forward-char))))
     list))
 
+(defun fixed-charsets-in-region (start end &optional buffer)
+  "Return a list of the charsets in the region between START and END.
+BUFFER defaults to the current buffer if omitted."
+  (let (list)
+    (save-excursion
+      (if buffer
+	  (set-buffer buffer))
+      (save-restriction
+	(narrow-to-region start end)
+	(goto-char (point-min))
+	(let ((prev-charset nil))
+	  (while (not (eobp))
+	    (let* ((charset (char-charset (char-after (point)))))
+	      (if (not (eq prev-charset charset))
+		  (progn
+		    (setq prev-charset charset)
+		    (or (memq charset list)
+			(setq list (cons charset list))))))
+	    (forward-char)))))
+    list))
+
+(defun list-charsets-in-region (start end &optional buffer)
+  "Return a list of the charsets in the region between START and END.
+BUFFER defaults to the current buffer if omitted."
+  (let (list)
+    (save-excursion
+      (if buffer
+	  (set-buffer buffer))
+      (save-restriction
+	(narrow-to-region start end)
+	(goto-char (point-min))
+	;; this could be optimized by maintaining prev-charset and checking
+	;; for equality, but memq is not that slow for a short list.
+	(while (not (eobp))
+	  (let* ((charset (char-charset (char-after (point)))))
+	    (or (memq charset list)
+		(setq list (cons charset list))))
+	  (forward-char))))
+    list))
+
+(defun hash-charsets-in-region (start end &optional buffer)
+  "Return a list of the charsets in the region between START and END.
+BUFFER defaults to the current buffer if omitted."
+  (let ((ht (make-hash-table :size 10)))
+    (save-excursion
+      (if buffer
+	  (set-buffer buffer))
+      (save-restriction
+	(narrow-to-region start end)
+	(goto-char (point-min))
+	(while (not (eobp))
+	  (puthash (char-charset (char-after (point))) t ht)
+	  (forward-char))))
+    (hash-table-key-list ht)))
+
+(defun c-charsets-in-region (start end &optional buffer)
+  "Return a list of the charsets in the region between START and END.
+BUFFER defaults to the current buffer if omitted."
+  (setq buffer (or buffer (current-buffer)))
+  (charsets-in-region-internal buffer start end))
+
 (defun charsets-in-string (string)
   "Return a list of the charsets in STRING."
   (let (list)
@@ -73,7 +134,15 @@
 	  string)
     list))
 
+(defun c-charsets-in-string (string)
+  "Return a list of the charsets in STRING."
+  (charsets-in-string-internal string nil nil))
+
+(or (fboundp 'charsets-in-string)
+    (defalias 'charsets-in-string 'c-charsets-in-string))
 (defalias 'find-charset-string 'charsets-in-string)
+(or (fboundp 'charsets-in-region)
+    (defalias 'charsets-in-region 'c-charsets-in-region))
 (defalias 'find-charset-region 'charsets-in-region)
 
 
--- a/lisp/specifier.el	Wed Sep 22 01:10:57 2004 +0000
+++ b/lisp/specifier.el	Wed Sep 22 02:06:52 2004 +0000
@@ -347,7 +347,7 @@
 with the VALUE.  Tags are symbols (usually naming device types, such
 as `x' and `tty', or device classes, such as `color', `mono', and
 `grayscale'); specifying a TAG-SET restricts the scope of VALUE to
-devices that match all specified tags. (You can also create your
+devices that match all specified tags.  (You can also create your
 own tags using `define-specifier-tag', and use them to identify
 specifications added by you, so you can remove them later.)
 
@@ -367,7 +367,7 @@
 nil (meaning no specs) and `set-specifier' will interpret the `nil'
 as meaning \"I'm adding a global instantiator and its value is `nil'\"),
 or in strange cases where there is an ambiguity between a spec-list
-and an inst-list, etc. (The built-in specifier types are designed
+and an inst-list, etc.  (The built-in specifier types are designed
 in such a way as to avoid any such ambiguities.)"
 
   ;; backward compatibility: the old function had HOW-TO-ADD as the
--- a/lisp/unicode.el	Wed Sep 22 01:10:57 2004 +0000
+++ b/lisp/unicode.el	Wed Sep 22 02:06:52 2004 +0000
@@ -31,6 +31,7 @@
 
 ; ;; Subsets of Unicode.
 
+; #### what is this bogosity ... "chars 96, final ?2" !!?!
 ; (make-charset 'mule-unicode-2500-33ff 
 ; 	      "Unicode characters of the range U+2500..U+33FF."
 ; 	      '(dimension
@@ -124,7 +125,8 @@
 	    ;; "CP950.TXT" 
 	    ;; "GB12345.TXT" 
 	    ("GB2312.TXT" chinese-gb2312)
-	    ;; "HANGUL.TXT" 
+	    ;; "HANGUL.TXT"
+	    ;; #### shouldn't JIS X 0201's upper limit be 7f?
 	    ("JIS0201.TXT" latin-jisx0201 #x21 #x80)
 	    ("JIS0201.TXT" katakana-jisx0201 #xA0 #xFF #x-80)
 	    ("JIS0208.TXT" japanese-jisx0208 nil nil nil ignore-first-column)
@@ -139,6 +141,7 @@
 	    ;; "SHIFTJIS.TXT"
 	    )
 	   ("unicode/mule-ucs"
+	    ;; #### we don't support surrogates?!??
 	    ;; use these instead of the above ones once we support surrogates
 	    ;;("chinese-cns11643-1.txt" chinese-cns11643-1)
 	    ;;("chinese-cns11643-2.txt" chinese-cns11643-2)
@@ -249,6 +252,8 @@
  "UCS-4 Little Endian"
  '(mnemonic "UCS4-LE"
    documentation
+   ;; #### I don't think this is permitted by ISO 10646, only Unicode.
+   ;; Call it UTF-32 instead?
    "Little-endian version of UCS-4 Unicode encoding.  See `ucs-4' coding system."
    type ucs-4
    little-endian t))
@@ -259,7 +264,7 @@
  '(mnemonic "UTF8"
    documentation
    "UTF-8 Unicode encoding -- ASCII-compatible 8-bit variable-width encoding
-with the same principles as the Mule-internal encoding:
+sharing the following principles with the Mule-internal encoding:
 
   -- All ASCII characters (codepoints 0 through 127) are represented
      by themselves (i.e. using one byte, with the same value as the
--- a/lisp/x-font-menu.el	Wed Sep 22 01:10:57 2004 +0000
+++ b/lisp/x-font-menu.el	Wed Sep 22 02:06:52 2004 +0000
@@ -1,4 +1,4 @@
-;; x-font-menu.el --- Managing menus of X fonts.
+;;; x-font-menu.el --- Managing menus of X fonts.
 
 ;; Copyright (C) 1994 Free Software Foundation, Inc.
 ;; Copyright (C) 1995 Tinker Systems and INS Engineering Corp.
@@ -36,7 +36,8 @@
 
 (globally-declare-boundp
  '(x-font-regexp
-   x-font-regexp-foundry-and-family x-font-regexp-spacing))
+   x-font-regexp-foundry-and-family
+   x-font-regexp-spacing))
 
 (globally-declare-fboundp
  '(charset-registry))
@@ -55,10 +56,14 @@
 				;  "Axcob" -> "Applix Courier Bold", etc.
      )
    "\\|")
-  "A regexp matching font families which are uninteresting (e.g. cursor fonts).")
+  "Regexp matching font families which should not be menu-selectable.
+E.g. cursor fonts.")
 
 (defun hack-font-truename (fn)
-  "Filter the output of `font-instance-truename' to deal with Japanese fontsets."
+  ;; #### Are "font sets" XFontSets?
+  ;; #### Is this useful if not configure'd --with-xfs?
+  ;; #### This is duplicated in gtk-font-menu.el.
+  "Filter the output of `font-instance-truename' to deal with font sets."
   (if (string-match "," (font-instance-truename fn))
       (let ((fpnt (nth 8 (split-string (font-instance-name fn) "-")))
 	    (flist (split-string (font-instance-truename fn) ","))
--- a/man/ChangeLog	Wed Sep 22 01:10:57 2004 +0000
+++ b/man/ChangeLog	Wed Sep 22 02:06:52 2004 +0000
@@ -1,3 +1,13 @@
+2003-11-02  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* lispref/control.texi (Examples of Catch): Mention use of a cons
+	as a catch tag.
+
+2004-07-20  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* lispref/glyphs.texi (Image Instantiator Formats): Add a few
+	words about the tab control widget.
+
 2004-05-14  Darryl Okahata  <darrylo@xemacs.org>
 
 	* lispref/windows.texi.  Added documentation for the functions,
@@ -17,7 +27,8 @@
 
 	* lispref/searching.texi (Syntax of Regexps): Add example of use
 	of shy groups in variable subexpression, correct rumor that there
-	may be substantial performance gain.
+	may be substantial performance gain.  Document double-digit back-
+	references.
 
 2004-08-13  Stephen J. Turnbull  <stephen@xemacs.org>
 
--- a/man/lispref/control.texi	Wed Sep 22 01:10:57 2004 +0000
+++ b/man/lispref/control.texi	Wed Sep 22 02:06:52 2004 +0000
@@ -621,6 +621,14 @@
 @code{yes}.  The function @code{print} is never called, and the
 body-form @code{'no} is never evaluated.
 
+In most cases the formal tag for a catch is a quoted symbol or a
+variable whose value is a symbol.  Both styles are demonstrated above.
+In definitions of derived control structures, an anonymous tag may be
+desired.  A gensym could be used, but since catch tags are compared
+using @code{eq}, any Lisp object can be used.  An occasionally
+encountered idiom is to bind a local variable to @code{(cons nil nil)},
+and use the variable as the formal tag.
+
 @node Errors
 @subsection Errors
 @cindex errors
--- a/man/lispref/glyphs.texi	Wed Sep 22 01:10:57 2004 +0000
+++ b/man/lispref/glyphs.texi	Wed Sep 22 02:06:52 2004 +0000
@@ -640,6 +640,19 @@
 A tab widget; a series of user selectable tabs.  Can only be instanced
 as @code{widget}.
 
+The required keyword is @code{:items}.  Its value should be a list of
+vectors, whose first element is a string, the second element is a
+callback (a Lisp expression to be eval'ed), and the remaining elements
+are key-value pairs.  The most important keyword is @code{:selected} (a
+Boolean); exactly one of the elements should have a value of @code{t}
+for the @code{:selected} property.  Other keywords accepted include
+@code{:descriptor} (a string), @code{:face} (a symbol naming a face),
+@code{:orientation} (a symbol, one of @code{top}, @code{center},
+@code{bottom}, @code{left}, or @code{right}), and @code{:pixel-width}
+and @code{:pixel-height} (positive integers).
+
+(The above is incomplete and may be inaccurate.)
+
 @item tree-view
 A folding widget.  Can only be instanced as @code{widget}.
 
--- a/src/ChangeLog	Wed Sep 22 01:10:57 2004 +0000
+++ b/src/ChangeLog	Wed Sep 22 02:06:52 2004 +0000
@@ -1,3 +1,37 @@
+2004-09-19  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* file-coding.c: Improve explanatory comments, add diatribes.
+	Fix some typos in comments.
+
+2003-11-21  Stephen J. Turnbull  <stephen@xemacs.org>
+ 
+	* dired.c (Fdirectory_files): MATCH is only tried against basename.
+
+2003-11-02  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* eval.c (Fcatch, Fthrow): Document that tags are compared with `eq'.
+
+2003-10-31  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* procimpl.h (struct Lisp_Process):
+	* undo.c (Fprimitive_undo): 
+	Remove obsolete references to ENERGIZE in comments.
+
+2004-07-18  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* glyphs.c (Fimage_instance_type): Add 'widget to docstring.
+
+2004-07-12  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* glyphs-widget.c (tab_control_query_geometry):
+	(logical_unit_height):
+	Typos in comments.
+
+2004-06-29  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* syntax.c (Sextword): Doesn't seem to exist.  Move comment to
+	Internals manual.
+
 2004-09-21  Jerry James  <james@xemacs.org>
 
 	* sound.c (Fwait_for_sounds): Factor out test for unused parameter.
@@ -430,6 +464,8 @@
 
 	* syntax.c (setup_syntax_cache): Fix behavior for buffers
 	containing multibyte chars.
+	Fixes "Fatal error: assertion failed, file src/buffer.h, line 617,
+	(x) >= ((Charbpos) 1) && x <= ((buf)->text->bufz + 0)"
 
 2004-06-29  Jerry James  <james@xemacs.org>
 
--- a/src/dired.c	Wed Sep 22 01:10:57 2004 +0000
+++ b/src/dired.c	Wed Sep 22 02:06:52 2004 +0000
@@ -60,7 +60,8 @@
 Return a list of names of files in DIRECTORY.
 There are four optional arguments:
 If FULL is non-nil, absolute pathnames of the files are returned.
-If MATCH is non-nil, only pathnames containing that regexp are returned.
+If MATCH is non-nil, only pathnames whose basename contain that regexp are
+ returned.
 If NOSORT is non-nil, the list is not sorted--its order is unpredictable.
  NOSORT is useful if you plan to sort the result yourself.
 If FILES-ONLY is the symbol t, then only the "files" in the directory
--- a/src/eval.c	Wed Sep 22 01:10:57 2004 +0000
+++ b/src/eval.c	Wed Sep 22 02:06:52 2004 +0000
@@ -1472,7 +1472,7 @@
 DEFUN ("catch", Fcatch, 1, UNEVALLED, 0, /*
 \(catch TAG BODY...): eval BODY allowing nonlocal exits using `throw'.
 TAG is evalled to get the tag to use.  Then the BODY is executed.
-Within BODY, (throw TAG) with same tag exits BODY and exits this `catch'.
+Within BODY, (throw TAG) with same (`eq') tag exits BODY and this `catch'.
 If no throw happens, `catch' returns the value of the last BODY form.
 If a throw happens, it specifies the value to return from `catch'.
 */
@@ -1536,8 +1536,8 @@
 /* Unwind the specbind, catch, and handler stacks back to CATCH, and
    jump to that CATCH, returning VALUE as the value of that catch.
 
-   This is the guts Fthrow and Fsignal; they differ only in the way
-   they choose the catch tag to throw to.  A catch tag for a
+   This is the guts of Fthrow and Fsignal; they differ only in the
+   way they choose the catch tag to throw to.  A catch tag for a
    condition-case form has a TAG of Qnil.
 
    Before each catch is discarded, unbind all special bindings and
@@ -1709,7 +1709,7 @@
 
 DEFUN_NORETURN ("throw", Fthrow, 2, 2, 0, /*
 Throw to the catch for TAG and return VALUE from it.
-Both TAG and VALUE are evalled.
+Both TAG and VALUE are evalled.  Tags are the same iff they are `eq'.
 */
        (tag, value))
 {
--- a/src/file-coding.c	Wed Sep 22 01:10:57 2004 +0000
+++ b/src/file-coding.c	Wed Sep 22 02:06:52 2004 +0000
@@ -69,6 +69,27 @@
    Removed the conditionals.
    */
 
+/* sjt sez:
+
+There should be no elementary coding systems in the Lisp API, only chains.
+Chains should be declared, not computed, as a sequence of coding formats.
+(Probably the internal representation can be a vector for efficiency but
+programmers would probably rather work with lists.)  A stream has a token
+type.  Most streams are octet streams.  Text is a stream of characters (in
+_internal_ format; a file on disk is not text!)  An octet-stream has no
+implicit semantics, so its format must always be specified.  The only type
+currently having semantics is characters.  This means that the chain [euc-jp
+-> internal -> shift_jis) may be specified (euc-jp, shift_jis), and if no
+euc-jp -> shift_jis converter is available, then the chain is automatically
+constructed.  (N.B.  I f we have fixed width buffers in the future, then we
+could have ASCII -> 8-bit char -> 16-bit char -> ISO-2022-JP (with escape
+sequences).
+
+EOL handling is a char <-> char coding.  It should not be part of another
+coding system except as a convenience for users.  For text coding,
+automatically insert EOL handlers between char <-> octet boundaries.
+*/
+
 /* Comments about future work
 
 ------------------------------------------------------------------
@@ -157,7 +178,7 @@
       results until the text file is reached, whereas the base64, gzip or
       euc-jp decoders will return higher.  Once the text file is reached,
       the EOL detector will return 0 or higher for the CRLF encoding, and
-      all other decoders will return 0 or lower; thus, we will successfully
+      all other detectors will return 0 or lower; thus, we will successfully
       proceed through CRLF decoding, or at worst prompt the user. (The only
       external-vs-internal distinction that might make sense here is to
       favor coding systems of the correct source type over those that
@@ -171,6 +192,18 @@
       before or after decoding of euc-jp, base64, iso2022, or similar,
       without any difference in the final results.)
 
+      #### What are we trying to say?  In base64, the CRLF decoding before
+      base64 decoding is irrelevant, they will be thrown out as whitespace
+      is not significant in base64.
+
+      [sjt considers all of this to be rather bogus.  Ideas like "greater
+      certainty" and "distinctive" can and should be quantified.  The issue
+      of proper table organization should be a question of optimization.]
+
+      [sjt wonders if it might not be a good idea to use Unicode's newline
+      character as the internal representation so that (for non-Unicode
+      coding systems) we can catch EOL bugs on Unix too.]
+
    -- There need to be two priority lists and two
       category->coding-system lists.  Once is general, the other
       category->langenv-specific.  The user sets the former, the langenv
@@ -221,10 +254,32 @@
 
    -- Clearly some of these are more important than others.  at the
    very least, the "better means of presentation" should be
-   implementation as soon as possibl, along with a very simple means
+   implemented as soon as possible, along with a very simple means
    of fail-safe whenever the data is readibly available, e.g. it's
    coming from a file, which is the most common scenario.
 
+--ben [at least that's what sjt thinks]
+
+*****
+
+While this is clearly something of an improvement over earlier designs,
+it doesn't deal with the most important issue: to do better than categories
+(which in the medium term is mostly going to mean "which flavor of Unicode
+is this?"), we need to look at statistical behavior rather than ruling out
+categories via presence of specific sequences.  This means the stream
+processor should
+
+    (1) keep octet distributions (octet, 2-, 3-, 4- octet sequences)
+    (2) in some kind of compressed form
+    (3) look for "skip features" (eg, characteristic behavior of leading
+        bytes for UTF-7, UTF-8, UTF-16, Mule code)
+    (4) pick up certain "simple" regexps
+    (5) provide "triggers" to determine when statistical detectors should be
+        invoked, such as octet count
+    (6) and "magic" like Unicode signatures or file(1) magic.
+
+--sjt
+
 
 ------------------------------------------------------------------
                             ABOUT FORMATS
@@ -309,20 +364,62 @@
          checked at all when doing safe-checking?).  safe-checking
          should work something like this: compile a list of all
          charsets used in the buffer, along with a count of chars
-         used.  that way, "slightly unsafe" charsets can perhaps be
-         presented at the end, which will lose only a few characters
+         used.  that way, "slightly unsafe" coding systems can perhaps
+         be presented at the end, which will lose only a few characters
          and are perhaps what the users were looking for.
 
+	 [sjt sez this whole step is a crock.  If a universal coding system
+	 is unacceptable, the user had better know what he/she is doing,
+	 and explicitly specify a lossy encoding.
+	 In principle, we can simply check for characters being writable as
+	 we go along.  Eg, via an "unrepresentable character handler."  We
+         still have the buffer contents.  If we can't successfully save,
+         then ask the user what to do.  (Do we ever simply destroy previous
+         file version before completing a write?)]
+
       2. when actually writing out, we need error checking in case an
          individual char in a charset can't be written even though the
          charsets are safe.  again, the user gets the choice of other
          reasonable coding systems.
 
+         [sjt -- something is very confused, here; safe charsets should be
+         defined as those charsets all of whose characters can be encoded.]
+
       3. same thing (error checking, list of alternatives, etc.) needs
          to happen when reading!  all of this will be a lot of work!
 
    
    --ben
+
+   I don't much like Ben's scheme.  First, this isn't an issue of I/O,
+   it's a coding issue.  It can happen in many places, not just on stream
+   I/O.  Error checking should take place on all translations.  Second,
+   the two-pass algorithm should be avoided if possible.  In some cases
+   (eg, output to a tty) we won't be able to go back and change the
+   previously output data.  Third, the whole idea of having a buffer full
+   of arbitrary characters which we're going to somehow shoehorn into a
+   file based on some twit user's less than informed idea of a coding system
+   is kind of laughable from the start.  If we're going to say that a buffer
+   has a coding system, shouldn't we enforce restrictions on what you can
+   put into it?  Fourth, what's the point of having safe charsets if some
+   of the characters in them are unsafe?  Fifth, what makes you think we're
+   going to have a list of charsets?  It seems to me that there might be
+   reasons to have user-defined charsets (eg, "German" vs "French" subsets
+   of ISO 8859/15).  Sixth, the idea of having language environment determine
+   precedence doesn't seem very useful to me.  Users who are working with a
+   language that corresponds to the language environment are not going to
+   run into safe charsets problems.  It's users who are outside of their
+   usual language environment who run into trouble.  Also, the reason for
+   specifying anything other than a universal coding system is normally
+   restrictions imposed by other users or applications.  Seventh, the
+   statistical feedback isn't terribly useful.  Users rarely "want" a
+   coding system, they want their file saved in a useful way.  We could
+   add a FORCE argument to conversions for those who really want a specific
+   coding system.  But mostly, a user might want to edit out a few unsafe
+   characters.  So (up to some maximum) we should keep a list of unsafe
+   text positions, and provide a convenient function for traversing them.
+
+   --sjt
 */
 
 #include <config.h>
@@ -497,8 +594,8 @@
 Lisp_Object Qgzip;
 #endif
 
-/* Maps coding system names to either coding system objects or (for
-   aliases) other names. */
+/* Maps symbols (coding system names) to either coding system objects or
+   (for aliases) other names. */
 static Lisp_Object Vcoding_system_hash_table;
 
 int enable_multibyte_characters;
@@ -910,6 +1007,7 @@
   return 0;
 }
 
+/* #### should we specify a conventional for "all coding systems"? */
 DEFUN ("coding-system-list", Fcoding_system_list, 0, 1, 0, /*
 Return a list of the names of all defined coding systems.
 If INTERNAL is nil, only the normal (non-internal) coding systems are
@@ -1558,6 +1656,8 @@
      `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible
      variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using
      only characters that will safely pass through all mail gateways.
+     [[ This should be \"transformation format\".  There should also be
+     `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]]
 
 'little-endian
      If non-nil, `utf-16' and `ucs-4' will write out the groups of two
@@ -1569,7 +1669,8 @@
      written out at the beginning of the data.  This serves both to
      identify the endianness of the following data and to mark the
      data as Unicode (at least, this is how Windows uses it).
-
+     [[ The correct term is \"signature\", since this technique may also
+     be used with UTF-8.  That is the term used in the standard. ]]
 
 
 The following additional properties are recognized if TYPE is
@@ -1596,6 +1697,7 @@
 
 
 The following additional properties are recognized if TYPE is 'undecided:
+[[ Doesn't GNU use \"detect-*\" for the following two? ]]
 
 'do-eol
      Do EOL detection.
@@ -1670,6 +1772,8 @@
     ? Qt : Qnil;
 }
 
+/* #### Shouldn't this really be a find/get pair? */
+
 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /*
 Return t if OBJECT is a coding system alias.
 All coding system aliases are created by `define-coding-system-alias'.
@@ -1794,7 +1898,8 @@
   Fputhash (alias, aliasee, Vcoding_system_hash_table);
 
   /* Set up aliases for subsidiaries.
-     #### There must be a better way to handle subsidiary coding systems. */
+     #### There must be a better way to handle subsidiary coding systems.
+     Inquiring Minds Want To Know: shouldn't they always be chains? */
   {
     static const char *suffixes[] = { "-unix", "-dos", "-mac" };
     int i;
@@ -1869,8 +1974,8 @@
        1, 1, 0, /*
 Return the coding system actually used for I/O.
 In some cases (e.g. when a particular EOL type is specified) this won't be
-the coding system itself.  This can be useful when trying to track down
-more closely how exactly data is decoded.
+the coding system itself.  This can be useful when trying to determine
+precisely how data was decoded.
 */
        (coding_system))
 {
@@ -2002,6 +2107,8 @@
    definition of decoding as converting from external- to
    internal-formatted data.
 
+   [[ REWRITE ME! ]]
+
    #### We really need to abstract out the concept of "data formats" and
    define "converters" that convert from and to specified formats,
    eliminating the idea of decoding and encoding.  When specifying a
@@ -2052,7 +2159,7 @@
 	/* #### 1024 is arbitrary; we really need to separate 0 from EOF,
            and when we get 0, keep taking more data until we don't get 0 --
            we don't know how much data the conversion routine might need
-           before it can generate any data of its own */
+           before it can generate any data of its own (eg, bzip2). */
 	Bytecount readmore =
 	  str->one_byte_at_a_time ? (Bytecount) 1 :
 	    max (size, (Bytecount) 1024);
@@ -2462,7 +2569,7 @@
 
   /* The chain of streams looks like this:
 
-     [BUFFER] <----- send through
+     [BUFFER] <----- (( read from/send to loop ))
                      ------> [CHAR->BYTE i.e. ENCODE AS BINARY if source is
                               in bytes]
 		             ------> [ENCODE/DECODE AS SPECIFIED]
@@ -2474,6 +2581,49 @@
 						      for this]
 			                              ------> [BUFFER]
    */
+  /* Of course, this is just horrible.  BYTE<->CHAR should only be available
+     to I/O routines.  It should not be visible to Mule proper.
+
+     A comment on the implementation.  Hrvoje and Kyle worry about the
+     inefficiency of repeated copying among buffers that chained coding
+     systems entail.  But this may not be as time inefficient as it appears
+     in the Mule ("house rules") context.  The issue is how do you do chain
+     coding systems without copying?  In theory you could have
+
+     IChar external_to_raw (ExtChar *cp, State *s);
+     IChar decode_utf16 (IChar c, State *s);
+     IChar decode_crlf (ExtChar *cp, State *s);
+
+     typedef Ichar (*Converter[]) (Ichar, State*);
+
+     Converter utf16[2] = { &decode_utf16, &decode_crlf };
+
+     void convert (ExtChar *inbuf, IChar *outbuf, Converter cvtr)
+     {
+       int i;
+       ExtChar c;
+       State s;
+
+       while (c = external_to_raw (*inbuf++, &s))
+	 {
+	   for (i = 0; i < sizeof(cvtr)/sizeof(Converter); ++i)
+	     if (s.ready)
+	       c = (*cvtr[i]) (c, &s);
+	 }
+       if (s.ready)
+         *outbuf++ = c;
+     }
+
+     But this is a lot of function calls; what Ben is doing is basically
+     reducing this to one call per buffer-full.  The only way to avoid this
+     is to hardcode all the "interesting" coding systems, maybe using
+     inline or macros to give structure.  But this is still a huge amount
+     of work, and code.
+
+     One advantage to the call-per-char approach is that we might be able
+     to do something about the marker/extent destruction that coding
+     normally entails.
+   */
   while (1)
     {
       char tempbuf[1024]; /* some random amount */
@@ -2797,20 +2947,19 @@
 {  
   if (data->lstreams)
     {
-      /* Order of deletion is important here!  Delete from the head of the
-         chain and work your way towards the tail.  In general, when you
-         delete an object, there should be *NO* pointers to it anywhere.
-         Deleting back-to-front would be a problem because there are
-         pointers going forward.  If there were pointers in both
-         directions, you'd have to disconnect the pointers to a particular
-         object before deleting it. */
+      /* During GC, these objects are unmarked, and are about to be freed.
+	 We do NOT want them on the free list, and that will cause lots of
+	 nastiness including crashes.  Just let them be freed normally. */
       if (!gc_in_progress)
 	{
 	  int i;
-	  /* During GC, these objects are unmarked, and are about to be
-	     freed.  We do NOT want them on the free list, and that will
-	     cause lots of nastiness including crashes.  Just let them be
-	     freed normally. */
+	  /* Order of deletion is important here!  Delete from the head of
+	     the chain and work your way towards the tail.  In general,
+	     when you delete an object, there should be *NO* pointers to it
+	     anywhere.  Deleting back-to-front would be a problem because
+	     there are pointers going forward.  If there were pointers in
+	     both directions, you'd have to disconnect the pointers to a
+	     particular object before deleting it. */
 	  for (i = 0; i < data->lstream_count; i++)
 	    Lstream_delete (XLSTREAM ((data->lstreams)[i]));
 	}
@@ -2927,7 +3076,10 @@
 /* "No conversion"; used for binary files.  We use quotes because there
    really is some conversion being applied (it does byte<->char
    conversion), but it appears to the user as if the text is read in
-   without conversion. */
+   without conversion.
+
+   #### Shouldn't we _call_ it that, then?  And while we're at it,
+   separate it into "to_internal" and "to_external"? */
 DEFINE_CODING_SYSTEM_TYPE (no_conversion);
 
 /* This is used when reading in "binary" files -- i.e. files that may
@@ -2973,6 +3125,7 @@
 		  c == LEADING_BYTE_CONTROL_1)
 		ch = c;
 	      else
+		/* #### This is just plain unacceptable. */
 		Dynarr_add (dst, '~'); /* untranslatable character */
 	    }
 	  else
@@ -3024,7 +3177,8 @@
 different EOL types itself if it does line-oriented type processing.
 This is unavoidable because we don't know whether the output of the
 main encoding routine is ASCII compatible (Unicode is definitely not,
-for example).
+for example).  [[ sjt sez this is bogus.  There should be _no_ EOL
+processing (or processing of any kind) after conversion to external. ]]
 
 There is one parameter: `subtype', either `cr', `lf', `crlf', or nil.
 */
@@ -4810,7 +4964,8 @@
   staticpro (&QScoding_system_cookie);
 
 #ifdef HAVE_DEFAULT_EOL_DETECTION
-  /* WARNING: The existing categories are intimately tied to the function
+  /* #### Find a more appropriate place for this comment.
+     WARNING: The existing categories are intimately tied to the function
      `coding-system-category' in coding.el.  If you change a category, or
      change the layout of any coding system associated with a category, you
      need to check that function and make sure it's written properly. */
@@ -4874,6 +5029,8 @@
 #endif
 }
 
+/* #### reformat this for consistent appearance? */
+
 void
 complex_vars_of_file_coding (void)
 {
--- a/src/file-coding.h	Wed Sep 22 01:10:57 2004 +0000
+++ b/src/file-coding.h	Wed Sep 22 02:06:52 2004 +0000
@@ -41,14 +41,16 @@
 /* Capsule description of the different structures, what their purpose is,
    how they fit together, and where various bits of data are stored.
 
-   A "coding system" is an algorithm for converting data in one format into
-   data in another format.  Currently most of the coding systems we have
-   created concern internationalized text, and convert between the XEmacs
-   internal format for multilingual text, and various external
+   A "coding system" is an algorithm for converting stream data in one format
+   into stream data in another format.  Currently most of the coding systems
+   we have created concern internationalized text, and convert between the
+   XEmacs internal format for multilingual text, and various external
    representations of such text.  However, any such conversion is possible,
    for example, compressing or uncompressing text using the gzip algorithm.
    All coding systems provide both encode and decode routines, so that the
-   conversion can go both ways.
+   conversion can go both ways.  Unfortunately encoding and decoding may not
+   be exact inverses, even for a specific instance of a coding system.  Care
+   must be taken when this is not the case.
 
    The way we handle this is by dividing the various potential coding
    systems into types, analogous to classes in C++.  Each coding system
@@ -121,7 +123,9 @@
    the Lisp primitives `set-process-input-coding-system' and
    `set-console-tty-input-coding-system', as well as getting set when a
    conversion operation was started with coding system `undecided' and the
-   correct coding system was then detected.)
+   correct coding system was then detected.)  #### This suggests implementing
+   compound text extended segments by saving the state of the ctext stream,
+   and installing an appropriate for the duration of the segment.
 
    IMPORTANT NOTE: There are at least two ancillary data structures
    associated with a coding system type. (There may also be detection data;
@@ -868,7 +872,7 @@
      because the write method is forced to take only what it's given but
      the read method can read more data from the other end if necessary.
      On the other hand, the write method is free to generate all the data
-     it wants (and just write it to the other end), but the the read method
+     it wants (and just write it to the other end), but the read method
      can return only as much as was asked for, so we need to implement our
      own buffering. */
 
--- a/src/glyphs-widget.c	Wed Sep 22 01:10:57 2004 +0000
+++ b/src/glyphs-widget.c	Wed Sep 22 02:06:52 2004 +0000
@@ -274,12 +274,12 @@
   return widget_border_width (IMAGE_INSTANCE_DOMAIN (ii));
 }
 
-/* #### Its not clear to me what the value of logical_unit_height should
-   be, or whether it should even depend on the current
-   image_instance. It really should probably only depend on the
-   default widget face and the domain, however you can envisage users
-   wanting different logical units for nested layouts - so using the
-   properties of the current lahyout is probably not so dumb. */
+/* #### It's not clear to me what the value of logical_unit_height should
+   be, or whether it should even depend on the current image_instance.  It
+   really should probably only depend on the default widget face and the
+   domain, however you can envisage users wanting different logical units
+   for nested layouts - so using the properties of the current layout is
+   probably not so dumb. */
 static int
 logical_unit_height (Lisp_Object text, Lisp_Object face, Lisp_Object domain)
 {
@@ -287,7 +287,7 @@
   widget_query_string_geometry (text, face, 
 				0, &charheight, domain);
   /* For the returned value to be useful it needs to be big enough to
-     accomodate the largest single-height widget. This is currently
+     accomodate the largest single-height widget.  This is currently
      the edit-field. */
   return charheight + 2 * widget_spacing (domain)
     + 4 * widget_border_width (domain);
@@ -961,8 +961,8 @@
     }
 }
 
-/* Get the geometry of a tab control. This is based on the number of
-   items and text therin in the tab control. */
+/* Get the geometry of a tab control.  This is based on the number of
+   items and text therein in the tab control. */
 static void
 tab_control_query_geometry (Lisp_Object image_instance,
 			    int* width, int* height,
@@ -978,6 +978,11 @@
     {
       int h, w;
 
+      /* #### Maybe we should allow items to be a list of strings?
+	 Ie, autoconvert "label" -> ["label" "label" :selected maybe-t].
+	 Maybe there's a better place (or several places) to do this?
+	 If so, change image_instantiator_tab_control back to use
+	 check_valid_item_list for checking Q_items. -- sjt */
       widget_query_string_geometry (XGUI_ITEM (XCAR (rest))->name,
 			     IMAGE_INSTANCE_WIDGET_FACE (ii),
 			     &w, &h, domain);
--- a/src/glyphs.c	Wed Sep 22 01:10:57 2004 +0000
+++ b/src/glyphs.c	Wed Sep 22 02:06:52 2004 +0000
@@ -1647,7 +1647,7 @@
 DEFUN ("image-instance-type", Fimage_instance_type, 1, 1, 0, /*
 Return the type of the given image instance.
 The return value will be one of 'nothing, 'text, 'mono-pixmap,
-'color-pixmap, 'pointer, or 'subwindow.
+'color-pixmap, 'pointer, 'subwindow, or 'widget.
 */
        (image_instance))
 {
--- a/src/process.c	Wed Sep 22 01:10:57 2004 +0000
+++ b/src/process.c	Wed Sep 22 02:06:52 2004 +0000
@@ -1197,6 +1197,7 @@
 
 DEFUN ("set-process-buffer", Fset_process_buffer, 2, 2, 0, /*
 Set buffer associated with PROCESS to BUFFER (a buffer, or nil).
+Output from PROCESS is inserted in this buffer unless PROCESS has a filter.
 */
        (process, buffer))
 {
@@ -1209,8 +1210,8 @@
 
 DEFUN ("process-buffer", Fprocess_buffer, 1, 1, 0, /*
 Return the buffer PROCESS is associated with.
-Output from PROCESS is inserted in this buffer
-unless PROCESS has a filter.
+Output from PROCESS is inserted in this buffer unless PROCESS has a filter.
+Set the buffer with `set-process-buffer'.
 */
        (process))
 {
@@ -1219,6 +1220,8 @@
 }
 
 DEFUN ("set-process-stderr-buffer", Fset_process_stderr_buffer, 2, 2, 0, /*
+Output from the stderr of PROCESS is inserted in this buffer unless
+PROCESS has a stderr filter.
 Set stderr buffer associated with PROCESS to BUFFER (a buffer, or nil).
 */
        (process, buffer))
@@ -1234,8 +1237,8 @@
 
 DEFUN ("process-stderr-buffer", Fprocess_stderr_buffer, 1, 1, 0, /*
 Return the stderr buffer PROCESS is associated with.
-Output from the stderr of PROCESS is inserted in this buffer
-unless PROCESS has a stderr filter.
+Output from the stderr of PROCESS is inserted in this buffer unless PROCESS
+has a stderr filter.  Set the buffer with `set-process-stderr-buffer'.
 */
        (process))
 {
--- a/src/procimpl.h	Wed Sep 22 01:10:57 2004 +0000
+++ b/src/procimpl.h	Wed Sep 22 02:06:52 2004 +0000
@@ -103,7 +103,7 @@
   /* Non-false if process has exited and "dumped core" on its way down */
   char core_dumped;
 
-  /* This next field is only actually used #ifdef ENERGIZE */
+  /* #### Is this field unused? */
   /* if this flag is not NIL, then filter will do the read on the
      channel, rather than having a call to make_string.
      This only works if the filter is a subr. */
--- a/src/syntax.c	Wed Sep 22 01:10:57 2004 +0000
+++ b/src/syntax.c	Wed Sep 22 02:06:52 2004 +0000
@@ -31,32 +31,6 @@
 #include "syntax.h"
 #include "extents.h"
 
-/* Here is a comment from Ken'ichi HANDA <handa@etl.go.jp>
-   explaining the purpose of the Sextword syntax category:
-
-Japanese words are not separated by spaces, which makes finding word
-boundaries very difficult.  Theoretically it's impossible without
-using natural language processing techniques.  But, by defining
-pseudo-words as below (much simplified for letting you understand it
-easily) for Japanese, we can have a convenient forward-word function
-for Japanese.
-
-	A Japanese word is a sequence of characters that consists of
-	zero or more Kanji characters followed by zero or more
-	Hiragana characters.
-
-Then, the problem is that now we can't say that a sequence of
-word-constituents makes up a WORD.  For instance, both Hiragana "A"
-and Kanji "KAN" are word-constituents but the sequence of these two
-letters can't be a single word.
-
-So, we introduced Sextword for Japanese letters.  A character of
-Sextword is a word-constituent but a word boundary may exist between
-two such characters.  */
-
-/* Mule 2.4 doesn't seem to have Sextword - I'm removing it -- mrb */
-/* Recovered by tomo */
-
 #define ST_COMMENT_STYLE 0x101
 #define ST_STRING_STYLE  0x102
 
--- a/src/undo.c	Wed Sep 22 01:10:57 2004 +0000
+++ b/src/undo.c	Wed Sep 22 02:06:52 2004 +0000
@@ -423,7 +423,7 @@
 #ifdef CLASH_DETECTION
 		  Funlock_buffer ();
 #endif /* CLASH_DETECTION */
-		  /* may GC under ENERGIZE: */
+		  /* #### need to check if this can GC */
 		  Fset_buffer_modified_p (Qnil, Qnil);
 		}
 	      else if (EXTENTP (car))