# HG changeset patch
# User stephent
# Date 1033551100 0
# Node ID ccaf90c5a53a7ea59a1aee12d3beeeff62cccf96
# Parent  ce9bdd48654f81712ee6f08f08664260f8fdd69f
[xemacs-hg @ 2002-10-02 09:29:37 by stephent]
21.4 -> R21.5 stuff
 manual improvements <87k7l1p6su.fsf@tleepslib.sk.tsukuba.ac.jp>
regexp tests <87fzvpp6mf.fsf@tleepslib.sk.tsukuba.ac.jp>
add-to-list doc Ville SkyttÃ¤ <87bs6dp6io.fsf@tleepslib.sk.tsukuba.ac.jp>
Move filename associations Ville SkyttÃ¤ <877kh1p6ee.fsf@tleepslib.sk.tsukuba.ac.jp>
lookup-syntax-properties <87admil2e0.fsf_-_@tleepslib.sk.tsukuba.ac.jp>
fix stale submatches <873crpp50v.fsf_-_@tleepslib.sk.tsukuba.ac.jp>
info for developers <87y99hnqc4.fsf@tleepslib.sk.tsukuba.ac.jp>

diff -r ce9bdd48654f -r ccaf90c5a53a ChangeLog
--- a/ChangeLog	Tue Oct 01 21:55:21 2002 +0000
+++ b/ChangeLog	Wed Oct 02 09:31:40 2002 +0000
@@ -1,3 +1,7 @@
+2002-07-12  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* etc/BETA (Large contributions): New section.  Reorganize file.
+
 2002-09-20  Steve Youngs  <youngs@xemacs.org>
 
 	* Makefile.in.in (TAGS tags): Descend into the modules directory
diff -r ce9bdd48654f -r ccaf90c5a53a etc/BETA
--- a/etc/BETA	Tue Oct 01 21:55:21 2002 +0000
+++ b/etc/BETA	Wed Oct 02 09:31:40 2002 +0000
@@ -151,7 +151,7 @@
 is rebuilt.
 
 ** Building XEmacs from a full distribution
-==============================================
+===========================================
 
 Locate a convenient place where you have at least 100MB of free space
 and issue the command
@@ -266,7 +266,97 @@
    of the developers.
 
 
-* Patching XEmacs
+* Packages
+==========
+
+[Note: these instructions have been partly updated, but not carefully
+reviewed in some time.  Caveat tester.]
+
+Starting with XEmacs 21.1, much of the functionality of XEmacs has
+been unbundled into "the packages."  For more information about the
+package system, see the Info nodes on Packages (in the XEmacs User
+Manual) and on Packaging (in the Lisp Reference).
+
+When bootstrapping XEmacs, you may need to manually install some
+packages (at least xemacs-base and efs).  These packages are available
+by FTP at ftp://ftp.xemacs.org/pub/xemacs/packages/.
+
+** Binary package installation
+==============================
+
+Prerequisite:  XEmacs 21.0-b1.
+
+Binary packages are complete entities that can be untarred at the top
+level of an XEmacs package hierarchy and work at runtime.  To install files
+in this directory, run the command `M-x package-admin-add-binary-package'
+and fill in appropriate values to the prompts.
+
+** Manual procedures for package management
+===========================================
+
+Prerequisite: XEmacs 21.0
+
+When adding and deleting files from a lisp directory the
+auto-autoloads.el (global symbols) and custom-load.el (Customization
+groups) must be kept in synch.  Assuming one is manipulating a
+directory called `lisp-utils', the command to rebuild the
+auto-autoloads.el file is:
+
+xemacs -vanilla -batch -l autoload -f batch-update-directory lisp-utils
+
+The command to rebuild the custom-load.el file is:
+
+xemacs -vanilla -batch -l cus-dep -f Custom-make-dependencies lisp-utils
+
+To bytecompile both of these files the command is:
+
+xemacs -vanilla -batch -f batch-byte-compile \
+	lisp-utils/auto-autoloads.el lisp-utils/custom-load.el
+
+** Building XEmacs and XEmacs packages from scratch
+===================================================
+
+To build everything completely from scratch (not a high priority as a
+design goal), the following procedure should work.  (I don't recommend
+building this way).
+
+*** Phase 1 -- Get a minimal XEmacs binary with mule to build the package
+    lisp with.
+
+**** Grab a mule-base tarball and install it into a newly created package
+     directory.
+
+**** Configure XEmacs with mule and a package-path including the
+     directory created above.
+
+**** Do a `make dist' to build an XEmacs binary.
+
+*** Phase 2 -- Build and install the package lisp.
+
+**** Modify XEmacs.rules for local paths and the XEmacs binary created in 
+     Phase 1.
+
+**** Do a make from the top level package lisp source directory.[1]
+
+**** Do `make bindist's on all the packages you wish to install and
+     remove the byproduct .tar.gz's.
+
+*** Phase 3 -- If necessary, redump XEmacs
+    with the packages that require dump-time support and install it.
+
+**** Reconfigure without Mule if you don't wish a Mule-ish XEmacs, and
+     rebuild XEmacs.
+
+- or -
+
+**** rm lib-src/DOC src/xemacs; make
+
+**** Install or run in-place.
+
+Note that this is in essence what `make all-elc' has always done.
+
+
+* Improving XEmacs
 =================
 
 ** Creating patches for submission
@@ -386,91 +476,93 @@
 CVS.  Members of the Review Board will also post short notices of
 administrative action (APPROVE, VETO, QUERY, etc) to xemacs-patches.
 
-* Packages
-====================================
+** Large contributions
+======================
 
-[Note: these instructions have been partly updated, but not carefully
-reviewed in some time.  Caveat tester.]
+Perhaps you have a whole new mode, or a major synchronization with
+upstream for a neglected package, or a synchronization with GNU Emacs
+you would like to contribute.  We welcome such contributions, but they
+are likely to be relatively controversial, generate more comments and
+requests for revision, and take longer to integrate.  Please be
+patient with the process.
 
-Starting with XEmacs 21.1, much of the functionality of XEmacs has
-been unbundled into "the packages."  For more information about the
-package system, see the Info nodes on Packages (in the XEmacs User
-Manual) and on Packaging (in the Lisp Reference).
+*** Updates to existing packages
+--------------------------------
 
-When bootstrapping XEmacs, you may need to manually install some
-packages (at least xemacs-base and efs).  These packages are available
-by FTP at ftp://ftp.xemacs.org/pub/xemacs/packages/.
-
-** Binary package installation
-================================================
-
-Prerequisite:  XEmacs 21.0-b1.
+If a package has gotten a bit out of date, or even started to bitrot,
+we welcome patches to synchronize it with upstream/GNU Emacs versions.
+Most packages end up varying somewhat from their GNU origins.  See
+"Syncing with GNU Emacs" for hints.  Note that if you do a reasonably
+large amount of syncing with GNU Emacs, you should log this in the
+file itself as well as in the ChangeLog.
 
-Binary packages are complete entities that can be untarred at the top
-level of an XEmacs package hierarchy and work at runtime.  To install files
-in this directory, run the command `M-x package-admin-add-binary-package'
-and fill in appropriate values to the prompts.
+If the package is important to you, please consider becoming the
+maintainer.  (See "New packages", below.)
 
-** Manual procedures for package management
-===========================================
-
-Prerequisite: XEmacs 21.0
+*** New packages
+----------------
 
-When adding and deleting files from a lisp directory the
-auto-autoloads.el (global symbols) and custom-load.el (Customization
-groups) must be kept in synch.  Assuming one is manipulating a
-directory called `lisp-utils', the command to rebuild the
-auto-autoloads.el file is:
+If you have a new mode or other large addition that does not require
+changes to the core, please consider submitting it as a package, and
+becoming the maintainer.  You get direct commit privileges to the
+repository for your package, "approval" privileges for your own
+patches as well as third party patches to your package, and some
+degree of veto power over patches you don't like.  In return, you are
+expected to maintain friendly liaison with the upstream developer (if
+you aren't the upstream developer), keep watch on the XEmacs Patches
+list for relevant patches, and be available by email to other
+developers for discussion of changes that impact your package.  It's
+also a pretty standard route to the "core" development group, where we
+have plenty of extra work waiting for volunteers.
 
-xemacs -vanilla -batch -l autoload -f batch-update-directory lisp-utils
-
-The command to rebuild the custom-load.el file is:
-
-xemacs -vanilla -batch -l cus-dep -f Custom-make-dependencies lisp-utils
-
-To bytecompile both of these files the command is:
+You don't have to become the maintainer, but it virtually ensures
+rapid acceptance of the package.
 
-xemacs -vanilla -batch -f batch-byte-compile \
-	lisp-utils/auto-autoloads.el lisp-utils/custom-load.el
+For help in creating new packages, see the (rather sparse) discussions
+in the XEmacs User's Guide and the Lisp Reference Manual.  The XEmacs
+Package Release Engineer (Ville Skyttä <ville.skytta@xemacs.org> is
+currently serving with Peter Brown <rendhalver@users.sourceforge.net>
+assisting; Steve Youngs <youngs@xemacs.org> and Stephen Turnbull
+<stephen@xemacs.org> also can help) is the most likely source of advice.
 
-** Building XEmacs and XEmacs packages from scratch
-===================================================
-
-To build everything completely from scratch (not a high priority as a
-design goal), the following procedure should work.  (I don't recommend
-building this way).
+*** Syncing with GNU Emacs
+--------------------------
 
-*** Phase 1 -- Get a minimal XEmacs binary with mule to build the package
-    lisp with.
+Syncing with GNU Emacs is an important activity.  Although each
+version has its advantages and areas of concentration, it is very
+desirable that common functionality share specifications and APIs.
+When porting GNU code to XEmacs, the following points should be given
+special attention:
 
-**** Grab a mule-base tarball and install it into a newly created package
-     directory.
+  o Recent GNU Emacsen cannot be built without Mule, but XEmacs can.
+    Make sure your changes do not assume the presence of Mule.
 
-**** Configure XEmacs with mule and a package-path including the
-     directory created above.
-
-**** Do a `make dist' to build an XEmacs binary.
+  o GNU Emacs nomenclature often differs from that of XEmacs.
+    Sometimes syncing the names is desirable, other times not.
 
-*** Phase 2 -- Build and install the package lisp.
-
-**** Modify XEmacs.rules for local paths and the XEmacs binary created in 
-     Phase 1.
+  o GNU Emacs functionality often differs from that of XEmacs.
+    Syncing functionality is often controversial.
 
-**** Do a make from the top level package lisp source directory.[1]
+It is important that you let other developers know that
+synchronization has taken place, to what degree, and when.  For this
+purpose, we use comments of the form
 
-**** Do `make bindist's on all the packages you wish to install and
-     remove the byproduct .tar.gz's.
+/* Synched up with: FSF 21.3 by Stephen Turnbull */
 
-*** Phase 3 -- If necessary, redump XEmacs
-    with the packages that require dump-time support and install it.
+in the source file itself, as the last element of the prefatory
+material (copyright notice and commentary).  Obviously the comment
+market needs to be changed to leading semicolons for Lisp, but
+otherwise the format is the same.
 
-**** Reconfigure without Mule if you don't wish a Mule-ish XEmacs, and
-     rebuild XEmacs.
+Of course you should note syncing as the purpose in the ChangeLog,
+too.  But entries get buried deep in the ChangeLog file, and may even
+get moved to a separate ChangeLog.OLD file for rarely synched files.
 
-- or -
-
-**** rm lib-src/DOC src/xemacs; make
+Rather than dates we use the version of GNU Emacs to sync to.  If the
+synchronization is partial, add a new comment describing what has
+actually been synched, leaving the description of the last full sync
+in place.  At each full sync, remove all previous synchronization
+comments.
 
-**** Install or run in-place.
-
-Note that this is in essence what `make all-elc' has always done.
+This applies to Lisp that we have broken out into packages, but
+remains in the GNU Emacs core, as well to core Lisp in XEmacs.
diff -r ce9bdd48654f -r ccaf90c5a53a lisp/ChangeLog
--- a/lisp/ChangeLog	Tue Oct 01 21:55:21 2002 +0000
+++ b/lisp/ChangeLog	Wed Oct 02 09:31:40 2002 +0000
@@ -1,3 +1,14 @@
+2002-09-22  Ville Skyttä  <ville.skytta@xemacs.org>
+
+	* files.el (auto-mode-alist): Move entries for modes in packages
+	to the corresponding modes.
+	(interpreter-mode-alist): Ditto.
+
+2002-09-16  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* obsolete.el (parse-sexp-lookup-properties): new compatible alias
+	for `lookup-syntax-properties'.  RFE from Alan Mackenzie.
+
 2002-08-08  Jerry James  <james@xemacs.org>
 
 	* autoload.el (make-c-autoload): New function for reading autoload
diff -r ce9bdd48654f -r ccaf90c5a53a lisp/files.el
--- a/lisp/files.el	Tue Oct 01 21:55:21 2002 +0000
+++ b/lisp/files.el	Wed Oct 02 09:31:40 2002 +0000
@@ -1222,9 +1222,11 @@
 	 :error-form nil
 	 (hack-local-variables (not find-file)))))
 
-;; #### This variable sucks in the package model.  There should be a
-;; way for new packages to add their entries to auto-mode-alist in a
-;; clean way.  Per Abrahamsen suggested splitting auto-mode-alist to
+;; `auto-mode-alist' used to contain entries for modes in core and in packages.
+;; The applicable entries are now located in the corresponding modes in
+;; packages, the ones here are for core modes.  Ditto for
+;; `interpreter-mode-alist' below.
+;; Per Abrahamsen suggested splitting auto-mode-alist to
 ;; several distinct variables such as, in order of precedence,
 ;; `user-auto-mode-alist' for users, `package-auto-mode-alist' for
 ;; packages and `auto-mode-alist' (which might also be called
@@ -1233,82 +1235,20 @@
 
 (defvar auto-mode-alist
   '(("\\.te?xt\\'" . text-mode)
-    ("\\.[chi]\\'" . c-mode)
     ("\\.el\\'" . emacs-lisp-mode)
-    ("\\.\\(?:[CH]\\|cc\\|hh\\)\\'" . c++-mode)
-    ("\\.[ch]\\(pp\\|xx\\|\\+\\+\\)\\'" . c++-mode)
-    ("\\.java\\'" . java-mode)
-    ("\\.idl\\'" . idl-mode)
-    ("\\.f\\(?:or\\)?\\'" . fortran-mode)
-    ("\\.F\\(?:OR\\)?\\'" . fortran-mode)
-    ("\\.[fF]90\\'" . f90-mode)
-;;; Less common extensions come here
-;;; so more common ones above are found faster.
-    ("\\.\\([pP][Llm]\\|al\\)\\'" . perl-mode)
-    ("\\.py\\'" . python-mode)
-    ("\\.texi\\(?:nfo\\)?\\'" . texinfo-mode)
-    ("\\.ad[abs]\\'" . ada-mode)
     ("\\.c?l\\(?:i?sp\\)?\\'" . lisp-mode)
-    ("\\.p\\(?:as\\)?\\'" . pascal-mode)
-    ("\\.ltx\\'" . latex-mode)
-    ("\\.[sS]\\'" . asm-mode)
-    ("[Cc]hange.?[Ll]og?\\(?:.[0-9]+\\)?\\'" . change-log-mode)
-    ("\\$CHANGE_LOG\\$\\.TXT" . change-log-mode)
-    ("\\.scm?\\(?:\\.[0-9]*\\)?\\'" . scheme-mode)
-    ("\\.e\\'" . eiffel-mode)
-    ("\\.mss\\'" . scribe-mode)
-    ("\\.m\\(?:[mes]\\|an\\)\\'" . nroff-mode)
-    ("\\.icn\\'" . icon-mode)
-    ("\\.\\(?:[ckz]?sh\\|shar\\)\\'" . sh-mode)
-    ("\\.[Pp][Rr][Oo]\\'" . idlwave-mode)
-    ("\\.si\\(v\\|eve\\)\\'" . sieve-mode)
-    ;; #### Unix-specific!
-    ("/\\.\\(?:bash_\\|z\\)?\\(profile\\|login\\|logout\\)\\'" . sh-mode)
-    ("/\\.\\(?:[ckz]sh\\|bash\\|tcsh\\|es\\|xinit\\|startx\\)rc\\'" . sh-mode)
-    ("/\\.\\(?:[kz]shenv\\|xsession\\)\\'" . sh-mode)
-    ("\\.m?spec$" .sh-mode)
-    ;; The following come after the ChangeLog pattern for the sake of
-    ;; ChangeLog.1, etc. and after the .scm.[0-9] pattern too.
-    ("\\.[123456789]\\'" . nroff-mode)
-    ("\\.[tT]e[xX]\\'" . tex-mode)
-    ("\\.\\(?:sty\\|cls\\|bbl\\)\\'" . latex-mode)
-    ("\\.bib\\'" . bibtex-mode)
     ("\\.article\\'" . text-mode)
     ("\\.letter\\'" . text-mode)
-    ("\\.\\(?:tcl\\|exp\\)\\'" . tcl-mode)
-    ("\\.wrl\\'" . vrml-mode)
-    ("\\.awk\\'" . awk-mode)
-    ("\\.prolog\\'" . prolog-mode)
-    ("\\.\\(?:arc\\|zip\\|lzh\\|zoo\\)\\'" . archive-mode)
     ;; Mailer puts message to be edited in /tmp/Re.... or Message
     ;; #### Unix-specific!
     ("\\`/tmp/Re" . text-mode)
     ("/Message[0-9]*\\'" . text-mode)
-    ("/drafts/[0-9]+\\'" . mh-letter-mode)
     ;; some news reader is reported to use this
     ("^/tmp/fol/" . text-mode)
-    ("\\.y\\'" . c-mode)
-    ("\\.lex\\'" . c-mode)
-    ("\\.m\\'" . objc-mode)
-    ("\\.oak\\'" . scheme-mode)
-    ("\\.[sj]?html?\\'" . html-mode)
-    ("\\.jsp\\'" . html-mode)
-    ("\\.xml\\'" . xml-mode)
-    ("\\.\\(?:sgml?\\|dtd\\)\\'" . sgml-mode)
-    ("\\.c?ps\\'" . postscript-mode)
     ;; .emacs following a directory delimiter in either Unix or
     ;; Windows syntax.
     ("[/\\][._].*emacs\\'" . emacs-lisp-mode)
-    ("\\.m4\\'" . autoconf-mode)
-    ("configure\\.\\(in\\|ac\\)\\'" . autoconf-mode)
     ("\\.ml\\'" . lisp-mode)
-    ("\\.ma?ke?\\'" . makefile-mode)
-    ("\\(GNU\\)?[Mm]akefile\\(\\.\\|\\'\\)" . makefile-mode)
-    ("[./\\]X\\(defaults\\|environment\\|resources\\|modmap\\)\\'" . xrdb-mode)
-    ;; #### The following three are Unix-specific (but do we care?)
-    ("/app-defaults/" . xrdb-mode)
-    ("\\.[^/]*wm2?\\(?:rc\\)?\\'" . winmgr-mode)
-    ("\\.\\(?:jpe?g\\|JPE?G\\|png\\|PNG\\|gif\\|GIF\\|tiff?\\|TIFF?\\)\\'" . image-mode)
     )
 "Alist of filename patterns vs. corresponding major mode functions.
 Each element looks like (REGEXP . FUNCTION) or (REGEXP FUNCTION NON-NIL).
@@ -1321,17 +1261,7 @@
 REGEXP and search the list again for another match.")
 
 (defvar interpreter-mode-alist
-  '(("^#!.*csh"	  . sh-mode)
-    ("^#!.*\\b\\(scope\\|wish\\|tcl\\|tclsh\\|expect\\)" . tcl-mode)
-    ("^#!.*sh\\b" . sh-mode)
-    ("perl"   . perl-mode)
-    ("python" . python-mode)
-    ("awk\\b" . awk-mode)
-    ("rexx"   . rexx-mode)
-    ("scm\\|guile" . scheme-mode)
-    ("emacs" . emacs-lisp-mode)
-    ("make" . makefile-mode)
-    ("^:"     . sh-mode))
+  '(("emacs" . emacs-lisp-mode))
   "Alist mapping interpreter names to major modes.
 This alist is used to guess the major mode of a file based on the
 contents of the first line.  This line often contains something like:
diff -r ce9bdd48654f -r ccaf90c5a53a lisp/obsolete.el
--- a/lisp/obsolete.el	Tue Oct 01 21:55:21 2002 +0000
+++ b/lisp/obsolete.el	Wed Oct 02 09:31:40 2002 +0000
@@ -107,6 +107,8 @@
 (make-obsolete 'set-window-dot 'set-window-point)
 
 (define-obsolete-function-alias 'extent-buffer 'extent-object)
+(define-compatible-variable-alias 'parse-sexp-lookup-properties
+  'lookup-syntax-properties)
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; frames
 (defun frame-first-window (frame)
diff -r ce9bdd48654f -r ccaf90c5a53a man/ChangeLog
--- a/man/ChangeLog	Tue Oct 01 21:55:21 2002 +0000
+++ b/man/ChangeLog	Wed Oct 02 09:31:40 2002 +0000
@@ -1,3 +1,14 @@
+2002-09-20  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* internals/internals.texi (Techniques for XEmacs Developers):
+	More performance optimization hints.
+	(Modules for Other Aspects of the Lisp Interpreter and Object System):
+	Describe syntax code internals.
+
+	* lispref/syntax.texi (Syntax Basics): XEmacs "20" -> "20 and later".
+	(Syntax Class Table): Deprecate SPC as whitespace designator.
+	(Syntax Flags): Rewrite for `8-bit' comment syntax flags.
+
 2002-08-30  Steve Youngs  <youngs@xemacs.org>
 
 	* XEmacs 21.5.9 "brussels sprouts" is released.
diff -r ce9bdd48654f -r ccaf90c5a53a man/internals/internals.texi
--- a/man/internals/internals.texi	Tue Oct 01 21:55:21 2002 +0000
+++ b/man/internals/internals.texi	Wed Oct 02 09:31:40 2002 +0000
@@ -3243,7 +3243,14 @@
 
 If you want to make XEmacs faster, target your favorite slow benchmark,
 run a profiler like Quantify, @code{gprof}, or @code{tcov}, and figure
-out where the cycles are going.  Specific projects:
+out where the cycles are going.  In many cases you can localize the
+problem (because a particular new feature or even a single patch
+elicited it).  Don't hesitate to use brute force techniques like a
+global counter incremented at strategic places, especially in
+combination with other performance indications (@emph{e.g.}, degree of
+buffer fragmentation into extents).
+
+Specific projects:
 
 @itemize @bullet
 @item
@@ -3256,8 +3263,16 @@
 @item
 Speed up redisplay.
 @item
-Speed up syntax highlighting.  Maybe moving some of the syntax
-highlighting capabilities into C would make a difference.
+Speed up syntax highlighting.  It was suggested that ``maybe moving some
+of the syntax highlighting capabilities into C would make a
+difference.''  Wrong idea, I think.  When processing one large file a
+particular low-level routine was being called 40 @emph{million} times
+simply for @emph{one} call to @code{newline-and-indent}.  Syntax
+highlighting needs to be rewritten to use a reliable, fast parser, then
+to trust the pre-parsed structure, and only do re-highlighting locally
+to a text change.  Modern machines are fast enough to implement such
+parsers in Lisp; but no machine will ever be fast enough to deal with
+quadratic (or worse) algorithms!
 @item
 Implement tail recursion in Emacs Lisp (hard!).
 @end itemize
@@ -4772,6 +4787,27 @@
 @code{forward-sexp}, and by @file{font-lock.c} to locate quoted strings,
 comments, etc.
 
+@c #### Break this out into a separate node somewhere!
+Syntax codes are implemented as bitfields in an int.  Bits 0-6 contain
+the syntax code itself, bit 7 is a special prefix flag used for Lisp,
+and bits 16-23 contain comment syntax flags.  From the Lisp programmer's
+point of view, there are 11 flags: 2 styles X 2 characters X @{start,
+end@} flags for two-character comment delimiters, 2 style flags for
+one-character comment delimiters, and the prefix flag.
+
+Internally, however, the characters used in multi-character delimiters
+will have non-comment-character syntax classes (@emph{e.g.}, the
+@samp{/} in C's @samp{/*} comment-start delimiter has ``punctuation''
+(here meaning ``operator-like'') class in C modes).  Thus in a mixed
+comment style, such as C++'s @samp{//} to end of line, is represented by
+giving @samp{/} the ``punctuation'' class and the ``style b first
+character of start sequence'' and ``style b second character of start
+sequence'' flags.  The fact that class is @emph{not} punctuation allows
+the syntax scanner to recognize that this is a multi-character
+delimiter.  The @samp{newline} character is given (single-character)
+``comment-end'' @emph{class} and the ``style b first character of end
+sequence'' @emph{flag}.  The ``comment-end'' class allows the scanner to
+determine that no second character is needed to terminate the comment.
 
 
 @example
diff -r ce9bdd48654f -r ccaf90c5a53a man/lispref/syntax.texi
--- a/man/lispref/syntax.texi	Tue Oct 01 21:55:21 2002 +0000
+++ b/man/lispref/syntax.texi	Wed Oct 02 09:31:40 2002 +0000
@@ -42,7 +42,7 @@
 this chapter.
 @end ifinfo
 
-  Under XEmacs 20, a syntax table is a particular subtype of the
+  Under XEmacs 20 and later, a syntax table is a particular subtype of the
 primitive char table type (@pxref{Char Tables}), and each element of the
 char table is an integer that encodes the syntax of the character in
 question, or a cons of such an integer and a matching character (for
@@ -133,11 +133,13 @@
 their meanings, and examples of their use.
 
 @deffn {Syntax class} @w{whitespace character}
-@dfn{Whitespace characters} (designated with @w{@samp{@ }} or @samp{-})
+@dfn{Whitespace characters} (designated with @samp{-})
 separate symbols and words from each other.  Typically, whitespace
 characters have no other syntactic significance, and multiple whitespace
 characters are syntactically equivalent to a single one.  Space, tab,
-newline and formfeed are almost always classified as whitespace.
+newline and formfeed are almost always classified as whitespace.  (The
+designator @w{@samp{@ }} is accepted for backwards compatibility with
+older versions of XEmacs, but is deprecated.  It is invalid in GNU Emacs.)
 @end deffn
 
 @deffn {Syntax class} @w{word constituent}
@@ -268,42 +270,23 @@
 @subsection Syntax Flags
 @cindex syntax flags
 
+@c This is a bit inaccurate, the ``a'' and ``b'' flags actually don't
+@c exist in the internal implementation.  AFAICT it doesn't affect the
+@c semantics as perceived by the LISP programmer.
   In addition to the classes, entries for characters in a syntax table
-can include flags.  There are six possible flags, represented by the
-characters @samp{1}, @samp{2}, @samp{3}, @samp{4}, @samp{b} and
-@samp{p}.
+can include flags.  There are eleven possible flags, represented by the
+digits @samp{1}--@samp{8}, and the lowercase letters @samp{a}, @samp{b},
+and @samp{p}.
 
-  All the flags except @samp{p} are used to describe multi-character
-comment delimiters.  The digit flags indicate that a character can
-@emph{also} be part of a comment sequence, in addition to the syntactic
-properties associated with its character class.  The flags are
+  All the flags except @samp{p} are used to describe comment delimiters.
+The digit flags indicate that a character can @emph{also} be part of a
+multi-character comment sequence, in addition to the syntactic
+properties associated with its character class.  The flags must be
 independent of the class and each other for the sake of characters such
 as @samp{*} in C mode, which is a punctuation character, @emph{and} the
 second character of a start-of-comment sequence (@samp{/*}), @emph{and}
 the first character of an end-of-comment sequence (@samp{*/}).
 
-The flags for a character @var{c} are:
-
-@itemize @bullet
-@item
-@samp{1} means @var{c} is the start of a two-character comment-start
-sequence.
-
-@item
-@samp{2} means @var{c} is the second character of such a sequence.
-
-@item
-@samp{3} means @var{c} is the start of a two-character comment-end
-sequence.
-
-@item
-@samp{4} means @var{c} is the second character of such a sequence.
-
-@item
-@c Emacs 19 feature
-@samp{b} means that @var{c} as a comment delimiter belongs to the
-alternative ``b'' comment style.
-
 Emacs supports two comment styles simultaneously in any one syntax
 table.  This is for the sake of C++.  Each style of comment syntax has
 its own comment-start sequence and its own comment-end sequence.  Each
@@ -311,44 +294,59 @@
 the comment-start sequence of style ``b'', it must also end with the
 comment-end sequence of style ``b''.
 
-The two comment-start sequences must begin with the same character; only
-the second character may differ.  Mark the second character of the
-``b''-style comment-start sequence with the @samp{b} flag.
-
-A comment-end sequence (one or two characters) applies to the ``b''
-style if its first character has the @samp{b} flag set; otherwise, it
-applies to the ``a'' style.
+@c #### Compatibility note; index here.
+As an extension to GNU Emacs 19 and 20, XEmacs supports two arbitrary
+comment-start sequences and two arbitrary comment-end sequences.  (Thus
+the need for 8 flags.)  GNU Emacs restricts the comment-start sequences
+to start with the same character, XEmacs does not.  This means that for
+two-character sequences, where GNU Emacs uses the @samp{b} flag, XEmacs
+uses the digit flags @samp{5}--@samp{8}.
 
-The appropriate comment syntax settings for C++ are as follows:
+A one character comment-end sequence applies to the ``b'' style if its
+first character has the @samp{b} flag set; otherwise, it applies to the
+``a'' style.  The @samp{a} flag is optional.  These flags have no effect
+on non-comment characters; two-character styles are determined by the
+digit flags.
+
+The flags for a character @var{c} are:
 
-@table @asis
-@item @samp{/}
-@samp{124b}
-@item @samp{*}
-@samp{23}
-@item newline
-@samp{>b}
-@end table
+@itemize @bullet
+@item
+@samp{1} means @var{c} is the start of a two-character comment-start
+sequence of style ``a''.
+
+@item
+@samp{2} means @var{c} is the second character of such a sequence.
 
-This defines four comment-delimiting sequences:
+@item
+@samp{3} means @var{c} is the start of a two-character comment-end
+sequence of style ``a''.
+
+@item
+@samp{4} means @var{c} is the second character of such a sequence.
 
-@table @asis
-@item @samp{/*}
-This is a comment-start sequence for ``a'' style because the
-second character, @samp{*}, does not have the @samp{b} flag.
+@item
+@samp{5} means @var{c} is the start of a two-character comment-start
+sequence of style ``b''.
+
+@item
+@samp{6} means @var{c} is the second character of such a sequence.
 
-@item @samp{//}
-This is a comment-start sequence for ``b'' style because the second
-character, @samp{/}, does have the @samp{b} flag.
+@item
+@samp{7} means @var{c} is the start of a two-character comment-end
+sequence of style ``b''.
+
+@item
+@samp{8} means @var{c} is the second character of such a sequence.
 
-@item @samp{*/}
-This is a comment-end sequence for ``a'' style because the first
-character, @samp{*}, does not have the @samp{b} flag
+@item
+@samp{a} means that @var{c} as a comment delimiter belongs to the
+default ``a'' comment style.  (This flag is optional.)
 
-@item newline
-This is a comment-end sequence for ``b'' style, because the newline
-character has the @samp{b} flag.
-@end table
+@item
+@c Emacs 19 feature
+@samp{b} means that @var{c} as a comment delimiter belongs to the
+alternate ``b'' comment style.
 
 @item
 @c Emacs 19 feature
@@ -362,6 +360,62 @@
 prefix (@samp{'}).  @xref{Motion and Syntax}.
 @end itemize
 
+Lisp (as you would expect) has a simple comment syntax.
+
+@table @asis
+@item @samp{;}
+@samp{<}
+@item newline
+@samp{>}
+@end table
+
+Note that no flags are used.
+This defines two comment-delimiting sequences:
+
+@table @asis
+@item @samp{;}
+This is a single-character comment-start sequence because the syntax
+class is @samp{<}.
+
+@item newline
+This is a single character comment-end sequence because the syntax class
+is @samp{>} and the @samp{b} flag is not set.
+@end table
+
+C++ (again, as you would expect) has a baroque, overrich, and
+excessively complex comment syntax.
+
+@table @asis
+@item @samp{/}
+@samp{1456}
+@item @samp{*}
+@samp{23}
+@item newline
+@samp{>b}
+@end table
+
+Note that the ``b'' style mixes one-character and two-character
+sequences.  The table above defines four comment-delimiting sequences:
+
+@table @asis
+@item @samp{/*}
+This is a comment-start sequence for ``a'' style because the @samp{1}
+flag is set on @samp{/} and the @samp{2} flag is set on @samp{*}.
+
+@item @samp{//}
+This is a comment-start sequence for ``b'' style because both the @samp{5}
+and the @samp{6} flags are set on @samp{/}.
+
+@item @samp{*/}
+This is a comment-end sequence for ``a'' style because the @samp{3}
+flag is set on @samp{*} and the @samp{4} flag is set on @samp{/}.
+
+@item newline
+This is a comment-end sequence for ``b'' style, because the newline
+character has the @samp{b} flag.
+@end table
+
+
 @node Syntax Table Functions
 @section Syntax Table Functions
 
diff -r ce9bdd48654f -r ccaf90c5a53a man/lispref/variables.texi
--- a/man/lispref/variables.texi	Tue Oct 01 21:55:21 2002 +0000
+++ b/man/lispref/variables.texi	Wed Oct 02 09:31:40 2002 +0000
@@ -725,12 +725,15 @@
   One other function for setting a variable is designed to add
 an element to a list if it is not already present in the list.
 
-@defun add-to-list symbol element
+@defun add-to-list symbol element &optional append
 This function sets the variable @var{symbol} by consing @var{element}
 onto the old value, if @var{element} is not already a member of that
 value.  It returns the resulting list, whether updated or not.  The
 value of @var{symbol} had better be a list already before the call.
 
+If the optional argument @var{append} is non-@code{nil}, @var{element}
+is added at the end of the list.
+
 The argument @var{symbol} is not implicitly quoted; @code{add-to-list}
 is an ordinary function, like @code{set} and unlike @code{setq}.  Quote
 the argument yourself if that is what you want.
diff -r ce9bdd48654f -r ccaf90c5a53a src/ChangeLog
--- a/src/ChangeLog	Tue Oct 01 21:55:21 2002 +0000
+++ b/src/ChangeLog	Wed Oct 02 09:31:40 2002 +0000
@@ -1,3 +1,19 @@
+2002-09-09  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* search.c (clear_unused_search_regs): New static function.
+	(search_buffer): 
+	(simple_search): 
+	(boyer_moore):
+	Use it.  Fixes "stale match data" bug reported by Martin Stjernholm.
+	Minor clarifications in comments.
+
+	* regex.c (re_match_2_internal): Ensure no stale submatches.
+
+2002-09-26  Golubev I. N.  <gin@mo.msk.ru>
+
+	* frame-x.c (x_delete_frame): do not call XtDisplay on a destroyed
+	widget.
+
 2002-09-22  Mike Sperber <mike@xemacs.org>
 
 	* specifier.c (specifier_add_spec): Don't do anything if NILP
diff -r ce9bdd48654f -r ccaf90c5a53a src/frame-x.c
--- a/src/frame-x.c	Tue Oct 01 21:55:21 2002 +0000
+++ b/src/frame-x.c	Wed Oct 02 09:31:40 2002 +0000
@@ -2644,14 +2644,14 @@
   dpy = XtDisplay (FRAME_X_SHELL_WIDGET (f));
 
 #ifdef EXTERNAL_WIDGET
-  expect_x_error (XtDisplay (FRAME_X_SHELL_WIDGET (f)));
+  expect_x_error (dpy);
   /* for obscure reasons having (I think) to do with the internal
      window-to-widget hierarchy maintained by Xt, we have to call
      XtUnrealizeWidget() here.  Xt can really suck. */
   if (f->being_deleted)
     XtUnrealizeWidget (FRAME_X_SHELL_WIDGET (f));
   XtDestroyWidget (FRAME_X_SHELL_WIDGET (f));
-  x_error_occurred_p (XtDisplay (FRAME_X_SHELL_WIDGET (f)));
+  x_error_occurred_p (dpy);
 #else
   XtDestroyWidget (FRAME_X_SHELL_WIDGET (f));
   /* make sure the windows are really gone! */
diff -r ce9bdd48654f -r ccaf90c5a53a src/regex.c
--- a/src/regex.c	Tue Oct 01 21:55:21 2002 +0000
+++ b/src/regex.c	Wed Oct 02 09:31:40 2002 +0000
@@ -4916,16 +4916,24 @@
 			(regoff_t) POINTER_TO_OFFSET (regend[internal_reg]);
                     }
 		}
-
-              /* If the regs structure we return has more elements than
-                 were in the pattern, set the extra elements to -1.  If
-                 we (re)allocated the registers, this is the case,
-                 because we always allocate enough to have at least one
-                 -1 at the end.  */
-              for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++)
-                regs->start[mcnt] = regs->end[mcnt] = -1;
 	    } /* regs && !bufp->no_sub */
 
+	  /* If we have regs and the regs structure has more elements than
+             were in the pattern, set the extra elements to -1.  If we
+	     (re)allocated the registers, this is the case, because we
+	     always allocate enough to have at least one -1 at the end.
+
+	     We do this even when no_sub is set because some applications
+             (XEmacs) reuse register structures which may contain stale
+	     information, and permit attempts to access those registers.
+
+	     It would be possible to require the caller to do this, but we'd
+	     have to change the API for this function to reflect that, and
+	     audit all callers. */
+	  if (regs && regs->num_regs > 0)
+	    for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++)
+	      regs->start[mcnt] = regs->end[mcnt] = -1;
+
           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
                         nfailure_points_pushed, nfailure_points_popped,
                         nfailure_points_pushed - nfailure_points_popped);
diff -r ce9bdd48654f -r ccaf90c5a53a src/search.c
--- a/src/search.c	Tue Oct 01 21:55:21 2002 +0000
+++ b/src/search.c	Wed Oct 02 09:31:40 2002 +0000
@@ -111,6 +111,7 @@
 Lisp_Object Vskip_chars_range_table;
 
 static void set_search_regs (struct buffer *buf, Charbpos beg, Charcount len);
+static void clear_unused_search_regs (struct re_registers *regp, int no_sub);
 static Charbpos simple_search (struct buffer *buf, Ibyte *base_pat,
 			       Bytecount len, Bytebpos pos, Bytebpos lim,
 			       EMACS_INT n, Lisp_Object trt);
@@ -1222,10 +1223,11 @@
   if (len == 0)
     {
       set_search_regs (buf, charbpos, 0);
+      clear_unused_search_regs (&search_regs, 0);
       return charbpos;
     }
 
-  /* Searching 0 times means don't move.  */
+  /* Searching 0 times means noop---don't move, don't touch registers.  */
   if (n == 0)
     return charbpos;
 
@@ -1481,6 +1483,7 @@
 	  end = bytebpos_to_charbpos (buf, pos + buf_len);
 	}
       set_search_regs (buf, beg, end - beg);
+      clear_unused_search_regs (&search_regs, 0);
 
       return retval;
     }
@@ -1844,6 +1847,7 @@
 		    Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len);
 
 		    set_search_regs (buf, bufstart, bufend - bufstart);
+		    clear_unused_search_regs (&search_regs, 0);
 		  }
 
 		  if ((n -= direction) != 0)
@@ -1934,6 +1938,7 @@
 		    Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len);
 
 		    set_search_regs (buf, bufstart, bufend - bufstart);
+		    clear_unused_search_regs (&search_regs, 0);
 		  }
 
 		  if ((n -= direction) != 0)
@@ -1953,8 +1958,8 @@
   return bytebpos_to_charbpos (buf, pos);
 }
 
-/* Record beginning BEG and end BEG + LEN
-   for a match just found in the current buffer.  */
+/* Record the whole-match data (beginning BEG and end BEG + LEN) and the
+   buffer for a match just found.  */
 
 static void
 set_search_regs (struct buffer *buf, Charbpos beg, Charcount len)
@@ -1973,6 +1978,24 @@
   last_thing_searched = wrap_buffer (buf);
 }
 
+/* Clear unused search registers so match data will be null.
+   REGP is a pointer to the register structure to clear, usually the global
+   search_regs.
+   NO_SUB is the number of subexpressions to allow for.  (Does not count
+   the whole match, ie, for a string search NO_SUB == 0.)
+   It is an error if NO_SUB > REGP.num_regs - 1. */
+
+static void
+clear_unused_search_regs (struct re_registers *regp, int no_sub)
+{
+  /* This function has been Mule-ized. */
+  int i;
+
+  assert (no_sub >= 0 && no_sub < regp->num_regs);
+  for (i = no_sub + 1; i < regp->num_regs; i++)
+    regp->start[i] = regp->end[i] = -1;
+}
+
 
 /* Given a string of words separated by word delimiters,
    compute a regexp that matches those exact words
diff -r ce9bdd48654f -r ccaf90c5a53a tests/ChangeLog
--- a/tests/ChangeLog	Tue Oct 01 21:55:21 2002 +0000
+++ b/tests/ChangeLog	Wed Oct 02 09:31:40 2002 +0000
@@ -1,3 +1,15 @@
+2002-09-09  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* automated/regexp-tests.el: Add test for stale subexpr match-data.
+	Thanks to Martin Stjernholm for the report.
+
+	* automated/syntax-tests.el: Conditionalize syntax-table property
+	tests on feature.  Enable feature if present.
+
+2002-09-12  Stephen J. Turnbull  <stephen@xemacs.org>
+
+	* automated/regexp-tests.el: Add word-boundary regexp tests.
+
 2002-08-30  Steve Youngs  <youngs@xemacs.org>
 
 	* XEmacs 21.5.9 "brussels sprouts" is released.
diff -r ce9bdd48654f -r ccaf90c5a53a tests/automated/regexp-tests.el
--- a/tests/automated/regexp-tests.el	Tue Oct 01 21:55:21 2002 +0000
+++ b/tests/automated/regexp-tests.el	Wed Oct 02 09:31:40 2002 +0000
@@ -213,3 +213,41 @@
   (looking-at "Unmatchable text")
   (replace-match "")
   (Assert (looking-at "^buffer.$")))
+
+;; Test that trivial regexps reset unused registers
+;; Thanks to Martin Sternholm for the report.
+;; xemacs-beta <5blm6h2ki5.fsf@lister.roxen.com>
+(with-temp-buffer
+  (insert "ab")
+  (goto-char (point-min))
+  (re-search-forward "\\(a\\)")
+  ;; test the whole-match data, too -- one try scotched that, too!
+  (Assert (string= (match-string 0) "a"))
+  (Assert (string= (match-string 1) "a"))
+  (re-search-forward "b")
+  (Assert (string= (match-string 0) "b"))
+  (Assert (string= (match-string 1) nil)))
+
+;; Test word boundaries
+(Assert (= (string-match " \\<a" " a") 0))
+(Assert (= (string-match "a\\> " "a ") 0))
+(Assert (= (string-match " \\ba" " a") 0))
+(Assert (= (string-match "a\\b " "a ") 0))
+(Assert (= (string-match "\\ba" " a") 1))
+(Assert (= (string-match "a\\b" "a ") 0))
+;; should work at target boundaries
+(Assert (= (string-match "\\<a" "a") 0))
+(Assert (= (string-match "a\\>" "a") 0))
+(Assert (= (string-match "\\ba" "a") 0))
+(Assert (= (string-match "a\\b" "a") 0))
+;; but not if the "word" would be on the null side of the boundary!
+(Assert (not (string-match "\\<" "")))
+(Assert (not (string-match "\\>" "")))
+(Assert (not (string-match " \\<" " ")))
+(Assert (not (string-match "\\> " " ")))
+(Assert (not (string-match "a\\<" "a")))
+(Assert (not (string-match "\\>a" "a")))
+;; Expect these to fail :-(
+(Assert (not (string-match "\\b" "")))
+(Assert (not (string-match " \\b" " ")))
+(Assert (not (string-match "\\b " " ")))
diff -r ce9bdd48654f -r ccaf90c5a53a tests/automated/syntax-tests.el
--- a/tests/automated/syntax-tests.el	Tue Oct 01 21:55:21 2002 +0000
+++ b/tests/automated/syntax-tests.el	Wed Oct 02 09:31:40 2002 +0000
@@ -107,16 +107,20 @@
 ;; <apply-pos> can be in the form (start . end), or can be a
 ;; character position.
 (defun test-syntax-table (string apply-pos apply-syntax stop)
-  (goto-char (point-max))
-  (unless (consp apply-pos)
-	(setq apply-pos `(,apply-pos . ,(+ 1 apply-pos))))
-  (let ((point (point)))
-	(insert string)
-	(put-text-property (+ point (car apply-pos)) (+ point (cdr apply-pos))
-					   'syntax-table apply-syntax)
-	(goto-char point)
-	(forward-word 1)
-	(Assert (eq (point) (+ point stop)))))
+  ;; We don't necessarily have syntax-table properties ...
+  (when (fboundp 'lookup-syntax-properties) ; backwards compatible kludge
+    ;; ... and they may not be enabled by default if we do.
+    (setq lookup-syntax-properties t)
+    (goto-char (point-max))
+    (unless (consp apply-pos)
+      (setq apply-pos `(,apply-pos . ,(+ 1 apply-pos))))
+    (let ((point (point)))
+      (insert string)
+      (put-text-property (+ point (car apply-pos)) (+ point (cdr apply-pos))
+			 'syntax-table apply-syntax)
+      (goto-char point)
+      (forward-word 1)
+      (Assert (eq (point) (+ point stop))))))
 
 ;; test syntax-table extents
 (with-temp-buffer
@@ -126,6 +130,8 @@
   (test-syntax-table "W." 1 `(,(syntax-string-to-code "w")) 2))
 
 ;; Test forward-comment at buffer boundaries
+;; #### The second Assert fails (once interpreted, once compiled) on 21.4.9
+;; with sjt's version of Andy's syntax-text-property-killer patch.
 (with-temp-buffer
   (if (not (fboundp 'c-mode))
       ;; #### This whole thing should go inside a macro Skip-Test