# HG changeset patch # User stephent # Date 1033551100 0 # Node ID ccaf90c5a53a7ea59a1aee12d3beeeff62cccf96 # Parent ce9bdd48654f81712ee6f08f08664260f8fdd69f [xemacs-hg @ 2002-10-02 09:29:37 by stephent] 21.4 -> R21.5 stuff manual improvements <87k7l1p6su.fsf@tleepslib.sk.tsukuba.ac.jp> regexp tests <87fzvpp6mf.fsf@tleepslib.sk.tsukuba.ac.jp> add-to-list doc Ville Skyttä <87bs6dp6io.fsf@tleepslib.sk.tsukuba.ac.jp> Move filename associations Ville Skyttä <877kh1p6ee.fsf@tleepslib.sk.tsukuba.ac.jp> lookup-syntax-properties <87admil2e0.fsf_-_@tleepslib.sk.tsukuba.ac.jp> fix stale submatches <873crpp50v.fsf_-_@tleepslib.sk.tsukuba.ac.jp> info for developers <87y99hnqc4.fsf@tleepslib.sk.tsukuba.ac.jp> diff -r ce9bdd48654f -r ccaf90c5a53a ChangeLog --- a/ChangeLog Tue Oct 01 21:55:21 2002 +0000 +++ b/ChangeLog Wed Oct 02 09:31:40 2002 +0000 @@ -1,3 +1,7 @@ +2002-07-12 Stephen J. Turnbull + + * etc/BETA (Large contributions): New section. Reorganize file. + 2002-09-20 Steve Youngs * Makefile.in.in (TAGS tags): Descend into the modules directory diff -r ce9bdd48654f -r ccaf90c5a53a etc/BETA --- a/etc/BETA Tue Oct 01 21:55:21 2002 +0000 +++ b/etc/BETA Wed Oct 02 09:31:40 2002 +0000 @@ -151,7 +151,7 @@ is rebuilt. ** Building XEmacs from a full distribution -============================================== +=========================================== Locate a convenient place where you have at least 100MB of free space and issue the command @@ -266,7 +266,97 @@ of the developers. -* Patching XEmacs +* Packages +========== + +[Note: these instructions have been partly updated, but not carefully +reviewed in some time. Caveat tester.] + +Starting with XEmacs 21.1, much of the functionality of XEmacs has +been unbundled into "the packages." For more information about the +package system, see the Info nodes on Packages (in the XEmacs User +Manual) and on Packaging (in the Lisp Reference). + +When bootstrapping XEmacs, you may need to manually install some +packages (at least xemacs-base and efs). These packages are available +by FTP at ftp://ftp.xemacs.org/pub/xemacs/packages/. + +** Binary package installation +============================== + +Prerequisite: XEmacs 21.0-b1. + +Binary packages are complete entities that can be untarred at the top +level of an XEmacs package hierarchy and work at runtime. To install files +in this directory, run the command `M-x package-admin-add-binary-package' +and fill in appropriate values to the prompts. + +** Manual procedures for package management +=========================================== + +Prerequisite: XEmacs 21.0 + +When adding and deleting files from a lisp directory the +auto-autoloads.el (global symbols) and custom-load.el (Customization +groups) must be kept in synch. Assuming one is manipulating a +directory called `lisp-utils', the command to rebuild the +auto-autoloads.el file is: + +xemacs -vanilla -batch -l autoload -f batch-update-directory lisp-utils + +The command to rebuild the custom-load.el file is: + +xemacs -vanilla -batch -l cus-dep -f Custom-make-dependencies lisp-utils + +To bytecompile both of these files the command is: + +xemacs -vanilla -batch -f batch-byte-compile \ + lisp-utils/auto-autoloads.el lisp-utils/custom-load.el + +** Building XEmacs and XEmacs packages from scratch +=================================================== + +To build everything completely from scratch (not a high priority as a +design goal), the following procedure should work. (I don't recommend +building this way). + +*** Phase 1 -- Get a minimal XEmacs binary with mule to build the package + lisp with. + +**** Grab a mule-base tarball and install it into a newly created package + directory. + +**** Configure XEmacs with mule and a package-path including the + directory created above. + +**** Do a `make dist' to build an XEmacs binary. + +*** Phase 2 -- Build and install the package lisp. + +**** Modify XEmacs.rules for local paths and the XEmacs binary created in + Phase 1. + +**** Do a make from the top level package lisp source directory.[1] + +**** Do `make bindist's on all the packages you wish to install and + remove the byproduct .tar.gz's. + +*** Phase 3 -- If necessary, redump XEmacs + with the packages that require dump-time support and install it. + +**** Reconfigure without Mule if you don't wish a Mule-ish XEmacs, and + rebuild XEmacs. + +- or - + +**** rm lib-src/DOC src/xemacs; make + +**** Install or run in-place. + +Note that this is in essence what `make all-elc' has always done. + + +* Improving XEmacs ================= ** Creating patches for submission @@ -386,91 +476,93 @@ CVS. Members of the Review Board will also post short notices of administrative action (APPROVE, VETO, QUERY, etc) to xemacs-patches. -* Packages -==================================== +** Large contributions +====================== -[Note: these instructions have been partly updated, but not carefully -reviewed in some time. Caveat tester.] +Perhaps you have a whole new mode, or a major synchronization with +upstream for a neglected package, or a synchronization with GNU Emacs +you would like to contribute. We welcome such contributions, but they +are likely to be relatively controversial, generate more comments and +requests for revision, and take longer to integrate. Please be +patient with the process. -Starting with XEmacs 21.1, much of the functionality of XEmacs has -been unbundled into "the packages." For more information about the -package system, see the Info nodes on Packages (in the XEmacs User -Manual) and on Packaging (in the Lisp Reference). +*** Updates to existing packages +-------------------------------- -When bootstrapping XEmacs, you may need to manually install some -packages (at least xemacs-base and efs). These packages are available -by FTP at ftp://ftp.xemacs.org/pub/xemacs/packages/. - -** Binary package installation -================================================ - -Prerequisite: XEmacs 21.0-b1. +If a package has gotten a bit out of date, or even started to bitrot, +we welcome patches to synchronize it with upstream/GNU Emacs versions. +Most packages end up varying somewhat from their GNU origins. See +"Syncing with GNU Emacs" for hints. Note that if you do a reasonably +large amount of syncing with GNU Emacs, you should log this in the +file itself as well as in the ChangeLog. -Binary packages are complete entities that can be untarred at the top -level of an XEmacs package hierarchy and work at runtime. To install files -in this directory, run the command `M-x package-admin-add-binary-package' -and fill in appropriate values to the prompts. +If the package is important to you, please consider becoming the +maintainer. (See "New packages", below.) -** Manual procedures for package management -=========================================== - -Prerequisite: XEmacs 21.0 +*** New packages +---------------- -When adding and deleting files from a lisp directory the -auto-autoloads.el (global symbols) and custom-load.el (Customization -groups) must be kept in synch. Assuming one is manipulating a -directory called `lisp-utils', the command to rebuild the -auto-autoloads.el file is: +If you have a new mode or other large addition that does not require +changes to the core, please consider submitting it as a package, and +becoming the maintainer. You get direct commit privileges to the +repository for your package, "approval" privileges for your own +patches as well as third party patches to your package, and some +degree of veto power over patches you don't like. In return, you are +expected to maintain friendly liaison with the upstream developer (if +you aren't the upstream developer), keep watch on the XEmacs Patches +list for relevant patches, and be available by email to other +developers for discussion of changes that impact your package. It's +also a pretty standard route to the "core" development group, where we +have plenty of extra work waiting for volunteers. -xemacs -vanilla -batch -l autoload -f batch-update-directory lisp-utils - -The command to rebuild the custom-load.el file is: - -xemacs -vanilla -batch -l cus-dep -f Custom-make-dependencies lisp-utils - -To bytecompile both of these files the command is: +You don't have to become the maintainer, but it virtually ensures +rapid acceptance of the package. -xemacs -vanilla -batch -f batch-byte-compile \ - lisp-utils/auto-autoloads.el lisp-utils/custom-load.el +For help in creating new packages, see the (rather sparse) discussions +in the XEmacs User's Guide and the Lisp Reference Manual. The XEmacs +Package Release Engineer (Ville Skyttä is +currently serving with Peter Brown +assisting; Steve Youngs and Stephen Turnbull + also can help) is the most likely source of advice. -** Building XEmacs and XEmacs packages from scratch -=================================================== - -To build everything completely from scratch (not a high priority as a -design goal), the following procedure should work. (I don't recommend -building this way). +*** Syncing with GNU Emacs +-------------------------- -*** Phase 1 -- Get a minimal XEmacs binary with mule to build the package - lisp with. +Syncing with GNU Emacs is an important activity. Although each +version has its advantages and areas of concentration, it is very +desirable that common functionality share specifications and APIs. +When porting GNU code to XEmacs, the following points should be given +special attention: -**** Grab a mule-base tarball and install it into a newly created package - directory. + o Recent GNU Emacsen cannot be built without Mule, but XEmacs can. + Make sure your changes do not assume the presence of Mule. -**** Configure XEmacs with mule and a package-path including the - directory created above. - -**** Do a `make dist' to build an XEmacs binary. + o GNU Emacs nomenclature often differs from that of XEmacs. + Sometimes syncing the names is desirable, other times not. -*** Phase 2 -- Build and install the package lisp. - -**** Modify XEmacs.rules for local paths and the XEmacs binary created in - Phase 1. + o GNU Emacs functionality often differs from that of XEmacs. + Syncing functionality is often controversial. -**** Do a make from the top level package lisp source directory.[1] +It is important that you let other developers know that +synchronization has taken place, to what degree, and when. For this +purpose, we use comments of the form -**** Do `make bindist's on all the packages you wish to install and - remove the byproduct .tar.gz's. +/* Synched up with: FSF 21.3 by Stephen Turnbull */ -*** Phase 3 -- If necessary, redump XEmacs - with the packages that require dump-time support and install it. +in the source file itself, as the last element of the prefatory +material (copyright notice and commentary). Obviously the comment +market needs to be changed to leading semicolons for Lisp, but +otherwise the format is the same. -**** Reconfigure without Mule if you don't wish a Mule-ish XEmacs, and - rebuild XEmacs. +Of course you should note syncing as the purpose in the ChangeLog, +too. But entries get buried deep in the ChangeLog file, and may even +get moved to a separate ChangeLog.OLD file for rarely synched files. -- or - - -**** rm lib-src/DOC src/xemacs; make +Rather than dates we use the version of GNU Emacs to sync to. If the +synchronization is partial, add a new comment describing what has +actually been synched, leaving the description of the last full sync +in place. At each full sync, remove all previous synchronization +comments. -**** Install or run in-place. - -Note that this is in essence what `make all-elc' has always done. +This applies to Lisp that we have broken out into packages, but +remains in the GNU Emacs core, as well to core Lisp in XEmacs. diff -r ce9bdd48654f -r ccaf90c5a53a lisp/ChangeLog --- a/lisp/ChangeLog Tue Oct 01 21:55:21 2002 +0000 +++ b/lisp/ChangeLog Wed Oct 02 09:31:40 2002 +0000 @@ -1,3 +1,14 @@ +2002-09-22 Ville Skyttä + + * files.el (auto-mode-alist): Move entries for modes in packages + to the corresponding modes. + (interpreter-mode-alist): Ditto. + +2002-09-16 Stephen J. Turnbull + + * obsolete.el (parse-sexp-lookup-properties): new compatible alias + for `lookup-syntax-properties'. RFE from Alan Mackenzie. + 2002-08-08 Jerry James * autoload.el (make-c-autoload): New function for reading autoload diff -r ce9bdd48654f -r ccaf90c5a53a lisp/files.el --- a/lisp/files.el Tue Oct 01 21:55:21 2002 +0000 +++ b/lisp/files.el Wed Oct 02 09:31:40 2002 +0000 @@ -1222,9 +1222,11 @@ :error-form nil (hack-local-variables (not find-file))))) -;; #### This variable sucks in the package model. There should be a -;; way for new packages to add their entries to auto-mode-alist in a -;; clean way. Per Abrahamsen suggested splitting auto-mode-alist to +;; `auto-mode-alist' used to contain entries for modes in core and in packages. +;; The applicable entries are now located in the corresponding modes in +;; packages, the ones here are for core modes. Ditto for +;; `interpreter-mode-alist' below. +;; Per Abrahamsen suggested splitting auto-mode-alist to ;; several distinct variables such as, in order of precedence, ;; `user-auto-mode-alist' for users, `package-auto-mode-alist' for ;; packages and `auto-mode-alist' (which might also be called @@ -1233,82 +1235,20 @@ (defvar auto-mode-alist '(("\\.te?xt\\'" . text-mode) - ("\\.[chi]\\'" . c-mode) ("\\.el\\'" . emacs-lisp-mode) - ("\\.\\(?:[CH]\\|cc\\|hh\\)\\'" . c++-mode) - ("\\.[ch]\\(pp\\|xx\\|\\+\\+\\)\\'" . c++-mode) - ("\\.java\\'" . java-mode) - ("\\.idl\\'" . idl-mode) - ("\\.f\\(?:or\\)?\\'" . fortran-mode) - ("\\.F\\(?:OR\\)?\\'" . fortran-mode) - ("\\.[fF]90\\'" . f90-mode) -;;; Less common extensions come here -;;; so more common ones above are found faster. - ("\\.\\([pP][Llm]\\|al\\)\\'" . perl-mode) - ("\\.py\\'" . python-mode) - ("\\.texi\\(?:nfo\\)?\\'" . texinfo-mode) - ("\\.ad[abs]\\'" . ada-mode) ("\\.c?l\\(?:i?sp\\)?\\'" . lisp-mode) - ("\\.p\\(?:as\\)?\\'" . pascal-mode) - ("\\.ltx\\'" . latex-mode) - ("\\.[sS]\\'" . asm-mode) - ("[Cc]hange.?[Ll]og?\\(?:.[0-9]+\\)?\\'" . change-log-mode) - ("\\$CHANGE_LOG\\$\\.TXT" . change-log-mode) - ("\\.scm?\\(?:\\.[0-9]*\\)?\\'" . scheme-mode) - ("\\.e\\'" . eiffel-mode) - ("\\.mss\\'" . scribe-mode) - ("\\.m\\(?:[mes]\\|an\\)\\'" . nroff-mode) - ("\\.icn\\'" . icon-mode) - ("\\.\\(?:[ckz]?sh\\|shar\\)\\'" . sh-mode) - ("\\.[Pp][Rr][Oo]\\'" . idlwave-mode) - ("\\.si\\(v\\|eve\\)\\'" . sieve-mode) - ;; #### Unix-specific! - ("/\\.\\(?:bash_\\|z\\)?\\(profile\\|login\\|logout\\)\\'" . sh-mode) - ("/\\.\\(?:[ckz]sh\\|bash\\|tcsh\\|es\\|xinit\\|startx\\)rc\\'" . sh-mode) - ("/\\.\\(?:[kz]shenv\\|xsession\\)\\'" . sh-mode) - ("\\.m?spec$" .sh-mode) - ;; The following come after the ChangeLog pattern for the sake of - ;; ChangeLog.1, etc. and after the .scm.[0-9] pattern too. - ("\\.[123456789]\\'" . nroff-mode) - ("\\.[tT]e[xX]\\'" . tex-mode) - ("\\.\\(?:sty\\|cls\\|bbl\\)\\'" . latex-mode) - ("\\.bib\\'" . bibtex-mode) ("\\.article\\'" . text-mode) ("\\.letter\\'" . text-mode) - ("\\.\\(?:tcl\\|exp\\)\\'" . tcl-mode) - ("\\.wrl\\'" . vrml-mode) - ("\\.awk\\'" . awk-mode) - ("\\.prolog\\'" . prolog-mode) - ("\\.\\(?:arc\\|zip\\|lzh\\|zoo\\)\\'" . archive-mode) ;; Mailer puts message to be edited in /tmp/Re.... or Message ;; #### Unix-specific! ("\\`/tmp/Re" . text-mode) ("/Message[0-9]*\\'" . text-mode) - ("/drafts/[0-9]+\\'" . mh-letter-mode) ;; some news reader is reported to use this ("^/tmp/fol/" . text-mode) - ("\\.y\\'" . c-mode) - ("\\.lex\\'" . c-mode) - ("\\.m\\'" . objc-mode) - ("\\.oak\\'" . scheme-mode) - ("\\.[sj]?html?\\'" . html-mode) - ("\\.jsp\\'" . html-mode) - ("\\.xml\\'" . xml-mode) - ("\\.\\(?:sgml?\\|dtd\\)\\'" . sgml-mode) - ("\\.c?ps\\'" . postscript-mode) ;; .emacs following a directory delimiter in either Unix or ;; Windows syntax. ("[/\\][._].*emacs\\'" . emacs-lisp-mode) - ("\\.m4\\'" . autoconf-mode) - ("configure\\.\\(in\\|ac\\)\\'" . autoconf-mode) ("\\.ml\\'" . lisp-mode) - ("\\.ma?ke?\\'" . makefile-mode) - ("\\(GNU\\)?[Mm]akefile\\(\\.\\|\\'\\)" . makefile-mode) - ("[./\\]X\\(defaults\\|environment\\|resources\\|modmap\\)\\'" . xrdb-mode) - ;; #### The following three are Unix-specific (but do we care?) - ("/app-defaults/" . xrdb-mode) - ("\\.[^/]*wm2?\\(?:rc\\)?\\'" . winmgr-mode) - ("\\.\\(?:jpe?g\\|JPE?G\\|png\\|PNG\\|gif\\|GIF\\|tiff?\\|TIFF?\\)\\'" . image-mode) ) "Alist of filename patterns vs. corresponding major mode functions. Each element looks like (REGEXP . FUNCTION) or (REGEXP FUNCTION NON-NIL). @@ -1321,17 +1261,7 @@ REGEXP and search the list again for another match.") (defvar interpreter-mode-alist - '(("^#!.*csh" . sh-mode) - ("^#!.*\\b\\(scope\\|wish\\|tcl\\|tclsh\\|expect\\)" . tcl-mode) - ("^#!.*sh\\b" . sh-mode) - ("perl" . perl-mode) - ("python" . python-mode) - ("awk\\b" . awk-mode) - ("rexx" . rexx-mode) - ("scm\\|guile" . scheme-mode) - ("emacs" . emacs-lisp-mode) - ("make" . makefile-mode) - ("^:" . sh-mode)) + '(("emacs" . emacs-lisp-mode)) "Alist mapping interpreter names to major modes. This alist is used to guess the major mode of a file based on the contents of the first line. This line often contains something like: diff -r ce9bdd48654f -r ccaf90c5a53a lisp/obsolete.el --- a/lisp/obsolete.el Tue Oct 01 21:55:21 2002 +0000 +++ b/lisp/obsolete.el Wed Oct 02 09:31:40 2002 +0000 @@ -107,6 +107,8 @@ (make-obsolete 'set-window-dot 'set-window-point) (define-obsolete-function-alias 'extent-buffer 'extent-object) +(define-compatible-variable-alias 'parse-sexp-lookup-properties + 'lookup-syntax-properties) ;;;;;;;;;;;;;;;;;;;;;;;;;;;; frames (defun frame-first-window (frame) diff -r ce9bdd48654f -r ccaf90c5a53a man/ChangeLog --- a/man/ChangeLog Tue Oct 01 21:55:21 2002 +0000 +++ b/man/ChangeLog Wed Oct 02 09:31:40 2002 +0000 @@ -1,3 +1,14 @@ +2002-09-20 Stephen J. Turnbull + + * internals/internals.texi (Techniques for XEmacs Developers): + More performance optimization hints. + (Modules for Other Aspects of the Lisp Interpreter and Object System): + Describe syntax code internals. + + * lispref/syntax.texi (Syntax Basics): XEmacs "20" -> "20 and later". + (Syntax Class Table): Deprecate SPC as whitespace designator. + (Syntax Flags): Rewrite for `8-bit' comment syntax flags. + 2002-08-30 Steve Youngs * XEmacs 21.5.9 "brussels sprouts" is released. diff -r ce9bdd48654f -r ccaf90c5a53a man/internals/internals.texi --- a/man/internals/internals.texi Tue Oct 01 21:55:21 2002 +0000 +++ b/man/internals/internals.texi Wed Oct 02 09:31:40 2002 +0000 @@ -3243,7 +3243,14 @@ If you want to make XEmacs faster, target your favorite slow benchmark, run a profiler like Quantify, @code{gprof}, or @code{tcov}, and figure -out where the cycles are going. Specific projects: +out where the cycles are going. In many cases you can localize the +problem (because a particular new feature or even a single patch +elicited it). Don't hesitate to use brute force techniques like a +global counter incremented at strategic places, especially in +combination with other performance indications (@emph{e.g.}, degree of +buffer fragmentation into extents). + +Specific projects: @itemize @bullet @item @@ -3256,8 +3263,16 @@ @item Speed up redisplay. @item -Speed up syntax highlighting. Maybe moving some of the syntax -highlighting capabilities into C would make a difference. +Speed up syntax highlighting. It was suggested that ``maybe moving some +of the syntax highlighting capabilities into C would make a +difference.'' Wrong idea, I think. When processing one large file a +particular low-level routine was being called 40 @emph{million} times +simply for @emph{one} call to @code{newline-and-indent}. Syntax +highlighting needs to be rewritten to use a reliable, fast parser, then +to trust the pre-parsed structure, and only do re-highlighting locally +to a text change. Modern machines are fast enough to implement such +parsers in Lisp; but no machine will ever be fast enough to deal with +quadratic (or worse) algorithms! @item Implement tail recursion in Emacs Lisp (hard!). @end itemize @@ -4772,6 +4787,27 @@ @code{forward-sexp}, and by @file{font-lock.c} to locate quoted strings, comments, etc. +@c #### Break this out into a separate node somewhere! +Syntax codes are implemented as bitfields in an int. Bits 0-6 contain +the syntax code itself, bit 7 is a special prefix flag used for Lisp, +and bits 16-23 contain comment syntax flags. From the Lisp programmer's +point of view, there are 11 flags: 2 styles X 2 characters X @{start, +end@} flags for two-character comment delimiters, 2 style flags for +one-character comment delimiters, and the prefix flag. + +Internally, however, the characters used in multi-character delimiters +will have non-comment-character syntax classes (@emph{e.g.}, the +@samp{/} in C's @samp{/*} comment-start delimiter has ``punctuation'' +(here meaning ``operator-like'') class in C modes). Thus in a mixed +comment style, such as C++'s @samp{//} to end of line, is represented by +giving @samp{/} the ``punctuation'' class and the ``style b first +character of start sequence'' and ``style b second character of start +sequence'' flags. The fact that class is @emph{not} punctuation allows +the syntax scanner to recognize that this is a multi-character +delimiter. The @samp{newline} character is given (single-character) +``comment-end'' @emph{class} and the ``style b first character of end +sequence'' @emph{flag}. The ``comment-end'' class allows the scanner to +determine that no second character is needed to terminate the comment. @example diff -r ce9bdd48654f -r ccaf90c5a53a man/lispref/syntax.texi --- a/man/lispref/syntax.texi Tue Oct 01 21:55:21 2002 +0000 +++ b/man/lispref/syntax.texi Wed Oct 02 09:31:40 2002 +0000 @@ -42,7 +42,7 @@ this chapter. @end ifinfo - Under XEmacs 20, a syntax table is a particular subtype of the + Under XEmacs 20 and later, a syntax table is a particular subtype of the primitive char table type (@pxref{Char Tables}), and each element of the char table is an integer that encodes the syntax of the character in question, or a cons of such an integer and a matching character (for @@ -133,11 +133,13 @@ their meanings, and examples of their use. @deffn {Syntax class} @w{whitespace character} -@dfn{Whitespace characters} (designated with @w{@samp{@ }} or @samp{-}) +@dfn{Whitespace characters} (designated with @samp{-}) separate symbols and words from each other. Typically, whitespace characters have no other syntactic significance, and multiple whitespace characters are syntactically equivalent to a single one. Space, tab, -newline and formfeed are almost always classified as whitespace. +newline and formfeed are almost always classified as whitespace. (The +designator @w{@samp{@ }} is accepted for backwards compatibility with +older versions of XEmacs, but is deprecated. It is invalid in GNU Emacs.) @end deffn @deffn {Syntax class} @w{word constituent} @@ -268,42 +270,23 @@ @subsection Syntax Flags @cindex syntax flags +@c This is a bit inaccurate, the ``a'' and ``b'' flags actually don't +@c exist in the internal implementation. AFAICT it doesn't affect the +@c semantics as perceived by the LISP programmer. In addition to the classes, entries for characters in a syntax table -can include flags. There are six possible flags, represented by the -characters @samp{1}, @samp{2}, @samp{3}, @samp{4}, @samp{b} and -@samp{p}. +can include flags. There are eleven possible flags, represented by the +digits @samp{1}--@samp{8}, and the lowercase letters @samp{a}, @samp{b}, +and @samp{p}. - All the flags except @samp{p} are used to describe multi-character -comment delimiters. The digit flags indicate that a character can -@emph{also} be part of a comment sequence, in addition to the syntactic -properties associated with its character class. The flags are + All the flags except @samp{p} are used to describe comment delimiters. +The digit flags indicate that a character can @emph{also} be part of a +multi-character comment sequence, in addition to the syntactic +properties associated with its character class. The flags must be independent of the class and each other for the sake of characters such as @samp{*} in C mode, which is a punctuation character, @emph{and} the second character of a start-of-comment sequence (@samp{/*}), @emph{and} the first character of an end-of-comment sequence (@samp{*/}). -The flags for a character @var{c} are: - -@itemize @bullet -@item -@samp{1} means @var{c} is the start of a two-character comment-start -sequence. - -@item -@samp{2} means @var{c} is the second character of such a sequence. - -@item -@samp{3} means @var{c} is the start of a two-character comment-end -sequence. - -@item -@samp{4} means @var{c} is the second character of such a sequence. - -@item -@c Emacs 19 feature -@samp{b} means that @var{c} as a comment delimiter belongs to the -alternative ``b'' comment style. - Emacs supports two comment styles simultaneously in any one syntax table. This is for the sake of C++. Each style of comment syntax has its own comment-start sequence and its own comment-end sequence. Each @@ -311,44 +294,59 @@ the comment-start sequence of style ``b'', it must also end with the comment-end sequence of style ``b''. -The two comment-start sequences must begin with the same character; only -the second character may differ. Mark the second character of the -``b''-style comment-start sequence with the @samp{b} flag. - -A comment-end sequence (one or two characters) applies to the ``b'' -style if its first character has the @samp{b} flag set; otherwise, it -applies to the ``a'' style. +@c #### Compatibility note; index here. +As an extension to GNU Emacs 19 and 20, XEmacs supports two arbitrary +comment-start sequences and two arbitrary comment-end sequences. (Thus +the need for 8 flags.) GNU Emacs restricts the comment-start sequences +to start with the same character, XEmacs does not. This means that for +two-character sequences, where GNU Emacs uses the @samp{b} flag, XEmacs +uses the digit flags @samp{5}--@samp{8}. -The appropriate comment syntax settings for C++ are as follows: +A one character comment-end sequence applies to the ``b'' style if its +first character has the @samp{b} flag set; otherwise, it applies to the +``a'' style. The @samp{a} flag is optional. These flags have no effect +on non-comment characters; two-character styles are determined by the +digit flags. + +The flags for a character @var{c} are: -@table @asis -@item @samp{/} -@samp{124b} -@item @samp{*} -@samp{23} -@item newline -@samp{>b} -@end table +@itemize @bullet +@item +@samp{1} means @var{c} is the start of a two-character comment-start +sequence of style ``a''. + +@item +@samp{2} means @var{c} is the second character of such a sequence. -This defines four comment-delimiting sequences: +@item +@samp{3} means @var{c} is the start of a two-character comment-end +sequence of style ``a''. + +@item +@samp{4} means @var{c} is the second character of such a sequence. -@table @asis -@item @samp{/*} -This is a comment-start sequence for ``a'' style because the -second character, @samp{*}, does not have the @samp{b} flag. +@item +@samp{5} means @var{c} is the start of a two-character comment-start +sequence of style ``b''. + +@item +@samp{6} means @var{c} is the second character of such a sequence. -@item @samp{//} -This is a comment-start sequence for ``b'' style because the second -character, @samp{/}, does have the @samp{b} flag. +@item +@samp{7} means @var{c} is the start of a two-character comment-end +sequence of style ``b''. + +@item +@samp{8} means @var{c} is the second character of such a sequence. -@item @samp{*/} -This is a comment-end sequence for ``a'' style because the first -character, @samp{*}, does not have the @samp{b} flag +@item +@samp{a} means that @var{c} as a comment delimiter belongs to the +default ``a'' comment style. (This flag is optional.) -@item newline -This is a comment-end sequence for ``b'' style, because the newline -character has the @samp{b} flag. -@end table +@item +@c Emacs 19 feature +@samp{b} means that @var{c} as a comment delimiter belongs to the +alternate ``b'' comment style. @item @c Emacs 19 feature @@ -362,6 +360,62 @@ prefix (@samp{'}). @xref{Motion and Syntax}. @end itemize +Lisp (as you would expect) has a simple comment syntax. + +@table @asis +@item @samp{;} +@samp{<} +@item newline +@samp{>} +@end table + +Note that no flags are used. +This defines two comment-delimiting sequences: + +@table @asis +@item @samp{;} +This is a single-character comment-start sequence because the syntax +class is @samp{<}. + +@item newline +This is a single character comment-end sequence because the syntax class +is @samp{>} and the @samp{b} flag is not set. +@end table + +C++ (again, as you would expect) has a baroque, overrich, and +excessively complex comment syntax. + +@table @asis +@item @samp{/} +@samp{1456} +@item @samp{*} +@samp{23} +@item newline +@samp{>b} +@end table + +Note that the ``b'' style mixes one-character and two-character +sequences. The table above defines four comment-delimiting sequences: + +@table @asis +@item @samp{/*} +This is a comment-start sequence for ``a'' style because the @samp{1} +flag is set on @samp{/} and the @samp{2} flag is set on @samp{*}. + +@item @samp{//} +This is a comment-start sequence for ``b'' style because both the @samp{5} +and the @samp{6} flags are set on @samp{/}. + +@item @samp{*/} +This is a comment-end sequence for ``a'' style because the @samp{3} +flag is set on @samp{*} and the @samp{4} flag is set on @samp{/}. + +@item newline +This is a comment-end sequence for ``b'' style, because the newline +character has the @samp{b} flag. +@end table + + @node Syntax Table Functions @section Syntax Table Functions diff -r ce9bdd48654f -r ccaf90c5a53a man/lispref/variables.texi --- a/man/lispref/variables.texi Tue Oct 01 21:55:21 2002 +0000 +++ b/man/lispref/variables.texi Wed Oct 02 09:31:40 2002 +0000 @@ -725,12 +725,15 @@ One other function for setting a variable is designed to add an element to a list if it is not already present in the list. -@defun add-to-list symbol element +@defun add-to-list symbol element &optional append This function sets the variable @var{symbol} by consing @var{element} onto the old value, if @var{element} is not already a member of that value. It returns the resulting list, whether updated or not. The value of @var{symbol} had better be a list already before the call. +If the optional argument @var{append} is non-@code{nil}, @var{element} +is added at the end of the list. + The argument @var{symbol} is not implicitly quoted; @code{add-to-list} is an ordinary function, like @code{set} and unlike @code{setq}. Quote the argument yourself if that is what you want. diff -r ce9bdd48654f -r ccaf90c5a53a src/ChangeLog --- a/src/ChangeLog Tue Oct 01 21:55:21 2002 +0000 +++ b/src/ChangeLog Wed Oct 02 09:31:40 2002 +0000 @@ -1,3 +1,19 @@ +2002-09-09 Stephen J. Turnbull + + * search.c (clear_unused_search_regs): New static function. + (search_buffer): + (simple_search): + (boyer_moore): + Use it. Fixes "stale match data" bug reported by Martin Stjernholm. + Minor clarifications in comments. + + * regex.c (re_match_2_internal): Ensure no stale submatches. + +2002-09-26 Golubev I. N. + + * frame-x.c (x_delete_frame): do not call XtDisplay on a destroyed + widget. + 2002-09-22 Mike Sperber * specifier.c (specifier_add_spec): Don't do anything if NILP diff -r ce9bdd48654f -r ccaf90c5a53a src/frame-x.c --- a/src/frame-x.c Tue Oct 01 21:55:21 2002 +0000 +++ b/src/frame-x.c Wed Oct 02 09:31:40 2002 +0000 @@ -2644,14 +2644,14 @@ dpy = XtDisplay (FRAME_X_SHELL_WIDGET (f)); #ifdef EXTERNAL_WIDGET - expect_x_error (XtDisplay (FRAME_X_SHELL_WIDGET (f))); + expect_x_error (dpy); /* for obscure reasons having (I think) to do with the internal window-to-widget hierarchy maintained by Xt, we have to call XtUnrealizeWidget() here. Xt can really suck. */ if (f->being_deleted) XtUnrealizeWidget (FRAME_X_SHELL_WIDGET (f)); XtDestroyWidget (FRAME_X_SHELL_WIDGET (f)); - x_error_occurred_p (XtDisplay (FRAME_X_SHELL_WIDGET (f))); + x_error_occurred_p (dpy); #else XtDestroyWidget (FRAME_X_SHELL_WIDGET (f)); /* make sure the windows are really gone! */ diff -r ce9bdd48654f -r ccaf90c5a53a src/regex.c --- a/src/regex.c Tue Oct 01 21:55:21 2002 +0000 +++ b/src/regex.c Wed Oct 02 09:31:40 2002 +0000 @@ -4916,16 +4916,24 @@ (regoff_t) POINTER_TO_OFFSET (regend[internal_reg]); } } - - /* If the regs structure we return has more elements than - were in the pattern, set the extra elements to -1. If - we (re)allocated the registers, this is the case, - because we always allocate enough to have at least one - -1 at the end. */ - for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) - regs->start[mcnt] = regs->end[mcnt] = -1; } /* regs && !bufp->no_sub */ + /* If we have regs and the regs structure has more elements than + were in the pattern, set the extra elements to -1. If we + (re)allocated the registers, this is the case, because we + always allocate enough to have at least one -1 at the end. + + We do this even when no_sub is set because some applications + (XEmacs) reuse register structures which may contain stale + information, and permit attempts to access those registers. + + It would be possible to require the caller to do this, but we'd + have to change the API for this function to reflect that, and + audit all callers. */ + if (regs && regs->num_regs > 0) + for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) + regs->start[mcnt] = regs->end[mcnt] = -1; + DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", nfailure_points_pushed, nfailure_points_popped, nfailure_points_pushed - nfailure_points_popped); diff -r ce9bdd48654f -r ccaf90c5a53a src/search.c --- a/src/search.c Tue Oct 01 21:55:21 2002 +0000 +++ b/src/search.c Wed Oct 02 09:31:40 2002 +0000 @@ -111,6 +111,7 @@ Lisp_Object Vskip_chars_range_table; static void set_search_regs (struct buffer *buf, Charbpos beg, Charcount len); +static void clear_unused_search_regs (struct re_registers *regp, int no_sub); static Charbpos simple_search (struct buffer *buf, Ibyte *base_pat, Bytecount len, Bytebpos pos, Bytebpos lim, EMACS_INT n, Lisp_Object trt); @@ -1222,10 +1223,11 @@ if (len == 0) { set_search_regs (buf, charbpos, 0); + clear_unused_search_regs (&search_regs, 0); return charbpos; } - /* Searching 0 times means don't move. */ + /* Searching 0 times means noop---don't move, don't touch registers. */ if (n == 0) return charbpos; @@ -1481,6 +1483,7 @@ end = bytebpos_to_charbpos (buf, pos + buf_len); } set_search_regs (buf, beg, end - beg); + clear_unused_search_regs (&search_regs, 0); return retval; } @@ -1844,6 +1847,7 @@ Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len); set_search_regs (buf, bufstart, bufend - bufstart); + clear_unused_search_regs (&search_regs, 0); } if ((n -= direction) != 0) @@ -1934,6 +1938,7 @@ Charbpos bufend = bytebpos_to_charbpos (buf, bytstart + len); set_search_regs (buf, bufstart, bufend - bufstart); + clear_unused_search_regs (&search_regs, 0); } if ((n -= direction) != 0) @@ -1953,8 +1958,8 @@ return bytebpos_to_charbpos (buf, pos); } -/* Record beginning BEG and end BEG + LEN - for a match just found in the current buffer. */ +/* Record the whole-match data (beginning BEG and end BEG + LEN) and the + buffer for a match just found. */ static void set_search_regs (struct buffer *buf, Charbpos beg, Charcount len) @@ -1973,6 +1978,24 @@ last_thing_searched = wrap_buffer (buf); } +/* Clear unused search registers so match data will be null. + REGP is a pointer to the register structure to clear, usually the global + search_regs. + NO_SUB is the number of subexpressions to allow for. (Does not count + the whole match, ie, for a string search NO_SUB == 0.) + It is an error if NO_SUB > REGP.num_regs - 1. */ + +static void +clear_unused_search_regs (struct re_registers *regp, int no_sub) +{ + /* This function has been Mule-ized. */ + int i; + + assert (no_sub >= 0 && no_sub < regp->num_regs); + for (i = no_sub + 1; i < regp->num_regs; i++) + regp->start[i] = regp->end[i] = -1; +} + /* Given a string of words separated by word delimiters, compute a regexp that matches those exact words diff -r ce9bdd48654f -r ccaf90c5a53a tests/ChangeLog --- a/tests/ChangeLog Tue Oct 01 21:55:21 2002 +0000 +++ b/tests/ChangeLog Wed Oct 02 09:31:40 2002 +0000 @@ -1,3 +1,15 @@ +2002-09-09 Stephen J. Turnbull + + * automated/regexp-tests.el: Add test for stale subexpr match-data. + Thanks to Martin Stjernholm for the report. + + * automated/syntax-tests.el: Conditionalize syntax-table property + tests on feature. Enable feature if present. + +2002-09-12 Stephen J. Turnbull + + * automated/regexp-tests.el: Add word-boundary regexp tests. + 2002-08-30 Steve Youngs * XEmacs 21.5.9 "brussels sprouts" is released. diff -r ce9bdd48654f -r ccaf90c5a53a tests/automated/regexp-tests.el --- a/tests/automated/regexp-tests.el Tue Oct 01 21:55:21 2002 +0000 +++ b/tests/automated/regexp-tests.el Wed Oct 02 09:31:40 2002 +0000 @@ -213,3 +213,41 @@ (looking-at "Unmatchable text") (replace-match "") (Assert (looking-at "^buffer.$"))) + +;; Test that trivial regexps reset unused registers +;; Thanks to Martin Sternholm for the report. +;; xemacs-beta <5blm6h2ki5.fsf@lister.roxen.com> +(with-temp-buffer + (insert "ab") + (goto-char (point-min)) + (re-search-forward "\\(a\\)") + ;; test the whole-match data, too -- one try scotched that, too! + (Assert (string= (match-string 0) "a")) + (Assert (string= (match-string 1) "a")) + (re-search-forward "b") + (Assert (string= (match-string 0) "b")) + (Assert (string= (match-string 1) nil))) + +;; Test word boundaries +(Assert (= (string-match " \\ " "a ") 0)) +(Assert (= (string-match " \\ba" " a") 0)) +(Assert (= (string-match "a\\b " "a ") 0)) +(Assert (= (string-match "\\ba" " a") 1)) +(Assert (= (string-match "a\\b" "a ") 0)) +;; should work at target boundaries +(Assert (= (string-match "\\" "a") 0)) +(Assert (= (string-match "\\ba" "a") 0)) +(Assert (= (string-match "a\\b" "a") 0)) +;; but not if the "word" would be on the null side of the boundary! +(Assert (not (string-match "\\<" ""))) +(Assert (not (string-match "\\>" ""))) +(Assert (not (string-match " \\<" " "))) +(Assert (not (string-match "\\> " " "))) +(Assert (not (string-match "a\\<" "a"))) +(Assert (not (string-match "\\>a" "a"))) +;; Expect these to fail :-( +(Assert (not (string-match "\\b" ""))) +(Assert (not (string-match " \\b" " "))) +(Assert (not (string-match "\\b " " "))) diff -r ce9bdd48654f -r ccaf90c5a53a tests/automated/syntax-tests.el --- a/tests/automated/syntax-tests.el Tue Oct 01 21:55:21 2002 +0000 +++ b/tests/automated/syntax-tests.el Wed Oct 02 09:31:40 2002 +0000 @@ -107,16 +107,20 @@ ;; can be in the form (start . end), or can be a ;; character position. (defun test-syntax-table (string apply-pos apply-syntax stop) - (goto-char (point-max)) - (unless (consp apply-pos) - (setq apply-pos `(,apply-pos . ,(+ 1 apply-pos)))) - (let ((point (point))) - (insert string) - (put-text-property (+ point (car apply-pos)) (+ point (cdr apply-pos)) - 'syntax-table apply-syntax) - (goto-char point) - (forward-word 1) - (Assert (eq (point) (+ point stop))))) + ;; We don't necessarily have syntax-table properties ... + (when (fboundp 'lookup-syntax-properties) ; backwards compatible kludge + ;; ... and they may not be enabled by default if we do. + (setq lookup-syntax-properties t) + (goto-char (point-max)) + (unless (consp apply-pos) + (setq apply-pos `(,apply-pos . ,(+ 1 apply-pos)))) + (let ((point (point))) + (insert string) + (put-text-property (+ point (car apply-pos)) (+ point (cdr apply-pos)) + 'syntax-table apply-syntax) + (goto-char point) + (forward-word 1) + (Assert (eq (point) (+ point stop)))))) ;; test syntax-table extents (with-temp-buffer @@ -126,6 +130,8 @@ (test-syntax-table "W." 1 `(,(syntax-string-to-code "w")) 2)) ;; Test forward-comment at buffer boundaries +;; #### The second Assert fails (once interpreted, once compiled) on 21.4.9 +;; with sjt's version of Andy's syntax-text-property-killer patch. (with-temp-buffer (if (not (fboundp 'c-mode)) ;; #### This whole thing should go inside a macro Skip-Test