view src/doc.c @ 665:fdefd0186b75

[xemacs-hg @ 2001-09-20 06:28:42 by ben] The great integral types renaming. The purpose of this is to rationalize the names used for various integral types, so that they match their intended uses and follow consist conventions, and eliminate types that were not semantically different from each other. The conventions are: -- All integral types that measure quantities of anything are signed. Some people disagree vociferously with this, but their arguments are mostly theoretical, and are vastly outweighed by the practical headaches of mixing signed and unsigned values, and more importantly by the far increased likelihood of inadvertent bugs: Because of the broken "viral" nature of unsigned quantities in C (operations involving mixed signed/unsigned are done unsigned, when exactly the opposite is nearly always wanted), even a single error in declaring a quantity unsigned that should be signed, or even the even more subtle error of comparing signed and unsigned values and forgetting the necessary cast, can be catastrophic, as comparisons will yield wrong results. -Wsign-compare is turned on specifically to catch this, but this tends to result in a great number of warnings when mixing signed and unsigned, and the casts are annoying. More has been written on this elsewhere. -- All such quantity types just mentioned boil down to EMACS_INT, which is 32 bits on 32-bit machines and 64 bits on 64-bit machines. This is guaranteed to be the same size as Lisp objects of type `int', and (as far as I can tell) of size_t (unsigned!) and ssize_t. The only type below that is not an EMACS_INT is Hashcode, which is an unsigned value of the same size as EMACS_INT. -- Type names should be relatively short (no more than 10 characters or so), with the first letter capitalized and no underscores if they can at all be avoided. -- "count" == a zero-based measurement of some quantity. Includes sizes, offsets, and indexes. -- "bpos" == a one-based measurement of a position in a buffer. "Charbpos" and "Bytebpos" count text in the buffer, rather than bytes in memory; thus Bytebpos does not directly correspond to the memory representation. Use "Membpos" for this. -- "Char" refers to internal-format characters, not to the C type "char", which is really a byte. -- For the actual name changes, see the script below. I ran the following script to do the conversion. (NOTE: This script is idempotent. You can safely run it multiple times and it will not screw up previous results -- in fact, it will do nothing if nothing has changed. Thus, it can be run repeatedly as necessary to handle patches coming in from old workspaces, or old branches.) There are two tags, just before and just after the change: `pre-integral-type-rename' and `post-integral-type-rename'. When merging code from the main trunk into a branch, the best thing to do is first merge up to `pre-integral-type-rename', then apply the script and associated changes, then merge from `post-integral-type-change' to the present. (Alternatively, just do the merging in one operation; but you may then have a lot of conflicts needing to be resolved by hand.) Script `fixtypes.sh' follows: ----------------------------------- cut ------------------------------------ files="*.[ch] s/*.h m/*.h config.h.in ../configure.in Makefile.in.in ../lib-src/*.[ch] ../lwlib/*.[ch]" gr Memory_Count Bytecount $files gr Lstream_Data_Count Bytecount $files gr Element_Count Elemcount $files gr Hash_Code Hashcode $files gr extcount bytecount $files gr bufpos charbpos $files gr bytind bytebpos $files gr memind membpos $files gr bufbyte intbyte $files gr Extcount Bytecount $files gr Bufpos Charbpos $files gr Bytind Bytebpos $files gr Memind Membpos $files gr Bufbyte Intbyte $files gr EXTCOUNT BYTECOUNT $files gr BUFPOS CHARBPOS $files gr BYTIND BYTEBPOS $files gr MEMIND MEMBPOS $files gr BUFBYTE INTBYTE $files gr MEMORY_COUNT BYTECOUNT $files gr LSTREAM_DATA_COUNT BYTECOUNT $files gr ELEMENT_COUNT ELEMCOUNT $files gr HASH_CODE HASHCODE $files ----------------------------------- cut ------------------------------------ `fixtypes.sh' is a Bourne-shell script; it uses 'gr': ----------------------------------- cut ------------------------------------ #!/bin/sh # Usage is like this: # gr FROM TO FILES ... # globally replace FROM with TO in FILES. FROM and TO are regular expressions. # backup files are stored in the `backup' directory. from="$1" to="$2" shift 2 echo ${1+"$@"} | xargs global-replace "s/$from/$to/g" ----------------------------------- cut ------------------------------------ `gr' in turn uses a Perl script to do its real work, `global-replace', which follows: ----------------------------------- cut ------------------------------------ : #-*- Perl -*- ### global-modify --- modify the contents of a file by a Perl expression ## Copyright (C) 1999 Martin Buchholz. ## Copyright (C) 2001 Ben Wing. ## Authors: Martin Buchholz <martin@xemacs.org>, Ben Wing <ben@xemacs.org> ## Maintainer: Ben Wing <ben@xemacs.org> ## Current Version: 1.0, May 5, 2001 # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with XEmacs; see the file COPYING. If not, write to the Free # Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA # 02111-1307, USA. eval 'exec perl -w -S $0 ${1+"$@"}' if 0; use strict; use FileHandle; use Carp; use Getopt::Long; use File::Basename; (my $myName = $0) =~ s@.*/@@; my $usage=" Usage: $myName [--help] [--backup-dir=DIR] [--line-mode] [--hunk-mode] PERLEXPR FILE ... Globally modify a file, either line by line or in one big hunk. Typical usage is like this: [with GNU print, GNU xargs: guaranteed to handle spaces, quotes, etc. in file names] find . -name '*.[ch]' -print0 | xargs -0 $0 's/\bCONST\b/const/g'\n [with non-GNU print, xargs] find . -name '*.[ch]' -print | xargs $0 's/\bCONST\b/const/g'\n The file is read in, either line by line (with --line-mode specified) or in one big hunk (with --hunk-mode specified; it's the default), and the Perl expression is then evalled with \$_ set to the line or hunk of text, including the terminating newline if there is one. It should destructively modify the value there, storing the changed result in \$_. Files in which any modifications are made are backed up to the directory specified using --backup-dir, or to `backup' by default. To disable this, use --backup-dir= with no argument. Hunk mode is the default because it is MUCH MUCH faster than line-by-line. Use line-by-line only when it matters, e.g. you want to do a replacement only once per line (the default without the `g' argument). Conversely, when using hunk mode, *ALWAYS* use `g'; otherwise, you will only make one replacement in the entire file! "; my %options = (); $Getopt::Long::ignorecase = 0; &GetOptions ( \%options, 'help', 'backup-dir=s', 'line-mode', 'hunk-mode', ); die $usage if $options{"help"} or @ARGV <= 1; my $code = shift; die $usage if grep (-d || ! -w, @ARGV); sub SafeOpen { open ((my $fh = new FileHandle), $_[0]); confess "Can't open $_[0]: $!" if ! defined $fh; return $fh; } sub SafeClose { close $_[0] or confess "Can't close $_[0]: $!"; } sub FileContents { my $fh = SafeOpen ("< $_[0]"); my $olddollarslash = $/; local $/ = undef; my $contents = <$fh>; $/ = $olddollarslash; return $contents; } sub WriteStringToFile { my $fh = SafeOpen ("> $_[0]"); binmode $fh; print $fh $_[1] or confess "$_[0]: $!\n"; SafeClose $fh; } foreach my $file (@ARGV) { my $changed_p = 0; my $new_contents = ""; if ($options{"line-mode"}) { my $fh = SafeOpen $file; while (<$fh>) { my $save_line = $_; eval $code; $changed_p = 1 if $save_line ne $_; $new_contents .= $_; } } else { my $orig_contents = $_ = FileContents $file; eval $code; if ($_ ne $orig_contents) { $changed_p = 1; $new_contents = $_; } } if ($changed_p) { my $backdir = $options{"backup-dir"}; $backdir = "backup" if !defined ($backdir); if ($backdir) { my ($name, $path, $suffix) = fileparse ($file, ""); my $backfulldir = $path . $backdir; my $backfile = "$backfulldir/$name"; mkdir $backfulldir, 0755 unless -d $backfulldir; print "modifying $file (original saved in $backfile)\n"; rename $file, $backfile; } WriteStringToFile ($file, $new_contents); } } ----------------------------------- cut ------------------------------------ In addition to those programs, I needed to fix up a few other things, particularly relating to the duplicate definitions of types, now that some types merged with others. Specifically: 1. in lisp.h, removed duplicate declarations of Bytecount. The changed code should now look like this: (In each code snippet below, the first and last lines are the same as the original, as are all lines outside of those lines. That allows you to locate the section to be replaced, and replace the stuff in that section, verifying that there isn't anything new added that would need to be kept.) --------------------------------- snip ------------------------------------- /* Counts of bytes or chars */ typedef EMACS_INT Bytecount; typedef EMACS_INT Charcount; /* Counts of elements */ typedef EMACS_INT Elemcount; /* Hash codes */ typedef unsigned long Hashcode; /* ------------------------ dynamic arrays ------------------- */ --------------------------------- snip ------------------------------------- 2. in lstream.h, removed duplicate declaration of Bytecount. Rewrote the comment about this type. The changed code should now look like this: --------------------------------- snip ------------------------------------- #endif /* The have been some arguments over the what the type should be that specifies a count of bytes in a data block to be written out or read in, using Lstream_read(), Lstream_write(), and related functions. Originally it was long, which worked fine; Martin "corrected" these to size_t and ssize_t on the grounds that this is theoretically cleaner and is in keeping with the C standards. Unfortunately, this practice is horribly error-prone due to design flaws in the way that mixed signed/unsigned arithmetic happens. In fact, by doing this change, Martin introduced a subtle but fatal error that caused the operation of sending large mail messages to the SMTP server under Windows to fail. By putting all values back to be signed, avoiding any signed/unsigned mixing, the bug immediately went away. The type then in use was Lstream_Data_Count, so that it be reverted cleanly if a vote came to that. Now it is Bytecount. Some earlier comments about why the type must be signed: This MUST BE SIGNED, since it also is used in functions that return the number of bytes actually read to or written from in an operation, and these functions can return -1 to signal error. Note that the standard Unix read() and write() functions define the count going in as a size_t, which is UNSIGNED, and the count going out as an ssize_t, which is SIGNED. This is a horrible design flaw. Not only is it highly likely to lead to logic errors when a -1 gets interpreted as a large positive number, but operations are bound to fail in all sorts of horrible ways when a number in the upper-half of the size_t range is passed in -- this number is unrepresentable as an ssize_t, so code that checks to see how many bytes are actually written (which is mandatory if you are dealing with certain types of devices) will get completely screwed up. --ben */ typedef enum lstream_buffering --------------------------------- snip ------------------------------------- 3. in dumper.c, there are four places, all inside of switch() statements, where XD_BYTECOUNT appears twice as a case tag. In each case, the two case blocks contain identical code, and you should *REMOVE THE SECOND* and leave the first.
author ben
date Thu, 20 Sep 2001 06:31:11 +0000
parents b39c14581166
children 943eaba38521
line wrap: on
line source

/* Record indices of function doc strings stored in a file.
   Copyright (C) 1985, 1986, 1992, 1993, 1994, 1995
   Free Software Foundation, Inc.

This file is part of XEmacs.

XEmacs is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.

XEmacs is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with XEmacs; see the file COPYING.  If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */

/* Synched up with: FSF 19.30. */

/* This file has been Mule-ized except as noted. */

#include <config.h>
#include "lisp.h"

#include "buffer.h"
#include "bytecode.h"
#include "insdel.h"
#include "keymap.h"
#include "sysfile.h"

Lisp_Object Vinternal_doc_file_name;

Lisp_Object QSsubstitute;

/* Read and return doc string from open file descriptor FD
   at position POSITION.  Does not close the file.  Returns
   string; or if error, returns a cons holding the error
   data to pass to Fsignal.  NAME_NONRELOC and NAME_RELOC
   are only used for the error messages. */

Lisp_Object
unparesseuxify_doc_string (int fd, EMACS_INT position,
                           char *name_nonreloc, Lisp_Object name_reloc)
{
  char buf[512 * 32 + 1];
  char *buffer = buf;
  int buffer_size = sizeof (buf);
  char *from, *to;
  REGISTER char *p = buffer;
  Lisp_Object return_me;

  if (0 > lseek (fd, position, 0))
    {
      if (name_nonreloc)
	name_reloc = build_string (name_nonreloc);
      return_me = list3 (build_string
			 ("Position out of range in doc string file"),
			  name_reloc, make_int (position));
      goto done;
    }

  /* Read the doc string into a buffer.
     Use the fixed buffer BUF if it is big enough; otherwise allocate one.
     We store the buffer in use in BUFFER and its size in BUFFER_SIZE.  */

  while (1)
    {
      int space_left = buffer_size - (p - buffer);
      int nread;

      /* Switch to a bigger buffer if we need one.  */
      if (space_left == 0)
	{
          char * old_buffer = buffer;
	  if (buffer == buf) {
            buffer = (char *) xmalloc (buffer_size *= 2);
            memcpy (buffer, old_buffer, p - old_buffer);
          } else {
            buffer = (char *) xrealloc (buffer, buffer_size *= 2);
          }
          p += buffer - old_buffer;
	  space_left = buffer_size - (p - buffer);
	}

      /* Don't read too much at one go.  */
      if (space_left > 1024 * 8)
	space_left = 1024 * 8;
      nread = read (fd, p, space_left);
      if (nread < 0)
	{
	  return_me = list1 (build_string
			     ("Read error on documentation file"));
	  goto done;
	}
      p[nread] = 0;
      if (!nread)
	break;
      {
        char *p1 = strchr (p, '\037'); /* End of doc string marker */
        if (p1)
          {
            *p1 = 0;
            p = p1;
            break;
          }
      }
      p += nread;
    }

  /* Scan the text and remove quoting with ^A (char code 1).
     ^A^A becomes ^A, ^A0 becomes a null char, and ^A_ becomes a ^_.  */
  from = to = buffer;
  while (from < p)
    {
      if (*from != 1 /*^A*/)
	*to++ = *from++;
      else
	{
	  int c = *(++from);

	  from++;
          switch (c)
            {
            case 1:   *to++ =  c;     break;
            case '0': *to++ = '\0';   break;
            case '_': *to++ = '\037'; break;
            default:
              return_me = list2 (build_string
	("Invalid data in documentation file -- ^A followed by weird code"),
                                 make_int (c));
              goto done;
            }
	}
    }

  /* #### mrb: following STILL completely broken */
  return_me = make_ext_string (buffer, to - buffer, Qbinary);

 done:
  if (buffer != buf) /* We must have allocated buffer above */
    xfree (buffer);
  return return_me;
}

#define string_join(dest, s1, s2) \
  memcpy ((void *) dest, (void *) XSTRING_DATA (s1), XSTRING_LENGTH (s1)); \
  memcpy ((void *) ((Intbyte *) dest + XSTRING_LENGTH (s1)), \
          (void *) XSTRING_DATA (s2), XSTRING_LENGTH (s2));  \
          dest[XSTRING_LENGTH (s1) + XSTRING_LENGTH (s2)] = '\0'

/* Extract a doc string from a file.  FILEPOS says where to get it.
   (This could actually be byte code instructions/constants instead
   of a doc string.)
   If it is an integer, use that position in the standard DOC file.
   If it is (FILE . INTEGER), use FILE as the file name
   and INTEGER as the position in that file.
   But if INTEGER is negative, make it positive.
   (A negative integer is used for user variables, so we can distinguish
   them without actually fetching the doc string.)  */

static Lisp_Object
get_doc_string (Lisp_Object filepos)
{
  /* !!#### This function has not been Mule-ized */
  REGISTER int fd;
  REGISTER char *name_nonreloc = 0;
  EMACS_INT position;
  Lisp_Object file, tem;
  Lisp_Object name_reloc = Qnil;

  if (INTP (filepos))
    {
      file = Vinternal_doc_file_name;
      position = XINT (filepos);
    }
  else if (CONSP (filepos) && INTP (XCDR (filepos)))
    {
      file = XCAR (filepos);
      position = XINT (XCDR (filepos));
      if (position < 0)
	position = - position;
    }
  else
    return Qnil;

  if (!STRINGP (file))
    return Qnil;

  /* Put the file name in NAME as a C string.
     If it is relative, combine it with Vdoc_directory.  */

  tem = Ffile_name_absolute_p (file);
  if (NILP (tem))
    {
      Bytecount minsize;
      /* XEmacs: Move this check here.  OK if called during loadup to
	 load byte code instructions. */
      if (!STRINGP (Vdoc_directory))
	return Qnil;

      minsize = XSTRING_LENGTH (Vdoc_directory);
      /* sizeof ("../lib-src/") == 12 */
      if (minsize < 12)
	minsize = 12;
      name_nonreloc =
	(char *) alloca (minsize + XSTRING_LENGTH (file) + 8);
      string_join (name_nonreloc, Vdoc_directory, file);
    }
  else
    name_reloc = file;

  fd = open (name_nonreloc ? name_nonreloc :
	     (char *) XSTRING_DATA (name_reloc), O_RDONLY | OPEN_BINARY, 0);
  if (fd < 0)
    {
#ifndef CANNOT_DUMP
      if (purify_flag)
	{
	    /* sizeof ("../lib-src/") == 12 */
	  name_nonreloc = (char *) alloca (12 + XSTRING_LENGTH (file) + 8);
	  /* Preparing to dump; DOC file is probably not installed.
	     So check in ../lib-src. */
	  strcpy (name_nonreloc, "../lib-src/");
	  strcat (name_nonreloc, (char *) XSTRING_DATA (file));

	  fd = open (name_nonreloc, O_RDONLY | OPEN_BINARY, 0);
	}
#endif /* CANNOT_DUMP */

      if (fd < 0)
	signal_error (Qfile_error, "Cannot open doc string file",
		      name_nonreloc ? build_string (name_nonreloc) :
		      name_reloc);
    }

  tem = unparesseuxify_doc_string (fd, position, name_nonreloc, name_reloc);
  close (fd);

  if (!STRINGP (tem))
    signal_error_1 (Qinvalid_byte_code, tem);

  return tem;
}

/* Get a string from position FILEPOS and pass it through the Lisp reader.
   We use this for fetching the bytecode string and constants vector
   of a compiled function from the .elc file.  */

Lisp_Object
read_doc_string (Lisp_Object filepos)
{
  Lisp_Object string = get_doc_string (filepos);

  if (!STRINGP (string))
    invalid_state ("loading bytecode failed to return string", string);
  return Fread (string);
}

DEFUN ("documentation", Fdocumentation, 1, 2, 0, /*
Return the documentation string of FUNCTION.
Unless a non-nil second argument RAW is given, the
string is passed through `substitute-command-keys'.
*/
       (function, raw))
{
  /* This function can GC */
  Lisp_Object fun;
  Lisp_Object doc;

  fun = Findirect_function (function);

  if (SUBRP (fun))
    {
      if (XSUBR (fun)->doc == 0)
	return Qnil;
      if ((EMACS_INT) XSUBR (fun)->doc >= 0)
	doc = build_string (XSUBR (fun)->doc);
      else
        doc = get_doc_string (make_int (- (EMACS_INT) XSUBR (fun)->doc));
    }
  else if (COMPILED_FUNCTIONP (fun))
    {
      Lisp_Object tem;
      Lisp_Compiled_Function *f = XCOMPILED_FUNCTION (fun);
      if (! (f->flags.documentationp))
        return Qnil;
      tem = compiled_function_documentation (f);
      if (STRINGP (tem))
	doc = tem;
      else if (NATNUMP (tem) || CONSP (tem))
	doc = get_doc_string (tem);
      else
        return Qnil;
    }
  else if (KEYMAPP (fun))
    return build_translated_string ("Prefix command (definition is a keymap of subcommands).");
  else if (STRINGP (fun) || VECTORP (fun))
    return build_translated_string ("Keyboard macro.");
  else if (CONSP (fun))
    {
      Lisp_Object funcar = Fcar (fun);

      if (!SYMBOLP (funcar))
	return Fsignal (Qinvalid_function, list1 (fun));
      else if (EQ (funcar, Qlambda)
             || EQ (funcar, Qautoload))
	{
	  Lisp_Object tem, tem1;
	  tem1 = Fcdr (Fcdr (fun));
	  tem = Fcar (tem1);
	  if (STRINGP (tem))
	    doc = tem;
	  /* Handle a doc reference--but these never come last
	     in the function body, so reject them if they are last.  */
	  else if ((NATNUMP (tem) || CONSP (tem))
		   && ! NILP (XCDR (tem1)))
	    doc = get_doc_string (tem);
	  else
	    return Qnil;
	}
      else if (EQ (funcar, Qmacro))
	return Fdocumentation (Fcdr (fun), raw);
      else
	goto oops;
    }
  else
    {
    oops:
      return Fsignal (Qinvalid_function, list1 (fun));
    }

  if (NILP (raw))
    {
      struct gcpro gcpro1;
#ifdef I18N3
      Lisp_Object domain = Qnil;
      if (COMPILED_FUNCTIONP (fun))
	domain = compiled_function_domain (XCOMPILED_FUNCTION (fun));
      if (NILP (domain))
	doc = Fgettext (doc);
      else
	doc = Fdgettext (domain, doc);
#endif

      GCPRO1 (doc);
      doc = Fsubstitute_command_keys (doc);
      UNGCPRO;
    }
  return doc;
}

DEFUN ("documentation-property", Fdocumentation_property, 2, 3, 0, /*
Return the documentation string that is SYMBOL's PROP property.
This is like `get', but it can refer to strings stored in the
`doc-directory/DOC' file; and if the value is a string, it is passed
through `substitute-command-keys'.  A non-nil third argument avoids this
translation.
*/
       (symbol, prop, raw))
{
  /* This function can GC */
  REGISTER Lisp_Object doc = Qnil;
#ifdef I18N3
  REGISTER Lisp_Object domain;
#endif
  struct gcpro gcpro1;

  GCPRO1 (doc);

  doc = Fget (symbol, prop, Qnil);
  if (INTP (doc))
    doc = get_doc_string (XINT (doc) > 0 ? doc : make_int (- XINT (doc)));
  else if (CONSP (doc))
    doc = get_doc_string (doc);
#ifdef I18N3
  if (!NILP (doc))
    {
      domain = Fget (symbol, Qvariable_domain, Qnil);
      if (NILP (domain))
	doc = Fgettext (doc);
      else
	doc = Fdgettext (domain, doc);
    }
#endif
  if (NILP (raw) && STRINGP (doc))
    doc = Fsubstitute_command_keys (doc);
  UNGCPRO;
  return doc;
}

static void
weird_doc (Lisp_Object sym, const char *weirdness, const char *type, int pos)
{
  if (!strcmp (weirdness, GETTEXT ("duplicate"))) return;
  message ("Note: Strange doc (%s) for %s %s @ %d",
           weirdness, type, string_data (XSYMBOL (sym)->name), pos);
}


DEFUN ("Snarf-documentation", Fsnarf_documentation, 1, 1, 0, /*
Used during Emacs initialization, before dumping runnable Emacs,
to find pointers to doc strings stored in `.../lib-src/DOC' and
record them in function definitions.
One arg, FILENAME, a string which does not include a directory.
The file is written to `../lib-src', and later found in `exec-directory'
when doc strings are referred to in the dumped Emacs.
*/
       (filename))
{
  /* !!#### This function has not been Mule-ized */
  int fd;
  char buf[1024 + 1];
  REGISTER int filled;
  REGISTER int pos;
  REGISTER char *p, *end;
  Lisp_Object sym, fun, tem;
  char *name;

#ifndef CANNOT_DUMP
  if (!purify_flag)
    invalid_operation ("Snarf-documentation can only be called in an undumped Emacs", Qunbound);
#endif

  CHECK_STRING (filename);

#ifdef CANNOT_DUMP
  if (!NILP(Vdoc_directory))
    {
      CHECK_STRING (Vdoc_directory);
      name = (char *) alloca (XSTRING_LENGTH (filename)
			      + XSTRING_LENGTH (Vdoc_directory)
			      + 1);
      strcpy (name, (char *) XSTRING_DATA (Vdoc_directory));
    }
  else
#endif /* CANNOT_DUMP */
    {
      name = (char *) alloca (XSTRING_LENGTH (filename) + 14);
      strcpy (name, "../lib-src/");
    }

  strcat (name, (char *) XSTRING_DATA (filename));

  fd = open (name, O_RDONLY | OPEN_BINARY, 0);
  if (fd < 0)
    report_file_error ("Opening doc string file", build_string (name));
  Vinternal_doc_file_name = filename;
  filled = 0;
  pos = 0;
  while (1)
    {
      if (filled < 512)
	filled += read (fd, &buf[filled], sizeof (buf) - 1 - filled);
      if (!filled)
	break;

      buf[filled] = 0;
      p = buf;
      end = buf + (filled < 512 ? filled : filled - 128);
      while (p != end && *p != '\037') p++;
      /* p points to ^_Ffunctionname\n or ^_Vvarname\n.  */
      if (p != end)
	{
	  end = strchr (p, '\n');
	  sym = oblookup (Vobarray, (Intbyte *) p + 2, end - p - 2);
	  if (SYMBOLP (sym))
	    {
              Lisp_Object offset = make_int (pos + end + 1 - buf);
	      /* Attach a docstring to a variable */
	      if (p[1] == 'V')
		{
		  /* Install file-position as variable-documentation property
		     and make it negative for a user-variable
		     (doc starts with a `*').  */
		  Lisp_Object old = Fget (sym, Qvariable_documentation, Qzero);
                  if (!ZEROP (old))
		    {
		      weird_doc (sym, GETTEXT ("duplicate"),
				 GETTEXT ("variable"), pos);
		      /* In the case of duplicate doc file entries, always
			 take the later one.  But if the doc is not an int
			 (a string, say) leave it alone. */
		      if (!INTP (old))
			goto weird;
		    }
		  Fput (sym, Qvariable_documentation,
                        ((end[1] == '*')
                         ? make_int (- XINT (offset))
                         : offset));
		}
	      /* Attach a docstring to a function.
                 The type determines where the docstring is stored.  */
	      else if (p[1] == 'F')
		{
                  fun = indirect_function (sym,0);

		  if (CONSP (fun) && EQ (XCAR (fun), Qmacro))
		    fun = XCDR (fun);

                  if (UNBOUNDP (fun))
		    {
		      /* May have been #if'ed out or something */
		      weird_doc (sym, GETTEXT ("not fboundp"),
				 GETTEXT ("function"), pos);
		      goto weird;
		    }
		  else if (SUBRP (fun))
		    {
		      /* Lisp_Subrs have a slot for it.  */
		      if (XSUBR (fun)->doc)
			{
			  weird_doc (sym, GETTEXT ("duplicate"),
				     GETTEXT ("subr"), pos);
			  goto weird;
			}
		      XSUBR (fun)->doc = (char *) (- XINT (offset));
		    }
		  else if (CONSP (fun))
		    {
                      /* If it's a lisp form, stick it in the form.  */
		      tem = XCAR (fun);
		      if (EQ (tem, Qlambda) || EQ (tem, Qautoload))
			{
			  tem = Fcdr (Fcdr (fun));
			  if (CONSP (tem) &&
			      INTP (XCAR (tem)))
			    {
			      Lisp_Object old = XCAR (tem);
			      if (!ZEROP (old))
				{
				  weird_doc (sym, GETTEXT ("duplicate"),
					     (EQ (tem, Qlambda)
					      ? GETTEXT ("lambda")
					      : GETTEXT ("autoload")),
					     pos);
				  /* In the case of duplicate doc file entries,
				     always take the later one.  But if the doc
				     is not an int (a string, say) leave it
				     alone. */
				  if (!INTP (old))
				    goto weird;
				}
			      XCAR (tem) = offset;
			    }
                          else if (!CONSP (tem))
			    {
			      weird_doc (sym, GETTEXT ("!CONSP(tem)"),
					 GETTEXT ("function"), pos);
			      goto cont;
			    }
                          else
			    {
			      /* DOC string is a string not integer 0 */
#if 0
			      weird_doc (sym, GETTEXT ("!INTP(XCAR(tem))"),
					 GETTEXT ("function"), pos);
#endif
			      goto cont;
			    }
                        }
                      else
			{
			  weird_doc (sym, GETTEXT ("not lambda or autoload"),
				     GETTEXT ("function"), pos);
			  goto cont;
			}
		    }
		  else if (COMPILED_FUNCTIONP (fun))
		    {
                      /* Compiled-Function objects sometimes have
                         slots for it.  */
                      Lisp_Compiled_Function *f = XCOMPILED_FUNCTION (fun);

		      /* This compiled-function object must have a
			 slot for the docstring, since we've found a
			 docstring for it.  Unless there were multiple
			 definitions of it, and the latter one didn't
			 have any doc, which is a legal if slightly
			 bogus situation, so don't blow up. */

                      if (! (f->flags.documentationp))
			{
			  weird_doc (sym, GETTEXT ("no doc slot"),
				     GETTEXT ("bytecode"), pos);
			  goto weird;
			}
		      else
			{
			  Lisp_Object old =
			    compiled_function_documentation (f);
			  if (!ZEROP (old))
			    {
			      weird_doc (sym, GETTEXT ("duplicate"),
					 GETTEXT ("bytecode"), pos);
			      /* In the case of duplicate doc file entries,
				 always take the later one.  But if the doc is
				 not an int (a string, say) leave it alone. */
			      if (!INTP (old))
				goto weird;
			    }
			  set_compiled_function_documentation (f, offset);
			}
                    }
                  else
                    {
                      /* Otherwise the function is undefined or
                         otherwise weird.   Ignore it. */
                      weird_doc (sym, GETTEXT ("weird function"),
				 GETTEXT ("function"), pos);
                      goto weird;
                    }
                }
	      else
                {
                /* lose: */
                  signal_error (Qfile_error, "DOC file invalid at position", make_int (pos));
                weird:
                  /* goto lose */;
                }
            }
	}
    cont:
      pos += end - buf;
      filled -= end - buf;
      memmove (buf, end, filled);
    }
  close (fd);
  return Qnil;
}


#if 1	/* Don't warn about functions whose doc was lost because they were
	   wrapped by advice-freeze.el... */
static int
kludgily_ignore_lost_doc_p (Lisp_Object sym)
{
# define kludge_prefix "ad-Orig-"
  Lisp_String *name = XSYMBOL (sym)->name;
  return (string_length (name) > (Bytecount) (sizeof (kludge_prefix)) &&
	  !strncmp ((char *) string_data (name), kludge_prefix,
		    sizeof (kludge_prefix) - 1));
# undef kludge_prefix
}
#else
# define kludgily_ignore_lost_doc_p(sym) 0
#endif


static int
verify_doc_mapper (Lisp_Object sym, void *arg)
{
  Lisp_Object closure = *(Lisp_Object *)arg;

  if (!NILP (Ffboundp (sym)))
    {
      int doc = 0;
      Lisp_Object fun = XSYMBOL (sym)->function;
      if (CONSP (fun) &&
	  EQ (XCAR (fun), Qmacro))
	fun = XCDR (fun);

      if (SUBRP (fun))
	doc = (EMACS_INT) XSUBR (fun)->doc;
      else if (SYMBOLP (fun))
	doc = -1;
      else if (KEYMAPP (fun))
	doc = -1;
      else if (CONSP (fun))
	{
	  Lisp_Object tem = XCAR (fun);
	  if (EQ (tem, Qlambda) || EQ (tem, Qautoload))
	    {
	      doc = -1;
	      tem = Fcdr (Fcdr (fun));
	      if (CONSP (tem) &&
		  INTP (XCAR (tem)))
		doc = XINT (XCAR (tem));
	    }
	}
      else if (COMPILED_FUNCTIONP (fun))
	{
          Lisp_Compiled_Function *f = XCOMPILED_FUNCTION (fun);
          if (! (f->flags.documentationp))
            doc = -1;
          else
            {
              Lisp_Object tem = compiled_function_documentation (f);
              if (INTP (tem))
                doc = XINT (tem);
            }
	}

      if (doc == 0 && !kludgily_ignore_lost_doc_p (sym))
	{
	  message ("Warning: doc lost for function %s.",
		   string_data (XSYMBOL (sym)->name));
	  XCDR (closure) = Qt;
	}
    }
  if (!NILP (Fboundp (sym)))
    {
      Lisp_Object doc = Fget (sym, Qvariable_documentation, Qnil);
      if (ZEROP (doc))
	{
	  message ("Warning: doc lost for variable %s.",
		   string_data (XSYMBOL (sym)->name));
	  XCDR (closure) = Qt;
	}
    }
  return 0; /* Never stop */
}

DEFUN ("Verify-documentation", Fverify_documentation, 0, 0, 0, /*
Used to make sure everything went well with Snarf-documentation.
Writes to stderr if not.
*/
       ())
{
  Lisp_Object closure = Fcons (Qnil, Qnil);
  struct gcpro gcpro1;
  GCPRO1 (closure);
  map_obarray (Vobarray, verify_doc_mapper, &closure);
  if (!NILP (Fcdr (closure)))
    message ("\n"
"This is usually because some files were preloaded by loaddefs.el or\n"
"site-load.el, but were not passed to make-docfile by Makefile.\n");
  UNGCPRO;
  return NILP (Fcdr (closure)) ? Qt : Qnil;
}


DEFUN ("substitute-command-keys", Fsubstitute_command_keys, 1, 1, 0, /*
Substitute key descriptions for command names in STRING.
Return a new string which is STRING with substrings of the form \\=\\[COMMAND]
replaced by either:  a keystroke sequence that will invoke COMMAND,
or "M-x COMMAND" if COMMAND is not on any keys.
Substrings of the form \\=\\{MAPVAR} are replaced by summaries
\(made by `describe-bindings') of the value of MAPVAR, taken as a keymap.
Substrings of the form \\=\\<MAPVAR> specify to use the value of MAPVAR
as the keymap for future \\=\\[COMMAND] substrings.
\\=\\= quotes the following character and is discarded;
thus, \\=\\=\\=\\= puts \\=\\= into the output, and \\=\\=\\=\\[ puts \\=\\[ into the output.
*/
       (string))
{
  /* This function can GC */
  Intbyte *buf;
  int changed = 0;
  REGISTER Intbyte *strdata;
  REGISTER Intbyte *bufp;
  Bytecount strlength;
  Bytecount idx;
  Bytecount bsize;
  Intbyte *new;
  Lisp_Object tem = Qnil;
  Lisp_Object keymap = Qnil;
  Lisp_Object name = Qnil;
  Intbyte *start;
  Bytecount length;
  struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;

  if (NILP (string))
    return Qnil;

  CHECK_STRING (string);
  GCPRO4 (string, tem, keymap, name);

  /* There is the possibility that the string is not destined for a
     translating stream, and it could be argued that we should do the
     same thing here as in Fformat(), but there are very few times
     when this will be the case and many calls to this function
     would have to have `gettext' calls added. (I18N3) */
  string = LISP_GETTEXT (string);

  /* KEYMAP is either nil (which means search all the active keymaps)
     or a specified local map (which means search just that and the
     global map).  If non-nil, it might come from Voverriding_local_map,
     or from a \\<mapname> construct in STRING itself..  */
#if 0 /* FSFmacs */
  /* This is really weird and garbagey.  If keymap is nil and there's
     an overriding-local-map, `where-is-internal' will correctly note
     this, so there's no reason to do it here.  Maybe FSFmacs
     `where-is-internal' is broken. */
  /*
  keymap = current_kboard->Voverriding_terminal_local_map;
  if (NILP (keymap))
    keymap = Voverriding_local_map;
  */
#endif

  strlength = XSTRING_LENGTH (string);
  bsize = 1 + strlength;
  buf = (Intbyte *) xmalloc (bsize);
  bufp = buf;

  /* Have to reset strdata every time GC might be called */
  strdata = XSTRING_DATA (string);
  for (idx = 0; idx < strlength; )
    {
      Intbyte *strp = strdata + idx;

      if (strp[0] != '\\')
	{
	  /* just copy other chars */
	  /* As it happens, this will work with Mule even if the
	     character quoted is multi-byte; the remaining multi-byte
	     characters will just be copied by this loop. */
	  *bufp++ = *strp;
	  idx++;
	}
      else switch (strp[1])
	{
	default:
	  {
	    /* just copy unknown escape sequences */
	    *bufp++ = *strp;
	    idx++;
	    break;
	  }
	case '=':
	  {
	    /* \= quotes the next character;
	       thus, to put in \[ without its special meaning, use \=\[.  */
	    /* As it happens, this will work with Mule even if the
	       character quoted is multi-byte; the remaining multi-byte
	       characters will just be copied by this loop. */
	    changed = 1;
	    *bufp++ = strp[2];
	    idx += 3;
	    break;
	  }
	case '[':
	  {
	    changed = 1;
	    idx += 2;		/* skip \[ */
	    strp += 2;
	    start = strp;

	    while ((idx < strlength)
		   && *strp != ']')
	      {
		strp++;
		idx++;
	      }
	    length = strp - start;
	    idx++;		/* skip ] */

	    tem = Fintern (make_string (start, length), Qnil);
	    tem = Fwhere_is_internal (tem, keymap, Qt, Qnil, Qnil);

#if 0 /* FSFmacs */
	    /* Disregard menu bar bindings; it is positively annoying to
	       mention them when there's no menu bar, and it isn't terribly
	       useful even when there is a menu bar.  */
	    if (!NILP (tem))
	      {
		firstkey = Faref (tem, Qzero);
		if (EQ (firstkey, Qmenu_bar))
		  tem = Qnil;
	      }
#endif

	    if (NILP (tem))	/* but not on any keys */
	      {
		new = (Intbyte *) xrealloc (buf, bsize += 4);
		bufp += new - buf;
		buf = new;
		memcpy (bufp, "M-x ", 4);
		bufp += 4;
		goto subst;
	      }
	    else
	      {			/* function is on a key */
		tem = Fkey_description (tem);
		goto subst_string;
	      }
	  }
	case '{':
	case '<':
	  {
	    Lisp_Object buffer = Fget_buffer_create (QSsubstitute);
	    struct buffer *buf_ = XBUFFER (buffer);

	    Fbuffer_disable_undo (buffer);
	    Ferase_buffer (buffer);

	    /* \{foo} is replaced with a summary of keymap (symbol-value foo).
	       \<foo> just sets the keymap used for \[cmd].  */
	    changed = 1;
	    idx += 2;		/* skip \{ or \< */
	    strp += 2;
	    start = strp;

	    while ((idx < strlength)
		   && *strp != '}' && *strp != '>')
	      {
		strp++;
		idx++;
	      }
	    length = strp - start;
	    idx++;		/* skip } or > */

	    /* Get the value of the keymap in TEM, or nil if undefined.
	       Do this while still in the user's current buffer
	       in case it is a local variable.  */
	    name = Fintern (make_string (start, length), Qnil);
	    tem = Fboundp (name);
	    if (! NILP (tem))
	      {
		tem = Fsymbol_value (name);
		if (! NILP (tem))
		  tem = get_keymap (tem, 0, 1);
	      }

	    if (NILP (tem))
	      {
		buffer_insert_c_string (buf_, "(uses keymap \"");
		buffer_insert_lisp_string (buf_, Fsymbol_name (name));
		buffer_insert_c_string (buf_, "\", which is not currently defined) ");

		if (start[-1] == '<') keymap = Qnil;
	      }
	    else if (start[-1] == '<')
	      keymap = tem;
	    else
	      describe_map_tree (tem, 1, Qnil, Qnil, 0, buffer);

	    tem = make_string_from_buffer (buf_, BUF_BEG (buf_),
					   BUF_Z (buf_) - BUF_BEG (buf_));
	    Ferase_buffer (buffer);
	  }
	  goto subst_string;

	subst_string:
	  start = XSTRING_DATA (tem);
	  length = XSTRING_LENGTH (tem);
	subst:
	  bsize += length;
	  new = (Intbyte *) xrealloc (buf, bsize);
	  bufp += new - buf;
	  buf = new;
	  memcpy (bufp, start, length);
	  bufp += length;

	  /* Reset STRDATA in case gc relocated it.  */
	  strdata = XSTRING_DATA (string);

	  break;
	}
    }

  if (changed)			/* don't bother if nothing substituted */
    tem = make_string (buf, bufp - buf);
  else
    tem = string;
  xfree (buf);
  UNGCPRO;
  return tem;
}


/************************************************************************/
/*                            initialization                            */
/************************************************************************/

void
syms_of_doc (void)
{
  DEFSUBR (Fdocumentation);
  DEFSUBR (Fdocumentation_property);
  DEFSUBR (Fsnarf_documentation);
  DEFSUBR (Fverify_documentation);
  DEFSUBR (Fsubstitute_command_keys);
}

void
vars_of_doc (void)
{
  DEFVAR_LISP ("internal-doc-file-name", &Vinternal_doc_file_name /*
Name of file containing documentation strings of built-in symbols.
*/ );
  Vinternal_doc_file_name = Qnil;

  QSsubstitute = build_string (" *substitute*");
  staticpro (&QSsubstitute);
}