view src/elhash.c @ 665:fdefd0186b75

[xemacs-hg @ 2001-09-20 06:28:42 by ben] The great integral types renaming. The purpose of this is to rationalize the names used for various integral types, so that they match their intended uses and follow consist conventions, and eliminate types that were not semantically different from each other. The conventions are: -- All integral types that measure quantities of anything are signed. Some people disagree vociferously with this, but their arguments are mostly theoretical, and are vastly outweighed by the practical headaches of mixing signed and unsigned values, and more importantly by the far increased likelihood of inadvertent bugs: Because of the broken "viral" nature of unsigned quantities in C (operations involving mixed signed/unsigned are done unsigned, when exactly the opposite is nearly always wanted), even a single error in declaring a quantity unsigned that should be signed, or even the even more subtle error of comparing signed and unsigned values and forgetting the necessary cast, can be catastrophic, as comparisons will yield wrong results. -Wsign-compare is turned on specifically to catch this, but this tends to result in a great number of warnings when mixing signed and unsigned, and the casts are annoying. More has been written on this elsewhere. -- All such quantity types just mentioned boil down to EMACS_INT, which is 32 bits on 32-bit machines and 64 bits on 64-bit machines. This is guaranteed to be the same size as Lisp objects of type `int', and (as far as I can tell) of size_t (unsigned!) and ssize_t. The only type below that is not an EMACS_INT is Hashcode, which is an unsigned value of the same size as EMACS_INT. -- Type names should be relatively short (no more than 10 characters or so), with the first letter capitalized and no underscores if they can at all be avoided. -- "count" == a zero-based measurement of some quantity. Includes sizes, offsets, and indexes. -- "bpos" == a one-based measurement of a position in a buffer. "Charbpos" and "Bytebpos" count text in the buffer, rather than bytes in memory; thus Bytebpos does not directly correspond to the memory representation. Use "Membpos" for this. -- "Char" refers to internal-format characters, not to the C type "char", which is really a byte. -- For the actual name changes, see the script below. I ran the following script to do the conversion. (NOTE: This script is idempotent. You can safely run it multiple times and it will not screw up previous results -- in fact, it will do nothing if nothing has changed. Thus, it can be run repeatedly as necessary to handle patches coming in from old workspaces, or old branches.) There are two tags, just before and just after the change: `pre-integral-type-rename' and `post-integral-type-rename'. When merging code from the main trunk into a branch, the best thing to do is first merge up to `pre-integral-type-rename', then apply the script and associated changes, then merge from `post-integral-type-change' to the present. (Alternatively, just do the merging in one operation; but you may then have a lot of conflicts needing to be resolved by hand.) Script `fixtypes.sh' follows: ----------------------------------- cut ------------------------------------ files="*.[ch] s/*.h m/*.h config.h.in ../configure.in Makefile.in.in ../lib-src/*.[ch] ../lwlib/*.[ch]" gr Memory_Count Bytecount $files gr Lstream_Data_Count Bytecount $files gr Element_Count Elemcount $files gr Hash_Code Hashcode $files gr extcount bytecount $files gr bufpos charbpos $files gr bytind bytebpos $files gr memind membpos $files gr bufbyte intbyte $files gr Extcount Bytecount $files gr Bufpos Charbpos $files gr Bytind Bytebpos $files gr Memind Membpos $files gr Bufbyte Intbyte $files gr EXTCOUNT BYTECOUNT $files gr BUFPOS CHARBPOS $files gr BYTIND BYTEBPOS $files gr MEMIND MEMBPOS $files gr BUFBYTE INTBYTE $files gr MEMORY_COUNT BYTECOUNT $files gr LSTREAM_DATA_COUNT BYTECOUNT $files gr ELEMENT_COUNT ELEMCOUNT $files gr HASH_CODE HASHCODE $files ----------------------------------- cut ------------------------------------ `fixtypes.sh' is a Bourne-shell script; it uses 'gr': ----------------------------------- cut ------------------------------------ #!/bin/sh # Usage is like this: # gr FROM TO FILES ... # globally replace FROM with TO in FILES. FROM and TO are regular expressions. # backup files are stored in the `backup' directory. from="$1" to="$2" shift 2 echo ${1+"$@"} | xargs global-replace "s/$from/$to/g" ----------------------------------- cut ------------------------------------ `gr' in turn uses a Perl script to do its real work, `global-replace', which follows: ----------------------------------- cut ------------------------------------ : #-*- Perl -*- ### global-modify --- modify the contents of a file by a Perl expression ## Copyright (C) 1999 Martin Buchholz. ## Copyright (C) 2001 Ben Wing. ## Authors: Martin Buchholz <martin@xemacs.org>, Ben Wing <ben@xemacs.org> ## Maintainer: Ben Wing <ben@xemacs.org> ## Current Version: 1.0, May 5, 2001 # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with XEmacs; see the file COPYING. If not, write to the Free # Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA # 02111-1307, USA. eval 'exec perl -w -S $0 ${1+"$@"}' if 0; use strict; use FileHandle; use Carp; use Getopt::Long; use File::Basename; (my $myName = $0) =~ s@.*/@@; my $usage=" Usage: $myName [--help] [--backup-dir=DIR] [--line-mode] [--hunk-mode] PERLEXPR FILE ... Globally modify a file, either line by line or in one big hunk. Typical usage is like this: [with GNU print, GNU xargs: guaranteed to handle spaces, quotes, etc. in file names] find . -name '*.[ch]' -print0 | xargs -0 $0 's/\bCONST\b/const/g'\n [with non-GNU print, xargs] find . -name '*.[ch]' -print | xargs $0 's/\bCONST\b/const/g'\n The file is read in, either line by line (with --line-mode specified) or in one big hunk (with --hunk-mode specified; it's the default), and the Perl expression is then evalled with \$_ set to the line or hunk of text, including the terminating newline if there is one. It should destructively modify the value there, storing the changed result in \$_. Files in which any modifications are made are backed up to the directory specified using --backup-dir, or to `backup' by default. To disable this, use --backup-dir= with no argument. Hunk mode is the default because it is MUCH MUCH faster than line-by-line. Use line-by-line only when it matters, e.g. you want to do a replacement only once per line (the default without the `g' argument). Conversely, when using hunk mode, *ALWAYS* use `g'; otherwise, you will only make one replacement in the entire file! "; my %options = (); $Getopt::Long::ignorecase = 0; &GetOptions ( \%options, 'help', 'backup-dir=s', 'line-mode', 'hunk-mode', ); die $usage if $options{"help"} or @ARGV <= 1; my $code = shift; die $usage if grep (-d || ! -w, @ARGV); sub SafeOpen { open ((my $fh = new FileHandle), $_[0]); confess "Can't open $_[0]: $!" if ! defined $fh; return $fh; } sub SafeClose { close $_[0] or confess "Can't close $_[0]: $!"; } sub FileContents { my $fh = SafeOpen ("< $_[0]"); my $olddollarslash = $/; local $/ = undef; my $contents = <$fh>; $/ = $olddollarslash; return $contents; } sub WriteStringToFile { my $fh = SafeOpen ("> $_[0]"); binmode $fh; print $fh $_[1] or confess "$_[0]: $!\n"; SafeClose $fh; } foreach my $file (@ARGV) { my $changed_p = 0; my $new_contents = ""; if ($options{"line-mode"}) { my $fh = SafeOpen $file; while (<$fh>) { my $save_line = $_; eval $code; $changed_p = 1 if $save_line ne $_; $new_contents .= $_; } } else { my $orig_contents = $_ = FileContents $file; eval $code; if ($_ ne $orig_contents) { $changed_p = 1; $new_contents = $_; } } if ($changed_p) { my $backdir = $options{"backup-dir"}; $backdir = "backup" if !defined ($backdir); if ($backdir) { my ($name, $path, $suffix) = fileparse ($file, ""); my $backfulldir = $path . $backdir; my $backfile = "$backfulldir/$name"; mkdir $backfulldir, 0755 unless -d $backfulldir; print "modifying $file (original saved in $backfile)\n"; rename $file, $backfile; } WriteStringToFile ($file, $new_contents); } } ----------------------------------- cut ------------------------------------ In addition to those programs, I needed to fix up a few other things, particularly relating to the duplicate definitions of types, now that some types merged with others. Specifically: 1. in lisp.h, removed duplicate declarations of Bytecount. The changed code should now look like this: (In each code snippet below, the first and last lines are the same as the original, as are all lines outside of those lines. That allows you to locate the section to be replaced, and replace the stuff in that section, verifying that there isn't anything new added that would need to be kept.) --------------------------------- snip ------------------------------------- /* Counts of bytes or chars */ typedef EMACS_INT Bytecount; typedef EMACS_INT Charcount; /* Counts of elements */ typedef EMACS_INT Elemcount; /* Hash codes */ typedef unsigned long Hashcode; /* ------------------------ dynamic arrays ------------------- */ --------------------------------- snip ------------------------------------- 2. in lstream.h, removed duplicate declaration of Bytecount. Rewrote the comment about this type. The changed code should now look like this: --------------------------------- snip ------------------------------------- #endif /* The have been some arguments over the what the type should be that specifies a count of bytes in a data block to be written out or read in, using Lstream_read(), Lstream_write(), and related functions. Originally it was long, which worked fine; Martin "corrected" these to size_t and ssize_t on the grounds that this is theoretically cleaner and is in keeping with the C standards. Unfortunately, this practice is horribly error-prone due to design flaws in the way that mixed signed/unsigned arithmetic happens. In fact, by doing this change, Martin introduced a subtle but fatal error that caused the operation of sending large mail messages to the SMTP server under Windows to fail. By putting all values back to be signed, avoiding any signed/unsigned mixing, the bug immediately went away. The type then in use was Lstream_Data_Count, so that it be reverted cleanly if a vote came to that. Now it is Bytecount. Some earlier comments about why the type must be signed: This MUST BE SIGNED, since it also is used in functions that return the number of bytes actually read to or written from in an operation, and these functions can return -1 to signal error. Note that the standard Unix read() and write() functions define the count going in as a size_t, which is UNSIGNED, and the count going out as an ssize_t, which is SIGNED. This is a horrible design flaw. Not only is it highly likely to lead to logic errors when a -1 gets interpreted as a large positive number, but operations are bound to fail in all sorts of horrible ways when a number in the upper-half of the size_t range is passed in -- this number is unrepresentable as an ssize_t, so code that checks to see how many bytes are actually written (which is mandatory if you are dealing with certain types of devices) will get completely screwed up. --ben */ typedef enum lstream_buffering --------------------------------- snip ------------------------------------- 3. in dumper.c, there are four places, all inside of switch() statements, where XD_BYTECOUNT appears twice as a case tag. In each case, the two case blocks contain identical code, and you should *REMOVE THE SECOND* and leave the first.
author ben
date Thu, 20 Sep 2001 06:31:11 +0000
parents b39c14581166
children 943eaba38521
line wrap: on
line source

/* Implementation of the hash table lisp object type.
   Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
   Copyright (C) 1995, 1996 Ben Wing.
   Copyright (C) 1997 Free Software Foundation, Inc.

This file is part of XEmacs.

XEmacs is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.

XEmacs is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCNTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with XEmacs; see the file COPYING.  If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */

/* Synched up with: Not in FSF. */

/* This file implements the hash table lisp object type.

   This implementation was mostly written by Martin Buchholz in 1997.

   The Lisp-level API (derived from Common Lisp) is almost completely
   compatible with GNU Emacs 21, even though the implementations are
   totally independent.

   The hash table technique used is "linear probing".  Collisions are
   resolved by putting the item in the next empty place in the array
   following the collision.  Finding a hash entry performs a linear
   search in the cluster starting at the hash value.

   On deletions from the hash table, the entries immediately following
   the deleted entry are re-entered in the hash table.  We do not have
   a special way to mark deleted entries (known as "tombstones").

   At the end of the hash entries ("hentries"), we leave room for an
   entry that is always empty (the "sentinel").

   The traditional literature on hash table implementation
   (e.g. Knuth) suggests that too much "primary clustering" occurs
   with linear probing.  However, this literature was written when
   locality of reference was not a factor.  The discrepancy between
   CPU speeds and memory speeds is increasing, and the speed of access
   to memory is highly dependent on memory caches which work best when
   there is high locality of data reference.  Random access to memory
   is up to 20 times as expensive as access to the nearest address
   (and getting worse).  So linear probing makes sense.

   But the representation doesn't actually matter that much with the
   current elisp engine.  Funcall is sufficiently slow that the choice
   of hash table implementation is noise.  */

#include <config.h>
#include "lisp.h"
#include "bytecode.h"
#include "elhash.h"
#include "opaque.h"

Lisp_Object Qhash_tablep;
static Lisp_Object Qhashtable, Qhash_table;
static Lisp_Object Qweakness, Qvalue, Qkey_or_value, Qkey_and_value;
static Lisp_Object Vall_weak_hash_tables;
static Lisp_Object Qrehash_size, Qrehash_threshold;
static Lisp_Object Q_size, Q_test, Q_weakness, Q_rehash_size, Q_rehash_threshold;

/* obsolete as of 19990901 in xemacs-21.2 */
static Lisp_Object Qweak, Qkey_weak, Qvalue_weak, Qkey_or_value_weak;
static Lisp_Object Qnon_weak, Q_type;

typedef struct hentry
{
  Lisp_Object key;
  Lisp_Object value;
} hentry;

struct Lisp_Hash_Table
{
  struct lcrecord_header header;
  Elemcount size;
  Elemcount count;
  Elemcount rehash_count;
  double rehash_size;
  double rehash_threshold;
  Elemcount golden_ratio;
  hash_table_hash_function_t hash_function;
  hash_table_test_function_t test_function;
  hentry *hentries;
  enum hash_table_weakness weakness;
  Lisp_Object next_weak;     /* Used to chain together all of the weak
			        hash tables.  Don't mark through this. */
};

#define HENTRY_CLEAR_P(hentry) ((*(EMACS_UINT*)(&((hentry)->key))) == 0)
#define CLEAR_HENTRY(hentry)   \
  ((*(EMACS_UINT*)(&((hentry)->key)))   = 0, \
   (*(EMACS_UINT*)(&((hentry)->value))) = 0)

#define HASH_TABLE_DEFAULT_SIZE 16
#define HASH_TABLE_DEFAULT_REHASH_SIZE 1.3
#define HASH_TABLE_MIN_SIZE 10

#define HASHCODE(key, ht)						\
  ((((ht)->hash_function ? (ht)->hash_function (key) : LISP_HASH (key))	\
    * (ht)->golden_ratio)						\
   % (ht)->size)

#define KEYS_EQUAL_P(key1, key2, testfun) \
  (EQ (key1, key2) || ((testfun) && (testfun) (key1, key2)))

#define LINEAR_PROBING_LOOP(probe, entries, size)		\
  for (;							\
       !HENTRY_CLEAR_P (probe) ||				\
	 (probe == entries + size ?				\
	  (probe = entries, !HENTRY_CLEAR_P (probe)) : 0);	\
       probe++)

#ifndef ERROR_CHECK_HASH_TABLE
# ifdef ERROR_CHECK_TYPECHECK
#  define ERROR_CHECK_HASH_TABLE 1
# else
#  define ERROR_CHECK_HASH_TABLE 0
# endif
#endif

#if ERROR_CHECK_HASH_TABLE
static void
check_hash_table_invariants (Lisp_Hash_Table *ht)
{
  assert (ht->count < ht->size);
  assert (ht->count <= ht->rehash_count);
  assert (ht->rehash_count < ht->size);
  assert ((double) ht->count * ht->rehash_threshold - 1 <= (double) ht->rehash_count);
  assert (HENTRY_CLEAR_P (ht->hentries + ht->size));
}
#else
#define check_hash_table_invariants(ht)
#endif

/* Return a suitable size for a hash table, with at least SIZE slots. */
static Elemcount
hash_table_size (Elemcount requested_size)
{
  /* Return some prime near, but greater than or equal to, SIZE.
     Decades from the time of writing, someone will have a system large
     enough that the list below will be too short... */
  static const Elemcount primes [] =
  {
    19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
    1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
    19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
    204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
    1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
    10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
    50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
    243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
    1174703521, 1527114613, 1985248999 /* , 2580823717UL, 3355070839UL */
  };
  /* We've heard of binary search. */
  int low, high;
  for (low = 0, high = countof (primes) - 1; high - low > 1;)
    {
      /* Loop Invariant: size < primes [high] */
      int mid = (low + high) / 2;
      if (primes [mid] < requested_size)
	low = mid;
      else
	high = mid;
    }
  return primes [high];
}


#if 0 /* I don't think these are needed any more.
	 If using the general lisp_object_equal_*() functions
	 causes efficiency problems, these can be resurrected. --ben */
/* equality and hash functions for Lisp strings */
int
lisp_string_equal (Lisp_Object str1, Lisp_Object str2)
{
  /* This is wrong anyway.  You can't use strcmp() on Lisp strings,
     because they can contain zero characters.  */
  return !strcmp ((char *) XSTRING_DATA (str1), (char *) XSTRING_DATA (str2));
}

static Hashcode
lisp_string_hash (Lisp_Object obj)
{
  return hash_string (XSTRING_DATA (str), XSTRING_LENGTH (str));
}

#endif /* 0 */

static int
lisp_object_eql_equal (Lisp_Object obj1, Lisp_Object obj2)
{
  return EQ (obj1, obj2) || (FLOATP (obj1) && internal_equal (obj1, obj2, 0));
}

static Hashcode
lisp_object_eql_hash (Lisp_Object obj)
{
  return FLOATP (obj) ? internal_hash (obj, 0) : LISP_HASH (obj);
}

static int
lisp_object_equal_equal (Lisp_Object obj1, Lisp_Object obj2)
{
  return internal_equal (obj1, obj2, 0);
}

static Hashcode
lisp_object_equal_hash (Lisp_Object obj)
{
  return internal_hash (obj, 0);
}


static Lisp_Object
mark_hash_table (Lisp_Object obj)
{
  Lisp_Hash_Table *ht = XHASH_TABLE (obj);

  /* If the hash table is weak, we don't want to mark the keys and
     values (we scan over them after everything else has been marked,
     and mark or remove them as necessary).  */
  if (ht->weakness == HASH_TABLE_NON_WEAK)
    {
      hentry *e, *sentinel;

      for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++)
	if (!HENTRY_CLEAR_P (e))
	  {
	    mark_object (e->key);
	    mark_object (e->value);
	  }
    }
  return Qnil;
}

/* Equality of hash tables.  Two hash tables are equal when they are of
   the same weakness and test function, they have the same number of
   elements, and for each key in the hash table, the values are `equal'.

   This is similar to Common Lisp `equalp' of hash tables, with the
   difference that CL requires the keys to be compared with the test
   function, which we don't do.  Doing that would require consing, and
   consing is a bad idea in `equal'.  Anyway, our method should provide
   the same result -- if the keys are not equal according to the test
   function, then Fgethash() in hash_table_equal_mapper() will fail.  */
static int
hash_table_equal (Lisp_Object hash_table1, Lisp_Object hash_table2, int depth)
{
  Lisp_Hash_Table *ht1 = XHASH_TABLE (hash_table1);
  Lisp_Hash_Table *ht2 = XHASH_TABLE (hash_table2);
  hentry *e, *sentinel;

  if ((ht1->test_function != ht2->test_function) ||
      (ht1->weakness      != ht2->weakness)      ||
      (ht1->count         != ht2->count))
    return 0;

  depth++;

  for (e = ht1->hentries, sentinel = e + ht1->size; e < sentinel; e++)
    if (!HENTRY_CLEAR_P (e))
      /* Look up the key in the other hash table, and compare the values. */
      {
	Lisp_Object value_in_other = Fgethash (e->key, hash_table2, Qunbound);
	if (UNBOUNDP (value_in_other) ||
	    !internal_equal (e->value, value_in_other, depth))
	  return 0;		/* Give up */
      }

  return 1;
}

/* This is not a great hash function, but it _is_ correct and fast.
   Examining all entries is too expensive, and examining a random
   subset does not yield a correct hash function. */
static Hashcode
hash_table_hash (Lisp_Object hash_table, int depth)
{
  return XHASH_TABLE (hash_table)->count;
}


/* Printing hash tables.

   This is non-trivial, because we use a readable structure-style
   syntax for hash tables.  This means that a typical hash table will be
   readably printed in the form of:

   #s(hash-table size 2 data (key1 value1 key2 value2))

   The supported hash table structure keywords and their values are:
   `test'             (eql (or nil), eq or equal)
   `size'             (a natnum or nil)
   `rehash-size'      (a float)
   `rehash-threshold' (a float)
   `weakness'         (nil, key, value, key-and-value, or key-or-value)
   `data'             (a list)

   If `print-readably' is nil, then a simpler syntax is used, for example

   #<hash-table size 2/13 data (key1 value1 key2 value2) 0x874d>

   The data is truncated to four pairs, and the rest is shown with
   `...'.  This printer does not cons.  */


/* Print the data of the hash table.  This maps through a Lisp
   hash table and prints key/value pairs using PRINTCHARFUN.  */
static void
print_hash_table_data (Lisp_Hash_Table *ht, Lisp_Object printcharfun)
{
  int count = 0;
  hentry *e, *sentinel;

  write_c_string (" data (", printcharfun);

  for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++)
    if (!HENTRY_CLEAR_P (e))
      {
	if (count > 0)
	  write_c_string (" ", printcharfun);
	if (!print_readably && count > 3)
	  {
	    write_c_string ("...", printcharfun);
	    break;
	  }
	print_internal (e->key, printcharfun, 1);
	write_c_string (" ", printcharfun);
	print_internal (e->value, printcharfun, 1);
	count++;
      }

  write_c_string (")", printcharfun);
}

static void
print_hash_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
{
  Lisp_Hash_Table *ht = XHASH_TABLE (obj);
  char buf[128];

  write_c_string (print_readably ? "#s(hash-table" : "#<hash-table",
		  printcharfun);

  /* These checks have a kludgy look to them, but they are safe.
     Due to nature of hashing, you cannot use arbitrary
     test functions anyway.  */
  if (!ht->test_function)
    write_c_string (" test eq", printcharfun);
  else if (ht->test_function == lisp_object_equal_equal)
    write_c_string (" test equal", printcharfun);
  else if (ht->test_function == lisp_object_eql_equal)
    DO_NOTHING;
  else
    abort ();

  if (ht->count || !print_readably)
    {
      if (print_readably)
	sprintf (buf, " size %ld", (long) ht->count);
      else
	sprintf (buf, " size %ld/%ld", (long) ht->count, (long) ht->size);
      write_c_string (buf, printcharfun);
    }

  if (ht->weakness != HASH_TABLE_NON_WEAK)
    {
      sprintf (buf, " weakness %s",
	       (ht->weakness == HASH_TABLE_WEAK		  ? "key-and-value" :
		ht->weakness == HASH_TABLE_KEY_WEAK	  ? "key" :
		ht->weakness == HASH_TABLE_VALUE_WEAK	  ? "value" :
		ht->weakness == HASH_TABLE_KEY_VALUE_WEAK ? "key-or-value" :
		"you-d-better-not-see-this"));
      write_c_string (buf, printcharfun);
    }

  if (ht->count)
    print_hash_table_data (ht, printcharfun);

  if (print_readably)
    write_c_string (")", printcharfun);
  else
    {
      sprintf (buf, " 0x%x>", ht->header.uid);
      write_c_string (buf, printcharfun);
    }
}

static void
free_hentries (hentry *hentries, size_t size)
{
#if ERROR_CHECK_HASH_TABLE
  /* Ensure a crash if other code uses the discarded entries afterwards. */
  hentry *e, *sentinel;

  for (e = hentries, sentinel = e + size; e < sentinel; e++)
    * (unsigned long *) e = 0xdeadbeef;
#endif

  if (!DUMPEDP (hentries))
    xfree (hentries);
}

static void
finalize_hash_table (void *header, int for_disksave)
{
  if (!for_disksave)
    {
      Lisp_Hash_Table *ht = (Lisp_Hash_Table *) header;
      free_hentries (ht->hentries, ht->size);
      ht->hentries = 0;
    }
}

static const struct lrecord_description hentry_description_1[] = {
  { XD_LISP_OBJECT, offsetof (hentry, key) },
  { XD_LISP_OBJECT, offsetof (hentry, value) },
  { XD_END }
};

static const struct struct_description hentry_description = {
  sizeof (hentry),
  hentry_description_1
};

const struct lrecord_description hash_table_description[] = {
  { XD_ELEMCOUNT,     offsetof (Lisp_Hash_Table, size) },
  { XD_STRUCT_PTR, offsetof (Lisp_Hash_Table, hentries), XD_INDIRECT(0, 1), &hentry_description },
  { XD_LO_LINK,    offsetof (Lisp_Hash_Table, next_weak) },
  { XD_END }
};

DEFINE_LRECORD_IMPLEMENTATION ("hash-table", hash_table,
                               mark_hash_table, print_hash_table,
			       finalize_hash_table,
			       hash_table_equal, hash_table_hash,
			       hash_table_description,
			       Lisp_Hash_Table);

static Lisp_Hash_Table *
xhash_table (Lisp_Object hash_table)
{
  if (!gc_in_progress)
    CHECK_HASH_TABLE (hash_table);
  check_hash_table_invariants (XHASH_TABLE (hash_table));
  return XHASH_TABLE (hash_table);
}


/************************************************************************/
/*			 Creation of Hash Tables			*/
/************************************************************************/

/* Creation of hash tables, without error-checking. */
static void
compute_hash_table_derived_values (Lisp_Hash_Table *ht)
{
  ht->rehash_count = (Elemcount)
    ((double) ht->size * ht->rehash_threshold);
  ht->golden_ratio = (Elemcount)
    ((double) ht->size * (.6180339887 / (double) sizeof (Lisp_Object)));
}

Lisp_Object
make_standard_lisp_hash_table (enum hash_table_test test,
			       Elemcount size,
			       double rehash_size,
			       double rehash_threshold,
			       enum hash_table_weakness weakness)
{
  hash_table_hash_function_t hash_function =  0;
  hash_table_test_function_t test_function = 0;

  switch (test)
    {
    case HASH_TABLE_EQ:
      test_function = 0;
      hash_function = 0;
      break;

    case HASH_TABLE_EQL:
      test_function = lisp_object_eql_equal;
      hash_function = lisp_object_eql_hash;
      break;

    case HASH_TABLE_EQUAL:
      test_function = lisp_object_equal_equal;
      hash_function = lisp_object_equal_hash;
      break;

    default:
      abort ();
    }

  return make_general_lisp_hash_table (hash_function, test_function,
				       size, rehash_size, rehash_threshold,
				       weakness);
}

Lisp_Object
make_general_lisp_hash_table (hash_table_hash_function_t hash_function,
			      hash_table_test_function_t test_function,
			      Elemcount size,
			      double rehash_size,
			      double rehash_threshold,
			      enum hash_table_weakness weakness)
{
  Lisp_Object hash_table;
  Lisp_Hash_Table *ht = alloc_lcrecord_type (Lisp_Hash_Table, &lrecord_hash_table);

  ht->test_function = test_function;
  ht->hash_function = hash_function;
  ht->weakness = weakness;

  ht->rehash_size =
    rehash_size > 1.0 ? rehash_size : HASH_TABLE_DEFAULT_REHASH_SIZE;

  ht->rehash_threshold =
    rehash_threshold > 0.0 ? rehash_threshold :
    size > 4096 && !ht->test_function ? 0.7 : 0.6;

  if (size < HASH_TABLE_MIN_SIZE)
    size = HASH_TABLE_MIN_SIZE;
  ht->size = hash_table_size ((Elemcount) (((double) size / ht->rehash_threshold)
					+ 1.0));
  ht->count = 0;

  compute_hash_table_derived_values (ht);

  /* We leave room for one never-occupied sentinel hentry at the end.  */
  ht->hentries = xnew_array_and_zero (hentry, ht->size + 1);

  XSETHASH_TABLE (hash_table, ht);

  if (weakness == HASH_TABLE_NON_WEAK)
    ht->next_weak = Qunbound;
  else
    ht->next_weak = Vall_weak_hash_tables, Vall_weak_hash_tables = hash_table;

  return hash_table;
}

Lisp_Object
make_lisp_hash_table (Elemcount size,
		      enum hash_table_weakness weakness,
		      enum hash_table_test test)
{
  return make_standard_lisp_hash_table (test, size, -1.0, -1.0, weakness);
}

/* Pretty reading of hash tables.

   Here we use the existing structures mechanism (which is,
   unfortunately, pretty cumbersome) for validating and instantiating
   the hash tables.  The idea is that the side-effect of reading a
   #s(hash-table PLIST) object is creation of a hash table with desired
   properties, and that the hash table is returned.  */

/* Validation functions: each keyword provides its own validation
   function.  The errors should maybe be continuable, but it is
   unclear how this would cope with ERRB.  */
static int
hash_table_size_validate (Lisp_Object keyword, Lisp_Object value,
			 Error_Behavior errb)
{
  if (NATNUMP (value))
    return 1;

  maybe_signal_error_1 (Qwrong_type_argument, list2 (Qnatnump, value),
		      Qhash_table, errb);
  return 0;
}

static Elemcount
decode_hash_table_size (Lisp_Object obj)
{
  return NILP (obj) ? HASH_TABLE_DEFAULT_SIZE : XINT (obj);
}

static int
hash_table_weakness_validate (Lisp_Object keyword, Lisp_Object value,
			      Error_Behavior errb)
{
  if (EQ (value, Qnil))			return 1;
  if (EQ (value, Qt))			return 1;
  if (EQ (value, Qkey))			return 1;
  if (EQ (value, Qkey_and_value))	return 1;
  if (EQ (value, Qkey_or_value))	return 1;
  if (EQ (value, Qvalue))		return 1;

  /* Following values are obsolete as of 19990901 in xemacs-21.2 */
  if (EQ (value, Qnon_weak))		return 1;
  if (EQ (value, Qweak))		return 1;
  if (EQ (value, Qkey_weak))		return 1;
  if (EQ (value, Qkey_or_value_weak))	return 1;
  if (EQ (value, Qvalue_weak))		return 1;

  maybe_invalid_constant ("Invalid hash table weakness",
			     value, Qhash_table, errb);
  return 0;
}

static enum hash_table_weakness
decode_hash_table_weakness (Lisp_Object obj)
{
  if (EQ (obj, Qnil))			return HASH_TABLE_NON_WEAK;
  if (EQ (obj, Qt))			return HASH_TABLE_WEAK;
  if (EQ (obj, Qkey_and_value))		return HASH_TABLE_WEAK;
  if (EQ (obj, Qkey))			return HASH_TABLE_KEY_WEAK;
  if (EQ (obj, Qkey_or_value))		return HASH_TABLE_KEY_VALUE_WEAK;
  if (EQ (obj, Qvalue))			return HASH_TABLE_VALUE_WEAK;

  /* Following values are obsolete as of 19990901 in xemacs-21.2 */
  if (EQ (obj, Qnon_weak))		return HASH_TABLE_NON_WEAK;
  if (EQ (obj, Qweak))			return HASH_TABLE_WEAK;
  if (EQ (obj, Qkey_weak))		return HASH_TABLE_KEY_WEAK;
  if (EQ (obj, Qkey_or_value_weak))	return HASH_TABLE_KEY_VALUE_WEAK;
  if (EQ (obj, Qvalue_weak))		return HASH_TABLE_VALUE_WEAK;

  invalid_constant ("Invalid hash table weakness", obj);
  return HASH_TABLE_NON_WEAK; /* not reached */
}

static int
hash_table_test_validate (Lisp_Object keyword, Lisp_Object value,
			 Error_Behavior errb)
{
  if (EQ (value, Qnil))	  return 1;
  if (EQ (value, Qeq))	  return 1;
  if (EQ (value, Qequal)) return 1;
  if (EQ (value, Qeql))	  return 1;

  maybe_invalid_constant ("Invalid hash table test",
			     value, Qhash_table, errb);
  return 0;
}

static enum hash_table_test
decode_hash_table_test (Lisp_Object obj)
{
  if (EQ (obj, Qnil))	return HASH_TABLE_EQL;
  if (EQ (obj, Qeq))	return HASH_TABLE_EQ;
  if (EQ (obj, Qequal)) return HASH_TABLE_EQUAL;
  if (EQ (obj, Qeql))	return HASH_TABLE_EQL;

  invalid_constant ("Invalid hash table test", obj);
  return HASH_TABLE_EQ; /* not reached */
}

static int
hash_table_rehash_size_validate (Lisp_Object keyword, Lisp_Object value,
				 Error_Behavior errb)
{
  if (!FLOATP (value))
    {
      maybe_signal_error_1 (Qwrong_type_argument, list2 (Qfloatp, value),
			  Qhash_table, errb);
      return 0;
    }

  {
    double rehash_size = XFLOAT_DATA (value);
    if (rehash_size <= 1.0)
      {
	maybe_invalid_argument
	  ("Hash table rehash size must be greater than 1.0",
	   value, Qhash_table, errb);
	return 0;
      }
  }

  return 1;
}

static double
decode_hash_table_rehash_size (Lisp_Object rehash_size)
{
  return NILP (rehash_size) ? -1.0 : XFLOAT_DATA (rehash_size);
}

static int
hash_table_rehash_threshold_validate (Lisp_Object keyword, Lisp_Object value,
				     Error_Behavior errb)
{
  if (!FLOATP (value))
    {
      maybe_signal_error_1 (Qwrong_type_argument, list2 (Qfloatp, value),
			  Qhash_table, errb);
      return 0;
    }

  {
    double rehash_threshold = XFLOAT_DATA (value);
    if (rehash_threshold <= 0.0 || rehash_threshold >= 1.0)
      {
	maybe_invalid_argument
	  ("Hash table rehash threshold must be between 0.0 and 1.0",
	   value, Qhash_table, errb);
	return 0;
      }
  }

  return 1;
}

static double
decode_hash_table_rehash_threshold (Lisp_Object rehash_threshold)
{
  return NILP (rehash_threshold) ? -1.0 : XFLOAT_DATA (rehash_threshold);
}

static int
hash_table_data_validate (Lisp_Object keyword, Lisp_Object value,
			 Error_Behavior errb)
{
  int len;

  GET_EXTERNAL_LIST_LENGTH (value, len);

  if (len & 1)
    {
      maybe_sferror
	("Hash table data must have alternating key/value pairs",
	 value, Qhash_table, errb);
      return 0;
    }
  return 1;
}

/* The actual instantiation of a hash table.  This does practically no
   error checking, because it relies on the fact that the paranoid
   functions above have error-checked everything to the last details.
   If this assumption is wrong, we will get a crash immediately (with
   error-checking compiled in), and we'll know if there is a bug in
   the structure mechanism.  So there.  */
static Lisp_Object
hash_table_instantiate (Lisp_Object plist)
{
  Lisp_Object hash_table;
  Lisp_Object test	       = Qnil;
  Lisp_Object size	       = Qnil;
  Lisp_Object rehash_size      = Qnil;
  Lisp_Object rehash_threshold = Qnil;
  Lisp_Object weakness	       = Qnil;
  Lisp_Object data	       = Qnil;

  while (!NILP (plist))
    {
      Lisp_Object key, value;
      key   = XCAR (plist); plist = XCDR (plist);
      value = XCAR (plist); plist = XCDR (plist);

      if      (EQ (key, Qtest))		    test	     = value;
      else if (EQ (key, Qsize))		    size	     = value;
      else if (EQ (key, Qrehash_size))	    rehash_size	     = value;
      else if (EQ (key, Qrehash_threshold)) rehash_threshold = value;
      else if (EQ (key, Qweakness))	    weakness	     = value;
      else if (EQ (key, Qdata))		    data	     = value;
      else if (EQ (key, Qtype))/*obsolete*/ weakness	     = value;
      else
	abort ();
    }

  /* Create the hash table.  */
  hash_table = make_standard_lisp_hash_table
    (decode_hash_table_test (test),
     decode_hash_table_size (size),
     decode_hash_table_rehash_size (rehash_size),
     decode_hash_table_rehash_threshold (rehash_threshold),
     decode_hash_table_weakness (weakness));

  /* I'm not sure whether this can GC, but better safe than sorry.  */
  {
    struct gcpro gcpro1;
    GCPRO1 (hash_table);

    /* And fill it with data.  */
    while (!NILP (data))
      {
	Lisp_Object key, value;
	key   = XCAR (data); data = XCDR (data);
	value = XCAR (data); data = XCDR (data);
	Fputhash (key, value, hash_table);
      }
    UNGCPRO;
  }

  return hash_table;
}

static void
structure_type_create_hash_table_structure_name (Lisp_Object structure_name)
{
  struct structure_type *st;

  st = define_structure_type (structure_name, 0, hash_table_instantiate);
  define_structure_type_keyword (st, Qtest, hash_table_test_validate);
  define_structure_type_keyword (st, Qsize, hash_table_size_validate);
  define_structure_type_keyword (st, Qrehash_size, hash_table_rehash_size_validate);
  define_structure_type_keyword (st, Qrehash_threshold, hash_table_rehash_threshold_validate);
  define_structure_type_keyword (st, Qweakness, hash_table_weakness_validate);
  define_structure_type_keyword (st, Qdata, hash_table_data_validate);

  /* obsolete as of 19990901 in xemacs-21.2 */
  define_structure_type_keyword (st, Qtype, hash_table_weakness_validate);
}

/* Create a built-in Lisp structure type named `hash-table'.
   We make #s(hashtable ...) equivalent to #s(hash-table ...),
   for backward compatibility.
   This is called from emacs.c.  */
void
structure_type_create_hash_table (void)
{
  structure_type_create_hash_table_structure_name (Qhash_table);
  structure_type_create_hash_table_structure_name (Qhashtable); /* compat */
}


/************************************************************************/
/*		Definition of Lisp-visible methods			*/
/************************************************************************/

DEFUN ("hash-table-p", Fhash_table_p, 1, 1, 0, /*
Return t if OBJECT is a hash table, else nil.
*/
       (object))
{
  return HASH_TABLEP (object) ? Qt : Qnil;
}

DEFUN ("make-hash-table", Fmake_hash_table, 0, MANY, 0, /*
Return a new empty hash table object.
Use Common Lisp style keywords to specify hash table properties.
 (make-hash-table &key test size rehash-size rehash-threshold weakness)

Keyword :test can be `eq', `eql' (default) or `equal'.
Comparison between keys is done using this function.
If speed is important, consider using `eq'.
When storing strings in the hash table, you will likely need to use `equal'.

Keyword :size specifies the number of keys likely to be inserted.
This number of entries can be inserted without enlarging the hash table.

Keyword :rehash-size must be a float greater than 1.0, and specifies
the factor by which to increase the size of the hash table when enlarging.

Keyword :rehash-threshold must be a float between 0.0 and 1.0,
and specifies the load factor of the hash table which triggers enlarging.

Non-standard keyword :weakness can be `nil' (default), `t', `key-and-value',
`key', `value' or `key-or-value'. `t' is an alias for `key-and-value'.

A key-and-value-weak hash table, also known as a fully-weak or simply
as a weak hash table, is one whose pointers do not count as GC
referents: for any key-value pair in the hash table, if the only
remaining pointer to either the key or the value is in a weak hash
table, then the pair will be removed from the hash table, and the key
and value collected.  A non-weak hash table (or any other pointer)
would prevent the object from being collected.

A key-weak hash table is similar to a fully-weak hash table except that
a key-value pair will be removed only if the key remains unmarked
outside of weak hash tables.  The pair will remain in the hash table if
the key is pointed to by something other than a weak hash table, even
if the value is not.

A value-weak hash table is similar to a fully-weak hash table except
that a key-value pair will be removed only if the value remains
unmarked outside of weak hash tables.  The pair will remain in the
hash table if the value is pointed to by something other than a weak
hash table, even if the key is not.

A key-or-value-weak hash table is similar to a fully-weak hash table except
that a key-value pair will be removed only if the value and the key remain
unmarked outside of weak hash tables.  The pair will remain in the
hash table if the value or key are pointed to by something other than a weak
hash table, even if the other is not.
*/
       (int nargs, Lisp_Object *args))
{
  int i = 0;
  Lisp_Object test	       = Qnil;
  Lisp_Object size	       = Qnil;
  Lisp_Object rehash_size      = Qnil;
  Lisp_Object rehash_threshold = Qnil;
  Lisp_Object weakness	       = Qnil;

  while (i + 1 < nargs)
    {
      Lisp_Object keyword = args[i++];
      Lisp_Object value   = args[i++];

      if      (EQ (keyword, Q_test))		 test		  = value;
      else if (EQ (keyword, Q_size))		 size		  = value;
      else if (EQ (keyword, Q_rehash_size))	 rehash_size	  = value;
      else if (EQ (keyword, Q_rehash_threshold)) rehash_threshold = value;
      else if (EQ (keyword, Q_weakness))	 weakness	  = value;
      else if (EQ (keyword, Q_type))/*obsolete*/ weakness	  = value;
      else invalid_constant ("Invalid hash table property keyword", keyword);
    }

  if (i < nargs)
    sferror ("Hash table property requires a value", args[i]);

#define VALIDATE_VAR(var) \
if (!NILP (var)) hash_table_##var##_validate (Q##var, var, ERROR_ME);

  VALIDATE_VAR (test);
  VALIDATE_VAR (size);
  VALIDATE_VAR (rehash_size);
  VALIDATE_VAR (rehash_threshold);
  VALIDATE_VAR (weakness);

  return make_standard_lisp_hash_table
    (decode_hash_table_test (test),
     decode_hash_table_size (size),
     decode_hash_table_rehash_size (rehash_size),
     decode_hash_table_rehash_threshold (rehash_threshold),
     decode_hash_table_weakness (weakness));
}

DEFUN ("copy-hash-table", Fcopy_hash_table, 1, 1, 0, /*
Return a new hash table containing the same keys and values as HASH-TABLE.
The keys and values will not themselves be copied.
*/
       (hash_table))
{
  const Lisp_Hash_Table *ht_old = xhash_table (hash_table);
  Lisp_Hash_Table *ht = alloc_lcrecord_type (Lisp_Hash_Table, &lrecord_hash_table);

  copy_lcrecord (ht, ht_old);

  ht->hentries = xnew_array (hentry, ht_old->size + 1);
  memcpy (ht->hentries, ht_old->hentries, (ht_old->size + 1) * sizeof (hentry));

  XSETHASH_TABLE (hash_table, ht);

  if (! EQ (ht->next_weak, Qunbound))
    {
      ht->next_weak = Vall_weak_hash_tables;
      Vall_weak_hash_tables = hash_table;
    }

  return hash_table;
}

static void
resize_hash_table (Lisp_Hash_Table *ht, Elemcount new_size)
{
  hentry *old_entries, *new_entries, *sentinel, *e;
  Elemcount old_size;

  old_size = ht->size;
  ht->size = new_size;

  old_entries = ht->hentries;

  ht->hentries = xnew_array_and_zero (hentry, new_size + 1);
  new_entries = ht->hentries;

  compute_hash_table_derived_values (ht);

  for (e = old_entries, sentinel = e + old_size; e < sentinel; e++)
    if (!HENTRY_CLEAR_P (e))
      {
	hentry *probe = new_entries + HASHCODE (e->key, ht);
	LINEAR_PROBING_LOOP (probe, new_entries, new_size)
	  ;
	*probe = *e;
      }

  free_hentries (old_entries, old_size);
}

/* After a hash table has been saved to disk and later restored by the
   portable dumper, it contains the same objects, but their addresses
   and thus their HASHCODEs have changed. */
void
pdump_reorganize_hash_table (Lisp_Object hash_table)
{
  const Lisp_Hash_Table *ht = xhash_table (hash_table);
  hentry *new_entries = xnew_array_and_zero (hentry, ht->size + 1);
  hentry *e, *sentinel;

  for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++)
    if (!HENTRY_CLEAR_P (e))
      {
	hentry *probe = new_entries + HASHCODE (e->key, ht);
	LINEAR_PROBING_LOOP (probe, new_entries, ht->size)
	  ;
	*probe = *e;
      }

  memcpy (ht->hentries, new_entries, ht->size * sizeof (hentry));

  xfree (new_entries);
}

static void
enlarge_hash_table (Lisp_Hash_Table *ht)
{
  Elemcount new_size =
    hash_table_size ((Elemcount) ((double) ht->size * ht->rehash_size));
  resize_hash_table (ht, new_size);
}

static hentry *
find_hentry (Lisp_Object key, const Lisp_Hash_Table *ht)
{
  hash_table_test_function_t test_function = ht->test_function;
  hentry *entries = ht->hentries;
  hentry *probe = entries + HASHCODE (key, ht);

  LINEAR_PROBING_LOOP (probe, entries, ht->size)
    if (KEYS_EQUAL_P (probe->key, key, test_function))
      break;

  return probe;
}

DEFUN ("gethash", Fgethash, 2, 3, 0, /*
Find hash value for KEY in HASH-TABLE.
If there is no corresponding value, return DEFAULT (which defaults to nil).
*/
       (key, hash_table, default_))
{
  const Lisp_Hash_Table *ht = xhash_table (hash_table);
  hentry *e = find_hentry (key, ht);

  return HENTRY_CLEAR_P (e) ? default_ : e->value;
}

DEFUN ("puthash", Fputhash, 3, 3, 0, /*
Hash KEY to VALUE in HASH-TABLE.
*/
       (key, value, hash_table))
{
  Lisp_Hash_Table *ht = xhash_table (hash_table);
  hentry *e = find_hentry (key, ht);

  if (!HENTRY_CLEAR_P (e))
    return e->value = value;

  e->key   = key;
  e->value = value;

  if (++ht->count >= ht->rehash_count)
    enlarge_hash_table (ht);

  return value;
}

/* Remove hentry pointed at by PROBE.
   Subsequent entries are removed and reinserted.
   We don't use tombstones - too wasteful.  */
static void
remhash_1 (Lisp_Hash_Table *ht, hentry *entries, hentry *probe)
{
  Elemcount size = ht->size;
  CLEAR_HENTRY (probe);
  probe++;
  ht->count--;

  LINEAR_PROBING_LOOP (probe, entries, size)
    {
      Lisp_Object key = probe->key;
      hentry *probe2 = entries + HASHCODE (key, ht);
      LINEAR_PROBING_LOOP (probe2, entries, size)
	if (EQ (probe2->key, key))
	  /* hentry at probe doesn't need to move. */
	  goto continue_outer_loop;
      /* Move hentry from probe to new home at probe2. */
      *probe2 = *probe;
      CLEAR_HENTRY (probe);
    continue_outer_loop: continue;
    }
}

DEFUN ("remhash", Fremhash, 2, 2, 0, /*
Remove the entry for KEY from HASH-TABLE.
Do nothing if there is no entry for KEY in HASH-TABLE.
Return non-nil if an entry was removed.
*/
       (key, hash_table))
{
  Lisp_Hash_Table *ht = xhash_table (hash_table);
  hentry *e = find_hentry (key, ht);

  if (HENTRY_CLEAR_P (e))
    return Qnil;

  remhash_1 (ht, ht->hentries, e);
  return Qt;
}

DEFUN ("clrhash", Fclrhash, 1, 1, 0, /*
Remove all entries from HASH-TABLE, leaving it empty.
*/
       (hash_table))
{
  Lisp_Hash_Table *ht = xhash_table (hash_table);
  hentry *e, *sentinel;

  for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++)
    CLEAR_HENTRY (e);
  ht->count = 0;

  return hash_table;
}

/************************************************************************/
/*			    Accessor Functions				*/
/************************************************************************/

DEFUN ("hash-table-count", Fhash_table_count, 1, 1, 0, /*
Return the number of entries in HASH-TABLE.
*/
       (hash_table))
{
  return make_int (xhash_table (hash_table)->count);
}

DEFUN ("hash-table-test", Fhash_table_test, 1, 1, 0, /*
Return the test function of HASH-TABLE.
This can be one of `eq', `eql' or `equal'.
*/
       (hash_table))
{
  hash_table_test_function_t fun = xhash_table (hash_table)->test_function;

  return (fun == lisp_object_eql_equal   ? Qeql   :
	  fun == lisp_object_equal_equal ? Qequal :
	  Qeq);
}

DEFUN ("hash-table-size", Fhash_table_size, 1, 1, 0, /*
Return the size of HASH-TABLE.
This is the current number of slots in HASH-TABLE, whether occupied or not.
*/
       (hash_table))
{
  return make_int (xhash_table (hash_table)->size);
}

DEFUN ("hash-table-rehash-size", Fhash_table_rehash_size, 1, 1, 0, /*
Return the current rehash size of HASH-TABLE.
This is a float greater than 1.0; the factor by which HASH-TABLE
is enlarged when the rehash threshold is exceeded.
*/
       (hash_table))
{
  return make_float (xhash_table (hash_table)->rehash_size);
}

DEFUN ("hash-table-rehash-threshold", Fhash_table_rehash_threshold, 1, 1, 0, /*
Return the current rehash threshold of HASH-TABLE.
This is a float between 0.0 and 1.0; the maximum `load factor' of HASH-TABLE,
beyond which the HASH-TABLE is enlarged by rehashing.
*/
       (hash_table))
{
  return make_float (xhash_table (hash_table)->rehash_threshold);
}

DEFUN ("hash-table-weakness", Fhash_table_weakness, 1, 1, 0, /*
Return the weakness of HASH-TABLE.
This can be one of `nil', `key-and-value', `key-or-value', `key' or `value'.
*/
       (hash_table))
{
  switch (xhash_table (hash_table)->weakness)
    {
    case HASH_TABLE_WEAK:		return Qkey_and_value;
    case HASH_TABLE_KEY_WEAK:		return Qkey;
    case HASH_TABLE_KEY_VALUE_WEAK:	return Qkey_or_value;
    case HASH_TABLE_VALUE_WEAK:		return Qvalue;
    default:				return Qnil;
    }
}

/* obsolete as of 19990901 in xemacs-21.2 */
DEFUN ("hash-table-type", Fhash_table_type, 1, 1, 0, /*
Return the type of HASH-TABLE.
This can be one of `non-weak', `weak', `key-weak' or `value-weak'.
*/
       (hash_table))
{
  switch (xhash_table (hash_table)->weakness)
    {
    case HASH_TABLE_WEAK:		return Qweak;
    case HASH_TABLE_KEY_WEAK:		return Qkey_weak;
    case HASH_TABLE_KEY_VALUE_WEAK:	return Qkey_or_value_weak;
    case HASH_TABLE_VALUE_WEAK:		return Qvalue_weak;
    default:				return Qnon_weak;
    }
}

/************************************************************************/
/*			    Mapping Functions				*/
/************************************************************************/

/* We need to be careful when mapping over hash tables because the
   hash table might be modified during the mapping operation:
   - by the mapping function
   - by gc (if the hash table is weak)

   So we make a copy of the hentries at the beginning of the mapping
   operation, and iterate over the copy.  Naturally, this is
   expensive, but not as expensive as you might think, because no
   actual memory has to be collected by our notoriously inefficient
   GC; we use an unwind-protect instead to free the memory directly.

   We could avoid the copying by having the hash table modifiers
   puthash and remhash check for currently active mapping functions.
   Disadvantages: it's hard to get right, and IMO hash mapping
   functions are basically rare, and no extra space in the hash table
   object and no extra cpu in puthash or remhash should be wasted to
   make maphash 3% faster.  From a design point of view, the basic
   functions gethash, puthash and remhash should be implementable
   without having to think about maphash.

   Note: We don't (yet) have Common Lisp's with-hash-table-iterator.
   If you implement this naively, you cannot have more than one
   concurrently active iterator over the same hash table.  The `each'
   function in perl has this limitation.

   Note: We GCPRO memory on the heap, not on the stack.  There is no
   obvious reason why this is bad, but as of this writing this is the
   only known occurrence of this technique in the code.

   -- Martin
*/

/* Ben disagrees with the "copying hentries" design, and says:

   Another solution is the same as I've already proposed -- when
   mapping, mark the table as "change-unsafe", and in this case, use a
   secondary table to maintain changes.  this could be basically a
   standard hash table, but with entries only for added or deleted
   entries in the primary table, and a marker like Qunbound to
   indicate a deleted entry.  puthash, gethash and remhash need a
   single extra check for this secondary table -- totally
   insignificant speedwise.  if you really cared about making
   recursive maphashes completely correct, you'd have to do a bit of
   extra work here -- when maphashing, if the secondary table exists,
   make a copy of it, and use the copy in conjunction with the primary
   table when mapping.  the advantages of this are

   [a] easy to demonstrate correct, even with weak hashtables.

   [b] no extra overhead in the general maphash case -- only when you
       modify the table while maphashing, and even then the overhead is
       very small.
*/

static Lisp_Object
maphash_unwind (Lisp_Object unwind_obj)
{
  void *ptr = (void *) get_opaque_ptr (unwind_obj);
  xfree (ptr);
  free_opaque_ptr (unwind_obj);
  return Qnil;
}

/* Return a malloced array of alternating key/value pairs from HT. */
static Lisp_Object *
copy_compress_hentries (const Lisp_Hash_Table *ht)
{
  Lisp_Object * const objs =
    /* If the hash table is empty, ht->count could be 0. */
    xnew_array (Lisp_Object, 2 * (ht->count > 0 ? ht->count : 1));
  const hentry *e, *sentinel;
  Lisp_Object *pobj;

  for (e = ht->hentries, sentinel = e + ht->size, pobj = objs; e < sentinel; e++)
    if (!HENTRY_CLEAR_P (e))
      {
	*(pobj++) = e->key;
	*(pobj++) = e->value;
      }

  type_checking_assert (pobj == objs + 2 * ht->count);

  return objs;
}

DEFUN ("maphash", Fmaphash, 2, 2, 0, /*
Map FUNCTION over entries in HASH-TABLE, calling it with two args,
each key and value in HASH-TABLE.

FUNCTION must not modify HASH-TABLE, with the one exception that FUNCTION
may remhash or puthash the entry currently being processed by FUNCTION.
*/
       (function, hash_table))
{
  const Lisp_Hash_Table * const ht = xhash_table (hash_table);
  Lisp_Object * const objs = copy_compress_hentries (ht);
  Lisp_Object args[3];
  const Lisp_Object *pobj, *end;
  int speccount = specpdl_depth ();
  struct gcpro gcpro1;

  record_unwind_protect (maphash_unwind, make_opaque_ptr ((void *)objs));
  GCPRO1 (objs[0]);
  gcpro1.nvars = 2 * ht->count;

  args[0] = function;

  for (pobj = objs, end = pobj + 2 * ht->count; pobj < end; pobj += 2)
    {
      args[1] = pobj[0];
      args[2] = pobj[1];
      Ffuncall (countof (args), args);
    }

  unbind_to (speccount, Qnil);
  UNGCPRO;

  return Qnil;
}

/* Map *C* function FUNCTION over the elements of a non-weak lisp hash table.
   FUNCTION must not modify HASH-TABLE, with the one exception that FUNCTION
   may puthash the entry currently being processed by FUNCTION.
   Mapping terminates if FUNCTION returns something other than 0. */
void
elisp_maphash_unsafe (maphash_function_t function,
	       Lisp_Object hash_table, void *extra_arg)
{
  const Lisp_Hash_Table *ht = XHASH_TABLE (hash_table);
  const hentry *e, *sentinel;

  for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++)
    if (!HENTRY_CLEAR_P (e))
      if (function (e->key, e->value, extra_arg))
	return;
}

/* Map *C* function FUNCTION over the elements of a lisp hash table.
   It is safe for FUNCTION to modify HASH-TABLE.
   Mapping terminates if FUNCTION returns something other than 0. */
void
elisp_maphash (maphash_function_t function,
	       Lisp_Object hash_table, void *extra_arg)
{
  const Lisp_Hash_Table * const ht = xhash_table (hash_table);
  Lisp_Object * const objs = copy_compress_hentries (ht);
  const Lisp_Object *pobj, *end;
  int speccount = specpdl_depth ();
  struct gcpro gcpro1;

  record_unwind_protect (maphash_unwind, make_opaque_ptr ((void *)objs));
  GCPRO1 (objs[0]);
  gcpro1.nvars = 2 * ht->count;

  for (pobj = objs, end = pobj + 2 * ht->count; pobj < end; pobj += 2)
    if (function (pobj[0], pobj[1], extra_arg))
      break;

  unbind_to (speccount, Qnil);
  UNGCPRO;
}

/* Remove all elements of a lisp hash table satisfying *C* predicate PREDICATE.
   PREDICATE must not modify HASH-TABLE. */
void
elisp_map_remhash (maphash_function_t predicate,
		   Lisp_Object hash_table, void *extra_arg)
{
  const Lisp_Hash_Table * const ht = xhash_table (hash_table);
  Lisp_Object * const objs = copy_compress_hentries (ht);
  const Lisp_Object *pobj, *end;
  int speccount = specpdl_depth ();
  struct gcpro gcpro1;

  record_unwind_protect (maphash_unwind, make_opaque_ptr ((void *)objs));
  GCPRO1 (objs[0]);
  gcpro1.nvars = 2 * ht->count;

  for (pobj = objs, end = pobj + 2 * ht->count; pobj < end; pobj += 2)
    if (predicate (pobj[0], pobj[1], extra_arg))
      Fremhash (pobj[0], hash_table);

  unbind_to (speccount, Qnil);
  UNGCPRO;
}


/************************************************************************/
/*		   garbage collecting weak hash tables			*/
/************************************************************************/
#define MARK_OBJ(obj) do {		\
  Lisp_Object mo_obj = (obj);		\
  if (!marked_p (mo_obj))		\
    {					\
      mark_object (mo_obj);		\
      did_mark = 1;			\
    }					\
} while (0)


/* Complete the marking for semi-weak hash tables. */
int
finish_marking_weak_hash_tables (void)
{
  Lisp_Object hash_table;
  int did_mark = 0;

  for (hash_table = Vall_weak_hash_tables;
       !NILP (hash_table);
       hash_table = XHASH_TABLE (hash_table)->next_weak)
    {
      const Lisp_Hash_Table *ht = XHASH_TABLE (hash_table);
      const hentry *e = ht->hentries;
      const hentry *sentinel = e + ht->size;

      if (! marked_p (hash_table))
	/* The hash table is probably garbage.  Ignore it. */
	continue;

      /* Now, scan over all the pairs.  For all pairs that are
	 half-marked, we may need to mark the other half if we're
	 keeping this pair. */
      switch (ht->weakness)
	{
	case HASH_TABLE_KEY_WEAK:
	  for (; e < sentinel; e++)
	    if (!HENTRY_CLEAR_P (e))
	      if (marked_p (e->key))
		MARK_OBJ (e->value);
	  break;

	case HASH_TABLE_VALUE_WEAK:
	  for (; e < sentinel; e++)
	    if (!HENTRY_CLEAR_P (e))
	      if (marked_p (e->value))
		MARK_OBJ (e->key);
	  break;

	case HASH_TABLE_KEY_VALUE_WEAK:
	  for (; e < sentinel; e++)
	    if (!HENTRY_CLEAR_P (e))
	      {
		if (marked_p (e->value))
		  MARK_OBJ (e->key);
		else if (marked_p (e->key))
		  MARK_OBJ (e->value);
	      }
	  break;

	case HASH_TABLE_KEY_CAR_WEAK:
	  for (; e < sentinel; e++)
	    if (!HENTRY_CLEAR_P (e))
	      if (!CONSP (e->key) || marked_p (XCAR (e->key)))
		{
		  MARK_OBJ (e->key);
		  MARK_OBJ (e->value);
		}
	  break;

	  /* We seem to be sprouting new weakness types at an alarming
	     rate. At least this is not externally visible - and in
	     fact all of these KEY_CAR_* types are only used by the
	     glyph code. */
	case HASH_TABLE_KEY_CAR_VALUE_WEAK:
	  for (; e < sentinel; e++)
	    if (!HENTRY_CLEAR_P (e))
	      {
		if (!CONSP (e->key) || marked_p (XCAR (e->key)))
		  {
		    MARK_OBJ (e->key);
		    MARK_OBJ (e->value);
		  }
		else if (marked_p (e->value))
		  MARK_OBJ (e->key);
	      }
	  break;

	case HASH_TABLE_VALUE_CAR_WEAK:
	  for (; e < sentinel; e++)
	    if (!HENTRY_CLEAR_P (e))
	      if (!CONSP (e->value) || marked_p (XCAR (e->value)))
		{
		  MARK_OBJ (e->key);
		  MARK_OBJ (e->value);
		}
	  break;

	default:
	  break;
	}
    }

  return did_mark;
}

void
prune_weak_hash_tables (void)
{
  Lisp_Object hash_table, prev = Qnil;
  for (hash_table = Vall_weak_hash_tables;
       !NILP (hash_table);
       hash_table = XHASH_TABLE (hash_table)->next_weak)
    {
      if (! marked_p (hash_table))
	{
	  /* This hash table itself is garbage.  Remove it from the list. */
	  if (NILP (prev))
	    Vall_weak_hash_tables = XHASH_TABLE (hash_table)->next_weak;
	  else
	    XHASH_TABLE (prev)->next_weak = XHASH_TABLE (hash_table)->next_weak;
	}
      else
	{
	  /* Now, scan over all the pairs.  Remove all of the pairs
	     in which the key or value, or both, is unmarked
	     (depending on the weakness of the hash table). */
	  Lisp_Hash_Table *ht = XHASH_TABLE (hash_table);
	  hentry *entries = ht->hentries;
	  hentry *sentinel = entries + ht->size;
	  hentry *e;

	  for (e = entries; e < sentinel; e++)
	    if (!HENTRY_CLEAR_P (e))
	      {
	      again:
		if (!marked_p (e->key) || !marked_p (e->value))
		  {
		    remhash_1 (ht, entries, e);
		    if (!HENTRY_CLEAR_P (e))
		      goto again;
		  }
	      }

	  prev = hash_table;
	}
    }
}

/* Return a hash value for an array of Lisp_Objects of size SIZE. */

Hashcode
internal_array_hash (Lisp_Object *arr, int size, int depth)
{
  int i;
  Hashcode hash = 0;
  depth++;

  if (size <= 5)
    {
      for (i = 0; i < size; i++)
	hash = HASH2 (hash, internal_hash (arr[i], depth));
      return hash;
    }

  /* just pick five elements scattered throughout the array.
     A slightly better approach would be to offset by some
     noise factor from the points chosen below. */
  for (i = 0; i < 5; i++)
    hash = HASH2 (hash, internal_hash (arr[i*size/5], depth));

  return hash;
}

/* Return a hash value for a Lisp_Object.  This is for use when hashing
   objects with the comparison being `equal' (for `eq', you can just
   use the Lisp_Object itself as the hash value).  You need to make a
   tradeoff between the speed of the hash function and how good the
   hashing is.  In particular, the hash function needs to be FAST,
   so you can't just traipse down the whole tree hashing everything
   together.  Most of the time, objects will differ in the first
   few elements you hash.  Thus, we only go to a short depth (5)
   and only hash at most 5 elements out of a vector.  Theoretically
   we could still take 5^5 time (a big big number) to compute a
   hash, but practically this won't ever happen. */

Hashcode
internal_hash (Lisp_Object obj, int depth)
{
  if (depth > 5)
    return 0;
  if (CONSP (obj))
    {
      /* no point in worrying about tail recursion, since we're not
	 going very deep */
      return HASH2 (internal_hash (XCAR (obj), depth + 1),
		    internal_hash (XCDR (obj), depth + 1));
    }
  if (STRINGP (obj))
    {
      return hash_string (XSTRING_DATA (obj), XSTRING_LENGTH (obj));
    }
  if (LRECORDP (obj))
    {
      const struct lrecord_implementation
	*imp = XRECORD_LHEADER_IMPLEMENTATION (obj);
      if (imp->hash)
	return imp->hash (obj, depth);
    }

  return LISP_HASH (obj);
}

DEFUN ("sxhash", Fsxhash, 1, 1, 0, /*
Return a hash value for OBJECT.
\(equal obj1 obj2) implies (= (sxhash obj1) (sxhash obj2)).
*/
       (object))
{
  return make_int (internal_hash (object, 0));
}

#if 0
xxDEFUN ("internal-hash-value", Finternal_hash_value, 1, 1, 0, /*
Hash value of OBJECT.  For debugging.
The value is returned as (HIGH . LOW).
*/
       (object))
{
  /* This function is pretty 32bit-centric. */
  Hashcode hash = internal_hash (object, 0);
  return Fcons (hash >> 16, hash & 0xffff);
}
#endif


/************************************************************************/
/*                            initialization                            */
/************************************************************************/

void
syms_of_elhash (void)
{
  INIT_LRECORD_IMPLEMENTATION (hash_table);

  DEFSUBR (Fhash_table_p);
  DEFSUBR (Fmake_hash_table);
  DEFSUBR (Fcopy_hash_table);
  DEFSUBR (Fgethash);
  DEFSUBR (Fremhash);
  DEFSUBR (Fputhash);
  DEFSUBR (Fclrhash);
  DEFSUBR (Fmaphash);
  DEFSUBR (Fhash_table_count);
  DEFSUBR (Fhash_table_test);
  DEFSUBR (Fhash_table_size);
  DEFSUBR (Fhash_table_rehash_size);
  DEFSUBR (Fhash_table_rehash_threshold);
  DEFSUBR (Fhash_table_weakness);
  DEFSUBR (Fhash_table_type); /* obsolete */
  DEFSUBR (Fsxhash);
#if 0
  DEFSUBR (Finternal_hash_value);
#endif

  DEFSYMBOL_MULTIWORD_PREDICATE (Qhash_tablep);
  DEFSYMBOL (Qhash_table);
  DEFSYMBOL (Qhashtable);
  DEFSYMBOL (Qweakness);
  DEFSYMBOL (Qvalue);
  DEFSYMBOL (Qkey_or_value);
  DEFSYMBOL (Qkey_and_value);
  DEFSYMBOL (Qrehash_size);
  DEFSYMBOL (Qrehash_threshold);

  DEFSYMBOL (Qweak);             /* obsolete */
  DEFSYMBOL (Qkey_weak);     /* obsolete */
  DEFSYMBOL (Qkey_or_value_weak);    /* obsolete */
  DEFSYMBOL (Qvalue_weak); /* obsolete */
  DEFSYMBOL (Qnon_weak);     /* obsolete */

  DEFKEYWORD (Q_test);
  DEFKEYWORD (Q_size);
  DEFKEYWORD (Q_rehash_size);
  DEFKEYWORD (Q_rehash_threshold);
  DEFKEYWORD (Q_weakness);
  DEFKEYWORD (Q_type); /* obsolete */
}

void
vars_of_elhash (void)
{
  /* This must NOT be staticpro'd */
  Vall_weak_hash_tables = Qnil;
  dump_add_weak_object_chain (&Vall_weak_hash_tables);
}