changeset 490:38fb9ae12edd

[xemacs-hg @ 2001-04-30 09:12:03 by martinb] a better string hash function
author martinb
date Mon, 30 Apr 2001 09:12:04 +0000
parents 4a8bb4aa9740
children b3bbdc4058d7
files src/ChangeLog src/depend src/lisp.h src/symbols.c
diffstat 4 files changed, 27 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Mon Apr 30 08:49:26 2001 +0000
+++ b/src/ChangeLog	Mon Apr 30 09:12:04 2001 +0000
@@ -1,3 +1,13 @@
+2001-04-30  Martin Buchholz  <martin@xemacs.org>
+
+	Make string hashing more efficient.
+	Makes intern much faster.
+	Makes hash tables containing strings faster.
+	* symbols.c (hash_string): A much better hash function.
+	Change prototype to return unsigned value.
+	(init_symbols_once_early): Use unsigned type for hash value.
+	(oblookup): Use unsigned type for hash value.
+
 2001-04-04  Martin Buchholz  <martin@xemacs.org>
 
 	* keymap.c (Fmap_keymap): Revert to previous implementation, since
--- a/src/depend	Mon Apr 30 08:49:26 2001 +0000
+++ b/src/depend	Mon Apr 30 09:12:04 2001 +0000
@@ -114,7 +114,7 @@
 ecrt0.o: config.h
 editfns.o: $(LISP_H) buffer.h bufslots.h casetab.h chartab.h commands.h conslots.h console.h device.h events.h extents.h frame.h frameslots.h glyphs.h gui.h insdel.h line-number.h mule-charset.h redisplay.h scrollbar.h specifier.h sysdep.h sysfile.h syspwd.h systime.h toolbar.h window.h winslots.h
 eldap.o: $(LISP_H) buffer.h bufslots.h casetab.h chartab.h eldap.h mule-charset.h opaque.h sysdep.h
-elhash.o: $(LISP_H) bytecode.h elhash.h
+elhash.o: $(LISP_H) bytecode.h elhash.h opaque.h
 emacs-marshals.o: hash.h
 emacs-widget-accessors.o: 
 emacs.o: $(LISP_H) backtrace.h buffer.h bufslots.h casetab.h chartab.h commands.h conslots.h console.h device.h dumper.h frame.h frameslots.h glyphs.h gui.h mule-charset.h nt.h paths.h process.h redisplay.h scrollbar.h specifier.h sysdep.h sysdll.h sysfile.h syssignal.h systime.h systty.h syswindows.h toolbar.h window.h winslots.h
@@ -215,7 +215,7 @@
 sysdll.o: config.h sysdll.h
 termcap.o: $(LISP_H) conslots.h console.h device.h
 terminfo.o: config.h
-tests.o: $(LISP_H) buffer.h bufslots.h casetab.h chartab.h lstream.h mule-charset.h opaque.h
+tests.o: $(LISP_H) buffer.h bufslots.h casetab.h chartab.h elhash.h lstream.h mule-charset.h opaque.h
 toolbar.o: $(LISP_H) buffer.h bufslots.h casetab.h chartab.h conslots.h console.h device.h frame.h frameslots.h glyphs.h gui.h mule-charset.h redisplay.h scrollbar.h specifier.h toolbar.h window.h winslots.h
 tooltalk.o: $(LISP_H) buffer.h bufslots.h casetab.h chartab.h elhash.h mule-charset.h process.h syssignal.h tooltalk.h
 tparam.o: config.h
--- a/src/lisp.h	Mon Apr 30 08:49:26 2001 +0000
+++ b/src/lisp.h	Mon Apr 30 09:12:04 2001 +0000
@@ -2876,7 +2876,7 @@
 					Error_behavior, int, Lisp_Object);
 
 /* Defined in symbols.c */
-int hash_string (const Bufbyte *, Bytecount);
+unsigned int hash_string (const Bufbyte *, Bytecount);
 Lisp_Object intern (const char *);
 Lisp_Object oblookup (Lisp_Object, const Bufbyte *, Bytecount);
 void map_obarray (Lisp_Object, int (*) (Lisp_Object, void *), void *);
--- a/src/symbols.c	Mon Apr 30 08:49:26 2001 +0000
+++ b/src/symbols.c	Mon Apr 30 09:12:04 2001 +0000
@@ -341,7 +341,7 @@
 Lisp_Object
 oblookup (Lisp_Object obarray, const Bufbyte *ptr, Bytecount size)
 {
-  int hash, obsize;
+  unsigned int hash, obsize;
   Lisp_Symbol *tail;
   Lisp_Object bucket;
 
@@ -374,40 +374,20 @@
   return make_int (hash);
 }
 
-#if 0 /* Emacs 19.34 */
-int
+/* An excellent string hashing function.
+   Adapted from glib's g_str_hash().
+   Investigation by Karl Nelson <kenelson@ece.ucdavis.edu>.
+   Do a web search for "g_str_hash X31_HASH" if you want to know more. */
+unsigned int
 hash_string (const Bufbyte *ptr, Bytecount len)
 {
-  const Bufbyte *p = ptr;
-  const Bufbyte *end = p + len;
-  Bufbyte c;
-  int hash = 0;
-
-  while (p != end)
-    {
-      c = *p++;
-      if (c >= 0140) c -= 40;
-      hash = ((hash<<3) + (hash>>28) + c);
-    }
-  return hash & 07777777777;
-}
-#endif
-
-/* derived from hashpjw, Dragon Book P436. */
-int
-hash_string (const Bufbyte *ptr, Bytecount len)
-{
-  int hash = 0;
-
-  while (len-- > 0)
-    {
-      int g;
-      hash = (hash << 4) + *ptr++;
-      g = hash & 0xf0000000;
-      if (g)
-	hash = (hash ^ (g >> 24)) ^ g;
-    }
-  return hash & 07777777777;
+  unsigned int hash;
+
+  for (hash = 0; len; len--, ptr++)
+    /* (31 * hash) will probably be optimized to ((hash << 5) - hash). */
+    hash = 31 * hash + *ptr;
+
+  return hash;
 }
 
 /* Map FN over OBARRAY.  The mapping is stopped when FN returns a
@@ -3250,7 +3230,7 @@
   staticpro (&initial_obarray);
   /* Intern nil in the obarray */
   {
-    int hash = hash_string (string_data (XSYMBOL (Qnil)->name), 3);
+    unsigned int hash = hash_string (string_data (XSYMBOL (Qnil)->name), 3);
     XVECTOR_DATA (Vobarray)[hash % OBARRAY_SIZE] = Qnil;
   }