Mercurial > hg > xemacs-beta
diff src/elhash.c @ 428:3ecd8885ac67 r21-2-22
Import from CVS: tag r21-2-22
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:28:15 +0200 |
parents | |
children | a5df635868b2 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/elhash.c Mon Aug 13 11:28:15 2007 +0200 @@ -0,0 +1,1468 @@ +/* Implementation of the hash table lisp object type. + Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. + Copyright (C) 1995, 1996 Ben Wing. + Copyright (C) 1997 Free Software Foundation, Inc. + +This file is part of XEmacs. + +XEmacs is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +XEmacs is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCNTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with XEmacs; see the file COPYING. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +/* Synched up with: Not in FSF. */ + +#include <config.h> +#include "lisp.h" +#include "bytecode.h" +#include "elhash.h" + +Lisp_Object Qhash_tablep; +static Lisp_Object Qhashtable, Qhash_table; +static Lisp_Object Qweakness, Qvalue; +static Lisp_Object Vall_weak_hash_tables; +static Lisp_Object Qrehash_size, Qrehash_threshold; +static Lisp_Object Q_size, Q_test, Q_weakness, Q_rehash_size, Q_rehash_threshold; + +/* obsolete as of 19990901 in xemacs-21.2 */ +static Lisp_Object Qweak, Qkey_weak, Qvalue_weak, Qnon_weak, Q_type; + +typedef struct hentry +{ + Lisp_Object key; + Lisp_Object value; +} hentry; + +struct Lisp_Hash_Table +{ + struct lcrecord_header header; + size_t size; + size_t count; + size_t rehash_count; + double rehash_size; + double rehash_threshold; + size_t golden_ratio; + hash_table_hash_function_t hash_function; + hash_table_test_function_t test_function; + hentry *hentries; + enum hash_table_weakness weakness; + Lisp_Object next_weak; /* Used to chain together all of the weak + hash tables. Don't mark through this. */ +}; +typedef struct Lisp_Hash_Table Lisp_Hash_Table; + +#define HENTRY_CLEAR_P(hentry) ((*(EMACS_UINT*)(&((hentry)->key))) == 0) +#define CLEAR_HENTRY(hentry) \ + ((*(EMACS_UINT*)(&((hentry)->key))) = 0, \ + (*(EMACS_UINT*)(&((hentry)->value))) = 0) + +#define HASH_TABLE_DEFAULT_SIZE 16 +#define HASH_TABLE_DEFAULT_REHASH_SIZE 1.3 +#define HASH_TABLE_MIN_SIZE 10 + +#define HASH_CODE(key, ht) \ + (((((ht)->hash_function ? (ht)->hash_function (key) : LISP_HASH (key)) \ + * (ht)->golden_ratio) \ + % (ht)->size)) + +#define KEYS_EQUAL_P(key1, key2, testfun) \ + (EQ ((key1), (key2)) || ((testfun) && (testfun) ((key1), (key2)))) + +#define LINEAR_PROBING_LOOP(probe, entries, size) \ + for (; \ + !HENTRY_CLEAR_P (probe) || \ + (probe == entries + size ? \ + (probe = entries, !HENTRY_CLEAR_P (probe)) : 0); \ + probe++) + +#ifndef ERROR_CHECK_HASH_TABLE +# ifdef ERROR_CHECK_TYPECHECK +# define ERROR_CHECK_HASH_TABLE 1 +# else +# define ERROR_CHECK_HASH_TABLE 0 +# endif +#endif + +#if ERROR_CHECK_HASH_TABLE +static void +check_hash_table_invariants (Lisp_Hash_Table *ht) +{ + assert (ht->count < ht->size); + assert (ht->count <= ht->rehash_count); + assert (ht->rehash_count < ht->size); + assert ((double) ht->count * ht->rehash_threshold - 1 <= (double) ht->rehash_count); + assert (HENTRY_CLEAR_P (ht->hentries + ht->size)); +} +#else +#define check_hash_table_invariants(ht) +#endif + +/* We use linear probing instead of double hashing, despite its lack + of blessing by Knuth and company, because, as a result of the + increasing discrepancy between CPU speeds and memory speeds, cache + behavior is becoming increasingly important, e.g: + + For a trivial loop, the penalty for non-sequential access of an array is: + - a factor of 3-4 on Pentium Pro 200 Mhz + - a factor of 10 on Ultrasparc 300 Mhz */ + +/* Return a suitable size for a hash table, with at least SIZE slots. */ +static size_t +hash_table_size (size_t requested_size) +{ + /* Return some prime near, but greater than or equal to, SIZE. + Decades from the time of writing, someone will have a system large + enough that the list below will be too short... */ + static CONST size_t primes [] = + { + 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031, + 1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783, + 19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941, + 204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519, + 1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301, + 10445899, 13579681, 17653589, 22949669, 29834603, 38784989, + 50420551, 65546729, 85210757, 110774011, 144006217, 187208107, + 243370577, 316381771, 411296309, 534685237, 695090819, 903618083, + 1174703521, 1527114613, 1985248999, 2580823717UL, 3355070839UL + }; + /* We've heard of binary search. */ + int low, high; + for (low = 0, high = countof (primes) - 1; high - low > 1;) + { + /* Loop Invariant: size < primes [high] */ + int mid = (low + high) / 2; + if (primes [mid] < requested_size) + low = mid; + else + high = mid; + } + return primes [high]; +} + + +#if 0 /* I don't think these are needed any more. + If using the general lisp_object_equal_*() functions + causes efficiency problems, these can be resurrected. --ben */ +/* equality and hash functions for Lisp strings */ +int +lisp_string_equal (Lisp_Object str1, Lisp_Object str2) +{ + /* This is wrong anyway. You can't use strcmp() on Lisp strings, + because they can contain zero characters. */ + return !strcmp ((char *) XSTRING_DATA (str1), (char *) XSTRING_DATA (str2)); +} + +static hashcode_t +lisp_string_hash (Lisp_Object obj) +{ + return hash_string (XSTRING_DATA (str), XSTRING_LENGTH (str)); +} + +#endif /* 0 */ + +static int +lisp_object_eql_equal (Lisp_Object obj1, Lisp_Object obj2) +{ + return EQ (obj1, obj2) || (FLOATP (obj1) && internal_equal (obj1, obj2, 0)); +} + +static hashcode_t +lisp_object_eql_hash (Lisp_Object obj) +{ + return FLOATP (obj) ? internal_hash (obj, 0) : LISP_HASH (obj); +} + +static int +lisp_object_equal_equal (Lisp_Object obj1, Lisp_Object obj2) +{ + return internal_equal (obj1, obj2, 0); +} + +static hashcode_t +lisp_object_equal_hash (Lisp_Object obj) +{ + return internal_hash (obj, 0); +} + + +static Lisp_Object +mark_hash_table (Lisp_Object obj) +{ + Lisp_Hash_Table *ht = XHASH_TABLE (obj); + + /* If the hash table is weak, we don't want to mark the keys and + values (we scan over them after everything else has been marked, + and mark or remove them as necessary). */ + if (ht->weakness == HASH_TABLE_NON_WEAK) + { + hentry *e, *sentinel; + + for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) + if (!HENTRY_CLEAR_P (e)) + { + mark_object (e->key); + mark_object (e->value); + } + } + return Qnil; +} + +/* Equality of hash tables. Two hash tables are equal when they are of + the same weakness and test function, they have the same number of + elements, and for each key in the hash table, the values are `equal'. + + This is similar to Common Lisp `equalp' of hash tables, with the + difference that CL requires the keys to be compared with the test + function, which we don't do. Doing that would require consing, and + consing is a bad idea in `equal'. Anyway, our method should provide + the same result -- if the keys are not equal according to the test + function, then Fgethash() in hash_table_equal_mapper() will fail. */ +static int +hash_table_equal (Lisp_Object hash_table1, Lisp_Object hash_table2, int depth) +{ + Lisp_Hash_Table *ht1 = XHASH_TABLE (hash_table1); + Lisp_Hash_Table *ht2 = XHASH_TABLE (hash_table2); + hentry *e, *sentinel; + + if ((ht1->test_function != ht2->test_function) || + (ht1->weakness != ht2->weakness) || + (ht1->count != ht2->count)) + return 0; + + depth++; + + for (e = ht1->hentries, sentinel = e + ht1->size; e < sentinel; e++) + if (!HENTRY_CLEAR_P (e)) + /* Look up the key in the other hash table, and compare the values. */ + { + Lisp_Object value_in_other = Fgethash (e->key, hash_table2, Qunbound); + if (UNBOUNDP (value_in_other) || + !internal_equal (e->value, value_in_other, depth)) + return 0; /* Give up */ + } + + return 1; +} + +/* Printing hash tables. + + This is non-trivial, because we use a readable structure-style + syntax for hash tables. This means that a typical hash table will be + readably printed in the form of: + + #s(hash-table size 2 data (key1 value1 key2 value2)) + + The supported hash table structure keywords and their values are: + `test' (eql (or nil), eq or equal) + `size' (a natnum or nil) + `rehash-size' (a float) + `rehash-threshold' (a float) + `weakness' (nil, t, key or value) + `data' (a list) + + If `print-readably' is non-nil, then a simpler syntax is used; for + instance: + + #<hash-table size 2/13 data (key1 value1 key2 value2) 0x874d> + + The data is truncated to four pairs, and the rest is shown with + `...'. This printer does not cons. */ + + +/* Print the data of the hash table. This maps through a Lisp + hash table and prints key/value pairs using PRINTCHARFUN. */ +static void +print_hash_table_data (Lisp_Hash_Table *ht, Lisp_Object printcharfun) +{ + int count = 0; + hentry *e, *sentinel; + + write_c_string (" data (", printcharfun); + + for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) + if (!HENTRY_CLEAR_P (e)) + { + if (count > 0) + write_c_string (" ", printcharfun); + if (!print_readably && count > 3) + { + write_c_string ("...", printcharfun); + break; + } + print_internal (e->key, printcharfun, 1); + write_c_string (" ", printcharfun); + print_internal (e->value, printcharfun, 1); + count++; + } + + write_c_string (")", printcharfun); +} + +static void +print_hash_table (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) +{ + Lisp_Hash_Table *ht = XHASH_TABLE (obj); + char buf[128]; + + write_c_string (print_readably ? "#s(hash-table" : "#<hash-table", + printcharfun); + + /* These checks have a kludgy look to them, but they are safe. + Due to nature of hashing, you cannot use arbitrary + test functions anyway. */ + if (!ht->test_function) + write_c_string (" test eq", printcharfun); + else if (ht->test_function == lisp_object_equal_equal) + write_c_string (" test equal", printcharfun); + else if (ht->test_function == lisp_object_eql_equal) + DO_NOTHING; + else + abort (); + + if (ht->count || !print_readably) + { + if (print_readably) + sprintf (buf, " size %lu", (unsigned long) ht->count); + else + sprintf (buf, " size %lu/%lu", + (unsigned long) ht->count, + (unsigned long) ht->size); + write_c_string (buf, printcharfun); + } + + if (ht->weakness != HASH_TABLE_NON_WEAK) + { + sprintf (buf, " weakness %s", + (ht->weakness == HASH_TABLE_WEAK ? "t" : + ht->weakness == HASH_TABLE_KEY_WEAK ? "key" : + ht->weakness == HASH_TABLE_VALUE_WEAK ? "value" : + "you-d-better-not-see-this")); + write_c_string (buf, printcharfun); + } + + if (ht->count) + print_hash_table_data (ht, printcharfun); + + if (print_readably) + write_c_string (")", printcharfun); + else + { + sprintf (buf, " 0x%x>", ht->header.uid); + write_c_string (buf, printcharfun); + } +} + +static void +finalize_hash_table (void *header, int for_disksave) +{ + if (!for_disksave) + { + Lisp_Hash_Table *ht = (Lisp_Hash_Table *) header; + + xfree (ht->hentries); + ht->hentries = 0; + } +} + +static const struct lrecord_description hentry_description_1[] = { + { XD_LISP_OBJECT, offsetof(hentry, key), 2 }, + { XD_END } +}; + +static const struct struct_description hentry_description = { + sizeof(hentry), + hentry_description_1 +}; + +const struct lrecord_description hash_table_description[] = { + { XD_SIZE_T, offsetof(Lisp_Hash_Table, size) }, + { XD_STRUCT_PTR, offsetof(Lisp_Hash_Table, hentries), XD_INDIRECT(0, 1), &hentry_description }, + { XD_LO_LINK, offsetof(Lisp_Hash_Table, next_weak) }, + { XD_END } +}; + +DEFINE_LRECORD_IMPLEMENTATION ("hash-table", hash_table, + mark_hash_table, print_hash_table, + finalize_hash_table, + /* #### Implement hash_table_hash()! */ + hash_table_equal, 0, + hash_table_description, + Lisp_Hash_Table); + +static Lisp_Hash_Table * +xhash_table (Lisp_Object hash_table) +{ + if (!gc_in_progress) + CHECK_HASH_TABLE (hash_table); + check_hash_table_invariants (XHASH_TABLE (hash_table)); + return XHASH_TABLE (hash_table); +} + + +/************************************************************************/ +/* Creation of Hash Tables */ +/************************************************************************/ + +/* Creation of hash tables, without error-checking. */ +static double +hash_table_rehash_threshold (Lisp_Hash_Table *ht) +{ + return + ht->rehash_threshold > 0.0 ? ht->rehash_threshold : + ht->size > 4096 && !ht->test_function ? 0.7 : 0.6; +} + +static void +compute_hash_table_derived_values (Lisp_Hash_Table *ht) +{ + ht->rehash_count = (size_t) + ((double) ht->size * hash_table_rehash_threshold (ht)); + ht->golden_ratio = (size_t) + ((double) ht->size * (.6180339887 / (double) sizeof (Lisp_Object))); +} + +Lisp_Object +make_general_lisp_hash_table (enum hash_table_test test, + size_t size, + double rehash_size, + double rehash_threshold, + enum hash_table_weakness weakness) +{ + Lisp_Object hash_table; + Lisp_Hash_Table *ht = alloc_lcrecord_type (Lisp_Hash_Table, &lrecord_hash_table); + + ht->rehash_size = rehash_size; + ht->rehash_threshold = rehash_threshold; + ht->weakness = weakness; + + switch (test) + { + case HASH_TABLE_EQ: + ht->test_function = 0; + ht->hash_function = 0; + break; + + case HASH_TABLE_EQL: + ht->test_function = lisp_object_eql_equal; + ht->hash_function = lisp_object_eql_hash; + break; + + case HASH_TABLE_EQUAL: + ht->test_function = lisp_object_equal_equal; + ht->hash_function = lisp_object_equal_hash; + break; + + default: + abort (); + } + + if (ht->rehash_size <= 0.0) + ht->rehash_size = HASH_TABLE_DEFAULT_REHASH_SIZE; + if (size < HASH_TABLE_MIN_SIZE) + size = HASH_TABLE_MIN_SIZE; + if (rehash_threshold < 0.0) + rehash_threshold = 0.75; + ht->size = + hash_table_size ((size_t) ((double) size / hash_table_rehash_threshold (ht)) + 1); + ht->count = 0; + compute_hash_table_derived_values (ht); + + /* We leave room for one never-occupied sentinel hentry at the end. */ + ht->hentries = xnew_array (hentry, ht->size + 1); + + { + hentry *e, *sentinel; + for (e = ht->hentries, sentinel = e + ht->size; e <= sentinel; e++) + CLEAR_HENTRY (e); + } + + XSETHASH_TABLE (hash_table, ht); + + if (weakness == HASH_TABLE_NON_WEAK) + ht->next_weak = Qunbound; + else + ht->next_weak = Vall_weak_hash_tables, Vall_weak_hash_tables = hash_table; + + return hash_table; +} + +Lisp_Object +make_lisp_hash_table (size_t size, + enum hash_table_weakness weakness, + enum hash_table_test test) +{ + return make_general_lisp_hash_table + (test, size, HASH_TABLE_DEFAULT_REHASH_SIZE, -1.0, weakness); +} + +/* Pretty reading of hash tables. + + Here we use the existing structures mechanism (which is, + unfortunately, pretty cumbersome) for validating and instantiating + the hash tables. The idea is that the side-effect of reading a + #s(hash-table PLIST) object is creation of a hash table with desired + properties, and that the hash table is returned. */ + +/* Validation functions: each keyword provides its own validation + function. The errors should maybe be continuable, but it is + unclear how this would cope with ERRB. */ +static int +hash_table_size_validate (Lisp_Object keyword, Lisp_Object value, + Error_behavior errb) +{ + if (NATNUMP (value)) + return 1; + + maybe_signal_error (Qwrong_type_argument, list2 (Qnatnump, value), + Qhash_table, errb); + return 0; +} + +static size_t +decode_hash_table_size (Lisp_Object obj) +{ + return NILP (obj) ? HASH_TABLE_DEFAULT_SIZE : XINT (obj); +} + +static int +hash_table_weakness_validate (Lisp_Object keyword, Lisp_Object value, + Error_behavior errb) +{ + if (EQ (value, Qnil)) return 1; + if (EQ (value, Qt)) return 1; + if (EQ (value, Qkey)) return 1; + if (EQ (value, Qvalue)) return 1; + + /* Following values are obsolete as of 19990901 in xemacs-21.2 */ + if (EQ (value, Qnon_weak)) return 1; + if (EQ (value, Qweak)) return 1; + if (EQ (value, Qkey_weak)) return 1; + if (EQ (value, Qvalue_weak)) return 1; + + maybe_signal_simple_error ("Invalid hash table weakness", + value, Qhash_table, errb); + return 0; +} + +static enum hash_table_weakness +decode_hash_table_weakness (Lisp_Object obj) +{ + if (EQ (obj, Qnil)) return HASH_TABLE_NON_WEAK; + if (EQ (obj, Qt)) return HASH_TABLE_WEAK; + if (EQ (obj, Qkey)) return HASH_TABLE_KEY_WEAK; + if (EQ (obj, Qvalue)) return HASH_TABLE_VALUE_WEAK; + + /* Following values are obsolete as of 19990901 in xemacs-21.2 */ + if (EQ (obj, Qnon_weak)) return HASH_TABLE_NON_WEAK; + if (EQ (obj, Qweak)) return HASH_TABLE_WEAK; + if (EQ (obj, Qkey_weak)) return HASH_TABLE_KEY_WEAK; + if (EQ (obj, Qvalue_weak)) return HASH_TABLE_VALUE_WEAK; + + signal_simple_error ("Invalid hash table weakness", obj); + return HASH_TABLE_NON_WEAK; /* not reached */ +} + +static int +hash_table_test_validate (Lisp_Object keyword, Lisp_Object value, + Error_behavior errb) +{ + if (EQ (value, Qnil)) return 1; + if (EQ (value, Qeq)) return 1; + if (EQ (value, Qequal)) return 1; + if (EQ (value, Qeql)) return 1; + + maybe_signal_simple_error ("Invalid hash table test", + value, Qhash_table, errb); + return 0; +} + +static enum hash_table_test +decode_hash_table_test (Lisp_Object obj) +{ + if (EQ (obj, Qnil)) return HASH_TABLE_EQL; + if (EQ (obj, Qeq)) return HASH_TABLE_EQ; + if (EQ (obj, Qequal)) return HASH_TABLE_EQUAL; + if (EQ (obj, Qeql)) return HASH_TABLE_EQL; + + signal_simple_error ("Invalid hash table test", obj); + return HASH_TABLE_EQ; /* not reached */ +} + +static int +hash_table_rehash_size_validate (Lisp_Object keyword, Lisp_Object value, + Error_behavior errb) +{ + if (!FLOATP (value)) + { + maybe_signal_error (Qwrong_type_argument, list2 (Qfloatp, value), + Qhash_table, errb); + return 0; + } + + { + double rehash_size = XFLOAT_DATA (value); + if (rehash_size <= 1.0) + { + maybe_signal_simple_error + ("Hash table rehash size must be greater than 1.0", + value, Qhash_table, errb); + return 0; + } + } + + return 1; +} + +static double +decode_hash_table_rehash_size (Lisp_Object rehash_size) +{ + return NILP (rehash_size) ? -1.0 : XFLOAT_DATA (rehash_size); +} + +static int +hash_table_rehash_threshold_validate (Lisp_Object keyword, Lisp_Object value, + Error_behavior errb) +{ + if (!FLOATP (value)) + { + maybe_signal_error (Qwrong_type_argument, list2 (Qfloatp, value), + Qhash_table, errb); + return 0; + } + + { + double rehash_threshold = XFLOAT_DATA (value); + if (rehash_threshold <= 0.0 || rehash_threshold >= 1.0) + { + maybe_signal_simple_error + ("Hash table rehash threshold must be between 0.0 and 1.0", + value, Qhash_table, errb); + return 0; + } + } + + return 1; +} + +static double +decode_hash_table_rehash_threshold (Lisp_Object rehash_threshold) +{ + return NILP (rehash_threshold) ? -1.0 : XFLOAT_DATA (rehash_threshold); +} + +static int +hash_table_data_validate (Lisp_Object keyword, Lisp_Object value, + Error_behavior errb) +{ + int len; + + GET_EXTERNAL_LIST_LENGTH (value, len); + + if (len & 1) + { + maybe_signal_simple_error + ("Hash table data must have alternating key/value pairs", + value, Qhash_table, errb); + return 0; + } + return 1; +} + +/* The actual instantiation of a hash table. This does practically no + error checking, because it relies on the fact that the paranoid + functions above have error-checked everything to the last details. + If this assumption is wrong, we will get a crash immediately (with + error-checking compiled in), and we'll know if there is a bug in + the structure mechanism. So there. */ +static Lisp_Object +hash_table_instantiate (Lisp_Object plist) +{ + Lisp_Object hash_table; + Lisp_Object test = Qnil; + Lisp_Object size = Qnil; + Lisp_Object rehash_size = Qnil; + Lisp_Object rehash_threshold = Qnil; + Lisp_Object weakness = Qnil; + Lisp_Object data = Qnil; + + while (!NILP (plist)) + { + Lisp_Object key, value; + key = XCAR (plist); plist = XCDR (plist); + value = XCAR (plist); plist = XCDR (plist); + + if (EQ (key, Qtest)) test = value; + else if (EQ (key, Qsize)) size = value; + else if (EQ (key, Qrehash_size)) rehash_size = value; + else if (EQ (key, Qrehash_threshold)) rehash_threshold = value; + else if (EQ (key, Qweakness)) weakness = value; + else if (EQ (key, Qdata)) data = value; + else if (EQ (key, Qtype))/*obsolete*/ weakness = value; + else + abort (); + } + + /* Create the hash table. */ + hash_table = make_general_lisp_hash_table + (decode_hash_table_test (test), + decode_hash_table_size (size), + decode_hash_table_rehash_size (rehash_size), + decode_hash_table_rehash_threshold (rehash_threshold), + decode_hash_table_weakness (weakness)); + + /* I'm not sure whether this can GC, but better safe than sorry. */ + { + struct gcpro gcpro1; + GCPRO1 (hash_table); + + /* And fill it with data. */ + while (!NILP (data)) + { + Lisp_Object key, value; + key = XCAR (data); data = XCDR (data); + value = XCAR (data); data = XCDR (data); + Fputhash (key, value, hash_table); + } + UNGCPRO; + } + + return hash_table; +} + +static void +structure_type_create_hash_table_structure_name (Lisp_Object structure_name) +{ + struct structure_type *st; + + st = define_structure_type (structure_name, 0, hash_table_instantiate); + define_structure_type_keyword (st, Qtest, hash_table_test_validate); + define_structure_type_keyword (st, Qsize, hash_table_size_validate); + define_structure_type_keyword (st, Qrehash_size, hash_table_rehash_size_validate); + define_structure_type_keyword (st, Qrehash_threshold, hash_table_rehash_threshold_validate); + define_structure_type_keyword (st, Qweakness, hash_table_weakness_validate); + define_structure_type_keyword (st, Qdata, hash_table_data_validate); + + /* obsolete as of 19990901 in xemacs-21.2 */ + define_structure_type_keyword (st, Qtype, hash_table_weakness_validate); +} + +/* Create a built-in Lisp structure type named `hash-table'. + We make #s(hashtable ...) equivalent to #s(hash-table ...), + for backward compatibility. + This is called from emacs.c. */ +void +structure_type_create_hash_table (void) +{ + structure_type_create_hash_table_structure_name (Qhash_table); + structure_type_create_hash_table_structure_name (Qhashtable); /* compat */ +} + + +/************************************************************************/ +/* Definition of Lisp-visible methods */ +/************************************************************************/ + +DEFUN ("hash-table-p", Fhash_table_p, 1, 1, 0, /* +Return t if OBJECT is a hash table, else nil. +*/ + (object)) +{ + return HASH_TABLEP (object) ? Qt : Qnil; +} + +DEFUN ("make-hash-table", Fmake_hash_table, 0, MANY, 0, /* +Return a new empty hash table object. +Use Common Lisp style keywords to specify hash table properties. + (make-hash-table &key test size rehash-size rehash-threshold weakness) + +Keyword :test can be `eq', `eql' (default) or `equal'. +Comparison between keys is done using this function. +If speed is important, consider using `eq'. +When storing strings in the hash table, you will likely need to use `equal'. + +Keyword :size specifies the number of keys likely to be inserted. +This number of entries can be inserted without enlarging the hash table. + +Keyword :rehash-size must be a float greater than 1.0, and specifies +the factor by which to increase the size of the hash table when enlarging. + +Keyword :rehash-threshold must be a float between 0.0 and 1.0, +and specifies the load factor of the hash table which triggers enlarging. + +Non-standard keyword :weakness can be `nil' (default), `t', `key' or `value'. + +A weak hash table is one whose pointers do not count as GC referents: +for any key-value pair in the hash table, if the only remaining pointer +to either the key or the value is in a weak hash table, then the pair +will be removed from the hash table, and the key and value collected. +A non-weak hash table (or any other pointer) would prevent the object +from being collected. + +A key-weak hash table is similar to a fully-weak hash table except that +a key-value pair will be removed only if the key remains unmarked +outside of weak hash tables. The pair will remain in the hash table if +the key is pointed to by something other than a weak hash table, even +if the value is not. + +A value-weak hash table is similar to a fully-weak hash table except +that a key-value pair will be removed only if the value remains +unmarked outside of weak hash tables. The pair will remain in the +hash table if the value is pointed to by something other than a weak +hash table, even if the key is not. +*/ + (int nargs, Lisp_Object *args)) +{ + int i = 0; + Lisp_Object test = Qnil; + Lisp_Object size = Qnil; + Lisp_Object rehash_size = Qnil; + Lisp_Object rehash_threshold = Qnil; + Lisp_Object weakness = Qnil; + + while (i + 1 < nargs) + { + Lisp_Object keyword = args[i++]; + Lisp_Object value = args[i++]; + + if (EQ (keyword, Q_test)) test = value; + else if (EQ (keyword, Q_size)) size = value; + else if (EQ (keyword, Q_rehash_size)) rehash_size = value; + else if (EQ (keyword, Q_rehash_threshold)) rehash_threshold = value; + else if (EQ (keyword, Q_weakness)) weakness = value; + else if (EQ (keyword, Q_type))/*obsolete*/ weakness = value; + else signal_simple_error ("Invalid hash table property keyword", keyword); + } + + if (i < nargs) + signal_simple_error ("Hash table property requires a value", args[i]); + +#define VALIDATE_VAR(var) \ +if (!NILP (var)) hash_table_##var##_validate (Q##var, var, ERROR_ME); + + VALIDATE_VAR (test); + VALIDATE_VAR (size); + VALIDATE_VAR (rehash_size); + VALIDATE_VAR (rehash_threshold); + VALIDATE_VAR (weakness); + + return make_general_lisp_hash_table + (decode_hash_table_test (test), + decode_hash_table_size (size), + decode_hash_table_rehash_size (rehash_size), + decode_hash_table_rehash_threshold (rehash_threshold), + decode_hash_table_weakness (weakness)); +} + +DEFUN ("copy-hash-table", Fcopy_hash_table, 1, 1, 0, /* +Return a new hash table containing the same keys and values as HASH-TABLE. +The keys and values will not themselves be copied. +*/ + (hash_table)) +{ + CONST Lisp_Hash_Table *ht_old = xhash_table (hash_table); + Lisp_Hash_Table *ht = alloc_lcrecord_type (Lisp_Hash_Table, &lrecord_hash_table); + + copy_lcrecord (ht, ht_old); + + ht->hentries = xnew_array (hentry, ht_old->size + 1); + memcpy (ht->hentries, ht_old->hentries, (ht_old->size + 1) * sizeof (hentry)); + + XSETHASH_TABLE (hash_table, ht); + + if (! EQ (ht->next_weak, Qunbound)) + { + ht->next_weak = Vall_weak_hash_tables; + Vall_weak_hash_tables = hash_table; + } + + return hash_table; +} + +static void +resize_hash_table (Lisp_Hash_Table *ht, size_t new_size) +{ + hentry *old_entries, *new_entries, *old_sentinel, *new_sentinel, *e; + size_t old_size; + + old_size = ht->size; + ht->size = new_size; + + old_entries = ht->hentries; + + ht->hentries = xnew_array (hentry, new_size + 1); + new_entries = ht->hentries; + + old_sentinel = old_entries + old_size; + new_sentinel = new_entries + new_size; + + for (e = new_entries; e <= new_sentinel; e++) + CLEAR_HENTRY (e); + + compute_hash_table_derived_values (ht); + + for (e = old_entries; e < old_sentinel; e++) + if (!HENTRY_CLEAR_P (e)) + { + hentry *probe = new_entries + HASH_CODE (e->key, ht); + LINEAR_PROBING_LOOP (probe, new_entries, new_size) + ; + *probe = *e; + } + + if (!DUMPEDP (old_entries)) + xfree (old_entries); +} + +void +reorganize_hash_table (Lisp_Hash_Table *ht) +{ + resize_hash_table (ht, ht->size); +} + +static void +enlarge_hash_table (Lisp_Hash_Table *ht) +{ + size_t new_size = + hash_table_size ((size_t) ((double) ht->size * ht->rehash_size)); + resize_hash_table (ht, new_size); +} + +static hentry * +find_hentry (Lisp_Object key, CONST Lisp_Hash_Table *ht) +{ + hash_table_test_function_t test_function = ht->test_function; + hentry *entries = ht->hentries; + hentry *probe = entries + HASH_CODE (key, ht); + + LINEAR_PROBING_LOOP (probe, entries, ht->size) + if (KEYS_EQUAL_P (probe->key, key, test_function)) + break; + + return probe; +} + +DEFUN ("gethash", Fgethash, 2, 3, 0, /* +Find hash value for KEY in HASH-TABLE. +If there is no corresponding value, return DEFAULT (which defaults to nil). +*/ + (key, hash_table, default_)) +{ + CONST Lisp_Hash_Table *ht = xhash_table (hash_table); + hentry *e = find_hentry (key, ht); + + return HENTRY_CLEAR_P (e) ? default_ : e->value; +} + +DEFUN ("puthash", Fputhash, 3, 3, 0, /* +Hash KEY to VALUE in HASH-TABLE. +*/ + (key, value, hash_table)) +{ + Lisp_Hash_Table *ht = xhash_table (hash_table); + hentry *e = find_hentry (key, ht); + + if (!HENTRY_CLEAR_P (e)) + return e->value = value; + + e->key = key; + e->value = value; + + if (++ht->count >= ht->rehash_count) + enlarge_hash_table (ht); + + return value; +} + +/* Remove hentry pointed at by PROBE. + Subsequent entries are removed and reinserted. + We don't use tombstones - too wasteful. */ +static void +remhash_1 (Lisp_Hash_Table *ht, hentry *entries, hentry *probe) +{ + size_t size = ht->size; + CLEAR_HENTRY (probe); + probe++; + ht->count--; + + LINEAR_PROBING_LOOP (probe, entries, size) + { + Lisp_Object key = probe->key; + hentry *probe2 = entries + HASH_CODE (key, ht); + LINEAR_PROBING_LOOP (probe2, entries, size) + if (EQ (probe2->key, key)) + /* hentry at probe doesn't need to move. */ + goto continue_outer_loop; + /* Move hentry from probe to new home at probe2. */ + *probe2 = *probe; + CLEAR_HENTRY (probe); + continue_outer_loop: continue; + } +} + +DEFUN ("remhash", Fremhash, 2, 2, 0, /* +Remove the entry for KEY from HASH-TABLE. +Do nothing if there is no entry for KEY in HASH-TABLE. +*/ + (key, hash_table)) +{ + Lisp_Hash_Table *ht = xhash_table (hash_table); + hentry *e = find_hentry (key, ht); + + if (HENTRY_CLEAR_P (e)) + return Qnil; + + remhash_1 (ht, ht->hentries, e); + return Qt; +} + +DEFUN ("clrhash", Fclrhash, 1, 1, 0, /* +Remove all entries from HASH-TABLE, leaving it empty. +*/ + (hash_table)) +{ + Lisp_Hash_Table *ht = xhash_table (hash_table); + hentry *e, *sentinel; + + for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) + CLEAR_HENTRY (e); + ht->count = 0; + + return hash_table; +} + +/************************************************************************/ +/* Accessor Functions */ +/************************************************************************/ + +DEFUN ("hash-table-count", Fhash_table_count, 1, 1, 0, /* +Return the number of entries in HASH-TABLE. +*/ + (hash_table)) +{ + return make_int (xhash_table (hash_table)->count); +} + +DEFUN ("hash-table-test", Fhash_table_test, 1, 1, 0, /* +Return the test function of HASH-TABLE. +This can be one of `eq', `eql' or `equal'. +*/ + (hash_table)) +{ + hash_table_test_function_t fun = xhash_table (hash_table)->test_function; + + return (fun == lisp_object_eql_equal ? Qeql : + fun == lisp_object_equal_equal ? Qequal : + Qeq); +} + +DEFUN ("hash-table-size", Fhash_table_size, 1, 1, 0, /* +Return the size of HASH-TABLE. +This is the current number of slots in HASH-TABLE, whether occupied or not. +*/ + (hash_table)) +{ + return make_int (xhash_table (hash_table)->size); +} + +DEFUN ("hash-table-rehash-size", Fhash_table_rehash_size, 1, 1, 0, /* +Return the current rehash size of HASH-TABLE. +This is a float greater than 1.0; the factor by which HASH-TABLE +is enlarged when the rehash threshold is exceeded. +*/ + (hash_table)) +{ + return make_float (xhash_table (hash_table)->rehash_size); +} + +DEFUN ("hash-table-rehash-threshold", Fhash_table_rehash_threshold, 1, 1, 0, /* +Return the current rehash threshold of HASH-TABLE. +This is a float between 0.0 and 1.0; the maximum `load factor' of HASH-TABLE, +beyond which the HASH-TABLE is enlarged by rehashing. +*/ + (hash_table)) +{ + return make_float (hash_table_rehash_threshold (xhash_table (hash_table))); +} + +DEFUN ("hash-table-weakness", Fhash_table_weakness, 1, 1, 0, /* +Return the weakness of HASH-TABLE. +This can be one of `nil', `t', `key' or `value'. +*/ + (hash_table)) +{ + switch (xhash_table (hash_table)->weakness) + { + case HASH_TABLE_WEAK: return Qt; + case HASH_TABLE_KEY_WEAK: return Qkey; + case HASH_TABLE_VALUE_WEAK: return Qvalue; + default: return Qnil; + } +} + +/* obsolete as of 19990901 in xemacs-21.2 */ +DEFUN ("hash-table-type", Fhash_table_type, 1, 1, 0, /* +Return the type of HASH-TABLE. +This can be one of `non-weak', `weak', `key-weak' or `value-weak'. +*/ + (hash_table)) +{ + switch (xhash_table (hash_table)->weakness) + { + case HASH_TABLE_WEAK: return Qweak; + case HASH_TABLE_KEY_WEAK: return Qkey_weak; + case HASH_TABLE_VALUE_WEAK: return Qvalue_weak; + default: return Qnon_weak; + } +} + +/************************************************************************/ +/* Mapping Functions */ +/************************************************************************/ +DEFUN ("maphash", Fmaphash, 2, 2, 0, /* +Map FUNCTION over entries in HASH-TABLE, calling it with two args, +each key and value in HASH-TABLE. + +FUNCTION may not modify HASH-TABLE, with the one exception that FUNCTION +may remhash or puthash the entry currently being processed by FUNCTION. +*/ + (function, hash_table)) +{ + CONST Lisp_Hash_Table *ht = xhash_table (hash_table); + CONST hentry *e, *sentinel; + + for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) + if (!HENTRY_CLEAR_P (e)) + { + Lisp_Object args[3], key; + again: + key = e->key; + args[0] = function; + args[1] = key; + args[2] = e->value; + Ffuncall (countof (args), args); + /* Has FUNCTION done a remhash? */ + if (!EQ (key, e->key) && !HENTRY_CLEAR_P (e)) + goto again; + } + + return Qnil; +} + +/* Map *C* function FUNCTION over the elements of a lisp hash table. */ +void +elisp_maphash (maphash_function_t function, + Lisp_Object hash_table, void *extra_arg) +{ + CONST Lisp_Hash_Table *ht = XHASH_TABLE (hash_table); + CONST hentry *e, *sentinel; + + for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) + if (!HENTRY_CLEAR_P (e)) + { + Lisp_Object key; + again: + key = e->key; + if (function (key, e->value, extra_arg)) + return; + /* Has FUNCTION done a remhash? */ + if (!EQ (key, e->key) && !HENTRY_CLEAR_P (e)) + goto again; + } +} + +/* Remove all elements of a lisp hash table satisfying *C* predicate PREDICATE. */ +void +elisp_map_remhash (maphash_function_t predicate, + Lisp_Object hash_table, void *extra_arg) +{ + Lisp_Hash_Table *ht = XHASH_TABLE (hash_table); + hentry *e, *entries, *sentinel; + + for (e = entries = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) + if (!HENTRY_CLEAR_P (e)) + { + again: + if (predicate (e->key, e->value, extra_arg)) + { + remhash_1 (ht, entries, e); + if (!HENTRY_CLEAR_P (e)) + goto again; + } + } +} + + +/************************************************************************/ +/* garbage collecting weak hash tables */ +/************************************************************************/ + +/* Complete the marking for semi-weak hash tables. */ +int +finish_marking_weak_hash_tables (void) +{ + Lisp_Object hash_table; + int did_mark = 0; + + for (hash_table = Vall_weak_hash_tables; + !NILP (hash_table); + hash_table = XHASH_TABLE (hash_table)->next_weak) + { + CONST Lisp_Hash_Table *ht = XHASH_TABLE (hash_table); + CONST hentry *e = ht->hentries; + CONST hentry *sentinel = e + ht->size; + + if (! marked_p (hash_table)) + /* The hash table is probably garbage. Ignore it. */ + continue; + + /* Now, scan over all the pairs. For all pairs that are + half-marked, we may need to mark the other half if we're + keeping this pair. */ +#define MARK_OBJ(obj) \ +do { if (!marked_p (obj)) mark_object (obj), did_mark = 1; } while (0) + + switch (ht->weakness) + { + case HASH_TABLE_KEY_WEAK: + for (; e < sentinel; e++) + if (!HENTRY_CLEAR_P (e)) + if (marked_p (e->key)) + MARK_OBJ (e->value); + break; + + case HASH_TABLE_VALUE_WEAK: + for (; e < sentinel; e++) + if (!HENTRY_CLEAR_P (e)) + if (marked_p (e->value)) + MARK_OBJ (e->key); + break; + + case HASH_TABLE_KEY_CAR_WEAK: + for (; e < sentinel; e++) + if (!HENTRY_CLEAR_P (e)) + if (!CONSP (e->key) || marked_p (XCAR (e->key))) + { + MARK_OBJ (e->key); + MARK_OBJ (e->value); + } + break; + + case HASH_TABLE_VALUE_CAR_WEAK: + for (; e < sentinel; e++) + if (!HENTRY_CLEAR_P (e)) + if (!CONSP (e->value) || marked_p (XCAR (e->value))) + { + MARK_OBJ (e->key); + MARK_OBJ (e->value); + } + break; + + default: + break; + } + } + + return did_mark; +} + +void +prune_weak_hash_tables (void) +{ + Lisp_Object hash_table, prev = Qnil; + for (hash_table = Vall_weak_hash_tables; + !NILP (hash_table); + hash_table = XHASH_TABLE (hash_table)->next_weak) + { + if (! marked_p (hash_table)) + { + /* This hash table itself is garbage. Remove it from the list. */ + if (NILP (prev)) + Vall_weak_hash_tables = XHASH_TABLE (hash_table)->next_weak; + else + XHASH_TABLE (prev)->next_weak = XHASH_TABLE (hash_table)->next_weak; + } + else + { + /* Now, scan over all the pairs. Remove all of the pairs + in which the key or value, or both, is unmarked + (depending on the weakness of the hash table). */ + Lisp_Hash_Table *ht = XHASH_TABLE (hash_table); + hentry *entries = ht->hentries; + hentry *sentinel = entries + ht->size; + hentry *e; + + for (e = entries; e < sentinel; e++) + if (!HENTRY_CLEAR_P (e)) + { + again: + if (!marked_p (e->key) || !marked_p (e->value)) + { + remhash_1 (ht, entries, e); + if (!HENTRY_CLEAR_P (e)) + goto again; + } + } + + prev = hash_table; + } + } +} + +/* Return a hash value for an array of Lisp_Objects of size SIZE. */ + +hashcode_t +internal_array_hash (Lisp_Object *arr, int size, int depth) +{ + int i; + unsigned long hash = 0; + + if (size <= 5) + { + for (i = 0; i < size; i++) + hash = HASH2 (hash, internal_hash (arr[i], depth + 1)); + return hash; + } + + /* just pick five elements scattered throughout the array. + A slightly better approach would be to offset by some + noise factor from the points chosen below. */ + for (i = 0; i < 5; i++) + hash = HASH2 (hash, internal_hash (arr[i*size/5], depth + 1)); + + return hash; +} + +/* Return a hash value for a Lisp_Object. This is for use when hashing + objects with the comparison being `equal' (for `eq', you can just + use the Lisp_Object itself as the hash value). You need to make a + tradeoff between the speed of the hash function and how good the + hashing is. In particular, the hash function needs to be FAST, + so you can't just traipse down the whole tree hashing everything + together. Most of the time, objects will differ in the first + few elements you hash. Thus, we only go to a short depth (5) + and only hash at most 5 elements out of a vector. Theoretically + we could still take 5^5 time (a big big number) to compute a + hash, but practically this won't ever happen. */ + +hashcode_t +internal_hash (Lisp_Object obj, int depth) +{ + if (depth > 5) + return 0; + if (CONSP (obj)) + { + /* no point in worrying about tail recursion, since we're not + going very deep */ + return HASH2 (internal_hash (XCAR (obj), depth + 1), + internal_hash (XCDR (obj), depth + 1)); + } + if (STRINGP (obj)) + { + return hash_string (XSTRING_DATA (obj), XSTRING_LENGTH (obj)); + } + if (VECTORP (obj)) + { + return HASH2 (XVECTOR_LENGTH (obj), + internal_array_hash (XVECTOR_DATA (obj), + XVECTOR_LENGTH (obj), + depth + 1)); + } + if (LRECORDP (obj)) + { + CONST struct lrecord_implementation + *imp = XRECORD_LHEADER_IMPLEMENTATION (obj); + if (imp->hash) + return imp->hash (obj, depth); + } + + return LISP_HASH (obj); +} + +DEFUN ("sxhash", Fsxhash, 1, 1, 0, /* +Return a hash value for OBJECT. +(equal obj1 obj2) implies (= (sxhash obj1) (sxhash obj2)). +*/ + (object)) +{ + return make_int (internal_hash (object, 0)); +} + +#if 0 +xxDEFUN ("internal-hash-value", Finternal_hash_value, 1, 1, 0, /* +Hash value of OBJECT. For debugging. +The value is returned as (HIGH . LOW). +*/ + (object)) +{ + /* This function is pretty 32bit-centric. */ + unsigned long hash = internal_hash (object, 0); + return Fcons (hash >> 16, hash & 0xffff); +} +#endif + + +/************************************************************************/ +/* initialization */ +/************************************************************************/ + +void +syms_of_elhash (void) +{ + DEFSUBR (Fhash_table_p); + DEFSUBR (Fmake_hash_table); + DEFSUBR (Fcopy_hash_table); + DEFSUBR (Fgethash); + DEFSUBR (Fremhash); + DEFSUBR (Fputhash); + DEFSUBR (Fclrhash); + DEFSUBR (Fmaphash); + DEFSUBR (Fhash_table_count); + DEFSUBR (Fhash_table_test); + DEFSUBR (Fhash_table_size); + DEFSUBR (Fhash_table_rehash_size); + DEFSUBR (Fhash_table_rehash_threshold); + DEFSUBR (Fhash_table_weakness); + DEFSUBR (Fhash_table_type); /* obsolete */ + DEFSUBR (Fsxhash); +#if 0 + DEFSUBR (Finternal_hash_value); +#endif + + defsymbol (&Qhash_tablep, "hash-table-p"); + defsymbol (&Qhash_table, "hash-table"); + defsymbol (&Qhashtable, "hashtable"); + defsymbol (&Qweakness, "weakness"); + defsymbol (&Qvalue, "value"); + defsymbol (&Qrehash_size, "rehash-size"); + defsymbol (&Qrehash_threshold, "rehash-threshold"); + + defsymbol (&Qweak, "weak"); /* obsolete */ + defsymbol (&Qkey_weak, "key-weak"); /* obsolete */ + defsymbol (&Qvalue_weak, "value-weak"); /* obsolete */ + defsymbol (&Qnon_weak, "non-weak"); /* obsolete */ + + defkeyword (&Q_test, ":test"); + defkeyword (&Q_size, ":size"); + defkeyword (&Q_rehash_size, ":rehash-size"); + defkeyword (&Q_rehash_threshold, ":rehash-threshold"); + defkeyword (&Q_weakness, ":weakness"); + defkeyword (&Q_type, ":type"); /* obsolete */ +} + +void +vars_of_elhash (void) +{ + /* This must NOT be staticpro'd */ + Vall_weak_hash_tables = Qnil; + pdump_wire_list (&Vall_weak_hash_tables); +}