comparison src/elhash.c @ 489:4a8bb4aa9740

[xemacs-hg @ 2001-04-30 08:49:24 by martinb] hash table mapping
author martinb
date Mon, 30 Apr 2001 08:49:26 +0000
parents 5aa1854ad537
children 11b53bb7daf5
comparison
equal deleted inserted replaced
488:1e7b510d04f6 489:4a8bb4aa9740
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */ 21 Boston, MA 02111-1307, USA. */
22 22
23 /* Synched up with: Not in FSF. */ 23 /* Synched up with: Not in FSF. */
24 24
25 /* This file implements the hash table lisp object type.
26
27 The hash table technique used is "linear probing". Collisions are
28 resolved by putting the item in the next empty place in the array
29 following the collision. Finding a hash entry performs a linear
30 search in the cluster starting at the hash value.
31
32 On deletions from the hash table, the entries immediately following
33 the deleted entry are re-entered in the hash table. We do not have
34 a special way to mark deleted entries (known as "tombstones").
35
36 At the end of the hash entries ("hentries"), we leave room for an
37 entry that is always empty (the "sentinel").
38
39 The traditional literature on hash table implementation
40 (e.g. Knuth) suggests that too much "primary clustering" occurs
41 with linear probing. However, this literature was written when
42 locality of reference was not a factor. The discrepancy between
43 CPU speeds and memory speeds is increasing, and the speed of access
44 to memory is highly dependent on memory caches which work best when
45 there is high locality of data reference. Random access to memory
46 is up to 20 times as expensive as access to the nearest address
47 (and getting worse). So linear probing makes sense.
48
49 But the representation doesn't actually matter that much with the
50 current elisp engine. Funcall is sufficiently slow that the choice
51 of hash table implementation is noise. */
52
25 #include <config.h> 53 #include <config.h>
26 #include "lisp.h" 54 #include "lisp.h"
27 #include "bytecode.h" 55 #include "bytecode.h"
28 #include "elhash.h" 56 #include "elhash.h"
57 #include "opaque.h"
29 58
30 Lisp_Object Qhash_tablep; 59 Lisp_Object Qhash_tablep;
31 static Lisp_Object Qhashtable, Qhash_table; 60 static Lisp_Object Qhashtable, Qhash_table;
32 static Lisp_Object Qweakness, Qvalue, Qkey_or_value, Qkey_and_value; 61 static Lisp_Object Qweakness, Qvalue, Qkey_or_value, Qkey_and_value;
33 static Lisp_Object Vall_weak_hash_tables; 62 static Lisp_Object Vall_weak_hash_tables;
104 assert (HENTRY_CLEAR_P (ht->hentries + ht->size)); 133 assert (HENTRY_CLEAR_P (ht->hentries + ht->size));
105 } 134 }
106 #else 135 #else
107 #define check_hash_table_invariants(ht) 136 #define check_hash_table_invariants(ht)
108 #endif 137 #endif
109
110 /* We use linear probing instead of double hashing, despite its lack
111 of blessing by Knuth and company, because, as a result of the
112 increasing discrepancy between CPU speeds and memory speeds, cache
113 behavior is becoming increasingly important, e.g:
114
115 For a trivial loop, the penalty for non-sequential access of an array is:
116 - a factor of 3-4 on Pentium Pro 200 Mhz
117 - a factor of 10 on Ultrasparc 300 Mhz */
118 138
119 /* Return a suitable size for a hash table, with at least SIZE slots. */ 139 /* Return a suitable size for a hash table, with at least SIZE slots. */
120 static size_t 140 static size_t
121 hash_table_size (size_t requested_size) 141 hash_table_size (size_t requested_size)
122 { 142 {
371 write_c_string (buf, printcharfun); 391 write_c_string (buf, printcharfun);
372 } 392 }
373 } 393 }
374 394
375 static void 395 static void
396 free_hentries (hentry *hentries, size_t size)
397 {
398 #if ERROR_CHECK_HASH_TABLE
399 /* Ensure a crash if other code uses the discarded entries afterwards. */
400 hentry *e, *sentinel;
401
402 for (e = hentries, sentinel = e + size; e < sentinel; e++)
403 * (unsigned long *) e = 0xdeadbeef;
404 #endif
405
406 if (!DUMPEDP (hentries))
407 xfree (hentries);
408 }
409
410 static void
376 finalize_hash_table (void *header, int for_disksave) 411 finalize_hash_table (void *header, int for_disksave)
377 { 412 {
378 if (!for_disksave) 413 if (!for_disksave)
379 { 414 {
380 Lisp_Hash_Table *ht = (Lisp_Hash_Table *) header; 415 Lisp_Hash_Table *ht = (Lisp_Hash_Table *) header;
381 416 free_hentries (ht->hentries, ht->size);
382 xfree (ht->hentries);
383 ht->hentries = 0; 417 ht->hentries = 0;
384 } 418 }
385 } 419 }
386 420
387 static const struct lrecord_description hentry_description_1[] = { 421 static const struct lrecord_description hentry_description_1[] = {
940 LINEAR_PROBING_LOOP (probe, new_entries, new_size) 974 LINEAR_PROBING_LOOP (probe, new_entries, new_size)
941 ; 975 ;
942 *probe = *e; 976 *probe = *e;
943 } 977 }
944 978
945 if (!DUMPEDP (old_entries)) 979 free_hentries (old_entries, old_size);
946 xfree (old_entries);
947 } 980 }
948 981
949 /* After a hash table has been saved to disk and later restored by the 982 /* After a hash table has been saved to disk and later restored by the
950 portable dumper, it contains the same objects, but their addresses 983 portable dumper, it contains the same objects, but their addresses
951 and thus their HASH_CODEs have changed. */ 984 and thus their HASH_CODEs have changed. */
1169 } 1202 }
1170 1203
1171 /************************************************************************/ 1204 /************************************************************************/
1172 /* Mapping Functions */ 1205 /* Mapping Functions */
1173 /************************************************************************/ 1206 /************************************************************************/
1207
1208 /* We need to be careful when mapping over hash tables because the
1209 hash table might be modified during the mapping operation:
1210 - by the mapping function
1211 - by gc (if the hash table is weak)
1212
1213 So we make a copy of the hentries at the beginning of the mapping
1214 operation, and iterate over the copy. */
1215 static Lisp_Object
1216 maphash_unwind (Lisp_Object unwind_obj)
1217 {
1218 void *ptr = (void *) get_opaque_ptr (unwind_obj);
1219 xfree (ptr);
1220 free_opaque_ptr (unwind_obj);
1221 return Qnil;
1222 }
1223
1224 /* Return a malloced array of alternating key/value pairs from HT. */
1225 static Lisp_Object *
1226 copy_compress_hentries (const Lisp_Hash_Table *ht)
1227 {
1228 Lisp_Object * const objs =
1229 /* If the hash table is empty, ht->count could be 0. */
1230 xnew_array (Lisp_Object, 2 * (ht->count > 0 ? ht->count : 1));
1231 const hentry *e, *sentinel;
1232 Lisp_Object *pobj;
1233
1234 for (e = ht->hentries, sentinel = e + ht->size, pobj = objs; e < sentinel; e++)
1235 if (!HENTRY_CLEAR_P (e))
1236 {
1237 *(pobj++) = e->key;
1238 *(pobj++) = e->value;
1239 }
1240
1241 type_checking_assert (pobj == objs + 2 * ht->count);
1242
1243 return objs;
1244 }
1245
1174 DEFUN ("maphash", Fmaphash, 2, 2, 0, /* 1246 DEFUN ("maphash", Fmaphash, 2, 2, 0, /*
1175 Map FUNCTION over entries in HASH-TABLE, calling it with two args, 1247 Map FUNCTION over entries in HASH-TABLE, calling it with two args,
1176 each key and value in HASH-TABLE. 1248 each key and value in HASH-TABLE.
1177 1249
1178 FUNCTION may not modify HASH-TABLE, with the one exception that FUNCTION 1250 FUNCTION must not modify HASH-TABLE, with the one exception that FUNCTION
1179 may remhash or puthash the entry currently being processed by FUNCTION. 1251 may remhash or puthash the entry currently being processed by FUNCTION.
1180 */ 1252 */
1181 (function, hash_table)) 1253 (function, hash_table))
1182 { 1254 {
1183 const Lisp_Hash_Table *ht = xhash_table (hash_table); 1255 const Lisp_Hash_Table * const ht = xhash_table (hash_table);
1256 Lisp_Object * const objs = copy_compress_hentries (ht);
1257 Lisp_Object args[3];
1258 const Lisp_Object *pobj, *end;
1259 int speccount = specpdl_depth ();
1260 struct gcpro gcpro1;
1261
1262 record_unwind_protect (maphash_unwind, make_opaque_ptr ((void *)objs));
1263 GCPRO1 (objs[0]);
1264 gcpro1.nvars = 2 * ht->count;
1265
1266 args[0] = function;
1267
1268 for (pobj = objs, end = pobj + 2 * ht->count; pobj < end; pobj += 2)
1269 {
1270 args[1] = pobj[0];
1271 args[2] = pobj[1];
1272 Ffuncall (countof (args), args);
1273 }
1274
1275 unbind_to (speccount, Qnil);
1276 UNGCPRO;
1277
1278 return Qnil;
1279 }
1280
1281 /* Map *C* function FUNCTION over the elements of a non-weak lisp hash table.
1282 FUNCTION must not modify HASH-TABLE, with the one exception that FUNCTION
1283 may puthash the entry currently being processed by FUNCTION.
1284 Mapping terminates if FUNCTION returns something other than 0. */
1285 void
1286 elisp_maphash_unsafe (maphash_function_t function,
1287 Lisp_Object hash_table, void *extra_arg)
1288 {
1289 const Lisp_Hash_Table *ht = XHASH_TABLE (hash_table);
1184 const hentry *e, *sentinel; 1290 const hentry *e, *sentinel;
1185 1291
1186 for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) 1292 for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++)
1187 if (!HENTRY_CLEAR_P (e)) 1293 if (!HENTRY_CLEAR_P (e))
1188 { 1294 if (function (e->key, e->value, extra_arg))
1189 Lisp_Object args[3], key; 1295 return;
1190 again: 1296 }
1191 key = e->key; 1297
1192 args[0] = function; 1298 /* Map *C* function FUNCTION over the elements of a lisp hash table.
1193 args[1] = key; 1299 It is safe for FUNCTION to modify HASH-TABLE.
1194 args[2] = e->value; 1300 Mapping terminates if FUNCTION returns something other than 0. */
1195 Ffuncall (countof (args), args);
1196 /* Has FUNCTION done a remhash? */
1197 if (!EQ (key, e->key) && !HENTRY_CLEAR_P (e))
1198 goto again;
1199 }
1200
1201 return Qnil;
1202 }
1203
1204 /* #### If the Lisp function being called does a puthash and this
1205 #### causes the hash table to be resized, the results will be quite
1206 #### random and we will likely crash. To fix this, either set a
1207 #### flag in the hash table while we're mapping and signal an error
1208 #### when new entries are added, or fix things to make this
1209 #### operation work properly, like this: Store two hash tables in
1210 #### each hash table object -- the second one is written to when
1211 #### you do a puthash inside of a mapping operation, and the
1212 #### various operations need to check both hash tables for entries.
1213 #### As soon as the last maphash over a particular hash table
1214 #### object terminates, the entries in the second table are added
1215 #### to the first (using an unwind-protect). --ben */
1216
1217 /* Map *C* function FUNCTION over the elements of a lisp hash table. */
1218 void 1301 void
1219 elisp_maphash (maphash_function_t function, 1302 elisp_maphash (maphash_function_t function,
1220 Lisp_Object hash_table, void *extra_arg) 1303 Lisp_Object hash_table, void *extra_arg)
1221 { 1304 {
1222 const Lisp_Hash_Table *ht = XHASH_TABLE (hash_table); 1305 const Lisp_Hash_Table * const ht = xhash_table (hash_table);
1223 const hentry *e, *sentinel; 1306 Lisp_Object * const objs = copy_compress_hentries (ht);
1224 1307 const Lisp_Object *pobj, *end;
1225 for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) 1308 int speccount = specpdl_depth ();
1226 if (!HENTRY_CLEAR_P (e)) 1309 struct gcpro gcpro1;
1227 { 1310
1228 Lisp_Object key; 1311 record_unwind_protect (maphash_unwind, make_opaque_ptr ((void *)objs));
1229 again: 1312 GCPRO1 (objs[0]);
1230 key = e->key; 1313 gcpro1.nvars = 2 * ht->count;
1231 if (function (key, e->value, extra_arg)) 1314
1232 return; 1315 for (pobj = objs, end = pobj + 2 * ht->count; pobj < end; pobj += 2)
1233 /* Has FUNCTION done a remhash? */ 1316 if (function (pobj[0], pobj[1], extra_arg))
1234 if (!EQ (key, e->key) && !HENTRY_CLEAR_P (e)) 1317 break;
1235 goto again; 1318
1236 } 1319 unbind_to (speccount, Qnil);
1237 } 1320 UNGCPRO;
1238 1321 }
1239 /* Remove all elements of a lisp hash table satisfying *C* predicate PREDICATE. */ 1322
1323 /* Remove all elements of a lisp hash table satisfying *C* predicate PREDICATE.
1324 PREDICATE must not modify HASH-TABLE. */
1240 void 1325 void
1241 elisp_map_remhash (maphash_function_t predicate, 1326 elisp_map_remhash (maphash_function_t predicate,
1242 Lisp_Object hash_table, void *extra_arg) 1327 Lisp_Object hash_table, void *extra_arg)
1243 { 1328 {
1244 Lisp_Hash_Table *ht = XHASH_TABLE (hash_table); 1329 const Lisp_Hash_Table * const ht = xhash_table (hash_table);
1245 hentry *e, *entries, *sentinel; 1330 Lisp_Object * const objs = copy_compress_hentries (ht);
1246 1331 const Lisp_Object *pobj, *end;
1247 for (e = entries = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) 1332 int speccount = specpdl_depth ();
1248 if (!HENTRY_CLEAR_P (e)) 1333 struct gcpro gcpro1;
1249 { 1334
1250 again: 1335 record_unwind_protect (maphash_unwind, make_opaque_ptr ((void *)objs));
1251 if (predicate (e->key, e->value, extra_arg)) 1336 GCPRO1 (objs[0]);
1252 { 1337 gcpro1.nvars = 2 * ht->count;
1253 remhash_1 (ht, entries, e); 1338
1254 if (!HENTRY_CLEAR_P (e)) 1339 for (pobj = objs, end = pobj + 2 * ht->count; pobj < end; pobj += 2)
1255 goto again; 1340 if (predicate (pobj[0], pobj[1], extra_arg))
1256 } 1341 Fremhash (pobj[0], hash_table);
1257 } 1342
1343 unbind_to (speccount, Qnil);
1344 UNGCPRO;
1258 } 1345 }
1259 1346
1260 1347
1261 /************************************************************************/ 1348 /************************************************************************/
1262 /* garbage collecting weak hash tables */ 1349 /* garbage collecting weak hash tables */