Mercurial > hg > xemacs-beta
comparison src/elhash.c @ 489:4a8bb4aa9740
[xemacs-hg @ 2001-04-30 08:49:24 by martinb]
hash table mapping
author | martinb |
---|---|
date | Mon, 30 Apr 2001 08:49:26 +0000 |
parents | 5aa1854ad537 |
children | 11b53bb7daf5 |
comparison
equal
deleted
inserted
replaced
488:1e7b510d04f6 | 489:4a8bb4aa9740 |
---|---|
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
21 Boston, MA 02111-1307, USA. */ | 21 Boston, MA 02111-1307, USA. */ |
22 | 22 |
23 /* Synched up with: Not in FSF. */ | 23 /* Synched up with: Not in FSF. */ |
24 | 24 |
25 /* This file implements the hash table lisp object type. | |
26 | |
27 The hash table technique used is "linear probing". Collisions are | |
28 resolved by putting the item in the next empty place in the array | |
29 following the collision. Finding a hash entry performs a linear | |
30 search in the cluster starting at the hash value. | |
31 | |
32 On deletions from the hash table, the entries immediately following | |
33 the deleted entry are re-entered in the hash table. We do not have | |
34 a special way to mark deleted entries (known as "tombstones"). | |
35 | |
36 At the end of the hash entries ("hentries"), we leave room for an | |
37 entry that is always empty (the "sentinel"). | |
38 | |
39 The traditional literature on hash table implementation | |
40 (e.g. Knuth) suggests that too much "primary clustering" occurs | |
41 with linear probing. However, this literature was written when | |
42 locality of reference was not a factor. The discrepancy between | |
43 CPU speeds and memory speeds is increasing, and the speed of access | |
44 to memory is highly dependent on memory caches which work best when | |
45 there is high locality of data reference. Random access to memory | |
46 is up to 20 times as expensive as access to the nearest address | |
47 (and getting worse). So linear probing makes sense. | |
48 | |
49 But the representation doesn't actually matter that much with the | |
50 current elisp engine. Funcall is sufficiently slow that the choice | |
51 of hash table implementation is noise. */ | |
52 | |
25 #include <config.h> | 53 #include <config.h> |
26 #include "lisp.h" | 54 #include "lisp.h" |
27 #include "bytecode.h" | 55 #include "bytecode.h" |
28 #include "elhash.h" | 56 #include "elhash.h" |
57 #include "opaque.h" | |
29 | 58 |
30 Lisp_Object Qhash_tablep; | 59 Lisp_Object Qhash_tablep; |
31 static Lisp_Object Qhashtable, Qhash_table; | 60 static Lisp_Object Qhashtable, Qhash_table; |
32 static Lisp_Object Qweakness, Qvalue, Qkey_or_value, Qkey_and_value; | 61 static Lisp_Object Qweakness, Qvalue, Qkey_or_value, Qkey_and_value; |
33 static Lisp_Object Vall_weak_hash_tables; | 62 static Lisp_Object Vall_weak_hash_tables; |
104 assert (HENTRY_CLEAR_P (ht->hentries + ht->size)); | 133 assert (HENTRY_CLEAR_P (ht->hentries + ht->size)); |
105 } | 134 } |
106 #else | 135 #else |
107 #define check_hash_table_invariants(ht) | 136 #define check_hash_table_invariants(ht) |
108 #endif | 137 #endif |
109 | |
110 /* We use linear probing instead of double hashing, despite its lack | |
111 of blessing by Knuth and company, because, as a result of the | |
112 increasing discrepancy between CPU speeds and memory speeds, cache | |
113 behavior is becoming increasingly important, e.g: | |
114 | |
115 For a trivial loop, the penalty for non-sequential access of an array is: | |
116 - a factor of 3-4 on Pentium Pro 200 Mhz | |
117 - a factor of 10 on Ultrasparc 300 Mhz */ | |
118 | 138 |
119 /* Return a suitable size for a hash table, with at least SIZE slots. */ | 139 /* Return a suitable size for a hash table, with at least SIZE slots. */ |
120 static size_t | 140 static size_t |
121 hash_table_size (size_t requested_size) | 141 hash_table_size (size_t requested_size) |
122 { | 142 { |
371 write_c_string (buf, printcharfun); | 391 write_c_string (buf, printcharfun); |
372 } | 392 } |
373 } | 393 } |
374 | 394 |
375 static void | 395 static void |
396 free_hentries (hentry *hentries, size_t size) | |
397 { | |
398 #if ERROR_CHECK_HASH_TABLE | |
399 /* Ensure a crash if other code uses the discarded entries afterwards. */ | |
400 hentry *e, *sentinel; | |
401 | |
402 for (e = hentries, sentinel = e + size; e < sentinel; e++) | |
403 * (unsigned long *) e = 0xdeadbeef; | |
404 #endif | |
405 | |
406 if (!DUMPEDP (hentries)) | |
407 xfree (hentries); | |
408 } | |
409 | |
410 static void | |
376 finalize_hash_table (void *header, int for_disksave) | 411 finalize_hash_table (void *header, int for_disksave) |
377 { | 412 { |
378 if (!for_disksave) | 413 if (!for_disksave) |
379 { | 414 { |
380 Lisp_Hash_Table *ht = (Lisp_Hash_Table *) header; | 415 Lisp_Hash_Table *ht = (Lisp_Hash_Table *) header; |
381 | 416 free_hentries (ht->hentries, ht->size); |
382 xfree (ht->hentries); | |
383 ht->hentries = 0; | 417 ht->hentries = 0; |
384 } | 418 } |
385 } | 419 } |
386 | 420 |
387 static const struct lrecord_description hentry_description_1[] = { | 421 static const struct lrecord_description hentry_description_1[] = { |
940 LINEAR_PROBING_LOOP (probe, new_entries, new_size) | 974 LINEAR_PROBING_LOOP (probe, new_entries, new_size) |
941 ; | 975 ; |
942 *probe = *e; | 976 *probe = *e; |
943 } | 977 } |
944 | 978 |
945 if (!DUMPEDP (old_entries)) | 979 free_hentries (old_entries, old_size); |
946 xfree (old_entries); | |
947 } | 980 } |
948 | 981 |
949 /* After a hash table has been saved to disk and later restored by the | 982 /* After a hash table has been saved to disk and later restored by the |
950 portable dumper, it contains the same objects, but their addresses | 983 portable dumper, it contains the same objects, but their addresses |
951 and thus their HASH_CODEs have changed. */ | 984 and thus their HASH_CODEs have changed. */ |
1169 } | 1202 } |
1170 | 1203 |
1171 /************************************************************************/ | 1204 /************************************************************************/ |
1172 /* Mapping Functions */ | 1205 /* Mapping Functions */ |
1173 /************************************************************************/ | 1206 /************************************************************************/ |
1207 | |
1208 /* We need to be careful when mapping over hash tables because the | |
1209 hash table might be modified during the mapping operation: | |
1210 - by the mapping function | |
1211 - by gc (if the hash table is weak) | |
1212 | |
1213 So we make a copy of the hentries at the beginning of the mapping | |
1214 operation, and iterate over the copy. */ | |
1215 static Lisp_Object | |
1216 maphash_unwind (Lisp_Object unwind_obj) | |
1217 { | |
1218 void *ptr = (void *) get_opaque_ptr (unwind_obj); | |
1219 xfree (ptr); | |
1220 free_opaque_ptr (unwind_obj); | |
1221 return Qnil; | |
1222 } | |
1223 | |
1224 /* Return a malloced array of alternating key/value pairs from HT. */ | |
1225 static Lisp_Object * | |
1226 copy_compress_hentries (const Lisp_Hash_Table *ht) | |
1227 { | |
1228 Lisp_Object * const objs = | |
1229 /* If the hash table is empty, ht->count could be 0. */ | |
1230 xnew_array (Lisp_Object, 2 * (ht->count > 0 ? ht->count : 1)); | |
1231 const hentry *e, *sentinel; | |
1232 Lisp_Object *pobj; | |
1233 | |
1234 for (e = ht->hentries, sentinel = e + ht->size, pobj = objs; e < sentinel; e++) | |
1235 if (!HENTRY_CLEAR_P (e)) | |
1236 { | |
1237 *(pobj++) = e->key; | |
1238 *(pobj++) = e->value; | |
1239 } | |
1240 | |
1241 type_checking_assert (pobj == objs + 2 * ht->count); | |
1242 | |
1243 return objs; | |
1244 } | |
1245 | |
1174 DEFUN ("maphash", Fmaphash, 2, 2, 0, /* | 1246 DEFUN ("maphash", Fmaphash, 2, 2, 0, /* |
1175 Map FUNCTION over entries in HASH-TABLE, calling it with two args, | 1247 Map FUNCTION over entries in HASH-TABLE, calling it with two args, |
1176 each key and value in HASH-TABLE. | 1248 each key and value in HASH-TABLE. |
1177 | 1249 |
1178 FUNCTION may not modify HASH-TABLE, with the one exception that FUNCTION | 1250 FUNCTION must not modify HASH-TABLE, with the one exception that FUNCTION |
1179 may remhash or puthash the entry currently being processed by FUNCTION. | 1251 may remhash or puthash the entry currently being processed by FUNCTION. |
1180 */ | 1252 */ |
1181 (function, hash_table)) | 1253 (function, hash_table)) |
1182 { | 1254 { |
1183 const Lisp_Hash_Table *ht = xhash_table (hash_table); | 1255 const Lisp_Hash_Table * const ht = xhash_table (hash_table); |
1256 Lisp_Object * const objs = copy_compress_hentries (ht); | |
1257 Lisp_Object args[3]; | |
1258 const Lisp_Object *pobj, *end; | |
1259 int speccount = specpdl_depth (); | |
1260 struct gcpro gcpro1; | |
1261 | |
1262 record_unwind_protect (maphash_unwind, make_opaque_ptr ((void *)objs)); | |
1263 GCPRO1 (objs[0]); | |
1264 gcpro1.nvars = 2 * ht->count; | |
1265 | |
1266 args[0] = function; | |
1267 | |
1268 for (pobj = objs, end = pobj + 2 * ht->count; pobj < end; pobj += 2) | |
1269 { | |
1270 args[1] = pobj[0]; | |
1271 args[2] = pobj[1]; | |
1272 Ffuncall (countof (args), args); | |
1273 } | |
1274 | |
1275 unbind_to (speccount, Qnil); | |
1276 UNGCPRO; | |
1277 | |
1278 return Qnil; | |
1279 } | |
1280 | |
1281 /* Map *C* function FUNCTION over the elements of a non-weak lisp hash table. | |
1282 FUNCTION must not modify HASH-TABLE, with the one exception that FUNCTION | |
1283 may puthash the entry currently being processed by FUNCTION. | |
1284 Mapping terminates if FUNCTION returns something other than 0. */ | |
1285 void | |
1286 elisp_maphash_unsafe (maphash_function_t function, | |
1287 Lisp_Object hash_table, void *extra_arg) | |
1288 { | |
1289 const Lisp_Hash_Table *ht = XHASH_TABLE (hash_table); | |
1184 const hentry *e, *sentinel; | 1290 const hentry *e, *sentinel; |
1185 | 1291 |
1186 for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) | 1292 for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) |
1187 if (!HENTRY_CLEAR_P (e)) | 1293 if (!HENTRY_CLEAR_P (e)) |
1188 { | 1294 if (function (e->key, e->value, extra_arg)) |
1189 Lisp_Object args[3], key; | 1295 return; |
1190 again: | 1296 } |
1191 key = e->key; | 1297 |
1192 args[0] = function; | 1298 /* Map *C* function FUNCTION over the elements of a lisp hash table. |
1193 args[1] = key; | 1299 It is safe for FUNCTION to modify HASH-TABLE. |
1194 args[2] = e->value; | 1300 Mapping terminates if FUNCTION returns something other than 0. */ |
1195 Ffuncall (countof (args), args); | |
1196 /* Has FUNCTION done a remhash? */ | |
1197 if (!EQ (key, e->key) && !HENTRY_CLEAR_P (e)) | |
1198 goto again; | |
1199 } | |
1200 | |
1201 return Qnil; | |
1202 } | |
1203 | |
1204 /* #### If the Lisp function being called does a puthash and this | |
1205 #### causes the hash table to be resized, the results will be quite | |
1206 #### random and we will likely crash. To fix this, either set a | |
1207 #### flag in the hash table while we're mapping and signal an error | |
1208 #### when new entries are added, or fix things to make this | |
1209 #### operation work properly, like this: Store two hash tables in | |
1210 #### each hash table object -- the second one is written to when | |
1211 #### you do a puthash inside of a mapping operation, and the | |
1212 #### various operations need to check both hash tables for entries. | |
1213 #### As soon as the last maphash over a particular hash table | |
1214 #### object terminates, the entries in the second table are added | |
1215 #### to the first (using an unwind-protect). --ben */ | |
1216 | |
1217 /* Map *C* function FUNCTION over the elements of a lisp hash table. */ | |
1218 void | 1301 void |
1219 elisp_maphash (maphash_function_t function, | 1302 elisp_maphash (maphash_function_t function, |
1220 Lisp_Object hash_table, void *extra_arg) | 1303 Lisp_Object hash_table, void *extra_arg) |
1221 { | 1304 { |
1222 const Lisp_Hash_Table *ht = XHASH_TABLE (hash_table); | 1305 const Lisp_Hash_Table * const ht = xhash_table (hash_table); |
1223 const hentry *e, *sentinel; | 1306 Lisp_Object * const objs = copy_compress_hentries (ht); |
1224 | 1307 const Lisp_Object *pobj, *end; |
1225 for (e = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) | 1308 int speccount = specpdl_depth (); |
1226 if (!HENTRY_CLEAR_P (e)) | 1309 struct gcpro gcpro1; |
1227 { | 1310 |
1228 Lisp_Object key; | 1311 record_unwind_protect (maphash_unwind, make_opaque_ptr ((void *)objs)); |
1229 again: | 1312 GCPRO1 (objs[0]); |
1230 key = e->key; | 1313 gcpro1.nvars = 2 * ht->count; |
1231 if (function (key, e->value, extra_arg)) | 1314 |
1232 return; | 1315 for (pobj = objs, end = pobj + 2 * ht->count; pobj < end; pobj += 2) |
1233 /* Has FUNCTION done a remhash? */ | 1316 if (function (pobj[0], pobj[1], extra_arg)) |
1234 if (!EQ (key, e->key) && !HENTRY_CLEAR_P (e)) | 1317 break; |
1235 goto again; | 1318 |
1236 } | 1319 unbind_to (speccount, Qnil); |
1237 } | 1320 UNGCPRO; |
1238 | 1321 } |
1239 /* Remove all elements of a lisp hash table satisfying *C* predicate PREDICATE. */ | 1322 |
1323 /* Remove all elements of a lisp hash table satisfying *C* predicate PREDICATE. | |
1324 PREDICATE must not modify HASH-TABLE. */ | |
1240 void | 1325 void |
1241 elisp_map_remhash (maphash_function_t predicate, | 1326 elisp_map_remhash (maphash_function_t predicate, |
1242 Lisp_Object hash_table, void *extra_arg) | 1327 Lisp_Object hash_table, void *extra_arg) |
1243 { | 1328 { |
1244 Lisp_Hash_Table *ht = XHASH_TABLE (hash_table); | 1329 const Lisp_Hash_Table * const ht = xhash_table (hash_table); |
1245 hentry *e, *entries, *sentinel; | 1330 Lisp_Object * const objs = copy_compress_hentries (ht); |
1246 | 1331 const Lisp_Object *pobj, *end; |
1247 for (e = entries = ht->hentries, sentinel = e + ht->size; e < sentinel; e++) | 1332 int speccount = specpdl_depth (); |
1248 if (!HENTRY_CLEAR_P (e)) | 1333 struct gcpro gcpro1; |
1249 { | 1334 |
1250 again: | 1335 record_unwind_protect (maphash_unwind, make_opaque_ptr ((void *)objs)); |
1251 if (predicate (e->key, e->value, extra_arg)) | 1336 GCPRO1 (objs[0]); |
1252 { | 1337 gcpro1.nvars = 2 * ht->count; |
1253 remhash_1 (ht, entries, e); | 1338 |
1254 if (!HENTRY_CLEAR_P (e)) | 1339 for (pobj = objs, end = pobj + 2 * ht->count; pobj < end; pobj += 2) |
1255 goto again; | 1340 if (predicate (pobj[0], pobj[1], extra_arg)) |
1256 } | 1341 Fremhash (pobj[0], hash_table); |
1257 } | 1342 |
1343 unbind_to (speccount, Qnil); | |
1344 UNGCPRO; | |
1258 } | 1345 } |
1259 | 1346 |
1260 | 1347 |
1261 /************************************************************************/ | 1348 /************************************************************************/ |
1262 /* garbage collecting weak hash tables */ | 1349 /* garbage collecting weak hash tables */ |