# HG changeset patch # User Henry S. Thompson # Date 1735842908 0 # Node ID 87a35540104b8e85a9c92afc4452df8badc2e24d # Parent 650383a798e5680ea0270bd1f128797de444ed0c time the unpickling diff -r 650383a798e5 -r 87a35540104b lib/python/cc/lmh/test_lookup1.py --- a/lib/python/cc/lmh/test_lookup1.py Thu Jan 02 18:30:03 2025 +0000 +++ b/lib/python/cc/lmh/test_lookup1.py Thu Jan 02 18:35:08 2025 +0000 @@ -1,11 +1,18 @@ #!/usr/bin/python3 from isal import igzip -import re, pickle + +import re, pickle, timeit +global d, handle PAT = re.compile(b'\{"url": "([^"]*)",.*, "filename": ".*/segments/[0-9]*\.([0-9][0-9]?)/') +handle = None +d = [0] + with open('results/CC-MAIN-2019-35/warc_lmhx/ks_0-9.pickle', 'rb') as handle: - d = pickle.load(handle) # this takes ~20 seconds + t = timeit.Timer('d[0] = pickle.load(handle)', globals = globals()) + t.timeit(number=1) + d = d[0] print(len(d)) N = 0 @@ -34,5 +41,5 @@ else: raise ValueError(props) cdx_out.write(l) -print(N,hits) +print('%s entries, %s given lastmod'%(N,hits))