Mercurial > hg > cc > cirrus_work
changeset 249:87a35540104b
time the unpickling
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 02 Jan 2025 18:35:08 +0000 |
parents | 650383a798e5 |
children | 417103100fd0 |
files | lib/python/cc/lmh/test_lookup1.py |
diffstat | 1 files changed, 10 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/lib/python/cc/lmh/test_lookup1.py Thu Jan 02 18:30:03 2025 +0000 +++ b/lib/python/cc/lmh/test_lookup1.py Thu Jan 02 18:35:08 2025 +0000 @@ -1,11 +1,18 @@ #!/usr/bin/python3 from isal import igzip -import re, pickle + +import re, pickle, timeit +global d, handle PAT = re.compile(b'\{"url": "([^"]*)",.*, "filename": ".*/segments/[0-9]*\.([0-9][0-9]?)/') +handle = None +d = [0] + with open('results/CC-MAIN-2019-35/warc_lmhx/ks_0-9.pickle', 'rb') as handle: - d = pickle.load(handle) # this takes ~20 seconds + t = timeit.Timer('d[0] = pickle.load(handle)', globals = globals()) + t.timeit(number=1) + d = d[0] print(len(d)) N = 0 @@ -34,5 +41,5 @@ else: raise ValueError(props) cdx_out.write(l) -print(N,hits) +print('%s entries, %s given lastmod'%(N,hits))