changeset 245:1d6fe71f13f4

test big dict for associating lm timestamp with cc timestamp+uri
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 01 Jan 2025 23:02:35 +0000
parents ce5b2c1da222
children 666069efb0c6
files lib/python/cc/lmh/test_hash.py
diffstat 1 files changed, 43 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/python/cc/lmh/test_hash.py	Wed Jan 01 23:02:35 2025 +0000
@@ -0,0 +1,43 @@
+#!/usr/bin/python3
+import sys, timeit
+global n, d
+
+def doit(n,fn):
+  global d
+  d = dict()
+  c = 0
+  with (sys.stdin if fn is None else open(fn)) as f:
+    for l in f:
+      (k,s,u,lm) = l.split()
+      if (kk:=s+u) in d:
+        print('dup',kk,d[kk],lm,file=sys.stderr)
+      d[kk]=lm
+      if (n is not None) and (c > n):
+        break
+      c += 1
+  print(len(d))
+
+repeat = 1
+n = None
+fn = None
+
+while len(sys.argv) > 1:
+  a = sys.argv.pop(1)
+  if a == '-r':
+    repeat = int(sys.argv.pop(1))
+    continue
+  if a == '-n':
+    n = int(sys.argv.pop(1))
+    continue
+  if a == '-f':
+    fn = sys.argv.pop(1)
+    continue
+  print('Usage: cat k...tsv | test.py [-r repeat] [-n lines] [-f file]')
+  exit(1)
+
+t = timeit.Timer('doit(n,fn)',globals={'doit':doit,'n':n,'fn':fn})
+
+print(t.repeat(repeat,1))
+
+t = timeit.Timer('d["20190825142846http://71.43.189.10/dermorph/"]',globals={'d':d})
+print(t.repeat(repeat,100000))