changeset 246:666069efb0c6

output bytes, pickle and save dict if -p, trim lm value to int
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 02 Jan 2025 14:51:00 +0000
parents 1d6fe71f13f4
children 7737da0ccb8c
files lib/python/cc/lmh/test_hash.py
diffstat 1 files changed, 13 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/lib/python/cc/lmh/test_hash.py	Wed Jan 01 23:02:35 2025 +0000
+++ b/lib/python/cc/lmh/test_hash.py	Thu Jan 02 14:51:00 2025 +0000
@@ -6,12 +6,12 @@
   global d
   d = dict()
   c = 0
-  with (sys.stdin if fn is None else open(fn)) as f:
+  with (sys.stdin.buffer if fn is None else open(fn,'rb')) as f:
     for l in f:
       (k,s,u,lm) = l.split()
       if (kk:=s+u) in d:
         print('dup',kk,d[kk],lm,file=sys.stderr)
-      d[kk]=lm
+      d[kk]=lm[:-2]
       if (n is not None) and (c > n):
         break
       c += 1
@@ -20,6 +20,7 @@
 repeat = 1
 n = None
 fn = None
+pick = None
 
 while len(sys.argv) > 1:
   a = sys.argv.pop(1)
@@ -32,12 +33,20 @@
   if a == '-f':
     fn = sys.argv.pop(1)
     continue
-  print('Usage: cat k...tsv | test.py [-r repeat] [-n lines] [-f file]')
+  if a == '-p':
+    pick = sys.argv.pop(1)
+    continue
+  print('Usage: cat k...tsv | test.py [-r repeat] [-n lines] [-f file] [-p file]')
   exit(1)
 
 t = timeit.Timer('doit(n,fn)',globals={'doit':doit,'n':n,'fn':fn})
 
 print(t.repeat(repeat,1))
 
-t = timeit.Timer('d["20190825142846http://71.43.189.10/dermorph/"]',globals={'d':d})
+if pick is not None:
+  import pickle
+  with open(pick, 'wb') as handle:
+    pickle.dump(d, handle, protocol=pickle.HIGHEST_PROTOCOL)
+
+t = timeit.Timer("d[b'20190825142846http://71.43.189.10/dermorph/']",globals={'d':d})
 print(t.repeat(repeat,100000))