changeset 278:1f2063a6417a default tip

parameterise the range of cdbs and segments, sometimes fast, sometimes not ???
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 26 Feb 2025 19:52:22 +0000
parents 018866252464
children
files lib/python/cc/lmh/test_cdbp.py
diffstat 1 files changed, 38 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/lib/python/cc/lmh/test_cdbp.py	Wed Feb 19 17:49:31 2025 +0000
+++ b/lib/python/cc/lmh/test_cdbp.py	Wed Feb 26 19:52:22 2025 +0000
@@ -1,7 +1,11 @@
 #!/usr/bin/python3
 # cython: profile=False
-'''Usage: uz .../cdx-....gz | test_lookup3.py cdbpat | igzip -c > cdc-...gz
-cdbpat identifies a set of 17 CDB files E.g. .../cdb/ks_%d-%d.cdb'''
+'''Usage: uz .../cdx-....gz | test_cdbp.py cdbpat N M S E | igzip -c > cdc-...gz
+cdbpat identifies a set of 17 CDB files covering 100 segments
+E.g. .../cdb/ks_%d-%d.cdb
+N:M gives a Python range of them to actually load
+S:M gives a Python range of segments restrict to
+'''
 
 import cython, typing, timeit, re, sys
 from db import CCdb
@@ -20,6 +24,10 @@
 
   D: int = 17
   d: int = int(100/(D-1))
+  N0: int = int(sys.argv[2])
+  N1: int = int(sys.argv[3])
+  S: int = int(sys.argv[4])
+  E: int = int(sys.argv[5])
 
   CC: List[CCdb]
   mv: List[char[::1]]
@@ -28,41 +36,44 @@
   bb: List[(int,int)] = list(zip(list(range(0,100,d)),
                                  list(range(5,100,d))+[99]))
 
+  ff = [(open(sys.argv[1]%(b,e),'rb') if (i >= N0 and i < N1) else None) for i,(b,e) in enumerate(bb)]
+  CC = [(None if f is None else CCdb()) for f in ff]
 
-  ff = [open(sys.argv[1]%(b,e),'rb') for b,e in bb]
-  CC = [CCdb() for i in range(D)]
-  for i in range(D):
-    CC[i].init(ff[i].fileno())
+  for C,f in zip(CC,ff):
+    if C is not None:
+      C.init(f.fileno())
 
   for l in cdx_in:
     key: cython.bytes
     cdate: cython.bytes
     props: cython.bytes
+    k: cython.bytes
+    seg: int
     res: int
+    i: int
     key, cdate, props = l.split(b' ',maxsplit=2)
     if (m:=PAT.search(props)):
+      N += 1
       seg = int(m[2])
-      N += 1
-      i: int = int(seg / d)
-      k: cython.bytes = cdate+m[1]
-      if (seg == 0) & (res := CC[i].find(k)) == 1:
-        hits += 1
-        cdx_out.write(key)
-        cdx_out.write(b' ')
-        cdx_out.write(cdate)
-        cdx_out.write(b' ')
-        cdx_out.write(memoryview(props)[:-2])
-        cdx_out.write(b', "lastmod": "')
-        cdx_out.flush()
-        CC[i].write(1)
-        cdx_out.write(b'"}\n')
-        continue
-      elif (seg != 0) | (res == 0):
-        if res == 0:
-          print(k,key,props,seg,i,file=sys.stderr)
-        pass
-      else:
-        raise ValueError((key,props,seg,i))
+      if (seg >= S and seg < E):
+        i = int(seg / d)
+        k = cdate+m[1]
+        if (res := CC[i].find(k)) == 1:
+          hits += 1
+          cdx_out.write(key)
+          cdx_out.write(b' ')
+          cdx_out.write(cdate)
+          cdx_out.write(b' ')
+          cdx_out.write(memoryview(props)[:-2])
+          cdx_out.write(b', "lastmod": "')
+          cdx_out.flush()
+          CC[i].write(1)
+          cdx_out.write(b'"}\n')
+          continue
+        elif res == 0:
+          pass
+        else:
+          raise ValueError((key,props,seg,i))
     else:
       raise ValueError(props)
     cdx_out.write(l)