# HG changeset patch # User Henry S. Thompson # Date 1740599542 0 # Node ID 1f2063a6417a7ed33f1d3034c6b519efd44628c6 # Parent 01886625246469d764a297d448fdd7935e0426d3 parameterise the range of cdbs and segments, sometimes fast, sometimes not ??? diff -r 018866252464 -r 1f2063a6417a lib/python/cc/lmh/test_cdbp.py --- a/lib/python/cc/lmh/test_cdbp.py Wed Feb 19 17:49:31 2025 +0000 +++ b/lib/python/cc/lmh/test_cdbp.py Wed Feb 26 19:52:22 2025 +0000 @@ -1,7 +1,11 @@ #!/usr/bin/python3 # cython: profile=False -'''Usage: uz .../cdx-....gz | test_lookup3.py cdbpat | igzip -c > cdc-...gz -cdbpat identifies a set of 17 CDB files E.g. .../cdb/ks_%d-%d.cdb''' +'''Usage: uz .../cdx-....gz | test_cdbp.py cdbpat N M S E | igzip -c > cdc-...gz +cdbpat identifies a set of 17 CDB files covering 100 segments +E.g. .../cdb/ks_%d-%d.cdb +N:M gives a Python range of them to actually load +S:M gives a Python range of segments restrict to +''' import cython, typing, timeit, re, sys from db import CCdb @@ -20,6 +24,10 @@ D: int = 17 d: int = int(100/(D-1)) + N0: int = int(sys.argv[2]) + N1: int = int(sys.argv[3]) + S: int = int(sys.argv[4]) + E: int = int(sys.argv[5]) CC: List[CCdb] mv: List[char[::1]] @@ -28,41 +36,44 @@ bb: List[(int,int)] = list(zip(list(range(0,100,d)), list(range(5,100,d))+[99])) + ff = [(open(sys.argv[1]%(b,e),'rb') if (i >= N0 and i < N1) else None) for i,(b,e) in enumerate(bb)] + CC = [(None if f is None else CCdb()) for f in ff] - ff = [open(sys.argv[1]%(b,e),'rb') for b,e in bb] - CC = [CCdb() for i in range(D)] - for i in range(D): - CC[i].init(ff[i].fileno()) + for C,f in zip(CC,ff): + if C is not None: + C.init(f.fileno()) for l in cdx_in: key: cython.bytes cdate: cython.bytes props: cython.bytes + k: cython.bytes + seg: int res: int + i: int key, cdate, props = l.split(b' ',maxsplit=2) if (m:=PAT.search(props)): + N += 1 seg = int(m[2]) - N += 1 - i: int = int(seg / d) - k: cython.bytes = cdate+m[1] - if (seg == 0) & (res := CC[i].find(k)) == 1: - hits += 1 - cdx_out.write(key) - cdx_out.write(b' ') - cdx_out.write(cdate) - cdx_out.write(b' ') - cdx_out.write(memoryview(props)[:-2]) - cdx_out.write(b', "lastmod": "') - cdx_out.flush() - CC[i].write(1) - cdx_out.write(b'"}\n') - continue - elif (seg != 0) | (res == 0): - if res == 0: - print(k,key,props,seg,i,file=sys.stderr) - pass - else: - raise ValueError((key,props,seg,i)) + if (seg >= S and seg < E): + i = int(seg / d) + k = cdate+m[1] + if (res := CC[i].find(k)) == 1: + hits += 1 + cdx_out.write(key) + cdx_out.write(b' ') + cdx_out.write(cdate) + cdx_out.write(b' ') + cdx_out.write(memoryview(props)[:-2]) + cdx_out.write(b', "lastmod": "') + cdx_out.flush() + CC[i].write(1) + cdx_out.write(b'"}\n') + continue + elif res == 0: + pass + else: + raise ValueError((key,props,seg,i)) else: raise ValueError(props) cdx_out.write(l)