Mercurial > hg > cc > cirrus_work
changeset 261:d3db9e4a0533 default tip
finally get test code separated from db.pyx to work
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 24 Jan 2025 15:07:00 +0000 |
parents | 5b2e675ac556 |
children | |
files | lib/python/cc/lmh/nndb.pyx lib/python/cc/lmh/setup_nn.py |
diffstat | 2 files changed, 51 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/python/cc/lmh/nndb.pyx Fri Jan 24 15:07:00 2025 +0000 @@ -0,0 +1,45 @@ +# distutils: extra_objects = cdb_min/libcdb.a +# sources = [cdb_min/cdb.c, cdb_min/error.c, cdb_min/open_read.c, cdb_min/seek_cur.c, cdb_min/open_trunc.c, cdb_min/seek_set.c, cdb_min/byte_copy.c, cdb_min/byte_diff.c, cdb_min/error_str.c, cdb_min/uint32_unpack.c, cdb_min/cdb_hash.c] +# distutils: include_dirs = cdb_min + +cimport cdb +cimport db + +# Usage: ... cdb-path probe nreps + +# E.g. python3 -c 'import nndb' ~/results/CC-MAIN-2019-35/warc_lmhx/ks_0.cdb 20190825142846http://71.43.189.10/dermorph/ 10000000 + +import sys, timeit + +cdef db.CCdb CC = db.CCdb() + +cdef db.CCdb testMe(bytes probe): + global CC + res: int = -1 + cdef cdb.Cdb* c_cdb + print('testing... %s %s x %s'%(sys.argv[1],probe,int(sys.argv[3]))) + with open(sys.argv[1],'rb') as dbf: + mv = CC.init(dbf.fileno()) + c_cdb = CC._c_cdb + print("mv %s cdb %s"%(len(mv),<long>(c_cdb))) + cdb.cdb_findstart(c_cdb) + res=CC.find(probe) + if res == 1: + print(res,o:=cdb.cdb_pos(), l:=cdb.cdb_len()) + sys.stdout.buffer.write(mv[o:o+l]) + sys.stdout.buffer.write(b'\n') + else: + print('losing %s'%res) + print('tested') + return CC + +def cfind(p: bytes) -> int: + global CC + return CC.find(p) + +testMe(sys.argv[2].encode('utf8')) + +probe = sys.argv[2].encode("utf8") + +print(timeit.timeit('cfind(probe)', + number=int(sys.argv[3]), globals=globals()))