Mercurial > hg > cc > cirrus_work
changeset 262:f511788e4a9d default tip
works with big (ks_0-9.60.cdb) cdb file
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 27 Jan 2025 21:19:18 +0000 |
parents | d3db9e4a0533 |
children | |
files | lib/python/cc/lmh/cdb.pxd lib/python/cc/lmh/odb.pyx |
diffstat | 2 files changed, 56 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/lib/python/cc/lmh/cdb.pxd Fri Jan 24 15:07:00 2025 +0000 +++ b/lib/python/cc/lmh/cdb.pxd Mon Jan 27 21:19:18 2025 +0000 @@ -15,11 +15,11 @@ int cdb_findnext(Cdb* cdb,char *,unsigned int) int cdb_find(Cdb* cdb,char *,unsigned int) - int cdb_pos(); + unsigned int cdb_pos(); int cdb_len(); char *cdb_mmap(); - int cdb_msize(); + unsigned int cdb_msize(); cdef extern from "error.h": int errno;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/python/cc/lmh/odb.pyx Mon Jan 27 21:19:18 2025 +0000 @@ -0,0 +1,54 @@ +# distutils: extra_objects = cdb_min/libcdb.a +# distutils: include_dirs = cdb_min + +cdef unsigned int res + +cimport cdb +import sys, timeit + +cdef class Cdb: + cdef cdb.Cdb* _c_cdb + + + def __cinit__(self): + self._c_cdb = cdb.cdb_new() + + cdef int find(self: Cdb, p: bytes): + return cdb.cdb_find(self._c_cdb, p, len(p)) + + cdef char[::1] init(self: Cdb, fno: int): + cdb.cdb_init(self._c_cdb,fno) + cdef char[::1] _mview = <char[:cdb.cdb_msize():1]>(cdb.cdb_mmap()) + return _mview + +probe: bytes = sys.argv[2].encode('utf8') + +cdef Cdb CC + +cdef Cdb testMe(): + global probe + cdef unsigned int res + cdef unsigned int o + cdef unsigned int l + print('testing...') + cd = Cdb() + with open(sys.argv[1],'rb') as dbf: + mv = cd.init(dbf.fileno()) + cdb.cdb_findstart(cd._c_cdb) + if cd.find(probe) > 0: + print(o:=cdb.cdb_pos(), l:=cdb.cdb_len()) + sys.stdout.buffer.write(mv[o:o+l]) + sys.stdout.buffer.write(b'\n') + else: + print(0) + print('tested') + return cd + +def cfind(p: bytes) -> int: + global CC + return cdb.cdb_find(CC._c_cdb, p, len(p)) + +CC = testMe() + +print(timeit.timeit('cfind(probe)', + number=int(sys.argv[3]), globals=globals()))