changeset 262:f511788e4a9d default tip

works with big (ks_0-9.60.cdb) cdb file
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 27 Jan 2025 21:19:18 +0000
parents d3db9e4a0533
children
files lib/python/cc/lmh/cdb.pxd lib/python/cc/lmh/odb.pyx
diffstat 2 files changed, 56 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/lib/python/cc/lmh/cdb.pxd	Fri Jan 24 15:07:00 2025 +0000
+++ b/lib/python/cc/lmh/cdb.pxd	Mon Jan 27 21:19:18 2025 +0000
@@ -15,11 +15,11 @@
     int cdb_findnext(Cdb* cdb,char *,unsigned int)
     int cdb_find(Cdb* cdb,char *,unsigned int)
 
-    int cdb_pos();
+    unsigned int cdb_pos();
     int cdb_len();
 
     char *cdb_mmap();
-    int cdb_msize();
+    unsigned int cdb_msize();
 
 cdef extern from "error.h":
   int errno;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/python/cc/lmh/odb.pyx	Mon Jan 27 21:19:18 2025 +0000
@@ -0,0 +1,54 @@
+# distutils: extra_objects = cdb_min/libcdb.a
+# distutils: include_dirs = cdb_min
+
+cdef unsigned int res
+
+cimport cdb
+import sys, timeit
+
+cdef class Cdb:
+  cdef cdb.Cdb* _c_cdb
+  
+
+  def __cinit__(self):
+    self._c_cdb = cdb.cdb_new()
+
+  cdef int find(self: Cdb, p: bytes):
+    return cdb.cdb_find(self._c_cdb, p, len(p))
+
+  cdef char[::1] init(self: Cdb, fno: int):
+    cdb.cdb_init(self._c_cdb,fno)
+    cdef char[::1] _mview = <char[:cdb.cdb_msize():1]>(cdb.cdb_mmap())
+    return _mview
+
+probe: bytes = sys.argv[2].encode('utf8')
+
+cdef Cdb CC
+
+cdef Cdb testMe():
+  global probe
+  cdef unsigned int res
+  cdef unsigned int o
+  cdef unsigned int l
+  print('testing...')
+  cd = Cdb()
+  with open(sys.argv[1],'rb') as dbf:
+    mv = cd.init(dbf.fileno())
+    cdb.cdb_findstart(cd._c_cdb)
+    if cd.find(probe) > 0:
+      print(o:=cdb.cdb_pos(), l:=cdb.cdb_len())
+      sys.stdout.buffer.write(mv[o:o+l])
+      sys.stdout.buffer.write(b'\n')
+    else:
+      print(0)
+  print('tested')
+  return cd
+
+def cfind(p: bytes) -> int:
+  global CC
+  return cdb.cdb_find(CC._c_cdb, p, len(p))
+
+CC = testMe()
+
+print(timeit.timeit('cfind(probe)',
+               number=int(sys.argv[3]), globals=globals()))