changeset 261:d3db9e4a0533 default tip

finally get test code separated from db.pyx to work
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 24 Jan 2025 15:07:00 +0000
parents 5b2e675ac556
children
files lib/python/cc/lmh/nndb.pyx lib/python/cc/lmh/setup_nn.py
diffstat 2 files changed, 51 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/python/cc/lmh/nndb.pyx	Fri Jan 24 15:07:00 2025 +0000
@@ -0,0 +1,45 @@
+# distutils: extra_objects = cdb_min/libcdb.a
+# sources = [cdb_min/cdb.c, cdb_min/error.c, cdb_min/open_read.c, cdb_min/seek_cur.c, cdb_min/open_trunc.c, cdb_min/seek_set.c, cdb_min/byte_copy.c, cdb_min/byte_diff.c, cdb_min/error_str.c, cdb_min/uint32_unpack.c, cdb_min/cdb_hash.c]
+# distutils: include_dirs = cdb_min
+
+cimport cdb
+cimport db
+
+# Usage: ... cdb-path probe nreps
+
+# E.g. python3 -c 'import nndb' ~/results/CC-MAIN-2019-35/warc_lmhx/ks_0.cdb 20190825142846http://71.43.189.10/dermorph/ 10000000
+
+import sys, timeit
+
+cdef db.CCdb CC = db.CCdb()
+
+cdef db.CCdb testMe(bytes probe):
+  global CC
+  res: int = -1
+  cdef cdb.Cdb* c_cdb
+  print('testing... %s %s x %s'%(sys.argv[1],probe,int(sys.argv[3])))
+  with open(sys.argv[1],'rb') as dbf:
+    mv = CC.init(dbf.fileno())
+    c_cdb = CC._c_cdb
+    print("mv %s cdb %s"%(len(mv),<long>(c_cdb)))
+    cdb.cdb_findstart(c_cdb)
+    res=CC.find(probe)
+    if res == 1:
+      print(res,o:=cdb.cdb_pos(), l:=cdb.cdb_len())
+      sys.stdout.buffer.write(mv[o:o+l])
+      sys.stdout.buffer.write(b'\n')
+    else:
+      print('losing %s'%res)
+  print('tested')
+  return CC
+
+def cfind(p: bytes) -> int:
+  global CC
+  return CC.find(p)
+
+testMe(sys.argv[2].encode('utf8'))
+
+probe = sys.argv[2].encode("utf8")
+
+print(timeit.timeit('cfind(probe)',
+               number=int(sys.argv[3]), globals=globals()))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/python/cc/lmh/setup_nn.py	Fri Jan 24 15:07:00 2025 +0000
@@ -0,0 +1,6 @@
+from setuptools import Extension, setup
+from Cython.Build import cythonize
+
+setup(
+    ext_modules = cythonize([Extension("nndb", ["nndb.pyx"])])
+)