# HG changeset patch # User Henry S. Thompson # Date 1738673452 0 # Node ID 24ca6ab32e474a1f534712189e0e189e8a675953 # Parent 0c814f07865a9ab84eaee0411e694b6ecc080795 malloc diff -r 0c814f07865a -r 24ca6ab32e47 lurid3/notes.txt --- a/lurid3/notes.txt Fri Jan 31 13:26:07 2025 +0000 +++ b/lurid3/notes.txt Tue Feb 04 12:50:52 2025 +0000 @@ -1191,11 +1191,11 @@ Oops, that was ndb, and nndb doesn't work! Things to try next: - 1) Build a bigger .cdb w. as close to 4GB as possible - 2) Shift to a shared library for cdb-0.75 + 1) Build a bigger .cdb w. as close to 4GB as possible Done + 2) Shift to a shared library for cdb-0.75 Done 3) Get rid of the single fixed Cdb struct instance and malloc it as - required - 3a) Remove debugging output and recompile everything + required Done + 3a) Remove debugging output and recompile everything Done 4) Build and test the real harness to process .cdx files using .cdb Try 50% more, e.g. approx. 1.5 segments @@ -1287,6 +1287,42 @@ vs '(X:=X+1) if cfind(probe)==1 else None', setup = 'global X' + +For this test, size of db doesn't matter: + >: python3 -c 'import nndb' ~/results/CC-MAIN-2019-35/warc_lmhx/ks_0-9.60.cdb 20190825142846http://71.43.189.10/dermorph/ 10000000 + testing... /work/dc007/dc007/hst/results/CC-MAIN-2019-35/warc_lmhx/ks_0-9.60.cdb b'20190825142846http://71.43.189.10/dermorph/' x 10000000 + 1 2488 10 + 1564555978 + tested + 1.9914793614298105 + 2.711402891203761 10000000 + + >: python3 -c 'import nndb' ~/results/CC-MAIN-2019-35/warc_lmhx/ks_0.cdb 20190825142846http://71.43.189.10/dermorph/ 10000000 + testing... /work/dc007/dc007/hst/results/CC-MAIN-2019-35/warc_lmhx/ks_0.cdb b'20190825142846http://71.43.189.10/dermorph/' x 10000000 + 1 2488 10 + 1564555978 + tested + 2.015953341498971 + 2.798953704535961 10000000 + +Now using non-static Cdb, it's slower??? Cirrus load is low :-(. + + >: python3 -c 'import nndb' ~/results/CC-MAIN-2019-35/warc_lmhx/ks_0.cdb 20190825142846http://71.43.189.10/dermorph/ 10000000 + testing... /work/dc007/dc007/hst/results/CC-MAIN-2019-35/warc_lmhx/ks_0.cdb b'20190825142846http://71.43.189.10/dermorph/' x 10000000 + 1 2488 10 + 1564555978 + tested + 2.474294847997953 + 3.2440936170023633 10000000 + +At least it works: + + >: python3 -c 'import get2' cdb/rts-tmp/sv.cdb cdb/rts-tmp/12.cdb two discard/tcp + two cdb/rts-tmp/sv.cdb missing + discard/tcp cdb/rts-tmp/sv.cdb 9 + two cdb/rts-tmp/12.cdb Goodbye + discard/tcp cdb/rts-tmp/12.cdb missing + ================ Try it with the existing _per segment_ index we have for 2019-35