changeset 258:1b87a7345e39

prepare a ks..tsv file for indexing into a cdb
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 24 Jan 2025 15:01:42 +0000
parents 3ac7e5ec07f9
children 41ed24203e14
files lib/python/cc/lmh/ks2cdb.py
diffstat 1 files changed, 43 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/python/cc/lmh/ks2cdb.py	Fri Jan 24 15:01:42 2025 +0000
@@ -0,0 +1,43 @@
+#!/usr/bin/python3
+import sys, timeit
+global fn, cn, n
+
+n = None
+
+def dump(n,fn,cn):
+  i = 0
+  with open(fn,'rb') as f, open(cn,'wb') as c:
+    for l in f:
+      (k,s,u,lm) = l.split()
+      kk=s+u
+      lmi=lm[:-2]
+      c.write(b'+')
+      c.write(b'%d'%len(kk))
+      c.write(b',')
+      c.write(b'%d'%len(lmi))
+      c.write(b':')
+      c.write(kk)
+      c.write(b'->')
+      c.write(lmi)
+      c.write(b'\n')
+      if (n is not None) and (i > n):
+        break
+      i += 1
+    c.write(b'\n')
+  print(i)
+
+while len(sys.argv) > 1:
+  a = sys.argv.pop(1)
+  if a == '-n':
+    n = int(sys.argv.pop(1))
+    continue
+  if a == '-f':
+    fn = sys.argv.pop(1)
+    continue
+  if a == '-c':
+    cn = sys.argv.pop(1)
+    continue
+  print('Usage: ks2cdb.py [-n n] -f file -c file')
+  exit(1)
+
+dump(n,fn,cn)