Mercurial > hg > cc > cirrus_work
changeset 106:6104acc1345b
first try
author | Henry Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 14 Sep 2023 19:27:23 +0100 |
parents | 9403c02d5034 |
children | 40c460fed99f |
files | bin/build_idx.py |
diffstat | 1 files changed, 31 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/build_idx.py Thu Sep 14 19:27:23 2023 +0100 @@ -0,0 +1,31 @@ +#!/usr/bin/python3 +'''Turn a merge_nnn.log file into a cluster.idx file + We cheat and use the old cluster.idx to save having to read + all the cdx-....gz files''' +import sys + +with open('cluster.idx','r') as oidx, open('new.idx','w') as nidx: + i=-1 + curpos=0 + target="cdx-00%03d.gz"%i + log=open("/dev/null",'r') # embarassing hack + for ol in oidx: + (surt, datestamp, file, offset, length, cnt) = ol.split() + if file!=target: + i+=1 + target="cdx-00%03d.gz"%i + log.close() + curpos=0 + log=open('merge_%d.log'%(i+1),'r') + hdr=log.readline() + (j,f) = hdr.split() + sys.stderr.write(hdr) + if int(j)!=i+1: + raise ValueError("wrong file: i=%s, j=%s"%(i,j)) + nl=log.readline() + if not nl: + sys.stderr.write('quiting early: %s\n'%i) + exit(1) + nlen=int(nl) + nidx.write("%s %s\t%s\t%s\t%s\t%s\n"%(surt, datestamp, file, curpos, nlen, cnt)) + curpos+=nlen