comparison bin/build_idx.py @ 106:6104acc1345b

first try
author Henry Thompson <ht@markup.co.uk>
date Thu, 14 Sep 2023 19:27:23 +0100
parents
children
comparison
equal deleted inserted replaced
105:9403c02d5034 106:6104acc1345b
1 #!/usr/bin/python3
2 '''Turn a merge_nnn.log file into a cluster.idx file
3 We cheat and use the old cluster.idx to save having to read
4 all the cdx-....gz files'''
5 import sys
6
7 with open('cluster.idx','r') as oidx, open('new.idx','w') as nidx:
8 i=-1
9 curpos=0
10 target="cdx-00%03d.gz"%i
11 log=open("/dev/null",'r') # embarassing hack
12 for ol in oidx:
13 (surt, datestamp, file, offset, length, cnt) = ol.split()
14 if file!=target:
15 i+=1
16 target="cdx-00%03d.gz"%i
17 log.close()
18 curpos=0
19 log=open('merge_%d.log'%(i+1),'r')
20 hdr=log.readline()
21 (j,f) = hdr.split()
22 sys.stderr.write(hdr)
23 if int(j)!=i+1:
24 raise ValueError("wrong file: i=%s, j=%s"%(i,j))
25 nl=log.readline()
26 if not nl:
27 sys.stderr.write('quiting early: %s\n'%i)
28 exit(1)
29 nlen=int(nl)
30 nidx.write("%s %s\t%s\t%s\t%s\t%s\n"%(surt, datestamp, file, curpos, nlen, cnt))
31 curpos+=nlen