106
|
1 #!/usr/bin/python3
|
|
2 '''Turn a merge_nnn.log file into a cluster.idx file
|
|
3 We cheat and use the old cluster.idx to save having to read
|
|
4 all the cdx-....gz files'''
|
|
5 import sys
|
|
6
|
|
7 with open('cluster.idx','r') as oidx, open('new.idx','w') as nidx:
|
|
8 i=-1
|
|
9 curpos=0
|
|
10 target="cdx-00%03d.gz"%i
|
|
11 log=open("/dev/null",'r') # embarassing hack
|
|
12 for ol in oidx:
|
|
13 (surt, datestamp, file, offset, length, cnt) = ol.split()
|
|
14 if file!=target:
|
|
15 i+=1
|
|
16 target="cdx-00%03d.gz"%i
|
|
17 log.close()
|
|
18 curpos=0
|
|
19 log=open('merge_%d.log'%(i+1),'r')
|
|
20 hdr=log.readline()
|
|
21 (j,f) = hdr.split()
|
|
22 sys.stderr.write(hdr)
|
|
23 if int(j)!=i+1:
|
|
24 raise ValueError("wrong file: i=%s, j=%s"%(i,j))
|
|
25 nl=log.readline()
|
|
26 if not nl:
|
|
27 sys.stderr.write('quiting early: %s\n'%i)
|
|
28 exit(1)
|
|
29 nlen=int(nl)
|
|
30 nidx.write("%s %s\t%s\t%s\t%s\t%s\n"%(surt, datestamp, file, curpos, nlen, cnt))
|
|
31 curpos+=nlen
|