Mercurial > hg > cc > cirrus_work
comparison bin/build_idx.py @ 106:6104acc1345b
first try
author | Henry Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 14 Sep 2023 19:27:23 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
105:9403c02d5034 | 106:6104acc1345b |
---|---|
1 #!/usr/bin/python3 | |
2 '''Turn a merge_nnn.log file into a cluster.idx file | |
3 We cheat and use the old cluster.idx to save having to read | |
4 all the cdx-....gz files''' | |
5 import sys | |
6 | |
7 with open('cluster.idx','r') as oidx, open('new.idx','w') as nidx: | |
8 i=-1 | |
9 curpos=0 | |
10 target="cdx-00%03d.gz"%i | |
11 log=open("/dev/null",'r') # embarassing hack | |
12 for ol in oidx: | |
13 (surt, datestamp, file, offset, length, cnt) = ol.split() | |
14 if file!=target: | |
15 i+=1 | |
16 target="cdx-00%03d.gz"%i | |
17 log.close() | |
18 curpos=0 | |
19 log=open('merge_%d.log'%(i+1),'r') | |
20 hdr=log.readline() | |
21 (j,f) = hdr.split() | |
22 sys.stderr.write(hdr) | |
23 if int(j)!=i+1: | |
24 raise ValueError("wrong file: i=%s, j=%s"%(i,j)) | |
25 nl=log.readline() | |
26 if not nl: | |
27 sys.stderr.write('quiting early: %s\n'%i) | |
28 exit(1) | |
29 nlen=int(nl) | |
30 nidx.write("%s %s\t%s\t%s\t%s\t%s\n"%(surt, datestamp, file, curpos, nlen, cnt)) | |
31 curpos+=nlen |