view bin/build_idx.py @ 110:a0ea1e4a714d

pass in debug flag(s) to merge_date.py
author Henry Thompson <ht@markup.co.uk>
date Tue, 19 Sep 2023 19:40:58 +0100
parents 6104acc1345b
children
line wrap: on
line source

#!/usr/bin/python3
'''Turn a merge_nnn.log file into a cluster.idx file
   We cheat and use the old cluster.idx to save having to read
   all the cdx-....gz files'''
import sys

with open('cluster.idx','r') as oidx, open('new.idx','w') as nidx:
  i=-1
  curpos=0
  target="cdx-00%03d.gz"%i
  log=open("/dev/null",'r') # embarassing hack
  for ol in oidx:
    (surt, datestamp, file, offset, length, cnt) = ol.split()
    if file!=target:
      i+=1
      target="cdx-00%03d.gz"%i
      log.close()
      curpos=0
      log=open('merge_%d.log'%(i+1),'r')
      hdr=log.readline()
      (j,f) = hdr.split()
      sys.stderr.write(hdr)
      if int(j)!=i+1:
        raise ValueError("wrong file: i=%s, j=%s"%(i,j))
    nl=log.readline()
    if not nl:
      sys.stderr.write('quiting early: %s\n'%i)
      exit(1)
    nlen=int(nl)
    nidx.write("%s %s\t%s\t%s\t%s\t%s\n"%(surt, datestamp, file, curpos, nlen, cnt))
    curpos+=nlen