Mercurial > hg > cc > cirrus_work
changeset 149:34562e621f6d
try to get the counts right, particularly when re-merging
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 04 Oct 2023 18:53:55 +0100 |
parents | ded66be0238c |
children | 4c499fc47ea7 85343fe48f69 |
files | lib/python/cc/lmh/merge_date.py |
diffstat | 1 files changed, 17 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/lib/python/cc/lmh/merge_date.py Wed Oct 04 18:51:56 2023 +0100 +++ b/lib/python/cc/lmh/merge_date.py Wed Oct 04 18:53:55 2023 +0100 @@ -61,7 +61,7 @@ NF = open('/dev/null','w') XF = open('/dev/null','rb') -if False: +if MERGED: MF = open(MERGED,'r') PREV_DCNT = 0 else: @@ -81,14 +81,18 @@ if MF and REDOING: oo = ML.split() oo = [oo[0]]+[int(o) for o in oo[1:]] - if oo != (no:=[NN, XCNT, WCNT, DCNT]): + if oo != (no:=[NN, XCNT, WCNT, DCNT-1]): print(*('%s:\t%s<>%s'%vv for vv in zip(('NN', 'XCNT', 'WCNT', 'DCNT'),oo,no)), sep='\n',file=sys.stderr) REDOING=False if FN != -1: print(NN, flush=True) # so we can compress it - print(NN, XCNT, WCNT, DCNT,sep='\t',file=sys.stderr,flush=True) + print(NN, XCNT, WCNT, + DCNT-1, # we've read one more date than we've actually + # used (even if we're finishing up, because + # we increment DCNT even on EOF) + sep='\t',file=sys.stderr,flush=True) time.sleep(0.1) # so they flush? FN += 1 if MF: @@ -106,12 +110,14 @@ mo = ML.split() NN = mo[0] (XCNT, WCNT, DCNT) = [int(o) for o in mo[1:]] - # file col. 4 is 1 ahead of the game - for i in range((DCNT-1)-PREV_DCNT): + # file col. 4 is 1 ahead of the game ?? + for i in range(DCNT-PREV_DCNT): dl = DF.readline() + DCNT+=1 # So the DCNT-1s above will be correct # hack because the first date of the next x file has # already been read and split - DKEY, DDATE, DURL, DTIME = dl.split(b'\t') + DKEY, DDATE, DURL, DTIME = \ + (("", "", "", 0) if dl == b'' else dl.split(b'\t')) PREV_DCNT = DCNT # We've skipped this one, go around again, # the existing XF will still be at EOF @@ -139,13 +145,16 @@ def nextDate(dn): global DEBUG, DF, DCNT, XCNT dl = DF.readline() + DCNT += 1 if dl == b'': - # write out the last of the last index file, if any + # Write out the last of the last index file, if any + # Note that we increment DCNT even in this case, so that the decrement + # that happens when we write out the summary line in nextLine on EOF + # will give the right answer. return "", "", "", 0 if DEBUG: sys.stderr.write("dl%s: %s\n"%(dn,dl)) dkey, ddate, durl, dtime = dl.split(b'\t') - DCNT += 1 return dkey, ddate, durl, dtime with open(sys.argv[1], 'rb') as DF: