# HG changeset patch # User Henry S. Thompson # Date 1695750943 -3600 # Node ID f52783faf3ee96afbacb995259ea0bea5e40a7bd # Parent 5b952d16838c461277d16bf9db34c0f6179bb648 a bit more logging diff -r 5b952d16838c -r f52783faf3ee bin/merge_date.py --- a/bin/merge_date.py Tue Sep 26 18:55:11 2023 +0100 +++ b/bin/merge_date.py Tue Sep 26 18:55:43 2023 +0100 @@ -42,7 +42,7 @@ FN = 0 -XCNT = 0 +XCNT = WCNT = 0 DCNT = 0 XF = igzip.IGzipFile(filename=XPATH%0) @@ -50,7 +50,7 @@ def nextLine(): '''Move on to next index file if current has run out''' - global FN, NF, NPATH, NN, XF, XPATH, XCNT, DCNT + global FN, NF, NPATH, NN, XF, XPATH, XCNT, DCNT, WCNT while True: xl=XF.readline() XCNT += 1 @@ -60,15 +60,17 @@ XF.close() NF.close() print(NN, flush=True) # so we can compress it - time.sleep(0.1) # so it flushes? + print(NN, XCNT, WCNT, DCNT,sep='\t',file=sys.stderr,flush=True) + time.sleep(0.1) # so they flush? XN=XPATH%FN if not os.path.exists(XN): return None XF = igzip.IGzipFile(filename=XN) NF = open((NN:=NPATH%FN), 'wb') xl = XF.readline() - XCNT = 1 + WCNT = XCNT = 1 if WARC.search(xl): + WCNT += 1 return xl else: NF.write(xl)