changeset 117:f52783faf3ee

a bit more logging
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 26 Sep 2023 18:55:43 +0100
parents 5b952d16838c
children 9d14e7c32737
files bin/merge_date.py
diffstat 1 files changed, 6 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/bin/merge_date.py	Tue Sep 26 18:55:11 2023 +0100
+++ b/bin/merge_date.py	Tue Sep 26 18:55:43 2023 +0100
@@ -42,7 +42,7 @@
 
 FN = 0
 
-XCNT = 0
+XCNT = WCNT = 0
 DCNT = 0
 
 XF = igzip.IGzipFile(filename=XPATH%0)
@@ -50,7 +50,7 @@
 
 def nextLine():
   '''Move on to next index file if current has run out'''
-  global FN, NF, NPATH, NN, XF, XPATH, XCNT, DCNT
+  global FN, NF, NPATH, NN, XF, XPATH, XCNT, DCNT, WCNT
   while True:
     xl=XF.readline()
     XCNT += 1
@@ -60,15 +60,17 @@
       XF.close()
       NF.close()
       print(NN, flush=True) # so we can compress it
-      time.sleep(0.1) # so it flushes?
+      print(NN, XCNT, WCNT, DCNT,sep='\t',file=sys.stderr,flush=True)
+      time.sleep(0.1) # so they flush?
       XN=XPATH%FN
       if not os.path.exists(XN):
         return None
       XF = igzip.IGzipFile(filename=XN)
       NF = open((NN:=NPATH%FN), 'wb')
       xl = XF.readline()
-      XCNT = 1
+      WCNT = XCNT = 1
     if WARC.search(xl):
+      WCNT += 1
       return xl
     else:
       NF.write(xl)