changeset 90:c1a70532444c

flip loops
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 31 Aug 2023 14:14:21 +0100
parents a62580816f1c
children 460f0599e8cd
files bin/merge_date.py
diffstat 1 files changed, 46 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- a/bin/merge_date.py	Wed Aug 30 21:49:43 2023 +0100
+++ b/bin/merge_date.py	Thu Aug 31 14:14:21 2023 +0100
@@ -7,55 +7,58 @@
 ksvstream consists of tab-separated key, CC date and Unix timestamp
 ''' # '
 
-import sys, io, os
+import sys, io, os, os.path
 from isal import igzip
 
-xpath = "%s/cdx-00%%0.3d.gz"%sys.argv[2]
-npath = "%s/cdx-00%%0.3d.gz"%sys.argv[3]
+XPATH = "%s/cdx-00%%0.3d.gz"%sys.argv[2]
+NPATH = "%s/cdx-00%%0.3d.gz"%sys.argv[3]
 
-#print(sys.argv[3],npath,file=sys.stderr)
+#print(sys.argv[3],NPATH,file=sys.stderr)
 
 os.makedirs(sys.argv[3], exist_ok=True)
 
-fn = -1
-xf = igzip.IGzipFile(filename=xpath%0)
-nf = open(npath%0, 'wb')
+FN = 0
+
+XF = igzip.IGzipFile(filename=XPATH%0)
+NF = open(NPATH%0,'wb')
+
+XL = b''
 
-df = open(sys.argv[1], 'rb')
+def nextLine():
+  global FN, NF, NPATH, XF, XPATH
+  xl=XF.readline()
+  if xl == b'':
+    # need to move to next index file
+    if NF is None:
+      FN = 0
+    else:
+      FN += 1
+    xn=XPATH%FN
+    if not os.path.exists(xn):
+      return
+    XF = igzip.IGzipFile(filename=xn)
+    NF = open(NPATH%FN, 'wb')
+    xl = XF.readline()
+  return xl
 
-xl = b''
-xkey = xdate = None
-
-for dl in df:
+with open(sys.argv[1], 'rb') as df:
+  dl = df.readline()
   (dkey, ddate, dtime) = dl.split(b'\t')
-  while dkey != xkey or ddate != xdate:
-    try:
-      if xl == b'':
-        # need to move to next index file
-        nf.close()
-        fn += 1
-        try:
-          xf = igzip.IGzipFile(filename=xpath%fn)
-        except Exception as e:
-          print("No more index input for %s: %s\nUnmatched:      |%s|%s|\n"
-                "Last index line: |%s|%s|"%(fn,e,dkey,ddate,xkey,xdate),
-                sys.stderr)
-          exit(1)
-        xl = xf.readline()
-        nf = open(npath%fn, 'wb')
-        #print('xl',xl,file=sys.stderr)
-        (xkey, xdate, xprops) = xl.split(b' ', maxsplit=2)
-        continue
-      else:
-        (xkey, xdate, xprops) = xl.split(b' ', maxsplit=2)
-    except:
-      breakpoint()
-    nf.write(xl)
-    xl = xf.readline()
-  nf.write(xkey)
-  nf.write(b' ')
-  nf.write(xdate)
-  nf.write(b' ')
-  nf.write(xprops[:-2])
-  nf.write(b', "lastmod": "%d"}\n'%int(dtime[:-3]))
-  xl=xf.readline()
+
+  while (xl:=nextLine()) is not None:
+    (xkey, xdate, xprops) = xl.split(b' ', maxsplit=2)
+    if dkey != xkey or ddate != xdate:
+      NF.write(xl)
+      continue
+    NF.write(xkey)
+    NF.write(b' ')
+    NF.write(xdate)
+    NF.write(b' ')
+    NF.write(xprops[:-2])
+    NF.write(b', "lastmod": "%d"}\n'%int(dtime[:-3]))
+    dl = df.readline()
+    if dl == '':
+      # write out the last of the last index file, if any
+      dkey = ddate = None
+    else:
+      (dkey, ddate, dtime) = dl.split(b'\t')