diff lib/python/cdx_segment.py @ 88:464d2dfb99c9

new
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 13 Apr 2021 17:02:09 +0000
parents b6a5999d8e06
children
line wrap: on
line diff
--- a/lib/python/cdx_segment.py	Tue Mar 16 16:20:02 2021 +0000
+++ b/lib/python/cdx_segment.py	Tue Apr 13 17:02:09 2021 +0000
@@ -53,26 +53,39 @@
       sys.stderr.write("bogus: ",afn,l)
       e+=1
 
-mt=datetime.now()
-print(mt,"copying",ifn,"%s ok, %d bogus, %d seconds so far"%(':'.join(map(str,n.values())),
-                                                              e,(mt-st).seconds),file=sys.stderr)
-# Randomise to try to avoid contention
-for s in sample(segdirs,100):
-  for r in rr:
-    of=ss[r][s]
-    of.flush()
-    o=of.fileno()
-    fsync(o)
-    with AtomicOpen("%s/%s/orig/cdx/%s/cdx"%(adir,s,r),"rb+") as df:
-      d=df.fileno()
-      while True:
-        data = read(o,131072)
-        if data == b'':  # end of file reached
-            break
-        write(d,data)
-    of.close()
+if True:
+  # See note below, will have to copy entire result to /beegfs at shell level
+  for rr in ss.values():
+    for s in rr.values():
+      s.close()
+else:
+  # The following fails, in that there are occasional small gaps in the result
+  #  I've given up trying to figure out why...
+  # Randomise to try to avoid contention
+  mt=datetime.now()
+  print(mt,"copying",ifn,"%s ok, %d bogus, %d seconds so far"%(':'.join(map(str,n.values())),
+                                                               e,(mt-st).seconds),file=sys.stderr)
 
-res=system("rm -r %s"%ifn)
+  for s in sample(segdirs,100):
+    for r in rr:
+      of=ss[r][s]
+      of.flush()
+      o=of.fileno()
+      fsync(o)
+      opos=lseek(o,0,SEEK_SET)
+      with AtomicOpen("%s/%s/orig/cdx/%s/cdx"%(adir,s,r),"rb+") as df:
+        d=df.fileno()
+        dpos=lseek(d,0,SEEK_END)
+        print(of.name,opos,df.name,dpos,file=sys.stderr)
+        while True:
+          data = read(o,131072)
+          if data == b'':  # end of file reached
+              break
+          write(d,data)
+      of.close()
+
+  res=0 #system("rm -r %s"%ifn)
 
 et=datetime.now()
-print(et,"finished",ifn,res,"%d seconds total"%((et-st).seconds),file=sys.stderr)
+print(et,"finished",ifn,"%s ok, %d bogus, %d seconds total"%(':'.join(map(str,n.values())),
+                                                             e,(et-st).seconds),file=sys.stderr)