Mercurial > hg > cc > cirrus_home
diff lib/python/cdx_segment.py @ 88:464d2dfb99c9
new
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Tue, 13 Apr 2021 17:02:09 +0000 |
parents | b6a5999d8e06 |
children |
line wrap: on
line diff
--- a/lib/python/cdx_segment.py Tue Mar 16 16:20:02 2021 +0000 +++ b/lib/python/cdx_segment.py Tue Apr 13 17:02:09 2021 +0000 @@ -53,26 +53,39 @@ sys.stderr.write("bogus: ",afn,l) e+=1 -mt=datetime.now() -print(mt,"copying",ifn,"%s ok, %d bogus, %d seconds so far"%(':'.join(map(str,n.values())), - e,(mt-st).seconds),file=sys.stderr) -# Randomise to try to avoid contention -for s in sample(segdirs,100): - for r in rr: - of=ss[r][s] - of.flush() - o=of.fileno() - fsync(o) - with AtomicOpen("%s/%s/orig/cdx/%s/cdx"%(adir,s,r),"rb+") as df: - d=df.fileno() - while True: - data = read(o,131072) - if data == b'': # end of file reached - break - write(d,data) - of.close() +if True: + # See note below, will have to copy entire result to /beegfs at shell level + for rr in ss.values(): + for s in rr.values(): + s.close() +else: + # The following fails, in that there are occasional small gaps in the result + # I've given up trying to figure out why... + # Randomise to try to avoid contention + mt=datetime.now() + print(mt,"copying",ifn,"%s ok, %d bogus, %d seconds so far"%(':'.join(map(str,n.values())), + e,(mt-st).seconds),file=sys.stderr) -res=system("rm -r %s"%ifn) + for s in sample(segdirs,100): + for r in rr: + of=ss[r][s] + of.flush() + o=of.fileno() + fsync(o) + opos=lseek(o,0,SEEK_SET) + with AtomicOpen("%s/%s/orig/cdx/%s/cdx"%(adir,s,r),"rb+") as df: + d=df.fileno() + dpos=lseek(d,0,SEEK_END) + print(of.name,opos,df.name,dpos,file=sys.stderr) + while True: + data = read(o,131072) + if data == b'': # end of file reached + break + write(d,data) + of.close() + + res=0 #system("rm -r %s"%ifn) et=datetime.now() -print(et,"finished",ifn,res,"%d seconds total"%((et-st).seconds),file=sys.stderr) +print(et,"finished",ifn,"%s ok, %d bogus, %d seconds total"%(':'.join(map(str,n.values())), + e,(et-st).seconds),file=sys.stderr)