comparison lib/python/cdx_segment.py @ 88:464d2dfb99c9

new
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 13 Apr 2021 17:02:09 +0000
parents b6a5999d8e06
children
comparison
equal deleted inserted replaced
87:b6a5999d8e06 88:464d2dfb99c9
51 n[r]+=1 51 n[r]+=1
52 else: 52 else:
53 sys.stderr.write("bogus: ",afn,l) 53 sys.stderr.write("bogus: ",afn,l)
54 e+=1 54 e+=1
55 55
56 mt=datetime.now() 56 if True:
57 print(mt,"copying",ifn,"%s ok, %d bogus, %d seconds so far"%(':'.join(map(str,n.values())), 57 # See note below, will have to copy entire result to /beegfs at shell level
58 e,(mt-st).seconds),file=sys.stderr) 58 for rr in ss.values():
59 # Randomise to try to avoid contention 59 for s in rr.values():
60 for s in sample(segdirs,100): 60 s.close()
61 for r in rr: 61 else:
62 of=ss[r][s] 62 # The following fails, in that there are occasional small gaps in the result
63 of.flush() 63 # I've given up trying to figure out why...
64 o=of.fileno() 64 # Randomise to try to avoid contention
65 fsync(o) 65 mt=datetime.now()
66 with AtomicOpen("%s/%s/orig/cdx/%s/cdx"%(adir,s,r),"rb+") as df: 66 print(mt,"copying",ifn,"%s ok, %d bogus, %d seconds so far"%(':'.join(map(str,n.values())),
67 d=df.fileno() 67 e,(mt-st).seconds),file=sys.stderr)
68 while True:
69 data = read(o,131072)
70 if data == b'': # end of file reached
71 break
72 write(d,data)
73 of.close()
74 68
75 res=system("rm -r %s"%ifn) 69 for s in sample(segdirs,100):
70 for r in rr:
71 of=ss[r][s]
72 of.flush()
73 o=of.fileno()
74 fsync(o)
75 opos=lseek(o,0,SEEK_SET)
76 with AtomicOpen("%s/%s/orig/cdx/%s/cdx"%(adir,s,r),"rb+") as df:
77 d=df.fileno()
78 dpos=lseek(d,0,SEEK_END)
79 print(of.name,opos,df.name,dpos,file=sys.stderr)
80 while True:
81 data = read(o,131072)
82 if data == b'': # end of file reached
83 break
84 write(d,data)
85 of.close()
86
87 res=0 #system("rm -r %s"%ifn)
76 88
77 et=datetime.now() 89 et=datetime.now()
78 print(et,"finished",ifn,res,"%d seconds total"%((et-st).seconds),file=sys.stderr) 90 print(et,"finished",ifn,"%s ok, %d bogus, %d seconds total"%(':'.join(map(str,n.values())),
91 e,(et-st).seconds),file=sys.stderr)