comparison bin/ix.py @ 109:15abf4aab307

approved Popen version using .communicate
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 22 Apr 2021 21:10:02 +0000
parents 9e5b117dc461
children f148c2366faa
comparison
equal deleted inserted replaced
108:9e5b117dc461 109:15abf4aab307
42 if whole: 42 if whole:
43 # try external unzip using Popen 43 # try external unzip using Popen
44 file.seek(offset) 44 file.seek(offset)
45 bv=memoryview(buf)[:length] 45 bv=memoryview(buf)[:length]
46 nb=file.readinto(bv) 46 nb=file.readinto(bv)
47 file.close()
47 if nb!=length: 48 if nb!=length:
48 print("losing",file.name,length,nb,file=sys.stderr) 49 print("losing",file.name,length,nb,file=sys.stderr)
49 if options.zipped: 50 if options.zipped:
50 BINOUT.write(bv) 51 BINOUT.write(bv)
51 else: 52 else:
54 #clear_bytes=io.BytesIO(uv) 55 #clear_bytes=io.BytesIO(uv)
55 p = Popen(["/lustre/home/dc007/hst/gentoo/usr/bin/igzip", 56 p = Popen(["/lustre/home/dc007/hst/gentoo/usr/bin/igzip",
56 "-dc"], 57 "-dc"],
57 stdin=PIPE, 58 stdin=PIPE,
58 stdout=None) 59 stdout=None)
59 p.stdin.write(bv) 60 fout, ferr = p.communicate(bv)
60 p.stdin.close()
61 res=p.wait() 61 res=p.wait()
62 if res!=0: 62 if res!=0:
63 print('pipe failed',res,p.stderr.decode()) 63 print('pipe failed',res,ferr.decode())
64 exit(2) 64 exit(2)
65 file.close()
66 return 65 return
67 with igzip.IGzipFile(fileobj=gzip_chunk) as gzip_fin: 66 with igzip.IGzipFile(fileobj=gzip_chunk) as gzip_fin:
68 while True: 67 while True:
69 l=gzip_fin.readinto(uv) 68 l=gzip_fin.readinto(uv)
70 if not l: 69 if not l:
71 break 70 break
72 BINOUT.write(memoryview(uv)[:l]) 71 BINOUT.write(memoryview(uv)[:l])
73 file.close()
74 72
75 def main(): 73 def main():
76 parser = argparse.ArgumentParser( 74 parser = argparse.ArgumentParser(
77 description='''Extract records from warc files given length, offset and file triples. 75 description='''Extract records from warc files given length, offset and file triples.
78 Input one triple on command line, or 76 Input one triple on command line, or