Mercurial > hg > cc > cirrus_home
comparison bin/ix.py @ 109:15abf4aab307
approved Popen version using .communicate
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 22 Apr 2021 21:10:02 +0000 |
parents | 9e5b117dc461 |
children | f148c2366faa |
comparison
equal
deleted
inserted
replaced
108:9e5b117dc461 | 109:15abf4aab307 |
---|---|
42 if whole: | 42 if whole: |
43 # try external unzip using Popen | 43 # try external unzip using Popen |
44 file.seek(offset) | 44 file.seek(offset) |
45 bv=memoryview(buf)[:length] | 45 bv=memoryview(buf)[:length] |
46 nb=file.readinto(bv) | 46 nb=file.readinto(bv) |
47 file.close() | |
47 if nb!=length: | 48 if nb!=length: |
48 print("losing",file.name,length,nb,file=sys.stderr) | 49 print("losing",file.name,length,nb,file=sys.stderr) |
49 if options.zipped: | 50 if options.zipped: |
50 BINOUT.write(bv) | 51 BINOUT.write(bv) |
51 else: | 52 else: |
54 #clear_bytes=io.BytesIO(uv) | 55 #clear_bytes=io.BytesIO(uv) |
55 p = Popen(["/lustre/home/dc007/hst/gentoo/usr/bin/igzip", | 56 p = Popen(["/lustre/home/dc007/hst/gentoo/usr/bin/igzip", |
56 "-dc"], | 57 "-dc"], |
57 stdin=PIPE, | 58 stdin=PIPE, |
58 stdout=None) | 59 stdout=None) |
59 p.stdin.write(bv) | 60 fout, ferr = p.communicate(bv) |
60 p.stdin.close() | |
61 res=p.wait() | 61 res=p.wait() |
62 if res!=0: | 62 if res!=0: |
63 print('pipe failed',res,p.stderr.decode()) | 63 print('pipe failed',res,ferr.decode()) |
64 exit(2) | 64 exit(2) |
65 file.close() | |
66 return | 65 return |
67 with igzip.IGzipFile(fileobj=gzip_chunk) as gzip_fin: | 66 with igzip.IGzipFile(fileobj=gzip_chunk) as gzip_fin: |
68 while True: | 67 while True: |
69 l=gzip_fin.readinto(uv) | 68 l=gzip_fin.readinto(uv) |
70 if not l: | 69 if not l: |
71 break | 70 break |
72 BINOUT.write(memoryview(uv)[:l]) | 71 BINOUT.write(memoryview(uv)[:l]) |
73 file.close() | |
74 | 72 |
75 def main(): | 73 def main(): |
76 parser = argparse.ArgumentParser( | 74 parser = argparse.ArgumentParser( |
77 description='''Extract records from warc files given length, offset and file triples. | 75 description='''Extract records from warc files given length, offset and file triples. |
78 Input one triple on command line, or | 76 Input one triple on command line, or |