Mercurial > hg > cc > cirrus_home
comparison bin/ix.py @ 110:f148c2366faa
back to IGzipFile
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 22 Apr 2021 21:31:03 +0000 |
parents | 15abf4aab307 |
children | 3119bca71181 |
comparison
equal
deleted
inserted
replaced
109:15abf4aab307 | 110:f148c2366faa |
---|---|
7 In all cases by 'filename' is meant crawlid/segmentid/filename | 7 In all cases by 'filename' is meant crawlid/segmentid/filename |
8 | 8 |
9 Note that if no output flag(s) is/are given, the whole WARC record will be output, more efficiently than would be the case if -whb is given.''' | 9 Note that if no output flag(s) is/are given, the whole WARC record will be output, more efficiently than would be the case if -whb is given.''' |
10 | 10 |
11 import sys, argparse, regex, os, shutil, io, gzip, time | 11 import sys, argparse, regex, os, shutil, io, gzip, time |
12 #from isal import igzip | 12 from isal import igzip |
13 from subprocess import Popen, PIPE | 13 #from subprocess import Popen, PIPE |
14 #import asyncio | 14 #import asyncio |
15 | 15 |
16 HACK_USAGE=regex.compile('\[-x\]\n\s*\[length\] \[offset\] \[filename\]') | 16 HACK_USAGE=regex.compile('\[-x\]\n\s*\[length\] \[offset\] \[filename\]') |
17 BINOUT=sys.stdout.buffer | 17 BINOUT=sys.stdout.buffer |
18 FPAT="/%s/%s/orig/warc/%s" | 18 FPAT="/%s/%s/orig/warc/%s" |
48 if nb!=length: | 48 if nb!=length: |
49 print("losing",file.name,length,nb,file=sys.stderr) | 49 print("losing",file.name,length,nb,file=sys.stderr) |
50 if options.zipped: | 50 if options.zipped: |
51 BINOUT.write(bv) | 51 BINOUT.write(bv) |
52 else: | 52 else: |
53 #gzip_chunk = io.BytesIO(bv) | 53 gzip_chunk = io.BytesIO(bv) |
54 uv=memoryview(buf)[length:] | 54 uv=memoryview(buf)[length:] |
55 #clear_bytes=io.BytesIO(uv) | 55 #clear_bytes=io.BytesIO(uv) |
56 p = Popen(["/lustre/home/dc007/hst/gentoo/usr/bin/igzip", | |
57 "-dc"], | |
58 stdin=PIPE, | |
59 stdout=None) | |
60 fout, ferr = p.communicate(bv) | |
61 res=p.wait() | |
62 if res!=0: | |
63 print('pipe failed',res,ferr.decode()) | |
64 exit(2) | |
65 return | |
66 with igzip.IGzipFile(fileobj=gzip_chunk) as gzip_fin: | 56 with igzip.IGzipFile(fileobj=gzip_chunk) as gzip_fin: |
67 while True: | 57 while True: |
68 l=gzip_fin.readinto(uv) | 58 l=gzip_fin.readinto(uv) |
69 if not l: | 59 if not l: |
70 break | 60 break |