comparison bin/lmh_warc.py @ 42:689a0e311cd2

make warc.py a library, separate out testing
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 05 Jul 2023 15:37:16 +0100
parents
children 11a886a84a49
comparison
equal deleted inserted replaced
41:fa43c318749b 42:689a0e311cd2
1 import re
2 TUPAT=re.compile(b'^WARC-Target-URI: (.*?)\r',re.MULTILINE)
3 LMPAT=re.compile(b'^Last-Modified: (.*?)\r',re.MULTILINE)
4
5 def showmeLMH(wtype,buf,part):
6 global URI
7 if part==1:
8 if (m:=TUPAT.search(buf)):
9 URI=m[1]
10 else:
11 raise ValueError(b"No target URI in %s ??"%buf)
12 else:
13 m=LMPAT.search(buf)
14 OUT.write(URI)
15 if m:
16 OUT.write(b'\t')
17 OUT.write(m[1])
18 OUT.write(b'\n')
19
20 warc(showmeLMH,[b'response'],parts=3)
21