Mercurial > hg > cc > cirrus_work
changeset 84:c18c307cc325
merge, including pointless fix wrt pq
author | Henry Thompson <ht@markup.co.uk> |
---|---|
date | Mon, 21 Aug 2023 13:06:20 -0400 |
parents | d92bd8527718 (current diff) 120d90b47d74 (diff) |
children | 1daa8e444cfe |
files | bin/sort_date.py |
diffstat | 1 files changed, 11 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/bin/lmh_warc.py Sat Aug 19 16:33:23 2023 -0400 +++ b/bin/lmh_warc.py Mon Aug 21 13:06:20 2023 -0400 @@ -2,22 +2,32 @@ import re,warc,sys TUPAT=re.compile(b'^WARC-Target-URI: (.*?)\r',re.MULTILINE) +DPAT=re.compile(b'^WARC-Date: (.*?)\r',re.MULTILINE) LMPAT=re.compile(b'^Last-Modified: (.*?)\r',re.MULTILINE) +DTAB=bytearray(range(256)) +DDEL=b'TZ-:' + OUT=open(sys.stdout.fileno(),'wb') def showmeLMH(wtype,buf,part): - global URI + global URI, DATE if part==1: if (m:=TUPAT.search(buf)): URI=m[1] else: raise ValueError(b"No target URI in %s ??"%buf) + if (md:=DPAT.search(buf)): + DATE=md[1] + else: + raise ValueError(b"No date in %s ??"%buf) else: mm=LMPAT.search(buf) OUT.write(URI) if mm: OUT.write(b'\t') + OUT.write(DATE.translate(DTAB,DDEL)) + OUT.write(b'\t') OUT.write(mm[1]) OUT.write(b'\n')