Mercurial > hg > cc > azure
changeset 40:4cf6bc21f683
start work on python version of tW.sh
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Fri, 30 Nov 2018 13:43:36 +0000 |
parents | bb09db2afe6b |
children | 3313edbab3b0 |
files | workers/bin/_timedWhich.py |
diffstat | 1 files changed, 18 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/workers/bin/_timedWhich.py Fri Nov 30 13:43:36 2018 +0000 @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +import re,sys,io + +uin=io.TextIOWrapper(sys.stdin.buffer,encoding='latin1') +p1=re.compile('"WARC-Target-URI":"(https?):.*msgtype=response') +p2=re.compile('"Last-Modified":"([^"]*)"') +w={} +wo={} +for l in uin: + m=p1.search(l) + if m: + k=m.group(1) + m=p2.search(l,m.end()) + if m is None: + wo[k]=wo.get(k,0)+1 + else: + w[k]=w.get(k,0)+1 +print("with %s\nw/o %s"%(w,wo))