# HG changeset patch # User Henry S. Thompson # Date 1543585416 0 # Node ID 4cf6bc21f68358b4a8eb6f1c39166551a40f9260 # Parent bb09db2afe6bb189af3b7c0b5aae012f7ef0e5a2 start work on python version of tW.sh diff -r bb09db2afe6b -r 4cf6bc21f683 workers/bin/_timedWhich.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/workers/bin/_timedWhich.py Fri Nov 30 13:43:36 2018 +0000 @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +import re,sys,io + +uin=io.TextIOWrapper(sys.stdin.buffer,encoding='latin1') +p1=re.compile('"WARC-Target-URI":"(https?):.*msgtype=response') +p2=re.compile('"Last-Modified":"([^"]*)"') +w={} +wo={} +for l in uin: + m=p1.search(l) + if m: + k=m.group(1) + m=p2.search(l,m.end()) + if m is None: + wo[k]=wo.get(k,0)+1 + else: + w[k]=w.get(k,0)+1 +print("with %s\nw/o %s"%(w,wo))