view workers/bin/_timedWhich.py @ 40:4cf6bc21f683

start work on python version of tW.sh
author Henry S. Thompson <ht@markup.co.uk>
date Fri, 30 Nov 2018 13:43:36 +0000
parents
children 1d776e96c16a
line wrap: on
line source

#!/usr/bin/env python3
import re,sys,io

uin=io.TextIOWrapper(sys.stdin.buffer,encoding='latin1')
p1=re.compile('"WARC-Target-URI":"(https?):.*msgtype=response')
p2=re.compile('"Last-Modified":"([^"]*)"')
w={}
wo={}
for l in uin:
  m=p1.search(l)
  if m:
    k=m.group(1)
    m=p2.search(l,m.end())
    if m is None:
      wo[k]=wo.get(k,0)+1
    else:
      w[k]=w.get(k,0)+1
print("with %s\nw/o %s"%(w,wo))