Mercurial > hg > cc > azure
view workers/bin/_timedWhich.py @ 43:c2b72d29a3ee
update to use _timedWhich.py
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Fri, 30 Nov 2018 18:37:40 +0000 |
parents | 1d776e96c16a |
children | 1342f6669352 |
line wrap: on
line source
#!/usr/bin/env python3 import re,sys,io uin=io.TextIOWrapper(sys.stdin.buffer,encoding='latin1') p1=re.compile('"WARC-Target-URI":"(https?):.*msgtype=response') p2=re.compile('"Last-Modified":"([^"]*)"') sep=re.compile('\.?[, \t]+') losers=re.compile('(mon|tue|wed|thu|fri|sat|sun)(day)?|gmt|[ap]m|\d\d?:\d\d:\d\d(\.\d*)?\w*|[-+]\d\d\d\d|\d\d?|:',re.I) oddz=re.compile('[A-Z]\w+/[A-Z]\w+') HTTP=0 HTTPS=1 tab=[{},{}] nd=[0,0] # no date sn=['http','https'] for l in uin: m=p1.search(l) if m: k=HTTP if m.group(1)=='http' else HTTPS m=p2.search(l,m.end()) if m is None: nd[k]+=1 else: t=tab[k] lm=m.group(1) lmc=sep.split(lm) if len(lmc)==1 and lmc[0].startswith('serve-proxy-cache:'): r='serve-proxy-cache:' else: if oddz.fullmatch(lmc[-1]): lmc.pop() r=' '.join(c for c in lmc if not losers.fullmatch(c)) t[r]=t.get(r,0)+1 for h in (HTTP,HTTPS): print("%s\t\t%s"%(sn[h],nd[h])) for (k,v) in tab[h].items(): print("%s\t%s\t%s"%(sn[h],k,v))