Mercurial > hg > cc > azure
changeset 42:1d776e96c16a
works on one file
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Fri, 30 Nov 2018 15:41:02 +0000 |
parents | 3313edbab3b0 |
children | c2b72d29a3ee |
files | workers/bin/_timedWhich.py |
diffstat | 1 files changed, 26 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/workers/bin/_timedWhich.py Fri Nov 30 13:44:50 2018 +0000 +++ b/workers/bin/_timedWhich.py Fri Nov 30 15:41:02 2018 +0000 @@ -4,15 +4,35 @@ uin=io.TextIOWrapper(sys.stdin.buffer,encoding='latin1') p1=re.compile('"WARC-Target-URI":"(https?):.*msgtype=response') p2=re.compile('"Last-Modified":"([^"]*)"') -w={} -wo={} +sep=re.compile('\.?[, \t]+') +losers=re.compile('(mon|tue|wed|thu|fri|sat|sun)(day)?|gmt|[ap]m|\d\d?:\d\d:\d\d(\.\d*)?\w*|[-+]\d\d\d\d|\d\d?|:',re.I) +oddz=re.compile('[A-Z]\w+/[A-Z]\w+') +HTTP=0 +HTTPS=1 +tab=[{},{}] +nd=[0,0] # no date +sn=['http','https'] for l in uin: m=p1.search(l) if m: - k=m.group(1) + k=HTTP if m.group(1)=='http' else HTTPS m=p2.search(l,m.end()) if m is None: - wo[k]=wo.get(k,0)+1 + nd[k]+=1 else: - w[k]=w.get(k,0)+1 -print("with %s\nw/o %s"%(w,wo)) + t=tab[k] + lm=m.group(1) + lmc=sep.split(lm) + if len(lmc)==1 and lmc[0].startswith('serve-proxy-cache:'): + r='serve-proxy-cache:' + else: + if oddz.fullmatch(lmc[-1]): + lmc.pop() + r=' '.join(c for c in lmc if not losers.fullmatch(c)) + t[r]=t.get(r,0)+1 +for h in (HTTP,HTTPS): + print("%s\t\t%s"%(sn[h],nd[h])) + for (k,v) in tab[h].items(): + print("%s\t%s\t%s"%(sn[h],k,v)) + +