Mercurial > hg > cc > azure
view workers/bin/_timedWhich.py @ 48:3b951980206d
using ptimedWhich.sh, _timedWhich.py
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Mon, 10 Dec 2018 14:51:52 +0000 |
parents | 7a4e49689935 |
children |
line wrap: on
line source
#!/usr/bin/env python3 import re,sys,io uin=io.TextIOWrapper(sys.stdin.buffer,encoding='latin1') p1=re.compile('"WARC-Target-URI":"(\w*):.*msgtype=response') p2=re.compile('"Last-Modified":"([^"]*)"') sep=re.compile('\.?[, \t]+') losers=re.compile('(mon|fri|sun)(day)?|tue(sday)?|wed(nesday)?|thu(rsday)?|sat(urday)?|gmt([+-][\d:]+)?|[ap]m|\d\d?:\d\d:(\d\d(\.\d*)?\w*|rd)|\{ts|[-+]\d\d\d\d|\d\d?|:',re.I) oddlast=re.compile('\d\w+[A-Z]{3,4}|[A-Z]\w+/[A-Z]\w+') HTTP=0 HTTPS=1 tab=[{},{}] nd=[0,0] # no date sn={'http':HTTP,'https':HTTPS} i=j=0 for l in uin: i+=1 m=p1.search(l) if m: j+=1 scheme=m.group(1) if scheme=='http': k=HTTP elif scheme=='https': k=HTTPS else: scheme=scheme.lower() try: k=sn[scheme] except KeyError: k=len(sn)+1 sn[scheme]=k tab.append(dict()) nd.append(0) m=p2.search(l,m.end()) if m is None: nd[k]+=1 else: t=tab[k] lm=m.group(1) lmc=sep.split(lm) if len(lmc)==1 and lmc[0].startswith('serve-proxy-cache:'): r='serve-proxy-cache:' elif len(lmc)>14 and lmc[-2][-1]==')': # e.g. Sun, 23 Apr 2017 11:10(02017Sun, 23 Apr 2017 11:10:29 +0300Sun, 23 Apr 2017 11:10:29 +030017) GMT lmc=lmc[:-12] else: if oddlast.fullmatch(lmc[-1]): lmc.pop() r=' '.join(c for c in lmc if not losers.fullmatch(c)) t[r]=t.get(r,0)+1 for l,h in sn.items(): if nd[h]>0: print("%s\t\t%s"%(l,nd[h])) for (k,v) in tab[h].items(): print("%s\t%s\t%s"%(l,k,v)) print("# %s lines, %s responses"%(i,j),file=sys.stderr)