Mercurial > hg > cc > azure
annotate workers/bin/_timedWhich.py @ 42:1d776e96c16a
works on one file
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Fri, 30 Nov 2018 15:41:02 +0000 |
parents | 4cf6bc21f683 |
children | 1342f6669352 |
rev | line source |
---|---|
40
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
2 import re,sys,io |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
3 |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
4 uin=io.TextIOWrapper(sys.stdin.buffer,encoding='latin1') |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
5 p1=re.compile('"WARC-Target-URI":"(https?):.*msgtype=response') |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
6 p2=re.compile('"Last-Modified":"([^"]*)"') |
42 | 7 sep=re.compile('\.?[, \t]+') |
8 losers=re.compile('(mon|tue|wed|thu|fri|sat|sun)(day)?|gmt|[ap]m|\d\d?:\d\d:\d\d(\.\d*)?\w*|[-+]\d\d\d\d|\d\d?|:',re.I) | |
9 oddz=re.compile('[A-Z]\w+/[A-Z]\w+') | |
10 HTTP=0 | |
11 HTTPS=1 | |
12 tab=[{},{}] | |
13 nd=[0,0] # no date | |
14 sn=['http','https'] | |
40
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
15 for l in uin: |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
16 m=p1.search(l) |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
17 if m: |
42 | 18 k=HTTP if m.group(1)=='http' else HTTPS |
40
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
19 m=p2.search(l,m.end()) |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
20 if m is None: |
42 | 21 nd[k]+=1 |
40
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
22 else: |
42 | 23 t=tab[k] |
24 lm=m.group(1) | |
25 lmc=sep.split(lm) | |
26 if len(lmc)==1 and lmc[0].startswith('serve-proxy-cache:'): | |
27 r='serve-proxy-cache:' | |
28 else: | |
29 if oddz.fullmatch(lmc[-1]): | |
30 lmc.pop() | |
31 r=' '.join(c for c in lmc if not losers.fullmatch(c)) | |
32 t[r]=t.get(r,0)+1 | |
33 for h in (HTTP,HTTPS): | |
34 print("%s\t\t%s"%(sn[h],nd[h])) | |
35 for (k,v) in tab[h].items(): | |
36 print("%s\t%s\t%s"%(sn[h],k,v)) | |
37 | |
38 |