Mercurial > hg > cc > azure
annotate workers/bin/_timedWhich.py @ 68:1f04bce6ead7 default tip
use basefile instead of transferfile, and remove cleanup: belt and braces wrt lossage of sac_schemes.py in 15% of 1000_k3,
this as used in a_2
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 04 Jun 2020 20:44:44 +0000 |
parents | 7a4e49689935 |
children |
rev | line source |
---|---|
40
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
2 import re,sys,io |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
3 |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
4 uin=io.TextIOWrapper(sys.stdin.buffer,encoding='latin1') |
46
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
5 p1=re.compile('"WARC-Target-URI":"(\w*):.*msgtype=response') |
40
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
6 p2=re.compile('"Last-Modified":"([^"]*)"') |
42 | 7 sep=re.compile('\.?[, \t]+') |
44
1342f6669352
knock off a few more relatively common cases
Henry S. Thompson <ht@markup.co.uk>
parents:
42
diff
changeset
|
8 losers=re.compile('(mon|fri|sun)(day)?|tue(sday)?|wed(nesday)?|thu(rsday)?|sat(urday)?|gmt([+-][\d:]+)?|[ap]m|\d\d?:\d\d:(\d\d(\.\d*)?\w*|rd)|\{ts|[-+]\d\d\d\d|\d\d?|:',re.I) |
1342f6669352
knock off a few more relatively common cases
Henry S. Thompson <ht@markup.co.uk>
parents:
42
diff
changeset
|
9 oddlast=re.compile('\d\w+[A-Z]{3,4}|[A-Z]\w+/[A-Z]\w+') |
42 | 10 HTTP=0 |
11 HTTPS=1 | |
12 tab=[{},{}] | |
13 nd=[0,0] # no date | |
46
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
14 sn={'http':HTTP,'https':HTTPS} |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
15 i=j=0 |
40
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
16 for l in uin: |
46
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
17 i+=1 |
40
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
18 m=p1.search(l) |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
19 if m: |
46
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
20 j+=1 |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
21 scheme=m.group(1) |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
22 if scheme=='http': |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
23 k=HTTP |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
24 elif scheme=='https': |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
25 k=HTTPS |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
26 else: |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
27 scheme=scheme.lower() |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
28 try: |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
29 k=sn[scheme] |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
30 except KeyError: |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
31 k=len(sn)+1 |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
32 sn[scheme]=k |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
33 tab.append(dict()) |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
34 nd.append(0) |
40
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
35 m=p2.search(l,m.end()) |
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
36 if m is None: |
42 | 37 nd[k]+=1 |
40
4cf6bc21f683
start work on python version of tW.sh
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
38 else: |
42 | 39 t=tab[k] |
40 lm=m.group(1) | |
41 lmc=sep.split(lm) | |
42 if len(lmc)==1 and lmc[0].startswith('serve-proxy-cache:'): | |
43 r='serve-proxy-cache:' | |
45 | 44 elif len(lmc)>14 and lmc[-2][-1]==')': |
45 # e.g. Sun, 23 Apr 2017 11:10(02017Sun, 23 Apr 2017 11:10:29 +0300Sun, 23 Apr 2017 11:10:29 +030017) GMT | |
46 lmc=lmc[:-12] | |
42 | 47 else: |
44
1342f6669352
knock off a few more relatively common cases
Henry S. Thompson <ht@markup.co.uk>
parents:
42
diff
changeset
|
48 if oddlast.fullmatch(lmc[-1]): |
42 | 49 lmc.pop() |
50 r=' '.join(c for c in lmc if not losers.fullmatch(c)) | |
51 t[r]=t.get(r,0)+1 | |
46
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
52 for l,h in sn.items(): |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
53 if nd[h]>0: |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
54 print("%s\t\t%s"%(l,nd[h])) |
42 | 55 for (k,v) in tab[h].items(): |
46
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
56 print("%s\t%s\t%s"%(l,k,v)) |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
57 print("# %s lines, %s responses"%(i,j),file=sys.stderr) |
42 | 58 |
59 | |
46
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
60 |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
61 |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
62 |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
63 |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
64 |
7a4e49689935
finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
45
diff
changeset
|
65 |