Mercurial > hg > cc > azure
comparison master/bin/fixDates.py @ 24:b4e3beb2227e
improved error handling,
does totalling now too
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Wed, 07 Nov 2018 14:15:56 +0000 |
parents | 0f4a0f4e38d4 |
children | dd19cf97b6dd |
comparison
equal
deleted
inserted
replaced
23:cc065b2a2543 | 24:b4e3beb2227e |
---|---|
1 /home/cc/lib/python/fixDates.py | 1 #!/usr/bin/env python3 |
2 import sys,re | |
3 ok=re.compile('(https?) ([^ ]*) ((?:19|20)..) ([0-9][0-9]*)$') | |
4 #parseable=re.compile('.*[-:/]\w+[-:/]|\w+\s\d{4}|\d{10}') | |
5 from dateparser import parse | |
6 | |
7 bogons=0 | |
8 tab={} | |
9 months={'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,'Jun':6, | |
10 'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12} | |
11 for l in sys.stdin: | |
12 ff=ok.match(l) | |
13 if ff is not None: | |
14 #print(l,end='') | |
15 scheme=ff.group(1) | |
16 try: | |
17 # More alphas then numerics... | |
18 try: | |
19 month=months[ff.group(2)] | |
20 except KeyError: | |
21 month=int(ff.group(2)) | |
22 year=int(ff.group(3)) | |
23 except: | |
24 # Unusual month or year field | |
25 d=parse("%s %s"%(ff.group(2),ff.group(3))) | |
26 if d is None: | |
27 print(5,ff.group(1),ff.group(2),ff.group(3),file=sys.stderr) | |
28 year=0 | |
29 month=0 | |
30 elif d.year<1970: | |
31 year=0 | |
32 month=1 | |
33 elif d.year>2019: | |
34 month=1 | |
35 year=2019 | |
36 else: | |
37 month=d.month | |
38 year=d.year | |
39 count=int(ff.group(4)) | |
40 key=(scheme,year,month) | |
41 tab[key]=tab.get(key,0)+count | |
42 continue | |
43 cols=l.split() | |
44 scheme=cols[0] | |
45 if scheme[-1]==':': | |
46 scheme=scheme[0:-1] | |
47 if scheme not in ('http','https'): | |
48 # The last 3 lines are needed because we get both http: with nothing else, when there was no last-mod | |
49 # header, or http by itself, when the last-mod consisted entirely of TZ info, which then gets deleted | |
50 print(1,scheme,l,file=sys.stderr) | |
51 bogons+=1 | |
52 continue | |
53 try: | |
54 cols=cols[1:] | |
55 count=int(cols.pop()) | |
56 except: | |
57 print(2,cols,file=sys.stderr) | |
58 bogons+=1 | |
59 continue | |
60 if cols==[]: | |
61 key=(scheme,0,0) | |
62 tab[key]=tab.get(key,0)+count | |
63 continue | |
64 l=' '.join(cols) | |
65 try: | |
66 d=parse(l) | |
67 if d is None: | |
68 print(3,d,l,count,file=sys.stderr) | |
69 year=0 | |
70 month=2 | |
71 elif d.year<1970: | |
72 key=(scheme,0,1) | |
73 elif d.year>2019: | |
74 key=(scheme,2019,1) | |
75 else: | |
76 key=(scheme,d.year,d.month) | |
77 tab[key]=tab.get(key,0)+count | |
78 except Exception(e): | |
79 print(4,e,l,file=sys.stderr) | |
80 bogons+=1 | |
81 for ((s,m,y),c) in tab.items(): | |
82 print(s,m,y,c,sep='\t') | |
83 print(bogons,file=sys.stderr) |