Mercurial > hg > cc > azure
comparison master/bin/fixDates.py @ 39:bb09db2afe6b
try to fix a few more niggling bugs
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 29 Nov 2018 15:14:46 +0000 |
parents | ad6eff2bc6f9 |
children | 3313edbab3b0 |
comparison
equal
deleted
inserted
replaced
38:beae6309d4ec | 39:bb09db2afe6b |
---|---|
30 #print(l,end='') | 30 #print(l,end='') |
31 scheme=HTTP if ff.group(1)=='http' else HTTPS | 31 scheme=HTTP if ff.group(1)=='http' else HTTPS |
32 count=None | 32 count=None |
33 try: | 33 try: |
34 # More alphas then numerics... | 34 # More alphas then numerics... |
35 count=int(ff.group(4)) | |
35 try: | 36 try: |
36 month=months[ff.group(2)] | 37 month=months[ff.group(2)] |
37 except KeyError: | 38 except KeyError: |
38 month=int(ff.group(2)) | 39 month=int(ff.group(2)) |
39 year=int(ff.group(3)) | 40 year=int(ff.group(3)) |
40 count=int(ff.group(4)) | |
41 except: | 41 except: |
42 # Unusual month or year field | 42 # Unusual month or year field |
43 try: | 43 try: |
44 d=parse("%s %s"%(ff.group(2),ff.group(3)))#,languages=['en']) | 44 # day 1 is because w/o it the default is today's is used, which may |
45 # fail if it's e.g. 31 March today and the string is "April 2017" | |
46 d=parse("1 %s %s"%(ff.group(2),ff.group(3)))#,languages=['en']) | |
45 if d is None or count is None: | 47 if d is None or count is None: |
46 print(5,ff.group(1),ff.group(2),ff.group(3),ff.group(4), | 48 print(5,ff.group(1),ff.group(2),ff.group(3),ff.group(4), |
47 file=sys.stderr) | 49 file=sys.stderr) |
48 bogons+=1 | 50 bogons+=1 |
49 continue | 51 continue |
55 continue | 57 continue |
56 else: | 58 else: |
57 month=d.month | 59 month=d.month |
58 year=d.year | 60 year=d.year |
59 except Exception as e: | 61 except Exception as e: |
60 print(6,e,l,file=sys.stderr) | 62 print(6,ff.group(1),e,l,file=sys.stderr) |
61 bogons+=1 | 63 bogons+=1 |
62 continue | 64 continue |
63 else: | 65 else: |
64 cols=l.split() | 66 cols=l.split() |
65 scheme=cols[0] | 67 scheme=cols[0] |
74 scheme=HTTP if scheme=='http' else HTTPS | 76 scheme=HTTP if scheme=='http' else HTTPS |
75 try: | 77 try: |
76 cols=cols[1:] | 78 cols=cols[1:] |
77 count=int(cols.pop()) | 79 count=int(cols.pop()) |
78 except: | 80 except: |
79 print(2,count,l,file=sys.stderr) | 81 print(2,sn[scheme],count,l,file=sys.stderr) |
80 bogons+=1 | 82 bogons+=1 |
81 continue | 83 continue |
82 if cols==[]: | 84 if cols==[]: |
83 nd[scheme]+=count | 85 nd[scheme]+=count |
84 continue | 86 continue |
85 else: | 87 else: |
86 l=' '.join(cols) | 88 l=' '.join(cols) |
87 try: | 89 try: |
88 d=parse(l)#,languages=['en'])) | 90 d=parse(l)#,languages=['en'])) |
89 if d is None: | 91 if d is None: |
90 print(3,d,l,count,file=sys.stderr) | 92 print(3,sn[scheme],l,count,file=sys.stderr) |
91 bogons+=1 | 93 bogons+=1 |
92 continue | 94 continue |
93 elif d.year<1900 or d.year>2100: | 95 elif d.year<1900 or d.year>2100: |
94 # Jan 0001 does show up, so log these as early / late | 96 # Jan 0001 does show up, so log these as early / late |
95 (ed if d.year<1900 else ld)[scheme]+=count | 97 (ed if d.year<1900 else ld)[scheme]+=count |
96 continue | 98 continue |
97 else: | 99 else: |
98 year=d.year | 100 year=d.year |
99 month=d.month | 101 month=d.month |
100 except Exception as e: | 102 except Exception as e: |
101 print(4,e,l,count,file=sys.stderr) | 103 print(4,sn[scheme],e,l,count,file=sys.stderr) |
102 bogons+=1 | 104 bogons+=1 |
103 continue | 105 continue |
104 # log it | 106 # log it |
105 yy=tab[scheme] | 107 yy=tab[scheme] |
106 y=year-1900 | 108 y=year-1900 |
125 mm=yy[y] | 127 mm=yy[y] |
126 if mm is not None: | 128 if mm is not None: |
127 for m in range(1,13): | 129 for m in range(1,13): |
128 if mm[m]!=0: | 130 if mm[m]!=0: |
129 print(sn[s],mn[m],y+1900,mm[m],sep='\t') | 131 print(sn[s],mn[m],y+1900,mm[m],sep='\t') |
130 print(n,bogons,file=sys.stderr) | 132 print('#3 %s %s %s'%(strftime('%Y-%m-%d %H:%M:%S'), |
131 print('#3 %s'%strftime('%Y-%m-%d %H:%M:%S'),file=sys.stderr) | 133 n,bogons), |
134 file=sys.stderr) | |
132 | 135 |
133 | 136 |
134 | 137 |