comparison master/bin/fixDates.py @ 39:bb09db2afe6b

try to fix a few more niggling bugs
author Henry S. Thompson <ht@markup.co.uk>
date Thu, 29 Nov 2018 15:14:46 +0000
parents ad6eff2bc6f9
children 3313edbab3b0
comparison
equal deleted inserted replaced
38:beae6309d4ec 39:bb09db2afe6b
30 #print(l,end='') 30 #print(l,end='')
31 scheme=HTTP if ff.group(1)=='http' else HTTPS 31 scheme=HTTP if ff.group(1)=='http' else HTTPS
32 count=None 32 count=None
33 try: 33 try:
34 # More alphas then numerics... 34 # More alphas then numerics...
35 count=int(ff.group(4))
35 try: 36 try:
36 month=months[ff.group(2)] 37 month=months[ff.group(2)]
37 except KeyError: 38 except KeyError:
38 month=int(ff.group(2)) 39 month=int(ff.group(2))
39 year=int(ff.group(3)) 40 year=int(ff.group(3))
40 count=int(ff.group(4))
41 except: 41 except:
42 # Unusual month or year field 42 # Unusual month or year field
43 try: 43 try:
44 d=parse("%s %s"%(ff.group(2),ff.group(3)))#,languages=['en']) 44 # day 1 is because w/o it the default is today's is used, which may
45 # fail if it's e.g. 31 March today and the string is "April 2017"
46 d=parse("1 %s %s"%(ff.group(2),ff.group(3)))#,languages=['en'])
45 if d is None or count is None: 47 if d is None or count is None:
46 print(5,ff.group(1),ff.group(2),ff.group(3),ff.group(4), 48 print(5,ff.group(1),ff.group(2),ff.group(3),ff.group(4),
47 file=sys.stderr) 49 file=sys.stderr)
48 bogons+=1 50 bogons+=1
49 continue 51 continue
55 continue 57 continue
56 else: 58 else:
57 month=d.month 59 month=d.month
58 year=d.year 60 year=d.year
59 except Exception as e: 61 except Exception as e:
60 print(6,e,l,file=sys.stderr) 62 print(6,ff.group(1),e,l,file=sys.stderr)
61 bogons+=1 63 bogons+=1
62 continue 64 continue
63 else: 65 else:
64 cols=l.split() 66 cols=l.split()
65 scheme=cols[0] 67 scheme=cols[0]
74 scheme=HTTP if scheme=='http' else HTTPS 76 scheme=HTTP if scheme=='http' else HTTPS
75 try: 77 try:
76 cols=cols[1:] 78 cols=cols[1:]
77 count=int(cols.pop()) 79 count=int(cols.pop())
78 except: 80 except:
79 print(2,count,l,file=sys.stderr) 81 print(2,sn[scheme],count,l,file=sys.stderr)
80 bogons+=1 82 bogons+=1
81 continue 83 continue
82 if cols==[]: 84 if cols==[]:
83 nd[scheme]+=count 85 nd[scheme]+=count
84 continue 86 continue
85 else: 87 else:
86 l=' '.join(cols) 88 l=' '.join(cols)
87 try: 89 try:
88 d=parse(l)#,languages=['en'])) 90 d=parse(l)#,languages=['en']))
89 if d is None: 91 if d is None:
90 print(3,d,l,count,file=sys.stderr) 92 print(3,sn[scheme],l,count,file=sys.stderr)
91 bogons+=1 93 bogons+=1
92 continue 94 continue
93 elif d.year<1900 or d.year>2100: 95 elif d.year<1900 or d.year>2100:
94 # Jan 0001 does show up, so log these as early / late 96 # Jan 0001 does show up, so log these as early / late
95 (ed if d.year<1900 else ld)[scheme]+=count 97 (ed if d.year<1900 else ld)[scheme]+=count
96 continue 98 continue
97 else: 99 else:
98 year=d.year 100 year=d.year
99 month=d.month 101 month=d.month
100 except Exception as e: 102 except Exception as e:
101 print(4,e,l,count,file=sys.stderr) 103 print(4,sn[scheme],e,l,count,file=sys.stderr)
102 bogons+=1 104 bogons+=1
103 continue 105 continue
104 # log it 106 # log it
105 yy=tab[scheme] 107 yy=tab[scheme]
106 y=year-1900 108 y=year-1900
125 mm=yy[y] 127 mm=yy[y]
126 if mm is not None: 128 if mm is not None:
127 for m in range(1,13): 129 for m in range(1,13):
128 if mm[m]!=0: 130 if mm[m]!=0:
129 print(sn[s],mn[m],y+1900,mm[m],sep='\t') 131 print(sn[s],mn[m],y+1900,mm[m],sep='\t')
130 print(n,bogons,file=sys.stderr) 132 print('#3 %s %s %s'%(strftime('%Y-%m-%d %H:%M:%S'),
131 print('#3 %s'%strftime('%Y-%m-%d %H:%M:%S'),file=sys.stderr) 133 n,bogons),
134 file=sys.stderr)
132 135
133 136
134 137