Mercurial > hg > cc > azure
changeset 32:9342f6269edf
rewritten to be faster, maybe, and avoid earlier bug
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Tue, 20 Nov 2018 10:31:05 +0000 |
parents | 580cc12c9712 |
children | 4c117ee8ed75 |
files | master/bin/fixDates.py |
diffstat | 1 files changed, 46 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/master/bin/fixDates.py Mon Nov 19 18:33:17 2018 +0000 +++ b/master/bin/fixDates.py Tue Nov 20 10:31:05 2018 +0000 @@ -5,18 +5,24 @@ #parseable=re.compile('.*[-:/]\w+[-:/]|\w+\s\d{4}|\d{10}') from dateparser import parse +n=0 bogons=0 +HTTP=0 +HTTPS=1 +sn=['http','https'] http_ytab=list(201*[None]) # 1900--2100 https_ytab=list(201*[None]) -http_yzero=list(13*[0]) -https_yzero=list(13*[0]) -months={'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,'Jun':6, - 'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12} +tab=[http_ytab,https_ytab] +nd=[0,0] +mn=[None,'Jan','Feb','Mar','Apr','May','Jun', + 'Jul','Aug','Sep','Oct','Nov','Dec'] +months=dict(zip(mn[1:],range(1,13))) for l in sys.stdin: + n+=1 ff=ok.match(l) if ff is not None: #print(l,end='') - scheme=ff.group(1) + scheme=HTTP if ff.group(1)=='http' else HTTPS count=None try: # More alphas then numerics... @@ -59,29 +65,31 @@ print(1,scheme,l,file=sys.stderr) bogons+=1 continue + scheme=HTTP if scheme=='http' else HTTPS try: cols=cols[1:] count=int(cols.pop()) except: - print(2,cols,file=sys.stderr) + print(2,sn[scheme],cols,count,file=sys.stderr) bogons+=1 continue if cols==[]: - year=month=0 + nd[scheme]+=count + continue else: l=' '.join(cols) try: d=parse(l) if d is None: print(3,d,l,count,file=sys.stderr) - year=0 - month=2 - elif d.year<1900: - year=0 - month=3 - elif d.year>3499: - year=3499 - month=2 + bogons+=1 + continue + elif d.year<1900 or d.year>2100: + # Shouldn't happen + print(8,ff.group(1),ff.group(2),ff.group(3),ff.group(4), + file=sys.stderr) + bogons+=1 + continue else: year=d.year month=d.month @@ -89,10 +97,28 @@ print(4,e,l,file=sys.stderr) bogons+=1 continue - # file it -if tab=={}: + # log it + yy=tab[scheme] + y=year-1900 + if yy[y] is None: + yy[y]=mm=array('L',13*[0]) + else: + mm=yy[y] + mm[month]+=count +if n==0: # ssh screwed up exit(1) -for ((s,m,y),c) in tab.items(): - print(s,m,y,c,sep='\t') -print(bogons,file=sys.stderr) +for s in (HTTP,HTTPS): + if nd[s]!=0: + print(sn[s],0,0,nd[s],sep='\t') + yy=tab[s] + for y in range(201): + mm=yy[y] + if mm is not None: + for m in range(1,13): + if mm[m]!=0: + print(sn[s],mn[m],y+1900,mm[m],sep='\t') +print(n,bogons,file=sys.stderr) + + +