Mercurial > hg > cc > azure
changeset 31:580cc12c9712
partway to rework after failure of mergedWhich.x64700
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Mon, 19 Nov 2018 18:33:17 +0000 |
parents | 9275e2a8b5e2 |
children | 9342f6269edf |
files | master/bin/fixDates.py |
diffstat | 1 files changed, 60 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/master/bin/fixDates.py Mon Nov 19 18:32:30 2018 +0000 +++ b/master/bin/fixDates.py Mon Nov 19 18:33:17 2018 +0000 @@ -1,11 +1,15 @@ #!/usr/bin/env python3 import sys,re +from array import array ok=re.compile('(https?) ([^ ]*) ((?:19|20)..) ([0-9][0-9]*)$') #parseable=re.compile('.*[-:/]\w+[-:/]|\w+\s\d{4}|\d{10}') from dateparser import parse bogons=0 -tab={} +http_ytab=list(201*[None]) # 1900--2100 +https_ytab=list(201*[None]) +http_yzero=list(13*[0]) +https_yzero=list(13*[0]) months={'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,'Jun':6, 'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12} for l in sys.stdin: @@ -13,6 +17,7 @@ if ff is not None: #print(l,end='') scheme=ff.group(1) + count=None try: # More alphas then numerics... try: @@ -20,68 +25,71 @@ except KeyError: month=int(ff.group(2)) year=int(ff.group(3)) + count=int(ff.group(4)) except: # Unusual month or year field try: d=parse("%s %s"%(ff.group(2),ff.group(3))) - if d is None: - print(5,ff.group(1),ff.group(2),ff.group(3),file=sys.stderr) - year=0 - month=0 - elif d.year<1970: - year=0 - month=1 - elif d.year>2019: - month=1 - year=2019 + if d is None or count is None: + print(5,ff.group(1),ff.group(2),ff.group(3),ff.group(4), + file=sys.stderr) + bogons+=1 + continue + elif d.year<1900 or d.year>2100: + # Shouldn't happen + print(7,ff.group(1),ff.group(2),ff.group(3),ff.group(4), + file=sys.stderr) + bogons+=1 + continue else: month=d.month year=d.year except Exception as e: print(6,e,l,file=sys.stderr) bogons+=1 - count=int(ff.group(4)) - key=(scheme,year,month) - tab[key]=tab.get(key,0)+count - continue - cols=l.split() - scheme=cols[0] - if scheme[-1]==':': - scheme=scheme[0:-1] - if scheme not in ('http','https'): - # The last 3 lines are needed because we get both http: with nothing else, when there was no last-mod - # header, or http by itself, when the last-mod consisted entirely of TZ info, which then gets deleted - print(1,scheme,l,file=sys.stderr) - bogons+=1 - continue - try: - cols=cols[1:] - count=int(cols.pop()) - except: - print(2,cols,file=sys.stderr) - bogons+=1 - continue - if cols==[]: - key=(scheme,0,0) - tab[key]=tab.get(key,0)+count - continue - l=' '.join(cols) - try: - d=parse(l) - if d is None: - print(3,d,l,count,file=sys.stderr) - year=0 - month=2 - elif d.year<1970: - key=(scheme,0,1) - elif d.year>2019: - key=(scheme,2019,1) + continue + else: + cols=l.split() + scheme=cols[0] + if scheme[-1]==':': + scheme=scheme[0:-1] + if scheme not in ('http','https'): + # The last 3 lines are needed because we get both http: with nothing else, when there was no last-mod + # header, or http by itself, when the last-mod consisted entirely of TZ info, which then gets deleted + print(1,scheme,l,file=sys.stderr) + bogons+=1 + continue + try: + cols=cols[1:] + count=int(cols.pop()) + except: + print(2,cols,file=sys.stderr) + bogons+=1 + continue + if cols==[]: + year=month=0 else: - key=(scheme,d.year,d.month) - tab[key]=tab.get(key,0)+count - except Exception as e: - print(4,e,l,file=sys.stderr) - bogons+=1 + l=' '.join(cols) + try: + d=parse(l) + if d is None: + print(3,d,l,count,file=sys.stderr) + year=0 + month=2 + elif d.year<1900: + year=0 + month=3 + elif d.year>3499: + year=3499 + month=2 + else: + year=d.year + month=d.month + except Exception as e: + print(4,e,l,file=sys.stderr) + bogons+=1 + continue + # file it if tab=={}: # ssh screwed up exit(1)