Mercurial > hg > cc > azure
view master/bin/fixDates.py @ 31:580cc12c9712
partway to rework after failure of mergedWhich.x64700
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Mon, 19 Nov 2018 18:33:17 +0000 |
parents | dd19cf97b6dd |
children | 9342f6269edf |
line wrap: on
line source
#!/usr/bin/env python3 import sys,re from array import array ok=re.compile('(https?) ([^ ]*) ((?:19|20)..) ([0-9][0-9]*)$') #parseable=re.compile('.*[-:/]\w+[-:/]|\w+\s\d{4}|\d{10}') from dateparser import parse bogons=0 http_ytab=list(201*[None]) # 1900--2100 https_ytab=list(201*[None]) http_yzero=list(13*[0]) https_yzero=list(13*[0]) months={'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5,'Jun':6, 'Jul':7,'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12} for l in sys.stdin: ff=ok.match(l) if ff is not None: #print(l,end='') scheme=ff.group(1) count=None try: # More alphas then numerics... try: month=months[ff.group(2)] except KeyError: month=int(ff.group(2)) year=int(ff.group(3)) count=int(ff.group(4)) except: # Unusual month or year field try: d=parse("%s %s"%(ff.group(2),ff.group(3))) if d is None or count is None: print(5,ff.group(1),ff.group(2),ff.group(3),ff.group(4), file=sys.stderr) bogons+=1 continue elif d.year<1900 or d.year>2100: # Shouldn't happen print(7,ff.group(1),ff.group(2),ff.group(3),ff.group(4), file=sys.stderr) bogons+=1 continue else: month=d.month year=d.year except Exception as e: print(6,e,l,file=sys.stderr) bogons+=1 continue else: cols=l.split() scheme=cols[0] if scheme[-1]==':': scheme=scheme[0:-1] if scheme not in ('http','https'): # The last 3 lines are needed because we get both http: with nothing else, when there was no last-mod # header, or http by itself, when the last-mod consisted entirely of TZ info, which then gets deleted print(1,scheme,l,file=sys.stderr) bogons+=1 continue try: cols=cols[1:] count=int(cols.pop()) except: print(2,cols,file=sys.stderr) bogons+=1 continue if cols==[]: year=month=0 else: l=' '.join(cols) try: d=parse(l) if d is None: print(3,d,l,count,file=sys.stderr) year=0 month=2 elif d.year<1900: year=0 month=3 elif d.year>3499: year=3499 month=2 else: year=d.year month=d.month except Exception as e: print(4,e,l,file=sys.stderr) bogons+=1 continue # file it if tab=={}: # ssh screwed up exit(1) for ((s,m,y),c) in tab.items(): print(s,m,y,c,sep='\t') print(bogons,file=sys.stderr)