Mercurial > hg > cc > cirrus_work
view bin/sort_date.py @ 73:e8c667bf8965
compute timestamps, key and sort lmh lines
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Tue, 08 Aug 2023 17:47:27 +0100 |
parents | |
children | 177f7df2bf46 |
line wrap: on
line source
#!/usr/bin/python3 # Assumes you have used grep -v $'\t' on input for speed # Recommended to also sed '/GMT$/s/\([^ ]\)GMT$/\1 GMT/' # to fix a common 'bad' timestamp (~ .2% of inputs) import email.utils import sys from urllib.parse import urlsplit def keyed(l): uri, dateTime = l.split('\t',1) try: epoch = email.utils.parsedate_to_datetime(dateTime).timestamp() parts = urlsplit(uri) nl = parts.netloc if ':' in nl: pa,pp=nl.split(':') (kk:=pa.split('.')).reverse() return ('%s:%s)%s'%(','.join(kk), pp, parts.path),epoch) else: (kk:=nl.split('.')).reverse() return ('%s)%s'%(','.join(kk), parts.path),epoch) except Exception as e: print(dateTime.rstrip(),e,sep='\t',file=sys.stderr) return with open(sys.argv[1],"r") as ff: for tl in sorted((kk for l in ff if (kk:=keyed(l)) is not None), key=lambda x:x[0]): print(tl[0],tl[1],sep='\t')