# HG changeset patch # User Henry S. Thompson # Date 1691513247 -3600 # Node ID e8c667bf8965250e2841b9a0704dad29eabf52a6 # Parent fd9bcd7596060c00083bc237eb2dbfe2a7bec4d8 compute timestamps, key and sort lmh lines diff -r fd9bcd759606 -r e8c667bf8965 bin/sort_date.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/sort_date.py Tue Aug 08 17:47:27 2023 +0100 @@ -0,0 +1,30 @@ +#!/usr/bin/python3 +# Assumes you have used grep -v $'\t' on input for speed +# Recommended to also sed '/GMT$/s/\([^ ]\)GMT$/\1 GMT/' +# to fix a common 'bad' timestamp (~ .2% of inputs) +import email.utils +import sys +from urllib.parse import urlsplit + +def keyed(l): + uri, dateTime = l.split('\t',1) + try: + epoch = email.utils.parsedate_to_datetime(dateTime).timestamp() + parts = urlsplit(uri) + nl = parts.netloc + if ':' in nl: + pa,pp=nl.split(':') + (kk:=pa.split('.')).reverse() + return ('%s:%s)%s'%(','.join(kk), pp, parts.path),epoch) + else: + (kk:=nl.split('.')).reverse() + return ('%s)%s'%(','.join(kk), parts.path),epoch) + except Exception as e: + print(dateTime.rstrip(),e,sep='\t',file=sys.stderr) + return + +with open(sys.argv[1],"r") as ff: + for tl in sorted((kk for l in ff if (kk:=keyed(l)) is not None), + key=lambda x:x[0]): + print(tl[0],tl[1],sep='\t') +