view bin/sort_date.py @ 73:e8c667bf8965

compute timestamps, key and sort lmh lines
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Tue, 08 Aug 2023 17:47:27 +0100
parents
children 177f7df2bf46
line wrap: on
line source

#!/usr/bin/python3
# Assumes you have used grep -v $'\t' on input for speed
# Recommended to also sed '/GMT$/s/\([^ ]\)GMT$/\1 GMT/'
#  to fix a common 'bad' timestamp (~ .2% of inputs)
import email.utils
import sys
from urllib.parse import urlsplit

def keyed(l):
  uri, dateTime = l.split('\t',1)
  try:
    epoch = email.utils.parsedate_to_datetime(dateTime).timestamp()
    parts = urlsplit(uri)
    nl = parts.netloc
    if ':' in nl:
      pa,pp=nl.split(':')
      (kk:=pa.split('.')).reverse()
      return ('%s:%s)%s'%(','.join(kk), pp, parts.path),epoch)
    else:
      (kk:=nl.split('.')).reverse()
      return ('%s)%s'%(','.join(kk), parts.path),epoch)
  except Exception as e:
    print(dateTime.rstrip(),e,sep='\t',file=sys.stderr)
    return

with open(sys.argv[1],"r") as ff:
  for tl in sorted((kk for l in ff if (kk:=keyed(l)) is not None),
                   key=lambda x:x[0]):
    print(tl[0],tl[1],sep='\t')