# HG changeset patch # User Henry S. Thompson # Date 1691542892 -3600 # Node ID 177f7df2bf465b477f2ef690f41379191e155f1b # Parent 432915a28952fccc36113548bc186227e2959b3c handle %-encoded utf-8 as idna diff -r 432915a28952 -r 177f7df2bf46 bin/sort_date.py --- a/bin/sort_date.py Tue Aug 08 17:48:29 2023 +0100 +++ b/bin/sort_date.py Wed Aug 09 02:01:32 2023 +0100 @@ -4,7 +4,15 @@ # to fix a common 'bad' timestamp (~ .2% of inputs) import email.utils import sys -from urllib.parse import urlsplit +from urllib.parse import urlsplit, unquote + +def auth(s): + if '%' in s: + kk=[(unquote(k).encode('idna')).decode('ascii') for k in s.split('.')] + else: + kk=s.split('.') + kk.reverse() + return ','.join(kk) def keyed(l): uri, dateTime = l.split('\t',1) @@ -14,12 +22,10 @@ nl = parts.netloc if ':' in nl: pa,pp=nl.split(':') - (kk:=pa.split('.')).reverse() - return ('%s:%s)%s'%(','.join(kk), pp, parts.path),epoch) + return ('%s:%s)%s'%(auth(pa), pp, parts.path),epoch) else: - (kk:=nl.split('.')).reverse() - return ('%s)%s'%(','.join(kk), parts.path),epoch) - except Exception as e: + return ('%s)%s'%(auth(nl), parts.path),epoch) + except (TypeError,IndexError,ValueError) as e: print(dateTime.rstrip(),e,sep='\t',file=sys.stderr) return