changeset 82:7bbb14f6e394

merge
author Henry Thompson <ht@markup.co.uk>
date Sat, 19 Aug 2023 16:02:29 -0400
parents e115f2e89af6 (diff) bf09a1d80d7b (current diff)
children d92bd8527718
files bin/sort_date.py
diffstat 2 files changed, 3 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
Binary file all_warc_lmh_out.tar.gz has changed
--- a/bin/sort_date.py	Fri Aug 18 18:25:54 2023 +0100
+++ b/bin/sort_date.py	Sat Aug 19 16:02:29 2023 -0400
@@ -39,12 +39,13 @@
       epoch = 32535215999.0
     parts = urlsplit(uri)
     nl = parts.netloc
+    pq = '?%s'%parts.query if parts.query else '';
     #print('nl',nl,file=sys.stderr)
     if ':' in nl:
       pa,pp=nl.split(':')
-      return ('%s:%s)%s'%(auth(pa), pp, parts.path),epoch)
+      return ('%s:%s)%s%s'%(auth(pa), pp, parts.path, pq),epoch)
     else:
-      return ('%s)%s'%(auth(nl), parts.path),epoch)
+      return ('%s)%s%s'%(auth(nl), parts.path, pq),epoch)
   except (TypeError,IndexError,ValueError) as e:
     print(dateTime.rstrip(),e,sep='\t',file=sys.stderr)
     return