changeset 134:adabcffc7d68

tweaks to get all tests through #14
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 29 Sep 2023 15:13:51 +0100
parents 3682ef4d2169
children df89f0052f3d
files lib/python/cc/lmh/sort_date.py
diffstat 1 files changed, 9 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/lib/python/cc/lmh/sort_date.py	Thu Sep 28 18:31:23 2023 +0100
+++ b/lib/python/cc/lmh/sort_date.py	Fri Sep 29 15:13:51 2023 +0100
@@ -109,7 +109,7 @@
     # Surely this could be simpler!
     url.query = quote(OGU.unquote_to_bytes(query).decode('utf-8',
                                                          errors='percent'),
-                      safe='''!"$&'()*+,-./:;<=>?@[\]^_`{|}~\x7f'''  #'
+                      safe='''!"$&'()*+,-./:;<=>?@[\]^_`{|}~\x7f'''  #')
                         ).encode('ascii')
     # </change>
   
@@ -143,7 +143,7 @@
       else:
         host = escapeOnce(host.lower())
 
-      url.host = host
+    url.host = host
 
     path = OGU.unescapeRepeatedly(url.path)
 
@@ -151,7 +151,7 @@
       path = OGU.normalizePath(path)
     # else path is free-form sort of thing, not /directory/thing
     # <change>
-    url.path = escapeOnce(OGU.unescapeRepeatedly(path),
+    path = escapeOnce(OGU.unescapeRepeatedly(path),
                       safe=b'''!"$&'()*+,-./:;<=>?@[\]^_`{|}~\x7f'''  #')
                       ).replace(b'\x7f',b'\\x7f')
     # Wrt \x7f (DEL), see "biz,televida)" case                        )
@@ -159,6 +159,10 @@
     #  will need to be handled, which would require a regexp
     # </change>
     # <change>
+    url.path=quote(OGU.unquote_to_bytes(path).decode('utf-8',
+                                                     errors='percent'),
+                   safe='''!"$&'()*+,-./:;<=>?@[\]^_`{|}~\x7f'''  #')
+                     ).encode('ascii')
     # </change>
   return surt.IAURLCanonicalizer.canonicalize(url, **options)
 
@@ -180,6 +184,8 @@
 #(.+)(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$",
 
 def cdx_key(uristring):
+  if '\\' in uristring:
+    uristring=bytes(uristring,'utf-8').decode('unicode_escape')
   _surt = surt.surt(uristring, canonicalizer=fixGoogleCanon)
   return WPAT.sub(')',_surt)