# HG changeset patch # User Henry S. Thompson # Date 1695996831 -3600 # Node ID adabcffc7d68c83fa7f25c714bbf26d7cd93ef1f # Parent 3682ef4d21690bd00adeb897d374c6ebcd242051 tweaks to get all tests through #14 diff -r 3682ef4d2169 -r adabcffc7d68 lib/python/cc/lmh/sort_date.py --- a/lib/python/cc/lmh/sort_date.py Thu Sep 28 18:31:23 2023 +0100 +++ b/lib/python/cc/lmh/sort_date.py Fri Sep 29 15:13:51 2023 +0100 @@ -109,7 +109,7 @@ # Surely this could be simpler! url.query = quote(OGU.unquote_to_bytes(query).decode('utf-8', errors='percent'), - safe='''!"$&'()*+,-./:;<=>?@[\]^_`{|}~\x7f''' #' + safe='''!"$&'()*+,-./:;<=>?@[\]^_`{|}~\x7f''' #') ).encode('ascii') # @@ -143,7 +143,7 @@ else: host = escapeOnce(host.lower()) - url.host = host + url.host = host path = OGU.unescapeRepeatedly(url.path) @@ -151,7 +151,7 @@ path = OGU.normalizePath(path) # else path is free-form sort of thing, not /directory/thing # - url.path = escapeOnce(OGU.unescapeRepeatedly(path), + path = escapeOnce(OGU.unescapeRepeatedly(path), safe=b'''!"$&'()*+,-./:;<=>?@[\]^_`{|}~\x7f''' #') ).replace(b'\x7f',b'\\x7f') # Wrt \x7f (DEL), see "biz,televida)" case ) @@ -159,6 +159,10 @@ # will need to be handled, which would require a regexp # # + url.path=quote(OGU.unquote_to_bytes(path).decode('utf-8', + errors='percent'), + safe='''!"$&'()*+,-./:;<=>?@[\]^_`{|}~\x7f''' #') + ).encode('ascii') # return surt.IAURLCanonicalizer.canonicalize(url, **options) @@ -180,6 +184,8 @@ #(.+)(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$", def cdx_key(uristring): + if '\\' in uristring: + uristring=bytes(uristring,'utf-8').decode('unicode_escape') _surt = surt.surt(uristring, canonicalizer=fixGoogleCanon) return WPAT.sub(')',_surt)