Mercurial > hg > cc > cirrus_work
changeset 134:adabcffc7d68
tweaks to get all tests through #14
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 29 Sep 2023 15:13:51 +0100 |
parents | 3682ef4d2169 |
children | df89f0052f3d |
files | lib/python/cc/lmh/sort_date.py |
diffstat | 1 files changed, 9 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/lib/python/cc/lmh/sort_date.py Thu Sep 28 18:31:23 2023 +0100 +++ b/lib/python/cc/lmh/sort_date.py Fri Sep 29 15:13:51 2023 +0100 @@ -109,7 +109,7 @@ # Surely this could be simpler! url.query = quote(OGU.unquote_to_bytes(query).decode('utf-8', errors='percent'), - safe='''!"$&'()*+,-./:;<=>?@[\]^_`{|}~\x7f''' #' + safe='''!"$&'()*+,-./:;<=>?@[\]^_`{|}~\x7f''' #') ).encode('ascii') # </change> @@ -143,7 +143,7 @@ else: host = escapeOnce(host.lower()) - url.host = host + url.host = host path = OGU.unescapeRepeatedly(url.path) @@ -151,7 +151,7 @@ path = OGU.normalizePath(path) # else path is free-form sort of thing, not /directory/thing # <change> - url.path = escapeOnce(OGU.unescapeRepeatedly(path), + path = escapeOnce(OGU.unescapeRepeatedly(path), safe=b'''!"$&'()*+,-./:;<=>?@[\]^_`{|}~\x7f''' #') ).replace(b'\x7f',b'\\x7f') # Wrt \x7f (DEL), see "biz,televida)" case ) @@ -159,6 +159,10 @@ # will need to be handled, which would require a regexp # </change> # <change> + url.path=quote(OGU.unquote_to_bytes(path).decode('utf-8', + errors='percent'), + safe='''!"$&'()*+,-./:;<=>?@[\]^_`{|}~\x7f''' #') + ).encode('ascii') # </change> return surt.IAURLCanonicalizer.canonicalize(url, **options) @@ -180,6 +184,8 @@ #(.+)(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$", def cdx_key(uristring): + if '\\' in uristring: + uristring=bytes(uristring,'utf-8').decode('unicode_escape') _surt = surt.surt(uristring, canonicalizer=fixGoogleCanon) return WPAT.sub(')',_surt)