Mercurial > hg > cc > cirrus_work
changeset 112:827eadc72122
more faithful regexps and non-byte uri output
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 25 Sep 2023 23:53:13 +0100 |
parents | ab3d547f3e76 |
children | 4a52585a1aac |
files | bin/sort_date.py |
diffstat | 1 files changed, 7 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/bin/sort_date.py Fri Sep 22 15:27:28 2023 +0100 +++ b/bin/sort_date.py Mon Sep 25 23:53:13 2023 +0100 @@ -75,11 +75,11 @@ # Hack this to reproduce the Java bug surt.URLRegexTransformer._RES_QUERY_SESSIONID = [ - re.compile(b"(.*&)(?:jsessionid=[0-9a-z]{32})(?:&(.*))?$", re.I), - re.compile(b"(.*&)(?:phpsessid=[0-9a-z]{32})(?:&(.*))?$", re.I), - re.compile(b"(.*[a-z]*)(?:sid=[0-9a-z]{32})(?:&(.*))?$", re.I), - re.compile(b"(.*&)(?:aspsessionid[a-z]{8}=[a-zA-Z]{24})(?:&(.*))?$", re.I), - re.compile(b"(.*&)(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$", re.I), + re.compile(b"(.+)(?:jsessionid=[0-9a-z]{32})(?:&(.*))?$", re.I), + re.compile(b"(.+)(?:phpsessid=[0-9a-z]{32})(?:&(.*))?$", re.I), + re.compile(b"(.+)(?:sid=[0-9a-z]{32})(?:&(.*))?$", re.I), + re.compile(b"(.+)(?:aspsessionid[a-z]{8}=[a-zA-Z]{24})(?:&(.*))?$", re.I), + re.compile(b"(.+)(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$", re.I), ] # Above based on this from broken Java code: @@ -120,6 +120,6 @@ ctk=cmp_to_key(locale.strcoll) for key, ts in sorted((kk for l in ff if (kk:=keyed(l)) is not None), key=lambda x:x[0]): - print(key[0],key[1],key[2].encode('ascii',errors='java_unicode'), + print(key[0],key[1], + key[2].encode('ascii',errors='java_unicode').decode('ascii'), ts,sep='\t') -