# HG changeset patch # User Henry S. Thompson # Date 1695682393 -3600 # Node ID 827eadc721227af6f60d92183e67344258a8be93 # Parent ab3d547f3e76dcd694fa0a15c0daf1feb4da2bfc more faithful regexps and non-byte uri output diff -r ab3d547f3e76 -r 827eadc72122 bin/sort_date.py --- a/bin/sort_date.py Fri Sep 22 15:27:28 2023 +0100 +++ b/bin/sort_date.py Mon Sep 25 23:53:13 2023 +0100 @@ -75,11 +75,11 @@ # Hack this to reproduce the Java bug surt.URLRegexTransformer._RES_QUERY_SESSIONID = [ - re.compile(b"(.*&)(?:jsessionid=[0-9a-z]{32})(?:&(.*))?$", re.I), - re.compile(b"(.*&)(?:phpsessid=[0-9a-z]{32})(?:&(.*))?$", re.I), - re.compile(b"(.*[a-z]*)(?:sid=[0-9a-z]{32})(?:&(.*))?$", re.I), - re.compile(b"(.*&)(?:aspsessionid[a-z]{8}=[a-zA-Z]{24})(?:&(.*))?$", re.I), - re.compile(b"(.*&)(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$", re.I), + re.compile(b"(.+)(?:jsessionid=[0-9a-z]{32})(?:&(.*))?$", re.I), + re.compile(b"(.+)(?:phpsessid=[0-9a-z]{32})(?:&(.*))?$", re.I), + re.compile(b"(.+)(?:sid=[0-9a-z]{32})(?:&(.*))?$", re.I), + re.compile(b"(.+)(?:aspsessionid[a-z]{8}=[a-zA-Z]{24})(?:&(.*))?$", re.I), + re.compile(b"(.+)(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$", re.I), ] # Above based on this from broken Java code: @@ -120,6 +120,6 @@ ctk=cmp_to_key(locale.strcoll) for key, ts in sorted((kk for l in ff if (kk:=keyed(l)) is not None), key=lambda x:x[0]): - print(key[0],key[1],key[2].encode('ascii',errors='java_unicode'), + print(key[0],key[1], + key[2].encode('ascii',errors='java_unicode').decode('ascii'), ts,sep='\t') -