# HG changeset patch # User Henry Thompson # Date 1695148114 -3600 # Node ID 9f7a35bf07f9a44c7b6ed818ff0007f2ee68e339 # Parent 40c460fed99f54ee195ce0c35df33dff8a28bf19 one more sid fix, match java unicode escaping in path diff -r 40c460fed99f -r 9f7a35bf07f9 bin/sort_date.py --- a/bin/sort_date.py Sun Sep 17 15:18:11 2023 +0100 +++ b/bin/sort_date.py Tue Sep 19 19:28:34 2023 +0100 @@ -27,6 +27,19 @@ codecs.register_error('percent',percent_encode) +def _u_esc(c): + if c<65536: + return '\\u%04X'%c + else: + return '\\U%08X'%c + +def java_unicode_encode(ude): + '''like backslashreplace but use uppercase and \ u00NN instead of \ xnn''' + return (''.join(_u_esc(ord(c)) for c in ude.object[ude.start:ude.end]), + ude.end) + +codecs.register_error('java_unicode',java_unicode_encode) + # From RFC-3986: # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" @@ -64,7 +77,7 @@ surt.URLRegexTransformer._RES_QUERY_SESSIONID = [ re.compile(b"(.*&)(?:jsessionid=[0-9a-z]{32})(?:&(.*))?$", re.I), re.compile(b"(.*&)(?:phpsessid=[0-9a-z]{32})(?:&(.*))?$", re.I), - re.compile(b"(.*&[a-z]*)(?:sid=[0-9a-z]{32})(?:&(.*))?$", re.I), + re.compile(b"(.*[a-z]*)(?:sid=[0-9a-z]{32})(?:&(.*))?$", re.I), re.compile(b"(.*&)(?:aspsessionid[a-z]{8}=[a-zA-Z]{24})(?:&(.*))?$", re.I), re.compile(b"(.*&)(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$", re.I), ] @@ -107,5 +120,6 @@ ctk=cmp_to_key(locale.strcoll) for key, ts in sorted((kk for l in ff if (kk:=keyed(l)) is not None), key=lambda x:x[0]): - print(key[0],key[1],key[2],ts,sep='\t') + print(key[0],key[1],key[2].encode('ascii',errors=java_unicode) + ,ts,sep='\t')