Mercurial > hg > cc > cirrus_work
diff bin/merge_date.py @ 107:40c460fed99f
working on sessionID pblms, still
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Sun, 17 Sep 2023 15:18:11 +0100 |
parents | e606c609f813 |
children | 52c6a9b0fc8c |
line wrap: on
line diff
--- a/bin/merge_date.py Thu Sep 14 19:27:23 2023 +0100 +++ b/bin/merge_date.py Sun Sep 17 15:18:11 2023 +0100 @@ -27,12 +27,13 @@ ISESSION = re.compile(SESSION.pattern,flags=re.I) URL=re.compile(b'\{"url": "([^"]*)"') -# Above based on this from fixed Java code: -#(?:jsessionid=[0-9a-zA-Z]{32})(?:&(.*))?$", "jsessionid=", 1, 2), -#(?:phpsessid=[0-9a-zA-Z]{32})(?:&(.*))?$", "phpsessid=", 1, 2), -#(?:sid=[0-9a-zA-Z]{32})(?:&(.*))?$", "sid=", 1, 2), -#(?:ASPSESSIONID[a-zA-Z]{8}=[a-zA-Z]{24})(?:&(.*))?$", "aspsessionid", 1, 2), -#(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$", +# Above based on this from broken Java code: +# https://github.com/iipc/webarchive-commons/commit/5cfff50a03263208520ca2d260229eefb2aec2f7 +#(.+)(?:jsessionid=[0-9a-zA-Z]{32})(?:&(.*))?$", "jsessionid=", 1, 2), +#(.+)(?:phpsessid=[0-9a-zA-Z]{32})(?:&(.*))?$", "phpsessid=", 1, 2), +#(.+)(?:sid=[0-9a-zA-Z]{32})(?:&(.*))?$", "sid=", 1, 2), +#(.+)(?:ASPSESSIONID[a-zA-Z]{8}=[a-zA-Z]{24})(?:&(.*))?$", "aspsessionid", 1, 2), +#(.+)(?:cfid=[^&]+&cftoken=[^&]+)(?:&(.*))?$", #print(sys.argv[3],NPATH,file=sys.stderr) @@ -112,12 +113,13 @@ if messyD: noMatch = (ddate != xdate or not dkey.startswith(xkey1) or - dkey!=xkey1 or + (xkey2 is not None and dkey!=xkey2) or durl!=xurl) if messyU: # better match if noMatch: - raise ValueError("Fail: xkey: %s\n" + raise ValueError("Fail1: md: %s mu: %s\n" + " xkey: %s\n" " dkey: %s\n" " xdate: %s\n" " ddate: %s\n" @@ -126,7 +128,7 @@ "dfq: %s\n" "k1, k2: |%s|%s|\n" "FN: %s XCNT: %s DCNT: %s\n" - "xl: %s"%(xkey, dkey, xdate, ddate, xurl, durl, + "xl: %s"%(messyD, messyU, xkey, dkey, xdate, ddate, xurl, durl, (b'\n '.join(dfq)).decode('ascii'), xkey1, xkey2, FN, XCNT, DCNT, xl)) # fall through to the ordinary (non-messy) match case @@ -156,14 +158,15 @@ durl!=xurl): if DEBUG and dkey and xkey.decode('ascii')>(dkey.decode('ascii')): - raise ValueError("Fail: xkey: %s\n" + raise ValueError("Fail2: md: %s mu: %s\n" + " xkey: %s\n" " dkey: %s\n" " xdate: %s\n" " ddate: %s\n" "dfq: %s\n" "k1, k2: |%s|%s|\n" "FN: %s XCNT: %s DCNT: %s\n" - "xl: %s"%(xkey, dkey, xdate, ddate, + "xl: %s"%(messyD, messyU, xkey, dkey, xdate, ddate, (b'\n '.join(dfq)).decode('ascii'), xkey1, xkey2, FN, XCNT, DCNT, xl)) NF.write(xl)