Mercurial > hg > cc > cirrus_work
changeset 146:83fbd652a014
iterate WPAT fix with improved pattern
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Tue, 03 Oct 2023 17:44:59 +0100 |
parents | 170844e51987 |
children | 1c2f477fb064 |
files | lib/python/cc/lmh/sort_date.py |
diffstat | 1 files changed, 4 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/lib/python/cc/lmh/sort_date.py Tue Oct 03 17:43:52 2023 +0100 +++ b/lib/python/cc/lmh/sort_date.py Tue Oct 03 17:44:59 2023 +0100 @@ -16,7 +16,7 @@ import re, codecs from itertools import chain -WPAT = re.compile('(,www\\d*)+\\)') +WPAT = re.compile(r'(,www\d*)(:\d+)?\)') # Thanks to https://stackoverflow.com/a/8776871 import locale @@ -188,7 +188,9 @@ if '\\u' in uristring: uristring=bytes(uristring,'utf-8').decode('unicode_escape') _surt = surt.surt(uristring, canonicalizer=fixGoogleCanon) - return WPAT.sub(')',_surt) + while WPAT.search(_surt): + _surt = WPAT.sub(r'\2)',_surt) + return _surt def keyed(l): uri, cc_stamp, dateTime = l.split('\t',2)