changeset 44:1342f6669352

knock off a few more relatively common cases
author Henry S. Thompson <ht@markup.co.uk>
date Sat, 01 Dec 2018 12:13:34 +0000
parents c2b72d29a3ee
children 21152d241e1a
files workers/bin/_timedWhich.py
diffstat 1 files changed, 3 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/workers/bin/_timedWhich.py	Fri Nov 30 18:37:40 2018 +0000
+++ b/workers/bin/_timedWhich.py	Sat Dec 01 12:13:34 2018 +0000
@@ -5,8 +5,8 @@
 p1=re.compile('"WARC-Target-URI":"(https?):.*msgtype=response')
 p2=re.compile('"Last-Modified":"([^"]*)"')
 sep=re.compile('\.?[, \t]+')
-losers=re.compile('(mon|tue|wed|thu|fri|sat|sun)(day)?|gmt|[ap]m|\d\d?:\d\d:\d\d(\.\d*)?\w*|[-+]\d\d\d\d|\d\d?|:',re.I)
-oddz=re.compile('[A-Z]\w+/[A-Z]\w+')
+losers=re.compile('(mon|fri|sun)(day)?|tue(sday)?|wed(nesday)?|thu(rsday)?|sat(urday)?|gmt([+-][\d:]+)?|[ap]m|\d\d?:\d\d:(\d\d(\.\d*)?\w*|rd)|\{ts|[-+]\d\d\d\d|\d\d?|:',re.I)
+oddlast=re.compile('\d\w+[A-Z]{3,4}|[A-Z]\w+/[A-Z]\w+')
 HTTP=0
 HTTPS=1
 tab=[{},{}]
@@ -26,7 +26,7 @@
       if len(lmc)==1 and lmc[0].startswith('serve-proxy-cache:'):
         r='serve-proxy-cache:'
       else:
-        if oddz.fullmatch(lmc[-1]):
+        if oddlast.fullmatch(lmc[-1]):
           lmc.pop()
         r=' '.join(c for c in lmc if not losers.fullmatch(c))
       t[r]=t.get(r,0)+1