# HG changeset patch # User Henry S. Thompson # Date 1689769182 -3600 # Node ID b14187ccfb463d9768d28b8a1429a5b888a4a1ac # Parent 9837840f332829fedc8828b94ac731e66a9eaf35 revert to just showing first LM diff -r 9837840f3328 -r b14187ccfb46 bin/lmh_warc.py --- a/bin/lmh_warc.py Fri Jul 14 17:39:14 2023 +0100 +++ b/bin/lmh_warc.py Wed Jul 19 13:19:42 2023 +0100 @@ -1,4 +1,6 @@ -import re,swarc,sys +#!/usr/bin/env python3 + +import re,warc,sys TUPAT=re.compile(b'^WARC-Target-URI: (.*?)\r',re.MULTILINE) LMPAT=re.compile(b'^Last-Modified: (.*?)\r',re.MULTILINE) @@ -12,13 +14,12 @@ else: raise ValueError(b"No target URI in %s ??"%buf) else: - mm=LMPAT.findall(buf) + mm=LMPAT.search(buf) OUT.write(URI) if mm: - for m in mm: - OUT.write(b'\t') - OUT.write(m) + OUT.write(b'\t') + OUT.write(mm[1]) OUT.write(b'\n') -swarc.warc(sys.argv[1],showmeLMH,[b'response'],parts=3) +warc.warc(sys.argv[1],showmeLMH,[b'response'],parts=3)