# HG changeset patch # User Henry S. Thompson # Date 1746727226 -3600 # Node ID 1c11117bb01bbe7945affad3d1fa737b29e2291b # Parent 83c7ecd61ecff45227a62cc5b50bbe5395862ca3 just starting diff -r 83c7ecd61ecf -r 1c11117bb01b lib/python/cc/lmh/new_key.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/python/cc/lmh/new_key.py Thu May 08 19:00:26 2025 +0100 @@ -0,0 +1,12 @@ +#!/usr/bin/python3 +'''Extract/construct a cut-down key for cdb''' +import re, sys + +C_PAT = re.compile('[^ ]* ([^ ]*) .*{"url": "http([^"]*).*"filename": "[^"]*\.([0-9][0-9]?)/(warc|robotstxt|crawldiagnostics)/') + +for l in sys.stdin: + if (m:=C_PAT.match(l)): + print(m[1],m[2],m[3],m[4]) + else: + print('oops',l) + exit(1)