view bin/count_warc.py @ 87:df231c95e4aa

final keystroke fixes, note _lacks_ multi-www fix, for which see sort_date.py
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 30 Aug 2023 11:10:54 +0100
parents 083229195d12
children
line wrap: on
line source

#!/usr/bin/env python3
import warc,sys

OUT=open(sys.stdout.fileno(),'wb')

if (debug:=(sys.argv[1]=='-d')):
  sys.argv.pop(1)

def countme(wtype,buf,part):
  if debug:
    breakpoint()
  OUT.write(b"%d\n"%len(buf))

#warc(showme,[b'response','warcinfo','request','metadata'],int(sys.argv[2]))
#warc(showme,[b'response'],whole=True)

warc.warc(sys.argv[1],countme,[b'response'],parts=int(sys.argv[2]),debug=debug)