changeset 44:083229195d12

just count part length
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 05 Jul 2023 17:51:44 +0100
parents 69be1131bcc5
children 212da3fe3b19
files bin/count_warc.py
diffstat 1 files changed, 17 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/count_warc.py	Wed Jul 05 17:51:44 2023 +0100
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+import warc,sys
+
+OUT=open(sys.stdout.fileno(),'wb')
+
+if (debug:=(sys.argv[1]=='-d')):
+  sys.argv.pop(1)
+
+def countme(wtype,buf,part):
+  if debug:
+    breakpoint()
+  OUT.write(b"%d\n"%len(buf))
+
+#warc(showme,[b'response','warcinfo','request','metadata'],int(sys.argv[2]))
+#warc(showme,[b'response'],whole=True)
+
+warc.warc(sys.argv[1],countme,[b'response'],parts=int(sys.argv[2]),debug=debug)