diff workers/bin/count1.sh @ 10:2fbefb8d1a9e

wrun.sh: usage catchup invoke.sh: force terminal allocation on workers test1.sh: support control of number of worker processes are spawned, support -t to turn off random delay at startup count1.sh: actual do the counting in subprocs to avoid disk contention
author Henry S. Thompson <ht@markup.co.uk>
date Mon, 08 Oct 2018 13:17:23 +0000
parents 5db6015689a2
children 36b5d379909a
line wrap: on
line diff
--- a/workers/bin/count1.sh	Tue Oct 02 10:52:45 2018 +0000
+++ b/workers/bin/count1.sh	Mon Oct 08 13:17:23 2018 +0000
@@ -1,6 +1,6 @@
 #!/bin/bash
 echo "# $ID"
-jq '.Envelope|.["WARC-Header-Metadata"]["WARC-Target-URI"]'|cut -f 1 -d ':'
+jq '.Envelope|.["WARC-Header-Metadata"]["WARC-Target-URI"]'|cut -f 1 -d ':'|awk '{c[$1]+=1} END {for (k in c) {print k, c[k]}}'