# HG changeset patch # User Henry S. Thompson # Date 1539170826 0 # Node ID 36b5d379909a4eced2c6a2d62d78e4732c706f0e # Parent 2fbefb8d1a9e3e78b3c05ee52413c13f98f1600c a bit more info in logs diff -r 2fbefb8d1a9e -r 36b5d379909a workers/bin/count1.sh --- a/workers/bin/count1.sh Mon Oct 08 13:17:23 2018 +0000 +++ b/workers/bin/count1.sh Wed Oct 10 11:27:06 2018 +0000 @@ -1,6 +1,7 @@ #!/bin/bash -echo "# $ID" +echo "# $(date) > $ID.$1" jq '.Envelope|.["WARC-Header-Metadata"]["WARC-Target-URI"]'|cut -f 1 -d ':'|awk '{c[$1]+=1} END {for (k in c) {print k, c[k]}}' +echo "# $(date) < $ID.$1" diff -r 2fbefb8d1a9e -r 36b5d379909a workers/bin/test1.sh --- a/workers/bin/test1.sh Mon Oct 08 13:17:23 2018 +0000 +++ b/workers/bin/test1.sh Wed Oct 10 11:27:06 2018 +0000 @@ -60,13 +60,13 @@ do url="https://commoncrawl.s3.amazonaws.com/$s" export ID=$id - echo $(date) "running |$proc|$home|$pause|$wp|$id|" >> $log + echo \# $(date) "running |$proc|$home|$pause|$wp|$id|" >> $log # Experimental retry loop tryRead "$url" crawl$id if [ -s crawl$id ] then - echo \# $id $(wc -l crawl$id) >> $log - parallel --round-robin --pipe -j $wp "count1.sh >> $res/{#} 2>>$res/errs{#}" < crawl$id || echo "ppfailed $? ${PIPESTATUS[@]}" 1>&2 + echo \# $(date) $id $(wc -l crawl$id) >> $log + parallel --round-robin --pipe -j $wp "count1.sh {#} >> $res/{#} 2>>$res/errs{#}" < crawl$id || echo "ppfailed $? ${PIPESTATUS[@]}" 1>&2 else echo "crawl$id empty" 1>&2 fi