Mercurial > hg > cc > azure
changeset 11:36b5d379909a
a bit more info in logs
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Wed, 10 Oct 2018 11:27:06 +0000 |
parents | 2fbefb8d1a9e |
children | be1034183e03 |
files | workers/bin/count1.sh workers/bin/test1.sh |
diffstat | 2 files changed, 5 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/workers/bin/count1.sh Mon Oct 08 13:17:23 2018 +0000 +++ b/workers/bin/count1.sh Wed Oct 10 11:27:06 2018 +0000 @@ -1,6 +1,7 @@ #!/bin/bash -echo "# $ID" +echo "# $(date) > $ID.$1" jq '.Envelope|.["WARC-Header-Metadata"]["WARC-Target-URI"]'|cut -f 1 -d ':'|awk '{c[$1]+=1} END {for (k in c) {print k, c[k]}}' +echo "# $(date) < $ID.$1"
--- a/workers/bin/test1.sh Mon Oct 08 13:17:23 2018 +0000 +++ b/workers/bin/test1.sh Wed Oct 10 11:27:06 2018 +0000 @@ -60,13 +60,13 @@ do url="https://commoncrawl.s3.amazonaws.com/$s" export ID=$id - echo $(date) "running |$proc|$home|$pause|$wp|$id|" >> $log + echo \# $(date) "running |$proc|$home|$pause|$wp|$id|" >> $log # Experimental retry loop tryRead "$url" crawl$id if [ -s crawl$id ] then - echo \# $id $(wc -l crawl$id) >> $log - parallel --round-robin --pipe -j $wp "count1.sh >> $res/{#} 2>>$res/errs{#}" < crawl$id || echo "ppfailed $? ${PIPESTATUS[@]}" 1>&2 + echo \# $(date) $id $(wc -l crawl$id) >> $log + parallel --round-robin --pipe -j $wp "count1.sh {#} >> $res/{#} 2>>$res/errs{#}" < crawl$id || echo "ppfailed $? ${PIPESTATUS[@]}" 1>&2 else echo "crawl$id empty" 1>&2 fi