changeset 11:36b5d379909a

a bit more info in logs
author Henry S. Thompson <ht@markup.co.uk>
date Wed, 10 Oct 2018 11:27:06 +0000
parents 2fbefb8d1a9e
children be1034183e03
files workers/bin/count1.sh workers/bin/test1.sh
diffstat 2 files changed, 5 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/workers/bin/count1.sh	Mon Oct 08 13:17:23 2018 +0000
+++ b/workers/bin/count1.sh	Wed Oct 10 11:27:06 2018 +0000
@@ -1,6 +1,7 @@
 #!/bin/bash
-echo "# $ID"
+echo "# $(date) > $ID.$1"
 jq '.Envelope|.["WARC-Header-Metadata"]["WARC-Target-URI"]'|cut -f 1 -d ':'|awk '{c[$1]+=1} END {for (k in c) {print k, c[k]}}'
+echo "# $(date) < $ID.$1"
 
 
 
--- a/workers/bin/test1.sh	Mon Oct 08 13:17:23 2018 +0000
+++ b/workers/bin/test1.sh	Wed Oct 10 11:27:06 2018 +0000
@@ -60,13 +60,13 @@
 do
  url="https://commoncrawl.s3.amazonaws.com/$s"
  export ID=$id
- echo $(date) "running |$proc|$home|$pause|$wp|$id|" >> $log
+ echo \# $(date) "running |$proc|$home|$pause|$wp|$id|" >> $log
  # Experimental retry loop
  tryRead "$url" crawl$id
  if [ -s crawl$id ]
  then
-  echo \# $id $(wc -l crawl$id) >> $log
-  parallel --round-robin --pipe -j $wp "count1.sh >> $res/{#} 2>>$res/errs{#}" < crawl$id || echo "ppfailed $? ${PIPESTATUS[@]}" 1>&2
+  echo \# $(date) $id $(wc -l crawl$id) >> $log
+  parallel --round-robin --pipe -j $wp "count1.sh {#} >> $res/{#} 2>>$res/errs{#}" < crawl$id || echo "ppfailed $? ${PIPESTATUS[@]}" 1>&2
  else
   echo "crawl$id empty" 1>&2
  fi