Mercurial > hg > cc > azure
diff master/src/wecu/run_sac.sh @ 58:a3edba8dab11
move to right place in tree
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 28 May 2020 09:56:42 +0000 |
parents | master/wecu/run_sac.sh@ac1a20e627a9 |
children | 8332faef25e1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/master/src/wecu/run_sac.sh Thu May 28 09:56:42 2020 +0000 @@ -0,0 +1,19 @@ +#!/bin/bash +cores=$1 +hosts=$2 +wd=$3 +shift +shift +shift +rm -f allout + +parallel -v \ + --sshloginfile $hosts \ + --retries 3 \ + --transferfile $(which sac_mapper.py|sed 's/sac_/.\/sac_/') \ + --will-cite \ + --jobs $cores \ + --workdir $wd \ + -a input_paths \ + "hostname 1>&2 ; export PYTHONIOENCODING=utf-8; curl -s -N https://commoncrawl.s3.amazonaws.com/{} | unpigz -dp 1 -c | tee >(wc -l 1>&2) | ./sac_mapper.py $* 2>&1" | tee -a allout | grep -v 'Authorized uses only' | \ + sac_reducer.py "$*"