Mercurial > hg > cc > azure
changeset 59:8332faef25e1
get quoting and arg positions right
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 28 May 2020 09:58:38 +0000 |
parents | a3edba8dab11 |
children | 5fdca5baa4e9 |
files | master/src/wecu/run_sac.sh |
diffstat | 1 files changed, 17 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/master/src/wecu/run_sac.sh Thu May 28 09:56:42 2020 +0000 +++ b/master/src/wecu/run_sac.sh Thu May 28 09:58:38 2020 +0000 @@ -1,4 +1,5 @@ #!/bin/bash +# Usage: run_sac.sh numcores hostsFilename workDir resType patType patterns cores=$1 hosts=$2 wd=$3 @@ -7,6 +8,19 @@ shift rm -f allout +# Get quoting right... +worker () { + f=$1 + shift + shift # we don't need/want the resType either + hostname 1>&2 + export PYTHONIOENCODING=utf-8 + curl -s -N https://commoncrawl.s3.amazonaws.com/$f | \ + unpigz -dp 1 -c | tee >(wc -l 1>&2) | ./sac_mapper.py "$@" 2>&1 +} + +export -f worker + parallel -v \ --sshloginfile $hosts \ --retries 3 \ @@ -15,5 +29,6 @@ --jobs $cores \ --workdir $wd \ -a input_paths \ - "hostname 1>&2 ; export PYTHONIOENCODING=utf-8; curl -s -N https://commoncrawl.s3.amazonaws.com/{} | unpigz -dp 1 -c | tee >(wc -l 1>&2) | ./sac_mapper.py $* 2>&1" | tee -a allout | grep -v 'Authorized uses only' | \ - sac_reducer.py "$*" + --env worker \ + worker '{}' "$@" | tee -a allout | grep -v 'Authorized uses only' | \ + sac_reducer.py "$@"