comparison master/src/wecu/run_sac.sh @ 59:8332faef25e1

get quoting and arg positions right
author Henry S. Thompson <ht@markup.co.uk>
date Thu, 28 May 2020 09:58:38 +0000
parents a3edba8dab11
children 5fdca5baa4e9
comparison
equal deleted inserted replaced
58:a3edba8dab11 59:8332faef25e1
1 #!/bin/bash 1 #!/bin/bash
2 # Usage: run_sac.sh numcores hostsFilename workDir resType patType patterns
2 cores=$1 3 cores=$1
3 hosts=$2 4 hosts=$2
4 wd=$3 5 wd=$3
5 shift 6 shift
6 shift 7 shift
7 shift 8 shift
8 rm -f allout 9 rm -f allout
10
11 # Get quoting right...
12 worker () {
13 f=$1
14 shift
15 shift # we don't need/want the resType either
16 hostname 1>&2
17 export PYTHONIOENCODING=utf-8
18 curl -s -N https://commoncrawl.s3.amazonaws.com/$f | \
19 unpigz -dp 1 -c | tee >(wc -l 1>&2) | ./sac_mapper.py "$@" 2>&1
20 }
21
22 export -f worker
9 23
10 parallel -v \ 24 parallel -v \
11 --sshloginfile $hosts \ 25 --sshloginfile $hosts \
12 --retries 3 \ 26 --retries 3 \
13 --transferfile $(which sac_mapper.py|sed 's/sac_/.\/sac_/') \ 27 --transferfile $(which sac_mapper.py|sed 's/sac_/.\/sac_/') \
14 --will-cite \ 28 --will-cite \
15 --jobs $cores \ 29 --jobs $cores \
16 --workdir $wd \ 30 --workdir $wd \
17 -a input_paths \ 31 -a input_paths \
18 "hostname 1>&2 ; export PYTHONIOENCODING=utf-8; curl -s -N https://commoncrawl.s3.amazonaws.com/{} | unpigz -dp 1 -c | tee >(wc -l 1>&2) | ./sac_mapper.py $* 2>&1" | tee -a allout | grep -v 'Authorized uses only' | \ 32 --env worker \
19 sac_reducer.py "$*" 33 worker '{}' "$@" | tee -a allout | grep -v 'Authorized uses only' | \
34 sac_reducer.py "$@"