Mercurial > hg > cc > azure
comparison master/src/wecu/run_sac.sh @ 59:8332faef25e1
get quoting and arg positions right
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 28 May 2020 09:58:38 +0000 |
parents | a3edba8dab11 |
children | 5fdca5baa4e9 |
comparison
equal
deleted
inserted
replaced
58:a3edba8dab11 | 59:8332faef25e1 |
---|---|
1 #!/bin/bash | 1 #!/bin/bash |
2 # Usage: run_sac.sh numcores hostsFilename workDir resType patType patterns | |
2 cores=$1 | 3 cores=$1 |
3 hosts=$2 | 4 hosts=$2 |
4 wd=$3 | 5 wd=$3 |
5 shift | 6 shift |
6 shift | 7 shift |
7 shift | 8 shift |
8 rm -f allout | 9 rm -f allout |
10 | |
11 # Get quoting right... | |
12 worker () { | |
13 f=$1 | |
14 shift | |
15 shift # we don't need/want the resType either | |
16 hostname 1>&2 | |
17 export PYTHONIOENCODING=utf-8 | |
18 curl -s -N https://commoncrawl.s3.amazonaws.com/$f | \ | |
19 unpigz -dp 1 -c | tee >(wc -l 1>&2) | ./sac_mapper.py "$@" 2>&1 | |
20 } | |
21 | |
22 export -f worker | |
9 | 23 |
10 parallel -v \ | 24 parallel -v \ |
11 --sshloginfile $hosts \ | 25 --sshloginfile $hosts \ |
12 --retries 3 \ | 26 --retries 3 \ |
13 --transferfile $(which sac_mapper.py|sed 's/sac_/.\/sac_/') \ | 27 --transferfile $(which sac_mapper.py|sed 's/sac_/.\/sac_/') \ |
14 --will-cite \ | 28 --will-cite \ |
15 --jobs $cores \ | 29 --jobs $cores \ |
16 --workdir $wd \ | 30 --workdir $wd \ |
17 -a input_paths \ | 31 -a input_paths \ |
18 "hostname 1>&2 ; export PYTHONIOENCODING=utf-8; curl -s -N https://commoncrawl.s3.amazonaws.com/{} | unpigz -dp 1 -c | tee >(wc -l 1>&2) | ./sac_mapper.py $* 2>&1" | tee -a allout | grep -v 'Authorized uses only' | \ | 32 --env worker \ |
19 sac_reducer.py "$*" | 33 worker '{}' "$@" | tee -a allout | grep -v 'Authorized uses only' | \ |
34 sac_reducer.py "$@" |