Mercurial > hg > cc > azure
view master/src/wecu/run_sac.sh @ 59:8332faef25e1
get quoting and arg positions right
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 28 May 2020 09:58:38 +0000 |
parents | a3edba8dab11 |
children | 5fdca5baa4e9 |
line wrap: on
line source
#!/bin/bash # Usage: run_sac.sh numcores hostsFilename workDir resType patType patterns cores=$1 hosts=$2 wd=$3 shift shift shift rm -f allout # Get quoting right... worker () { f=$1 shift shift # we don't need/want the resType either hostname 1>&2 export PYTHONIOENCODING=utf-8 curl -s -N https://commoncrawl.s3.amazonaws.com/$f | \ unpigz -dp 1 -c | tee >(wc -l 1>&2) | ./sac_mapper.py "$@" 2>&1 } export -f worker parallel -v \ --sshloginfile $hosts \ --retries 3 \ --transferfile $(which sac_mapper.py|sed 's/sac_/.\/sac_/') \ --will-cite \ --jobs $cores \ --workdir $wd \ -a input_paths \ --env worker \ worker '{}' "$@" | tee -a allout | grep -v 'Authorized uses only' | \ sac_reducer.py "$@"