Mercurial > hg > cc > azure
diff master/src/wecu/run_mapreduce.sh @ 58:a3edba8dab11
move to right place in tree
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 28 May 2020 09:56:42 +0000 |
parents | master/wecu/run_mapreduce.sh@ac1a20e627a9 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/master/src/wecu/run_mapreduce.sh Thu May 28 09:56:42 2020 +0000 @@ -0,0 +1,19 @@ +cores=`cat cores.txt` +if [ "$1" -ne "-1" ]; +then + cores=$1 +fi + +time parallel \ + --sshloginfile hosts \ + --transferfile "$2" \ + --transferfile "$3" \ + --will-cite \ + --jobs $cores \ + --retries 3 \ + --workdir $PWD \ + -a input_paths \ + "curl -s -N 'https://commoncrawl.s3.amazonaws.com/{}' | unpigz -dp 1 -c | $2" 2>&1 | grep -v 'Authorized uses only' | \ + sort | \ + eval $3 +