Mercurial > hg > cc > azure
diff master/wecu/run.sh @ 57:ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
sac not quite working yet
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Wed, 27 May 2020 20:54:34 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/master/wecu/run.sh Wed May 27 20:54:34 2020 +0000 @@ -0,0 +1,15 @@ +cores=`cat cores.txt` + +time parallel \ + --sshloginfile hosts \ + --transferfile mapper.py \ + --transferfile reducer.py \ + --will-cite \ + --retries 3 \ + --jobs $cores \ + --workdir $PWD \ + -a input_paths \ + 'curl -s -N "https://commoncrawl.s3.amazonaws.com/{}" | unpigz -dp 1 -c | ./mapper.py' | \ + sort | \ + ./reducer.py +