Mercurial > hg > cc > azure
annotate master/wecu/run.sh @ 57:ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
sac not quite working yet
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Wed, 27 May 2020 20:54:34 +0000 |
parents | |
children |
rev | line source |
---|---|
57
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
1 cores=`cat cores.txt` |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
2 |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
3 time parallel \ |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
4 --sshloginfile hosts \ |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
5 --transferfile mapper.py \ |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
6 --transferfile reducer.py \ |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
7 --will-cite \ |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
8 --retries 3 \ |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
9 --jobs $cores \ |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
10 --workdir $PWD \ |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
11 -a input_paths \ |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
12 'curl -s -N "https://commoncrawl.s3.amazonaws.com/{}" | unpigz -dp 1 -c | ./mapper.py' | \ |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
13 sort | \ |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
14 ./reducer.py |
ac1a20e627a9
from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted,
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
15 |