view master/src/wecu/run_mapreduce.sh @ 68:1f04bce6ead7 default tip

use basefile instead of transferfile, and remove cleanup: belt and braces wrt lossage of sac_schemes.py in 15% of 1000_k3, this as used in a_2
author Henry S. Thompson <ht@markup.co.uk>
date Thu, 04 Jun 2020 20:44:44 +0000
parents a3edba8dab11
children
line wrap: on
line source

cores=`cat cores.txt`
if [ "$1" -ne "-1" ]; 
then
    cores=$1
fi

time parallel \
    --sshloginfile hosts \
    --transferfile "$2" \
    --transferfile "$3" \
    --will-cite \
    --jobs $cores \
    --retries 3 \
    --workdir $PWD \
    -a input_paths  \
    "curl -s -N 'https://commoncrawl.s3.amazonaws.com/{}' | unpigz -dp 1 -c | $2" 2>&1 | grep -v 'Authorized uses only' | \
    sort | \
    eval $3