diff master/wecu/run_mapreduce.sh @ 57:ac1a20e627a9

from lukasz git repo 2020-05-26 (see ~/src/wecu), then editted, sac not quite working yet
author Henry S. Thompson <ht@markup.co.uk>
date Wed, 27 May 2020 20:54:34 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/master/wecu/run_mapreduce.sh	Wed May 27 20:54:34 2020 +0000
@@ -0,0 +1,19 @@
+cores=`cat cores.txt`
+if [ "$1" -ne "-1" ]; 
+then
+    cores=$1
+fi
+
+time parallel \
+    --sshloginfile hosts \
+    --transferfile "$2" \
+    --transferfile "$3" \
+    --will-cite \
+    --jobs $cores \
+    --retries 3 \
+    --workdir $PWD \
+    -a input_paths  \
+    "curl -s -N 'https://commoncrawl.s3.amazonaws.com/{}' | unpigz -dp 1 -c | $2" 2>&1 | grep -v 'Authorized uses only' | \
+    sort | \
+    eval $3
+