diff master/src/wecu/run_sac.sh @ 58:a3edba8dab11

move to right place in tree
author Henry S. Thompson <ht@markup.co.uk>
date Thu, 28 May 2020 09:56:42 +0000
parents master/wecu/run_sac.sh@ac1a20e627a9
children 8332faef25e1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/master/src/wecu/run_sac.sh	Thu May 28 09:56:42 2020 +0000
@@ -0,0 +1,19 @@
+#!/bin/bash
+cores=$1
+hosts=$2
+wd=$3
+shift
+shift
+shift
+rm -f allout
+
+parallel -v \
+    --sshloginfile $hosts \
+    --retries 3 \
+    --transferfile $(which sac_mapper.py|sed 's/sac_/.\/sac_/') \
+    --will-cite \
+    --jobs $cores \
+    --workdir $wd \
+    -a input_paths \
+    "hostname 1>&2 ; export PYTHONIOENCODING=utf-8; curl -s -N https://commoncrawl.s3.amazonaws.com/{} | unpigz -dp 1 -c | tee >(wc -l 1>&2) | ./sac_mapper.py $* 2>&1" | tee -a allout | grep -v 'Authorized uses only' | \
+    sac_reducer.py "$*"