changeset 59:8332faef25e1

get quoting and arg positions right
author Henry S. Thompson <ht@markup.co.uk>
date Thu, 28 May 2020 09:58:38 +0000
parents a3edba8dab11
children 5fdca5baa4e9
files master/src/wecu/run_sac.sh
diffstat 1 files changed, 17 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/master/src/wecu/run_sac.sh	Thu May 28 09:56:42 2020 +0000
+++ b/master/src/wecu/run_sac.sh	Thu May 28 09:58:38 2020 +0000
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Usage: run_sac.sh numcores hostsFilename workDir resType patType patterns
 cores=$1
 hosts=$2
 wd=$3
@@ -7,6 +8,19 @@
 shift
 rm -f allout
 
+# Get quoting right...
+worker () {
+  f=$1
+  shift
+  shift # we don't need/want the resType either
+  hostname 1>&2
+  export PYTHONIOENCODING=utf-8
+  curl -s -N https://commoncrawl.s3.amazonaws.com/$f | \
+   unpigz -dp 1 -c | tee >(wc -l 1>&2) | ./sac_mapper.py "$@" 2>&1
+}
+
+export -f worker
+
 parallel -v \
     --sshloginfile $hosts \
     --retries 3 \
@@ -15,5 +29,6 @@
     --jobs $cores \
     --workdir $wd \
     -a input_paths \
-    "hostname 1>&2 ; export PYTHONIOENCODING=utf-8; curl -s -N https://commoncrawl.s3.amazonaws.com/{} | unpigz -dp 1 -c | tee >(wc -l 1>&2) | ./sac_mapper.py $* 2>&1" | tee -a allout | grep -v 'Authorized uses only' | \
-    sac_reducer.py "$*"
+    --env worker \
+    worker '{}' "$@" | tee -a allout | grep -v 'Authorized uses only' | \
+    sac_reducer.py "$@"