diff master/src/wecu/run_sac.sh @ 64:b91e44355bbf

fix minor argument passing snafus
author Henry S. Thompson <ht@markup.co.uk>
date Wed, 03 Jun 2020 22:08:01 +0000
parents d46c8b12fc04
children e1f61f94b196
line wrap: on
line diff
--- a/master/src/wecu/run_sac.sh	Wed Jun 03 16:40:34 2020 +0000
+++ b/master/src/wecu/run_sac.sh	Wed Jun 03 22:08:01 2020 +0000
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Usage: run_sac.sh numcores hostsFilename workDir mapper keyHandler (-f filter) (-k numKeys) resType patType patterns
+# Usage: run_sac.sh numcores hostsFilename workDir mapper (-h keyHandler) (-f filter) (-k numKeys) resType patType patterns
 echo "$@" 1>cmd
 cores=$1
 hosts=$2
@@ -9,6 +9,12 @@
 shift
 shift
 shift
+if [ "$1" = "-h" ]
+then
+ shift
+ keyHandler="$1"
+ shift
+fi
 if [ "$1" = "-f" ]
 then
  shift
@@ -38,13 +44,15 @@
   shift
   filter="$1"
   shift
+  keyHandler="$1"
+  shift
   shift # we don't need/want the resType either
   me=$(hostname | cut -c 15)
   ff=$(echo $f | cut -f 4,6 -d / | sed 's/CC-MAIN-//;s/\.warc.*$//')
   echo $(date +%Y-%m-%d.%H:%M:%S) $me start $j $ff >>logs/${j}_log
   export PYTHONIOENCODING=utf-8
   { IFS=$'\n' ; stderr=( $( { curl -s -N https://commoncrawl.s3.amazonaws.com/$f | \
-   unpigz -dp 1 -c | $filter ./$mapper "$@" ; } 2>&1 1>res/${j}.tsv ; ) ) ; unset IFS ; }
+   unpigz -dp 1 -c | $filter ./$mapper "$keyHandler" "$@" ; } 2>&1 1>res/${j}.tsv ; ) ) ; unset IFS ; }
   { echo $(date +%Y-%m-%d.%H:%M:%S) $me finished $j $ff
     printf '%s\n' "${stderr[@]}" ; } | sed '2,$s/^/ /' >>logs/${j}_log # hack to try to
       # guarantee atomic entry in the log
@@ -52,6 +60,9 @@
 
 export -f worker
 
+echo worker '{}' '{#}' "$mapper" "$filter" "$keyHandler" "$@" 1>&2
+
+date 1>&2
 parallel \
     --sshloginfile $hosts \
     --retries 3 \
@@ -62,5 +73,7 @@
     -a input_paths \
     --env worker \
     --return 'logs/{#}_log' --return 'res/{#}.tsv' --cleanup \
-    worker '{}' '{#}' "$mapper" "$filter" "$@"
+    worker '{}' '{#}' "$mapper" "$filter" "$keyHandler" "$@"
+res=$?
+echo $(date) $res
 cat res/*.tsv | sac_reducer.py $1 $numKeys