Mercurial > hg > cc > azure
diff master/src/wecu/run_sac.sh @ 63:d46c8b12fc04
support multiple approaches to key combination, use local files to collect results
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Wed, 03 Jun 2020 16:40:34 +0000 |
parents | 892e1c0240e1 |
children | b91e44355bbf |
line wrap: on
line diff
--- a/master/src/wecu/run_sac.sh Tue Jun 02 17:35:07 2020 +0000 +++ b/master/src/wecu/run_sac.sh Wed Jun 03 16:40:34 2020 +0000 @@ -1,5 +1,5 @@ #!/bin/bash -# Usage: run_sac.sh numcores hostsFilename workDir mapper (-f filter) (-k numKeys) resType patType patterns +# Usage: run_sac.sh numcores hostsFilename workDir mapper keyHandler (-f filter) (-k numKeys) resType patType patterns echo "$@" 1>cmd cores=$1 hosts=$2 @@ -29,6 +29,7 @@ set -e set -o pipefail mkdir -p logs + mkdir -p res f=$1 shift j=$1 @@ -43,7 +44,7 @@ echo $(date +%Y-%m-%d.%H:%M:%S) $me start $j $ff >>logs/${j}_log export PYTHONIOENCODING=utf-8 { IFS=$'\n' ; stderr=( $( { curl -s -N https://commoncrawl.s3.amazonaws.com/$f | \ - unpigz -dp 1 -c | $filter ./$mapper "$@" ; } 2>&1 1>&4 ; ) ) ; unset IFS ; } 4>&1 + unpigz -dp 1 -c | $filter ./$mapper "$@" ; } 2>&1 1>res/${j}.tsv ; ) ) ; unset IFS ; } { echo $(date +%Y-%m-%d.%H:%M:%S) $me finished $j $ff printf '%s\n' "${stderr[@]}" ; } | sed '2,$s/^/ /' >>logs/${j}_log # hack to try to # guarantee atomic entry in the log @@ -60,6 +61,6 @@ --workdir $wd \ -a input_paths \ --env worker \ - --return 'logs/{#}_log' --cleanup \ - worker '{}' '{#}' "$mapper" "$filter" "$@" | tee >(wc -l 1>&2) |\ - sac_reducer.py $1 $numKeys + --return 'logs/{#}_log' --return 'res/{#}.tsv' --cleanup \ + worker '{}' '{#}' "$mapper" "$filter" "$@" +cat res/*.tsv | sac_reducer.py $1 $numKeys