Mercurial > hg > cc > azure
changeset 25:1b9329f6b5e1
works on which.16
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Wed, 07 Nov 2018 17:37:27 +0000 |
parents | b4e3beb2227e |
children | 58d46e6983fa |
files | workers/bin/_fixAndMerge.sh workers/bin/fixAndMerge.sh |
diffstat | 2 files changed, 58 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/workers/bin/_fixAndMerge.sh Wed Nov 07 17:37:27 2018 +0000 @@ -0,0 +1,8 @@ +#!/bin/bash +# Usage: [cat remote filenames] | _fixAndMerge.sh id home pause +id=$1 +home=$2 +pause=$3 +# Don't all start at once +sleep $pause +ssh $home "xargs cat" | fixDates.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/workers/bin/fixAndMerge.sh Wed Nov 07 17:37:27 2018 +0000 @@ -0,0 +1,50 @@ +#!/bin/bash +# Test script to split timedWhich output files [found in ifile.txt] across threads +# to merge earlier tabulations of http vs. https by last-modified date: +# Usage: fixAndMerge.sh id home [-t] numWorkerProcesses +# If -t, no random wait, just id seconds +# remove >>errs once tested +#set -e -o pipefail +echo $$ > test1.pid +proc=$1 +res=/var/data/res$proc +home=$2 +shift 2 +function lrand { +# cheap bad little random number generator +echo $(( 1 + ($(openssl rand 1 | od -d | head -1 | tr -s ' ' | cut -f 2 -d ' ') % $1))) +} +if [ "$1" = "-t" ] +then + shift + pause=$proc +else + pause=$(lrand 10) +fi +wp=$1 +touch .running +trap "{ + set -e -o pipefail + cd /var/data + tar -czhf - m.* res* | \ + ssh -o StrictHostKeyChecking=no -q $home \"{ cd data + mkdir -p mergedWhich.16 + cd mergedWhich.16 + tar -xzf - ; } 2>>errs\" + rm -rf res* m.* + cd + rm ifile.txt *.pid + ( sleep 5 ; rm -f nohup.cc ) & + }" EXIT +mkdir -p $res +log=$res/log +echo \# $(date) "running |$proc|$home|$pause|$wp|" >> $log +pRes=0 +echo "# $(date) $proc $(wc -l ifile.txt)" >> $log +parallel --round-robin --pipe -j $wp "_fixAndMerge.sh {#} $home $pause > $res/m.{#} 2>>$res/errs{#}" <ifile.txt || echo "ppfailed $? ${PIPESTATUS[@]}" 1>&2 +echo \# $(date) subprocs done >> $log +cat $res/m.* | awk '{c[$1 "\t" $2 "\t" $3]+=$4} END {for (k in c) {print k "\t" c[k]}}' > /var/data/m.$proc +echo \# $(date) subres merged >> $log +rm .running + +