annotate workers/bin/fixAndMerge.sh @ 27:dd19cf97b6dd

attempt to fix robustness pblms
author Henry S. Thompson <ht@markup.co.uk>
date Sat, 10 Nov 2018 13:20:56 +0000
parents 58d46e6983fa
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
25
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
1 #!/bin/bash
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
2 # Test script to split timedWhich output files [found in ifile.txt] across threads
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
3 # to merge earlier tabulations of http vs. https by last-modified date:
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
4 # Usage: fixAndMerge.sh id home [-t] numWorkerProcesses
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
5 # If -t, no random wait, just id seconds
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
6 # remove >>errs once tested
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
7 #set -e -o pipefail
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
8 echo $$ > test1.pid
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
9 proc=$1
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
10 res=/var/data/res$proc
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
11 home=$2
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
12 shift 2
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
13 function lrand {
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
14 # cheap bad little random number generator
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
15 echo $(( 1 + ($(openssl rand 1 | od -d | head -1 | tr -s ' ' | cut -f 2 -d ' ') % $1)))
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
16 }
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
17 if [ "$1" = "-t" ]
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
18 then
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
19 shift
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
20 pause=$proc
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
21 else
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
22 pause=$(lrand 10)
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
23 fi
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
24 wp=$1
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
25 touch .running
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
26 mkdir -p $res
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
27 log=$res/log
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
28 echo \# $(date) "running |$proc|$home|$pause|$wp|" >> $log
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
29 pRes=0
26
58d46e6983fa -mforce (?) multiple processors to be used
Henry S. Thompson <ht@markup.co.uk>
parents: 25
diff changeset
30 N=$(wc -l< ifile.txt)
27
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
31 echo \# $(date) $proc $N >> $log
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
32 parallel --pipe -N$((N / wp)) -j $wp "_fixAndMerge.sh {#} $home $pause $log > $res/m.{#} 2>>$res/errs{#}" <ifile.txt 2>>$res/errs|| echo "ppfailed $? ${PIPESTATUS[@]}" >> $res/errs
25
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
33 echo \# $(date) subprocs done >> $log
26
58d46e6983fa -mforce (?) multiple processors to be used
Henry S. Thompson <ht@markup.co.uk>
parents: 25
diff changeset
34 cat $res/m.* | awk '{c[$1 "\t" $2 "\t" $3]+=$4} END {for (k in c) {print k "\t" c[k]}}' > /var/data/m.$proc 2>> $res/errs
25
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
35 echo \# $(date) subres merged >> $log
27
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
36 #set -e -o pipefail
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
37 { cd /var/data
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
38 tar -czhf - m.* res* | \
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
39 ssh -o StrictHostKeyChecking=no -q $home "{ cd data
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
40 mkdir -p mergedWhich
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
41 cd mergedWhich
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
42 tar -xzf - ; }"
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
43 rm -rf res* m.* in* d*
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
44 } 2>>errs
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
45 cd
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
46 rm ifile.txt *.pid
dd19cf97b6dd attempt to fix robustness pblms
Henry S. Thompson <ht@markup.co.uk>
parents: 26
diff changeset
47 ( sleep 5 ; rm -f nohup.cc ) &
25
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
48 rm .running
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
49
1b9329f6b5e1 works on which.16
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
50