annotate workers/bin/ptimedWhich.sh @ 46:7a4e49689935

finally got logging sorted
author Henry S. Thompson <ht@markup.co.uk>
date Mon, 03 Dec 2018 21:10:02 +0000
parents
children 2a0dab424418
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
46
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
1 #!/bin/bash
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
2 # Test script to split CC WAT files across threads
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
3 # to tabulate http vs. https by last-modified date:
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
4 # Usage: [echo file file_id] | timedWhich.sh id home [-t] numWorkerProcesses
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
5 # If -t, no random wait, just id seconds
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
6 # remove >>errs once tested
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
7 #set -e -o pipefail
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
8 echo $$ > test1.pid
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
9 proc=$1
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
10 res=/var/data/res$proc
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
11 home=$2
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
12 shift 2
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
13 function lrand {
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
14 # cheap bad little random number generator
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
15 echo $(( 1 + ($(openssl rand 1 | od -d | head -1 | tr -s ' ' | cut -f 2 -d ' ') % $1)))
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
16 }
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
17 if [ "$1" = "-t" ]
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
18 then
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
19 shift
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
20 pause=$proc
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
21 else
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
22 pause=$(lrand 60)
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
23 fi
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
24 wp=$1
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
25 touch .running
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
26 function tryread {
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
27 n=$1
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
28 while read u o
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
29 do
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
30 m=0
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
31 set -o pipefail
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
32 until if [ $((m+=1)) -gt 5 ]; then echo " tried 5 times w/o success, giving up" 1>&2; return 1; fi && echo -n \# $(date) "reading $u ..." 1>&2 && \
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
33 curl -s -S --max-time 60 --insecure -o - "$u" | \
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
34 { echo "done at " $(date) 1>&2 ; zcat ; } |\
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
35 _timedWhich.py > "$o"
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
36 do
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
37 # try to avoid lockstep retries
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
38 echo \# ${PIPESTATUS[@]} 1>&2
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
39 sleep $(lrand 10)
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
40 echo \# $(date) retry number $m 1>&2
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
41 done
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
42 set +o pipefail
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
43 done
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
44 }
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
45 trap "{
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
46 set -e -o pipefail
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
47 cd /var/data
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
48 tar -czhf - CC* res* | \
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
49 ssh -o StrictHostKeyChecking=no -q $home \"{ cd data
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
50 mkdir -p which
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
51 cd which
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
52 tar -xzf - ; } 2>>errs\"
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
53 rm -rf res* CC*
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
54 cd
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
55 rm ifile.txt *.pid
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
56 ( sleep 5 ; rm nohup.cc ) &
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
57 }" EXIT
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
58 mkdir -p $res
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
59 log=$res/log
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
60 # Don't all start at once
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
61 sleep $pause
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
62 echo \# $(date) > $log
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
63 pRes=0
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
64 N=$(wc -l< ifile.txt)
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
65 export -f tryread lrand
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
66 while read s
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
67 do
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
68 url="https://commoncrawl.s3.amazonaws.com/$s"
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
69 cci=$(echo $s | tr '/-' ' ' | awk '{print $3,$4,$8,$13}' |tr ' ' \-)
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
70 echo $url /var/data/$cci
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
71 done < ifile.txt 2>> $res/errs | \
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
72 parallel --pipe -N$((N / wp)) -j $wp "bash -c \"tryread 2>>$res/errs{#}\"" 2>>$res/errs || pRes=$?
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
73 echo \# $(date) main loop exit code=$pRes >> $log
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
74 rm .running
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
75
7a4e49689935 finally got logging sorted
Henry S. Thompson <ht@markup.co.uk>
parents:
diff changeset
76