Mercurial > hg > cc > azure
annotate workers/bin/timedWhich.sh @ 65:e1f61f94b196
switch to curl->file, enable retries
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 04 Jun 2020 12:08:29 +0000 |
parents | c2b72d29a3ee |
children |
rev | line source |
---|---|
17
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
1 #!/bin/bash |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
2 # Test script to split CC WAT files across threads |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
3 # to tabulate http vs. https by last-modified date: |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
4 # Usage: [echo file file_id] | timedWhich.sh id home [-t] numWorkerProcesses |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
5 # If -t, no random wait, just id seconds |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
6 # remove >>errs once tested |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
7 #set -e -o pipefail |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
8 echo $$ > test1.pid |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
9 proc=$1 |
21
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
10 res=/var/data/res$proc |
17
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
11 home=$2 |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
12 shift 2 |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
13 function lrand { |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
14 # cheap bad little random number generator |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
15 echo $(( 1 + ($(openssl rand 1 | od -d | head -1 | tr -s ' ' | cut -f 2 -d ' ') % $1))) |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
16 } |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
17 if [ "$1" = "-t" ] |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
18 then |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
19 shift |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
20 pause=$proc |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
21 else |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
22 pause=$(lrand 60) |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
23 fi |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
24 wp=$1 |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
25 touch .running |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
26 function tryRead { |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
27 m=0 |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
28 set -o pipefail |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
29 until if [ $((m+=1)) -gt 5 ]; then echo "tried to read $1 5 times w/o success, giving up" 1>&2; return 1; fi && \ |
43
c2b72d29a3ee
update to use _timedWhich.py
Henry S. Thompson <ht@markup.co.uk>
parents:
22
diff
changeset
|
30 curl -s --insecure -o - "$1"| zcat | doit |
17
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
31 do |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
32 # try to avoid lockstep retries |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
33 echo ${PIPESTATUS[@]} 1>&2 |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
34 sleep $(lrand 10) |
21
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
35 echo $(date) retry number $m 1>&2 |
17
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
36 done |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
37 set +o pipefail |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
38 } |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
39 trap "{ |
21
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
40 set -e -o pipefail |
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
41 cd /var/data |
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
42 tar -czhf - CC* res* | \ |
17
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
43 ssh -o StrictHostKeyChecking=no -q $home \"{ cd data |
18
9631fca89cc6
F2-related stuff, and new experiment
Henry S. Thompson <ht@markup.co.uk>
parents:
17
diff
changeset
|
44 mkdir -p which |
9631fca89cc6
F2-related stuff, and new experiment
Henry S. Thompson <ht@markup.co.uk>
parents:
17
diff
changeset
|
45 cd which |
17
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
46 tar -xzf - ; } 2>>errs\" |
21
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
47 rm -rf res* CC* |
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
48 cd |
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
49 rm ifile.txt *.pid |
17
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
50 ( sleep 5 ; rm nohup.cc ) & |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
51 }" EXIT |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
52 mkdir -p $res |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
53 log=$res/log |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
54 # Don't all start at once |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
55 sleep $pause |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
56 echo \# $(date) > $log |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
57 pRes=0 |
21
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
58 doit () |
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
59 { |
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
60 echo -n "# $(date) $id " >> $log |
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
61 tee >(wc -l >> $log) |\ |
43
c2b72d29a3ee
update to use _timedWhich.py
Henry S. Thompson <ht@markup.co.uk>
parents:
22
diff
changeset
|
62 parallel --round-robin --pipe --block-size 2M -j $wp "_timedWhich.py {#} > $res/$cci.{#} 2>>$res/errs{#}" || echo "ppfailed $? ${PIPESTATUS[@]}" 1>&2 |
21
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
63 } |
17
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
64 while read s id |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
65 do |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
66 url="https://commoncrawl.s3.amazonaws.com/$s" |
21
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
67 cci=$(echo $s | tr '/-' ' ' | awk '{print $3,$4,$8,$13}' |tr ' ' \-) |
17
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
68 export ID=$id |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
69 echo \# $(date) "running |$proc|$home|$pause|$wp|$id|" >> $log |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
70 # Experimental retry loop |
21
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
71 tryRead "$url" |
a214fd3a8001
use /var/data, don't store but include subproc _inside_ tryread
Henry S. Thompson <ht@markup.co.uk>
parents:
18
diff
changeset
|
72 cat $res/$cci.* > /var/data/$cci |
17
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
73 done < ifile.txt 2>> $res/errs || pRes=$? |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
74 echo \# $(date) main loop exit code=$pRes >> $log |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
75 rm .running |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
76 |
2a2c1fb03c54
first cut at http/https real trial, with month and year last-modified info too
Henry S. Thompson <ht@markup.co.uk>
parents:
diff
changeset
|
77 |