Mercurial > hg > cc > cirrus_work
comparison bin/getcc_multi.aws @ 210:6faed9e5d9c9
use 2-digit suffixes,
use more care in what we wait for in the outer loop
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 11 Jan 2024 16:43:16 +0000 |
parents | 10c87f5c704d |
children |
comparison
equal
deleted
inserted
replaced
209:b6669d78a5d9 | 210:6faed9e5d9c9 |
---|---|
38 echo $(date) start $SEG | 38 echo $(date) start $SEG |
39 s=$(grep -Eow "[0-9]*\.$SEG" $wf | head -1) | 39 s=$(grep -Eow "[0-9]*\.$SEG" $wf | head -1) |
40 mkdir -p $s/orig/warc | 40 mkdir -p $s/orig/warc |
41 fgrep -v -f <(cd $s/orig/warc && ls *.warc.gz || :) <(fgrep -w $s warc.paths) > /tmp/hst/$s | 41 fgrep -v -f <(cd $s/orig/warc && ls *.warc.gz || :) <(fgrep -w $s warc.paths) > /tmp/hst/$s |
42 | 42 |
43 split -a 1 --numeric-suffixes=1 -n l/$nthreads /tmp/hst/$s /tmp/hst/${s}_ | 43 split -a 2 --numeric-suffixes=1 -n l/$nthreads /tmp/hst/$s /tmp/hst/${s}_ |
44 | 44 printf "%02d\n" $(seq 1 $nthreads) | while read i |
45 seq 1 $nthreads | while read i | |
46 do | 45 do |
47 cat /tmp/hst/${s}_$i | { | 46 cat /tmp/hst/${s}_$i | { |
48 printf "thread\t%s\t%s\t%s\n" $i $$ $(ps -o pgid= -p "$$") >> errlog_${SEG}_$i | 47 printf "thread\t%s\t%s\t%s\n" $i $$ $(ps -o pgid= -p "$$") >> errlog_${SEG}_$i |
49 while read f | 48 while read f |
50 do | 49 do |
55 fi | 54 fi |
56 done & | 55 done & |
57 sleep 30 | 56 sleep 30 |
58 } | 57 } |
59 done | 58 done |
60 while pgrep -a aws |grep -c s3; do sleep 60; done | 59 while pgrep -a aws |grep -c "aws s3.*${s}"; do sleep 60; done |
61 echo $(date) end $SEG | 60 echo $(date) end $SEG |