Mercurial > hg > cc > azure
changeset 27:dd19cf97b6dd
attempt to fix robustness pblms
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Sat, 10 Nov 2018 13:20:56 +0000 |
parents | 58d46e6983fa |
children | db75ec542c66 |
files | master/bin/fixDates.py workers/bin/_fixAndMerge.sh workers/bin/fixAndMerge.sh |
diffstat | 3 files changed, 46 insertions(+), 32 deletions(-) [+] |
line wrap: on
line diff
--- a/master/bin/fixDates.py Wed Nov 07 19:36:30 2018 +0000 +++ b/master/bin/fixDates.py Sat Nov 10 13:20:56 2018 +0000 @@ -22,20 +22,24 @@ year=int(ff.group(3)) except: # Unusual month or year field - d=parse("%s %s"%(ff.group(2),ff.group(3))) - if d is None: - print(5,ff.group(1),ff.group(2),ff.group(3),file=sys.stderr) - year=0 - month=0 - elif d.year<1970: - year=0 - month=1 - elif d.year>2019: - month=1 - year=2019 - else: - month=d.month - year=d.year + try: + d=parse("%s %s"%(ff.group(2),ff.group(3))) + if d is None: + print(5,ff.group(1),ff.group(2),ff.group(3),file=sys.stderr) + year=0 + month=0 + elif d.year<1970: + year=0 + month=1 + elif d.year>2019: + month=1 + year=2019 + else: + month=d.month + year=d.year + except Exception as e: + print(6,e,l,file=sys.stderr) + bogons+=1 count=int(ff.group(4)) key=(scheme,year,month) tab[key]=tab.get(key,0)+count @@ -75,9 +79,12 @@ else: key=(scheme,d.year,d.month) tab[key]=tab.get(key,0)+count - except Exception(e): + except Exception as e: print(4,e,l,file=sys.stderr) bogons+=1 +if tab=={}: + # ssh screwed up + exit(1) for ((s,m,y),c) in tab.items(): print(s,m,y,c,sep='\t') print(bogons,file=sys.stderr)
--- a/workers/bin/_fixAndMerge.sh Wed Nov 07 19:36:30 2018 +0000 +++ b/workers/bin/_fixAndMerge.sh Sat Nov 10 13:20:56 2018 +0000 @@ -1,8 +1,16 @@ #!/bin/bash -# Usage: [cat remote filenames] | _fixAndMerge.sh id home pause +# Usage: [cat remote filenames] | _fixAndMerge.sh id home pause log id=$1 home=$2 pause=$3 +log=$4 # Don't all start at once sleep $pause -ssh $home "xargs cat" | fixDates.py +echo \#.$id $(date) starting >> $log +cat > /var/data/in$id +echo \#.$id $(date) got list >> $log +until ( rm /var/data/d$id ; xargs -n 100 ssh $home "xargs gzip -c" < /var/data/in$id | gunzip -c >> /var/data/d$id ) do echo retrying 1>&2; sleep $pause; done +echo \#.$id $(date) got data >> $log +fixDates.py < /var/data/d$id +echo \#.$id $(date) done >> $log +
--- a/workers/bin/fixAndMerge.sh Wed Nov 07 19:36:30 2018 +0000 +++ b/workers/bin/fixAndMerge.sh Sat Nov 10 13:20:56 2018 +0000 @@ -23,29 +23,28 @@ fi wp=$1 touch .running -trap "{ - set -e -o pipefail - cd /var/data - tar -czhf - m.* res* | \ - ssh -o StrictHostKeyChecking=no -q $home \"{ cd data - mkdir -p mergedWhich - cd mergedWhich - tar -xzf - ; } 2>>errs\" - rm -rf res* m.* - cd - rm ifile.txt *.pid - ( sleep 5 ; rm -f nohup.cc ) & - }" EXIT mkdir -p $res log=$res/log echo \# $(date) "running |$proc|$home|$pause|$wp|" >> $log pRes=0 N=$(wc -l< ifile.txt) -echo "# $(date) $proc $N" >> $log -parallel --pipe -N$((N / wp)) -j $wp "_fixAndMerge.sh {#} $home $pause > $res/m.{#} 2>>$res/errs{#}" <ifile.txt 2>>$res/errs|| echo "ppfailed $? ${PIPESTATUS[@]}" >> $res/errs +echo \# $(date) $proc $N >> $log +parallel --pipe -N$((N / wp)) -j $wp "_fixAndMerge.sh {#} $home $pause $log > $res/m.{#} 2>>$res/errs{#}" <ifile.txt 2>>$res/errs|| echo "ppfailed $? ${PIPESTATUS[@]}" >> $res/errs echo \# $(date) subprocs done >> $log cat $res/m.* | awk '{c[$1 "\t" $2 "\t" $3]+=$4} END {for (k in c) {print k "\t" c[k]}}' > /var/data/m.$proc 2>> $res/errs echo \# $(date) subres merged >> $log +#set -e -o pipefail +{ cd /var/data + tar -czhf - m.* res* | \ + ssh -o StrictHostKeyChecking=no -q $home "{ cd data + mkdir -p mergedWhich + cd mergedWhich + tar -xzf - ; }" +rm -rf res* m.* in* d* +} 2>>errs +cd +rm ifile.txt *.pid +( sleep 5 ; rm -f nohup.cc ) & rm .running