changeset 27:dd19cf97b6dd

attempt to fix robustness pblms
author Henry S. Thompson <ht@markup.co.uk>
date Sat, 10 Nov 2018 13:20:56 +0000
parents 58d46e6983fa
children db75ec542c66
files master/bin/fixDates.py workers/bin/_fixAndMerge.sh workers/bin/fixAndMerge.sh
diffstat 3 files changed, 46 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/master/bin/fixDates.py	Wed Nov 07 19:36:30 2018 +0000
+++ b/master/bin/fixDates.py	Sat Nov 10 13:20:56 2018 +0000
@@ -22,20 +22,24 @@
       year=int(ff.group(3))
     except:
       # Unusual month or year field
-      d=parse("%s %s"%(ff.group(2),ff.group(3)))
-      if d is None:
-        print(5,ff.group(1),ff.group(2),ff.group(3),file=sys.stderr)
-        year=0
-        month=0
-      elif d.year<1970:
-        year=0
-        month=1
-      elif d.year>2019:
-        month=1
-        year=2019
-      else:
-        month=d.month
-        year=d.year
+      try:
+        d=parse("%s %s"%(ff.group(2),ff.group(3)))
+        if d is None:
+          print(5,ff.group(1),ff.group(2),ff.group(3),file=sys.stderr)
+          year=0
+          month=0
+        elif d.year<1970:
+          year=0
+          month=1
+        elif d.year>2019:
+          month=1
+          year=2019
+        else:
+          month=d.month
+          year=d.year
+      except Exception as e:
+        print(6,e,l,file=sys.stderr)
+        bogons+=1
     count=int(ff.group(4))
     key=(scheme,year,month)
     tab[key]=tab.get(key,0)+count
@@ -75,9 +79,12 @@
     else:
       key=(scheme,d.year,d.month)
     tab[key]=tab.get(key,0)+count
-  except Exception(e):
+  except Exception as e:
     print(4,e,l,file=sys.stderr)
     bogons+=1
+if tab=={}:
+  # ssh screwed up
+  exit(1)
 for ((s,m,y),c) in tab.items():
   print(s,m,y,c,sep='\t')
 print(bogons,file=sys.stderr)
--- a/workers/bin/_fixAndMerge.sh	Wed Nov 07 19:36:30 2018 +0000
+++ b/workers/bin/_fixAndMerge.sh	Sat Nov 10 13:20:56 2018 +0000
@@ -1,8 +1,16 @@
 #!/bin/bash
-# Usage: [cat remote filenames] | _fixAndMerge.sh id home pause
+# Usage: [cat remote filenames] | _fixAndMerge.sh id home pause log
 id=$1
 home=$2
 pause=$3
+log=$4
 # Don't all start at once
 sleep $pause
-ssh $home "xargs cat" | fixDates.py
+echo \#.$id $(date) starting >> $log
+cat > /var/data/in$id
+echo \#.$id $(date) got list >> $log
+until ( rm /var/data/d$id ; xargs -n 100 ssh $home "xargs gzip -c" < /var/data/in$id | gunzip -c >> /var/data/d$id ) do echo retrying 1>&2; sleep $pause; done
+echo \#.$id $(date) got data >> $log
+fixDates.py < /var/data/d$id
+echo \#.$id $(date) done >> $log
+
--- a/workers/bin/fixAndMerge.sh	Wed Nov 07 19:36:30 2018 +0000
+++ b/workers/bin/fixAndMerge.sh	Sat Nov 10 13:20:56 2018 +0000
@@ -23,29 +23,28 @@
 fi
 wp=$1
 touch .running
-trap "{ 
-  set -e -o pipefail
-  cd /var/data
-  tar -czhf - m.* res* | \
-   ssh -o StrictHostKeyChecking=no -q $home \"{ cd data
-                    mkdir -p mergedWhich
-                    cd mergedWhich
-                    tar -xzf - ; } 2>>errs\"
-  rm -rf res* m.*
-  cd
-  rm ifile.txt *.pid
-  ( sleep 5 ; rm -f nohup.cc ) &
-  }" EXIT
 mkdir -p $res
 log=$res/log
 echo \# $(date) "running |$proc|$home|$pause|$wp|" >> $log
 pRes=0
 N=$(wc -l< ifile.txt)
-echo "# $(date) $proc $N" >> $log
-parallel --pipe -N$((N / wp)) -j $wp "_fixAndMerge.sh {#} $home $pause > $res/m.{#} 2>>$res/errs{#}" <ifile.txt 2>>$res/errs|| echo "ppfailed $? ${PIPESTATUS[@]}" >> $res/errs
+echo \# $(date) $proc $N >> $log
+parallel --pipe -N$((N / wp)) -j $wp "_fixAndMerge.sh {#} $home $pause $log > $res/m.{#} 2>>$res/errs{#}" <ifile.txt 2>>$res/errs|| echo "ppfailed $? ${PIPESTATUS[@]}" >> $res/errs
 echo \# $(date) subprocs done >> $log
 cat $res/m.* | awk '{c[$1 "\t" $2 "\t" $3]+=$4} END {for (k in c) {print k "\t" c[k]}}' > /var/data/m.$proc 2>> $res/errs
 echo \# $(date) subres merged >> $log
+#set -e -o pipefail
+{ cd /var/data
+ tar -czhf - m.* res* | \
+  ssh -o StrictHostKeyChecking=no -q $home "{ cd data
+		  mkdir -p mergedWhich
+		  cd mergedWhich
+		  tar -xzf - ; }"
+rm -rf res* m.* in* d*
+} 2>>errs
+cd
+rm ifile.txt *.pid
+( sleep 5 ; rm -f nohup.cc ) &
 rm .running