changeset 33:4c117ee8ed75

fixDates, _fixAndMerge, _doFetch towards rework of date fixup share.sh, old_invoke.sh recover the old approach to sharing, which works
author Henry S. Thompson <ht@markup.co.uk>
date Tue, 20 Nov 2018 14:49:07 +0000
parents 9342f6269edf
children ad6eff2bc6f9
files master/bin/fixDates.py master/bin/internal/old_invoke.sh master/bin/share.sh workers/bin/_doFetch.sh workers/bin/_fixAndMerge.sh
diffstat 5 files changed, 65 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/master/bin/fixDates.py	Tue Nov 20 10:31:05 2018 +0000
+++ b/master/bin/fixDates.py	Tue Nov 20 14:49:07 2018 +0000
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import sys,re
 from array import array
+from time import strftime
 ok=re.compile('(https?) ([^ ]*) ((?:19|20)..) ([0-9][0-9]*)$')
 #parseable=re.compile('.*[-:/]\w+[-:/]|\w+\s\d{4}|\d{10}')
 from dateparser import parse
@@ -18,6 +19,9 @@
         'Jul','Aug','Sep','Oct','Nov','Dec']
 months=dict(zip(mn[1:],range(1,13)))
 for l in sys.stdin:
+  if l[0]=='#':
+    print('# %s'%strftime('%Y-%m-%d %H:%M:%S'),file=sys.stderr)
+    continue
   n+=1
   ff=ok.match(l)
   if ff is not None:
@@ -70,7 +74,7 @@
       cols=cols[1:]
       count=int(cols.pop())
     except:
-      print(2,sn[scheme],cols,count,file=sys.stderr)
+      print(2,count,l,file=sys.stderr)
       bogons+=1
       continue
     if cols==[]:
@@ -86,7 +90,7 @@
           continue
         elif d.year<1900 or d.year>2100:
           # Shouldn't happen 
-          print(8,ff.group(1),ff.group(2),ff.group(3),ff.group(4),
+          print(8,sn[scheme],d.month,d.year,count,
                 file=sys.stderr)
           bogons+=1
           continue
@@ -94,7 +98,7 @@
           year=d.year
           month=d.month
       except Exception as e:
-        print(4,e,l,file=sys.stderr)
+        print(4,e,l,count,file=sys.stderr)
         bogons+=1
         continue
   # log it
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/master/bin/internal/old_invoke.sh	Tue Nov 20 14:49:07 2018 +0000
@@ -0,0 +1,40 @@
+#!/bin/bash
+# Helper for ../wrun, q.v.
+# Usage: invoke.sh [-x] me cmd ifile id port ip [args...]
+#  Runs 
+#   cmd [id] args...
+#  via ssh to ip:port
+#  If ifile is not /dev/null, feed in as stdin
+#  Unless -x, worker id is passed as first arg
+if [ "$1" = "-w" ]
+then
+ shift
+ wait=1
+fi
+if [ "$1" = "-x" ]
+then
+ shift
+ id=
+ me=
+else
+ me=$1
+ id=$4
+fi
+cmd="$2"
+ifile=$3
+port=$5
+ip=$6
+shift 6
+echo "#$(date)#$cmd#$ifile#$id#$port#$ip#$@#" 1>&2
+if [ "$ifile" != "/dev/null" ]
+then
+  echo "# from $ifile" 1>&2
+  scp -P $port $ifile $ip:ifile.txt
+fi || echo scp failed, status=$? 1>&2
+if [ "$wait" ]
+then
+  ssh -tt -p $port $ip "nohup $cmd $id $me ""$@"" > nohup.cc"
+else
+  ssh -p $port $ip "$cmd $id $me ""$@"
+fi || echo ssh failed, status=$? 1>&2
+echo "#$(date)#$id#" 1>&2
--- a/master/bin/share.sh	Tue Nov 20 10:31:05 2018 +0000
+++ b/master/bin/share.sh	Tue Nov 20 14:49:07 2018 +0000
@@ -28,6 +28,6 @@
 az vmss list-instance-connection-info -g $group -n $name | tr -s ',": ' '\t' | \
     tail -n +2 | head -$np |cut -f 3-5 | tee /dev/stderr |\
   while read id ip port
-    do tar -czf - "$@" | "$(dirname "$0")"/internal/invoke.sh -x "" bash /dev/null "" $port $ip -c \""$cmd"\"
+    do tar -czf - "$@" | "$(dirname "$0")"/internal/old_invoke.sh -x "" bash /dev/null "" $port $ip -c \""$cmd"\"
   done
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/workers/bin/_doFetch.sh	Tue Nov 20 14:49:07 2018 +0000
@@ -0,0 +1,15 @@
+#!/bin/bash
+id=$1
+home=$2
+pause=$3
+log=$4
+shift 4
+echo \#.$id fetch $(echo "$@" | wc -w) >> $log
+until ssh $home "xargs gzip -c" "$@"  | gunzip -c > /var/data/d${id}.x 
+ do
+  echo retrying 1>&2
+  sleep $pause
+done
+echo \# # put a group mark in for timing purposes further down the line
+cat /var/data/d${id}.x | tee >(echo \#.$id fetched $(wc -l) >> $log)
+
--- a/workers/bin/_fixAndMerge.sh	Tue Nov 20 10:31:05 2018 +0000
+++ b/workers/bin/_fixAndMerge.sh	Tue Nov 20 14:49:07 2018 +0000
@@ -10,9 +10,8 @@
 cat > /var/data/in$id
 echo \#.$id $(date) got list $(wc -l /var/data/in$id) >> $log
 rm -f /var/data/d$id
-xargs -n 100 _doFetch.sh "$@" < /var/data/in$id >/var/data/d$id
-echo \#.$id $(wc -l /var/data/d$id)
-echo \#.$id $(date) got data >> $log
+xargs -n 16 _doFetch.sh "$@" < /var/data/in$id >/var/data/d$id
+echo \#.$id $(date) got data  $(wc -l /var/data/d$id) >> $log
 fixDates.py < /var/data/d$id
 echo \#.$id $(date) done >> $log