Mercurial > hg > cc > azure
changeset 33:4c117ee8ed75
fixDates, _fixAndMerge, _doFetch
towards rework of date fixup
share.sh, old_invoke.sh
recover the old approach to sharing, which works
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Tue, 20 Nov 2018 14:49:07 +0000 |
parents | 9342f6269edf |
children | ad6eff2bc6f9 |
files | master/bin/fixDates.py master/bin/internal/old_invoke.sh master/bin/share.sh workers/bin/_doFetch.sh workers/bin/_fixAndMerge.sh |
diffstat | 5 files changed, 65 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/master/bin/fixDates.py Tue Nov 20 10:31:05 2018 +0000 +++ b/master/bin/fixDates.py Tue Nov 20 14:49:07 2018 +0000 @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import sys,re from array import array +from time import strftime ok=re.compile('(https?) ([^ ]*) ((?:19|20)..) ([0-9][0-9]*)$') #parseable=re.compile('.*[-:/]\w+[-:/]|\w+\s\d{4}|\d{10}') from dateparser import parse @@ -18,6 +19,9 @@ 'Jul','Aug','Sep','Oct','Nov','Dec'] months=dict(zip(mn[1:],range(1,13))) for l in sys.stdin: + if l[0]=='#': + print('# %s'%strftime('%Y-%m-%d %H:%M:%S'),file=sys.stderr) + continue n+=1 ff=ok.match(l) if ff is not None: @@ -70,7 +74,7 @@ cols=cols[1:] count=int(cols.pop()) except: - print(2,sn[scheme],cols,count,file=sys.stderr) + print(2,count,l,file=sys.stderr) bogons+=1 continue if cols==[]: @@ -86,7 +90,7 @@ continue elif d.year<1900 or d.year>2100: # Shouldn't happen - print(8,ff.group(1),ff.group(2),ff.group(3),ff.group(4), + print(8,sn[scheme],d.month,d.year,count, file=sys.stderr) bogons+=1 continue @@ -94,7 +98,7 @@ year=d.year month=d.month except Exception as e: - print(4,e,l,file=sys.stderr) + print(4,e,l,count,file=sys.stderr) bogons+=1 continue # log it
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/master/bin/internal/old_invoke.sh Tue Nov 20 14:49:07 2018 +0000 @@ -0,0 +1,40 @@ +#!/bin/bash +# Helper for ../wrun, q.v. +# Usage: invoke.sh [-x] me cmd ifile id port ip [args...] +# Runs +# cmd [id] args... +# via ssh to ip:port +# If ifile is not /dev/null, feed in as stdin +# Unless -x, worker id is passed as first arg +if [ "$1" = "-w" ] +then + shift + wait=1 +fi +if [ "$1" = "-x" ] +then + shift + id= + me= +else + me=$1 + id=$4 +fi +cmd="$2" +ifile=$3 +port=$5 +ip=$6 +shift 6 +echo "#$(date)#$cmd#$ifile#$id#$port#$ip#$@#" 1>&2 +if [ "$ifile" != "/dev/null" ] +then + echo "# from $ifile" 1>&2 + scp -P $port $ifile $ip:ifile.txt +fi || echo scp failed, status=$? 1>&2 +if [ "$wait" ] +then + ssh -tt -p $port $ip "nohup $cmd $id $me ""$@"" > nohup.cc" +else + ssh -p $port $ip "$cmd $id $me ""$@" +fi || echo ssh failed, status=$? 1>&2 +echo "#$(date)#$id#" 1>&2
--- a/master/bin/share.sh Tue Nov 20 10:31:05 2018 +0000 +++ b/master/bin/share.sh Tue Nov 20 14:49:07 2018 +0000 @@ -28,6 +28,6 @@ az vmss list-instance-connection-info -g $group -n $name | tr -s ',": ' '\t' | \ tail -n +2 | head -$np |cut -f 3-5 | tee /dev/stderr |\ while read id ip port - do tar -czf - "$@" | "$(dirname "$0")"/internal/invoke.sh -x "" bash /dev/null "" $port $ip -c \""$cmd"\" + do tar -czf - "$@" | "$(dirname "$0")"/internal/old_invoke.sh -x "" bash /dev/null "" $port $ip -c \""$cmd"\" done
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/workers/bin/_doFetch.sh Tue Nov 20 14:49:07 2018 +0000 @@ -0,0 +1,15 @@ +#!/bin/bash +id=$1 +home=$2 +pause=$3 +log=$4 +shift 4 +echo \#.$id fetch $(echo "$@" | wc -w) >> $log +until ssh $home "xargs gzip -c" "$@" | gunzip -c > /var/data/d${id}.x + do + echo retrying 1>&2 + sleep $pause +done +echo \# # put a group mark in for timing purposes further down the line +cat /var/data/d${id}.x | tee >(echo \#.$id fetched $(wc -l) >> $log) +
--- a/workers/bin/_fixAndMerge.sh Tue Nov 20 10:31:05 2018 +0000 +++ b/workers/bin/_fixAndMerge.sh Tue Nov 20 14:49:07 2018 +0000 @@ -10,9 +10,8 @@ cat > /var/data/in$id echo \#.$id $(date) got list $(wc -l /var/data/in$id) >> $log rm -f /var/data/d$id -xargs -n 100 _doFetch.sh "$@" < /var/data/in$id >/var/data/d$id -echo \#.$id $(wc -l /var/data/d$id) -echo \#.$id $(date) got data >> $log +xargs -n 16 _doFetch.sh "$@" < /var/data/in$id >/var/data/d$id +echo \#.$id $(date) got data $(wc -l /var/data/d$id) >> $log fixDates.py < /var/data/d$id echo \#.$id $(date) done >> $log