changeset 25:1b9329f6b5e1

works on which.16
author Henry S. Thompson <ht@markup.co.uk>
date Wed, 07 Nov 2018 17:37:27 +0000
parents b4e3beb2227e
children 58d46e6983fa
files workers/bin/_fixAndMerge.sh workers/bin/fixAndMerge.sh
diffstat 2 files changed, 58 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/workers/bin/_fixAndMerge.sh	Wed Nov 07 17:37:27 2018 +0000
@@ -0,0 +1,8 @@
+#!/bin/bash
+# Usage: [cat remote filenames] | _fixAndMerge.sh id home pause
+id=$1
+home=$2
+pause=$3
+# Don't all start at once
+sleep $pause
+ssh $home "xargs cat" | fixDates.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/workers/bin/fixAndMerge.sh	Wed Nov 07 17:37:27 2018 +0000
@@ -0,0 +1,50 @@
+#!/bin/bash
+# Test script to split timedWhich output files [found in ifile.txt] across threads
+#   to merge earlier tabulations of http vs. https by last-modified date:
+# Usage: fixAndMerge.sh id home [-t] numWorkerProcesses
+#   If -t, no random wait, just id seconds
+# remove >>errs once tested
+#set -e -o pipefail
+echo $$ > test1.pid
+proc=$1
+res=/var/data/res$proc
+home=$2
+shift 2
+function lrand {
+# cheap bad little random number generator
+echo $(( 1 + ($(openssl rand 1 | od -d | head -1 | tr -s ' ' | cut -f 2 -d ' ') % $1)))
+}
+if [ "$1" = "-t" ]
+then
+ shift
+ pause=$proc
+else
+ pause=$(lrand 10)
+fi
+wp=$1
+touch .running
+trap "{ 
+  set -e -o pipefail
+  cd /var/data
+  tar -czhf - m.* res* | \
+   ssh -o StrictHostKeyChecking=no -q $home \"{ cd data
+                    mkdir -p mergedWhich.16
+                    cd mergedWhich.16
+                    tar -xzf - ; } 2>>errs\"
+  rm -rf res* m.*
+  cd
+  rm ifile.txt *.pid
+  ( sleep 5 ; rm -f nohup.cc ) &
+  }" EXIT
+mkdir -p $res
+log=$res/log
+echo \# $(date) "running |$proc|$home|$pause|$wp|" >> $log
+pRes=0
+echo "# $(date) $proc $(wc -l ifile.txt)" >> $log
+parallel --round-robin --pipe -j $wp "_fixAndMerge.sh {#} $home $pause > $res/m.{#} 2>>$res/errs{#}" <ifile.txt || echo "ppfailed $? ${PIPESTATUS[@]}" 1>&2
+echo \# $(date) subprocs done >> $log
+cat $res/m.* | awk '{c[$1 "\t" $2 "\t" $3]+=$4} END {for (k in c) {print k "\t" c[k]}}' > /var/data/m.$proc
+echo \# $(date) subres merged >> $log
+rm .running
+
+