changeset 20:a5dafc1364ed

new
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 29 Sep 2022 16:31:28 +0100
parents cec930a032ef
children cbac7dfe2f24
files bin/mime_by_seg.sh
diffstat 1 files changed, 26 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/mime_by_seg.sh	Thu Sep 29 16:31:28 2022 +0100
@@ -0,0 +1,26 @@
+#!/bin/bash
+# This runs on the compute nodes in nl1_counts inside a _runme doit
+# Args: 
+# Computes the by-mime totals for this segment
+
+pjobs=4 #  10 parallel invocations of this are running, 
+task=$PARALLEL_SEQ # {1..10}
+
+seg=$1
+
+echo $(date) $seg as 3.$task on start
+
+doit () {
+ i=$1
+ echo $(date) start $task.$seg $i $PARALLEL_SEQ 1>&2
+   awk -v seg=$seg '{if ($2==seg) {ll[$3]+=$1}} END {for (l in ll) print ll[l],l}' top21s_$i.tsv
+ echo $(date) end $task.$seg $i $PARALLEL_SEQ 1>&2
+}
+
+export -f doit
+export task seg
+
+seq -f '%03g' 0 299 | parallel -j $pjobs doit '{}' | uniq_merge.py > $seg.tsv
+
+echo $(date) task $n.$task on $nodename:$N.$node end 1>&2
+