# HG changeset patch # User Henry S. Thompson # Date 1664465488 -3600 # Node ID a5dafc1364ed8b8124b481df8a1b08b3a9e43a32 # Parent cec930a032ef34f260bb954e02a508ecaca37322 new diff -r cec930a032ef -r a5dafc1364ed bin/mime_by_seg.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/mime_by_seg.sh Thu Sep 29 16:31:28 2022 +0100 @@ -0,0 +1,26 @@ +#!/bin/bash +# This runs on the compute nodes in nl1_counts inside a _runme doit +# Args: +# Computes the by-mime totals for this segment + +pjobs=4 # 10 parallel invocations of this are running, +task=$PARALLEL_SEQ # {1..10} + +seg=$1 + +echo $(date) $seg as 3.$task on start + +doit () { + i=$1 + echo $(date) start $task.$seg $i $PARALLEL_SEQ 1>&2 + awk -v seg=$seg '{if ($2==seg) {ll[$3]+=$1}} END {for (l in ll) print ll[l],l}' top21s_$i.tsv + echo $(date) end $task.$seg $i $PARALLEL_SEQ 1>&2 +} + +export -f doit +export task seg + +seq -f '%03g' 0 299 | parallel -j $pjobs doit '{}' | uniq_merge.py > $seg.tsv + +echo $(date) task $n.$task on $nodename:$N.$node end 1>&2 +