Mercurial > hg > cc > cirrus_work
changeset 20:a5dafc1364ed
new
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 29 Sep 2022 16:31:28 +0100 |
parents | cec930a032ef |
children | cbac7dfe2f24 |
files | bin/mime_by_seg.sh |
diffstat | 1 files changed, 26 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/mime_by_seg.sh Thu Sep 29 16:31:28 2022 +0100 @@ -0,0 +1,26 @@ +#!/bin/bash +# This runs on the compute nodes in nl1_counts inside a _runme doit +# Args: +# Computes the by-mime totals for this segment + +pjobs=4 # 10 parallel invocations of this are running, +task=$PARALLEL_SEQ # {1..10} + +seg=$1 + +echo $(date) $seg as 3.$task on start + +doit () { + i=$1 + echo $(date) start $task.$seg $i $PARALLEL_SEQ 1>&2 + awk -v seg=$seg '{if ($2==seg) {ll[$3]+=$1}} END {for (l in ll) print ll[l],l}' top21s_$i.tsv + echo $(date) end $task.$seg $i $PARALLEL_SEQ 1>&2 +} + +export -f doit +export task seg + +seq -f '%03g' 0 299 | parallel -j $pjobs doit '{}' | uniq_merge.py > $seg.tsv + +echo $(date) task $n.$task on $nodename:$N.$node end 1>&2 +