view bin/lang_by_seg.sh @ 178:c42a5f7c97c5

renamed to by_interval.py
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 24 Nov 2023 20:40:09 +0000
parents dfdb95e5d774
children
line wrap: on
line source

#!/bin/bash
# This runs on the compute nodes in nl1_counts inside a _runme doit
# Args: 
# Computes the by-language totals for this segment

pjobs=4 #  10 parallel invocations of this are running, 
task=$PARALLEL_SEQ # {1..10}

seg=$1

echo $(date) $seg as 3.$task on start

doit () {
 i=$1
 echo $(date) start $task.$seg $i $PARALLEL_SEQ 1>&2
   awk -v seg=$seg '{if ($2==seg) {ll[$3]+=$1}} END {for (l in ll) print ll[l],l}' top21s_$i.tsv
 echo $(date) end $task.$seg $i $PARALLEL_SEQ 1>&2
}

export -f doit
export task seg

seq -f '%03g' 0 299 | parallel -j $pjobs doit '{}' | uniq_merge.py > $seg.tsv

echo $(date) task $n.$task on $nodename:$N.$node end 1>&2