comparison bin/lang_by_seg.sh @ 11:dfdb95e5d774

catch-up
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 28 Jul 2022 14:45:35 +0100
parents
children
comparison
equal deleted inserted replaced
10:cf982df00cbd 11:dfdb95e5d774
1 #!/bin/bash
2 # This runs on the compute nodes in nl1_counts inside a _runme doit
3 # Args:
4 # Computes the by-language totals for this segment
5
6 pjobs=4 # 10 parallel invocations of this are running,
7 task=$PARALLEL_SEQ # {1..10}
8
9 seg=$1
10
11 echo $(date) $seg as 3.$task on start
12
13 doit () {
14 i=$1
15 echo $(date) start $task.$seg $i $PARALLEL_SEQ 1>&2
16 awk -v seg=$seg '{if ($2==seg) {ll[$3]+=$1}} END {for (l in ll) print ll[l],l}' top21s_$i.tsv
17 echo $(date) end $task.$seg $i $PARALLEL_SEQ 1>&2
18 }
19
20 export -f doit
21 export task seg
22
23 seq -f '%03g' 0 299 | parallel -j $pjobs doit '{}' | uniq_merge.py > $seg.tsv
24
25 echo $(date) task $n.$task on $nodename:$N.$node end 1>&2
26