Mercurial > hg > cc > cirrus_work
comparison bin/lang_by_seg.sh @ 11:dfdb95e5d774
catch-up
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Thu, 28 Jul 2022 14:45:35 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
10:cf982df00cbd | 11:dfdb95e5d774 |
---|---|
1 #!/bin/bash | |
2 # This runs on the compute nodes in nl1_counts inside a _runme doit | |
3 # Args: | |
4 # Computes the by-language totals for this segment | |
5 | |
6 pjobs=4 # 10 parallel invocations of this are running, | |
7 task=$PARALLEL_SEQ # {1..10} | |
8 | |
9 seg=$1 | |
10 | |
11 echo $(date) $seg as 3.$task on start | |
12 | |
13 doit () { | |
14 i=$1 | |
15 echo $(date) start $task.$seg $i $PARALLEL_SEQ 1>&2 | |
16 awk -v seg=$seg '{if ($2==seg) {ll[$3]+=$1}} END {for (l in ll) print ll[l],l}' top21s_$i.tsv | |
17 echo $(date) end $task.$seg $i $PARALLEL_SEQ 1>&2 | |
18 } | |
19 | |
20 export -f doit | |
21 export task seg | |
22 | |
23 seq -f '%03g' 0 299 | parallel -j $pjobs doit '{}' | uniq_merge.py > $seg.tsv | |
24 | |
25 echo $(date) task $n.$task on $nodename:$N.$node end 1>&2 | |
26 |