annotate bin/lang_by_seg.sh @ 204:81ca65d44241

normalise % counts by non-empty bases only
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 06 Dec 2023 13:33:25 +0000
parents dfdb95e5d774
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/bin/bash
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 # This runs on the compute nodes in nl1_counts inside a _runme doit
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 # Args:
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4 # Computes the by-language totals for this segment
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 pjobs=4 # 10 parallel invocations of this are running,
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7 task=$PARALLEL_SEQ # {1..10}
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9 seg=$1
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 echo $(date) $seg as 3.$task on start
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
13 doit () {
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
14 i=$1
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
15 echo $(date) start $task.$seg $i $PARALLEL_SEQ 1>&2
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
16 awk -v seg=$seg '{if ($2==seg) {ll[$3]+=$1}} END {for (l in ll) print ll[l],l}' top21s_$i.tsv
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
17 echo $(date) end $task.$seg $i $PARALLEL_SEQ 1>&2
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
18 }
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
19
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
20 export -f doit
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
21 export task seg
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
22
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
23 seq -f '%03g' 0 299 | parallel -j $pjobs doit '{}' | uniq_merge.py > $seg.tsv
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
24
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
25 echo $(date) task $n.$task on $nodename:$N.$node end 1>&2
dfdb95e5d774 catch-up
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
26