annotate bin/mime_by_seg.sh @ 222:ee34498c6762

now using clean 2005 count
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 28 Feb 2024 14:44:59 +0000
parents a5dafc1364ed
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
20
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/bin/bash
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 # This runs on the compute nodes in nl1_counts inside a _runme doit
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 # Args:
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4 # Computes the by-mime totals for this segment
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 pjobs=4 # 10 parallel invocations of this are running,
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7 task=$PARALLEL_SEQ # {1..10}
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9 seg=$1
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 echo $(date) $seg as 3.$task on start
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
13 doit () {
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
14 i=$1
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
15 echo $(date) start $task.$seg $i $PARALLEL_SEQ 1>&2
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
16 awk -v seg=$seg '{if ($2==seg) {ll[$3]+=$1}} END {for (l in ll) print ll[l],l}' top21s_$i.tsv
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
17 echo $(date) end $task.$seg $i $PARALLEL_SEQ 1>&2
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
18 }
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
19
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
20 export -f doit
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
21 export task seg
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
22
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
23 seq -f '%03g' 0 299 | parallel -j $pjobs doit '{}' | uniq_merge.py > $seg.tsv
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
24
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
25 echo $(date) task $n.$task on $nodename:$N.$node end 1>&2
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
26