20
|
1 #!/bin/bash
|
|
2 # This runs on the compute nodes in nl1_counts inside a _runme doit
|
|
3 # Args:
|
|
4 # Computes the by-mime totals for this segment
|
|
5
|
|
6 pjobs=4 # 10 parallel invocations of this are running,
|
|
7 task=$PARALLEL_SEQ # {1..10}
|
|
8
|
|
9 seg=$1
|
|
10
|
|
11 echo $(date) $seg as 3.$task on start
|
|
12
|
|
13 doit () {
|
|
14 i=$1
|
|
15 echo $(date) start $task.$seg $i $PARALLEL_SEQ 1>&2
|
|
16 awk -v seg=$seg '{if ($2==seg) {ll[$3]+=$1}} END {for (l in ll) print ll[l],l}' top21s_$i.tsv
|
|
17 echo $(date) end $task.$seg $i $PARALLEL_SEQ 1>&2
|
|
18 }
|
|
19
|
|
20 export -f doit
|
|
21 export task seg
|
|
22
|
|
23 seq -f '%03g' 0 299 | parallel -j $pjobs doit '{}' | uniq_merge.py > $seg.tsv
|
|
24
|
|
25 echo $(date) task $n.$task on $nodename:$N.$node end 1>&2
|
|
26
|