Mercurial > hg > cc > cirrus_work
annotate bin/_nl1.sh @ 2:b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 18 Jul 2022 19:22:42 +0100 |
parents | |
children | f27061e8a9da |
rev | line source |
---|---|
2
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
1 #!/bin/bash |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
2 # This runs on the compute nodes... |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
3 # Args: CC-MAIN-2019-35 nl1_counts langs 0 299 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
4 # count languages (from file named by langs, found in resdir) in parallel, taking input directly from $cc/cdx_counts/xxx.tsv |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
5 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
6 N=$SLURM_JOB_NUM_NODES |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
7 n=$SLURM_NTASKS |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
8 c=$SLURM_CPUS_PER_TASK |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
9 nodename=$SLURMD_NODENAME |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
10 local=$SLURM_LOCALID |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
11 node=$SLURM_NODEID |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
12 task=$SLURM_PROCID |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
13 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
14 threadsPerTask=2 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
15 pjobs=$((c / $threadsPerTask)) |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
16 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
17 cc=$1 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
18 resdir=$W/$USER/results/$cc/$2 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
19 srcdir=$W/hst/results/$cc/cdx_counts |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
20 langs=$3 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
21 langfile=$resdir/$langs |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
22 s1=$4 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
23 sn=$5 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
24 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
25 echo $(date) task $n.$task on $nodename:$N.$node start $(pwd) 1>&2 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
26 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
27 mkdir -p $resdir |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
28 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
29 doit () { |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
30 echo $(date) start $1 $task $PARALLEL_SEQ 1>&2 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
31 fgrep ' w ' $srcdir/$1.tsv | \ |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
32 awk 'BEGIN {while (getline < "'$langfile'") {l[$0]=1}} |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
33 {if (l[$4]) {print $1,$4}}' | uniq -c | \ |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
34 $W/shared/bin/uniq_merge.py > $resdir/${langs}_$1.tsv |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
35 echo $(date) end $1 $task $PARALLEL_SEQ 1>&2 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
36 } |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
37 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
38 export -f doit |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
39 export srcdir resdir task langs langfile |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
40 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
41 seq $s1 $sn | while read i |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
42 do if [ $((i % $n)) -eq $task ] |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
43 then printf '%03g\n' $i |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
44 fi |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
45 done | \ |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
46 parallel -j $pjobs doit '{}' |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
47 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
48 echo $(date) task $n.$task on $nodename:$N.$node end 1>&2 |
b4801f5696b2
compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff
changeset
|
49 |