annotate bin/_ex1.sh @ 2:b4801f5696b2

compute node workers, see cirrus_home/bin repo for login node masters
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 18 Jul 2022 19:22:42 +0100
parents
children 668579197bec
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
1 #!/bin/bash
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
2 # This runs on the compute nodes...
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
3 # count top 21 solitary languages in parallel, taking input directly from /work/dc007/dc007/hst/results/$1/cdx_counts/xxx.tsv
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
4
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
5 N=$SLURM_JOB_NUM_NODES
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
6 n=$SLURM_NTASKS
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
7 c=$SLURM_CPUS_PER_TASK
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
8 nodename=$SLURMD_NODENAME
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
9 local=$SLURM_LOCALID
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
10 node=$SLURM_NODEID
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
11 task=$SLURM_PROCID
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
12
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
13 threadsPerTask=2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
14 pjobs=$((c / $threadsPerTask))
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
15
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
16 cc=$1
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
17 resdir=$W/$USER/results/$cc/$2
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
18 srcdir=$W/hst/results/$cc/cdx_counts
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
19 langs=$3
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
20 s1=$4
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
21 sn=$5
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
22
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
23 echo $(date) task $n.$task on $nodename:$N.$node start
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
24
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
25 mkdir -p $resdir
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
26
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
27 doit () {
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
28 echo $(date) start $1 $task $PARALLEL_SEQ
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
29 fgrep ' w ' $srcdir/$1.tsv | awk 'BEGIN { | uniq -c | \
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
30 $W/shared/bin/uniq_merge.py > $resdir/${langs}_$1.tsv
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
31 echo $(date) end $1 $task $PARALLEL_SEQ
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
32 }
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
33
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
34 export -f doit
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
35 export srcdir resdir task
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
36
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
37 seq $s1 $sn | while read i
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
38 do if [ $((i % $n)) -eq $task ]
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
39 then printf '%03g\n' $i
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
40 fi
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
41 done | \
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
42 parallel -j $pjobs doit '{}'
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
43
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
44 echo $(date) task $n.$task on $nodename:$N.$node end
b4801f5696b2 compute node workers, see cirrus_home/bin repo for login node masters
Henry S. Thompson <ht@inf.ed.ac.uk>
parents:
diff changeset
45